From 8eb9803723a14fd12675641b953e4ccbd86187a8 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 29 May 2016 22:03:50 +1000 Subject: powerpc: Avoid load hit store in __giveup_fpu() and __giveup_altivec() In both __giveup_fpu() and __giveup_altivec() we make two modifications to tsk->thread.regs->msr. gcc decides to do a read/modify/write of each change, so we end up with a load hit store: ld r9,264(r10) rldicl r9,r9,50,1 rotldi r9,r9,14 std r9,264(r10) ... ld r9,264(r10) rldicl r9,r9,40,1 rotldi r9,r9,24 std r9,264(r10) Fix this by using a temporary. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e2f12cb..a2dd3b1 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -139,12 +139,16 @@ EXPORT_SYMBOL(__msr_check_and_clear); #ifdef CONFIG_PPC_FPU void __giveup_fpu(struct task_struct *tsk) { + unsigned long msr; + save_fpu(tsk); - tsk->thread.regs->msr &= ~MSR_FP; + msr = tsk->thread.regs->msr; + msr &= ~MSR_FP; #ifdef CONFIG_VSX if (cpu_has_feature(CPU_FTR_VSX)) - tsk->thread.regs->msr &= ~MSR_VSX; + msr &= ~MSR_VSX; #endif + tsk->thread.regs->msr = msr; } void giveup_fpu(struct task_struct *tsk) @@ -219,12 +223,16 @@ static int restore_fp(struct task_struct *tsk) { return 0; } static void __giveup_altivec(struct task_struct *tsk) { + unsigned long msr; + save_altivec(tsk); - tsk->thread.regs->msr &= ~MSR_VEC; + msr = tsk->thread.regs->msr; + msr &= ~MSR_VEC; #ifdef CONFIG_VSX if (cpu_has_feature(CPU_FTR_VSX)) - tsk->thread.regs->msr &= ~MSR_VSX; + msr &= ~MSR_VSX; #endif + tsk->thread.regs->msr = msr; } void giveup_altivec(struct task_struct *tsk) -- cgit v0.10.2 From d96f234f47aff593538f9e3d674967078f56bc28 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 29 May 2016 22:03:51 +1000 Subject: powerpc: Avoid load hit store in setup_sigcontext() In setup_sigcontext(), we set current->thread.vrsave then use it straight after. Since current is hidden from the compiler via inline assembly, it cannot optimise this and we end up with a load hit store. Fix this by using a temporary. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 2552079..7e49984 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -104,6 +104,7 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, */ #ifdef CONFIG_ALTIVEC elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc); + unsigned long vrsave; #endif unsigned long msr = regs->msr; long err = 0; @@ -125,9 +126,13 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, /* We always copy to/from vrsave, it's 0 if we don't have or don't * use altivec. */ - if (cpu_has_feature(CPU_FTR_ALTIVEC)) - current->thread.vrsave = mfspr(SPRN_VRSAVE); - err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); + vrsave = 0; + if (cpu_has_feature(CPU_FTR_ALTIVEC)) { + vrsave = mfspr(SPRN_VRSAVE); + current->thread.vrsave = vrsave; + } + + err |= __put_user(vrsave, (u32 __user *)&v_regs[33]); #else /* CONFIG_ALTIVEC */ err |= __put_user(0, &sc->v_regs); #endif /* CONFIG_ALTIVEC */ -- cgit v0.10.2 From 3ece16632b64120df2ef566ce32afbdb4aa8af1e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 26 May 2016 08:38:13 +1000 Subject: powerpc: Remove assembly versions of strcpy, strcat, strlen and strcmp A number of our assembly implementations of string functions do not align their hot loops. I was going to align them manually, but I realised that they are are almost instruction for instruction identical to what gcc produces, with the advantage that gcc does align them. In light of that, let's just remove the assembly versions. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index e40010a..da3cdff 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -3,12 +3,8 @@ #ifdef __KERNEL__ -#define __HAVE_ARCH_STRCPY #define __HAVE_ARCH_STRNCPY -#define __HAVE_ARCH_STRLEN -#define __HAVE_ARCH_STRCMP #define __HAVE_ARCH_STRNCMP -#define __HAVE_ARCH_STRCAT #define __HAVE_ARCH_MEMSET #define __HAVE_ARCH_MEMCPY #define __HAVE_ARCH_MEMMOVE diff --git a/arch/powerpc/lib/ppc_ksyms.c b/arch/powerpc/lib/ppc_ksyms.c index c422812..ae69d84 100644 --- a/arch/powerpc/lib/ppc_ksyms.c +++ b/arch/powerpc/lib/ppc_ksyms.c @@ -9,11 +9,7 @@ EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memcmp); EXPORT_SYMBOL(memchr); -EXPORT_SYMBOL(strcpy); EXPORT_SYMBOL(strncpy); -EXPORT_SYMBOL(strcat); -EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strcmp); EXPORT_SYMBOL(strncmp); #ifndef CONFIG_GENERIC_CSUM diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S index c80fb49..a947056 100644 --- a/arch/powerpc/lib/string.S +++ b/arch/powerpc/lib/string.S @@ -16,15 +16,6 @@ PPC_LONG_ALIGN .text -_GLOBAL(strcpy) - addi r5,r3,-1 - addi r4,r4,-1 -1: lbzu r0,1(r4) - cmpwi 0,r0,0 - stbu r0,1(r5) - bne 1b - blr - /* This clears out any unused part of the destination buffer, just as the libc version does. -- paulus */ _GLOBAL(strncpy) @@ -45,30 +36,6 @@ _GLOBAL(strncpy) bdnz 2b blr -_GLOBAL(strcat) - addi r5,r3,-1 - addi r4,r4,-1 -1: lbzu r0,1(r5) - cmpwi 0,r0,0 - bne 1b - addi r5,r5,-1 -1: lbzu r0,1(r4) - cmpwi 0,r0,0 - stbu r0,1(r5) - bne 1b - blr - -_GLOBAL(strcmp) - addi r5,r3,-1 - addi r4,r4,-1 -1: lbzu r3,1(r5) - cmpwi 1,r3,0 - lbzu r0,1(r4) - subf. r3,r0,r3 - beqlr 1 - beq 1b - blr - _GLOBAL(strncmp) PPC_LCMPI 0,r5,0 beq- 2f @@ -85,14 +52,6 @@ _GLOBAL(strncmp) 2: li r3,0 blr -_GLOBAL(strlen) - addi r4,r3,-1 -1: lbzu r0,1(r4) - cmpwi 0,r0,0 - bne 1b - subf r3,r3,r4 - blr - #ifdef CONFIG_PPC32 _GLOBAL(memcmp) PPC_LCMPI 0,r5,0 -- cgit v0.10.2 From 87a156fb18fe15d012c3db506b6b8b001af2e58d Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 26 May 2016 08:39:55 +1000 Subject: powerpc: Align hot loops of some string functions Align the hot loops in our assembly implementation of strncpy(), strncmp() and memchr(). Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S index a947056..beabc68 100644 --- a/arch/powerpc/lib/string.S +++ b/arch/powerpc/lib/string.S @@ -24,6 +24,7 @@ _GLOBAL(strncpy) mtctr r5 addi r6,r3,-1 addi r4,r4,-1 + .balign 16 1: lbzu r0,1(r4) cmpwi 0,r0,0 stbu r0,1(r6) @@ -42,6 +43,7 @@ _GLOBAL(strncmp) mtctr r5 addi r5,r3,-1 addi r4,r4,-1 + .balign 16 1: lbzu r3,1(r5) cmpwi 1,r3,0 lbzu r0,1(r4) @@ -73,6 +75,7 @@ _GLOBAL(memchr) beq- 2f mtctr r5 addi r3,r3,-1 + .balign 16 1: lbzu r0,1(r3) cmpw 0,r0,r4 bdnzf 2,1b -- cgit v0.10.2 From e289086f6530dd85d88967bfceded98bdbcd7f41 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 23 May 2016 10:45:49 +0200 Subject: powerpc/32: Get rid of sub_reloc_offset() sub_reloc_offset() has not been used since commit 917f0af9e5a9 ("powerpc: Remove arch/ppc and include/asm-ppc") which removed include/asm-ppc/prom.h. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 285ca8c..d9c912b 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -104,20 +104,6 @@ _GLOBAL(mulhdu) blr /* - * sub_reloc_offset(x) returns x - reloc_offset(). - */ -_GLOBAL(sub_reloc_offset) - mflr r0 - bl 1f -1: mflr r5 - lis r4,1b@ha - addi r4,r4,1b@l - subf r5,r4,r5 - subf r3,r5,r3 - mtlr r0 - blr - -/* * reloc_got2 runs through the .got2 section adding an offset * to each entry. */ -- cgit v0.10.2 From 027dfac694fc27ef0273afb810d9b1f9da57d6e1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 1 Jun 2016 16:34:37 +1000 Subject: powerpc: Various typo fixes Signed-off-by: Andrea Gelmini Signed-off-by: Michael Ellerman diff --git a/Documentation/devicetree/bindings/rtc/rtc-opal.txt b/Documentation/devicetree/bindings/rtc/rtc-opal.txt index a1734e5..2340938c 100644 --- a/Documentation/devicetree/bindings/rtc/rtc-opal.txt +++ b/Documentation/devicetree/bindings/rtc/rtc-opal.txt @@ -2,7 +2,7 @@ IBM OPAL real-time clock ------------------------ Required properties: -- comapatible: Should be "ibm,opal-rtc" +- compatible: Should be "ibm,opal-rtc" Optional properties: - wakeup-source: Decides if the wakeup is supported or not diff --git a/arch/powerpc/crypto/aes-spe-regs.h b/arch/powerpc/crypto/aes-spe-regs.h index 30d217b..2cc3a2c 100644 --- a/arch/powerpc/crypto/aes-spe-regs.h +++ b/arch/powerpc/crypto/aes-spe-regs.h @@ -18,7 +18,7 @@ #define rLN r7 /* length of data to be processed */ #define rIP r8 /* potiner to IV (CBC/CTR/XTS modes) */ #define rKT r9 /* pointer to tweak key (XTS mode) */ -#define rT0 r11 /* pointers to en-/decrpytion tables */ +#define rT0 r11 /* pointers to en-/decryption tables */ #define rT1 r10 #define rD0 r9 /* data */ #define rD1 r14 diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 290157e..9643092 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -434,7 +434,7 @@ extern void slb_set_size(u16 size); * function. Used in slb_allocate() and do_stab_bolted. The function * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS * - * rt = register continaing the proto-VSID and into which the + * rt = register containing the proto-VSID and into which the * VSID will be stored * rx = scratch register (clobbered) * diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index fb9f376..7d34f3d 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -57,7 +57,7 @@ struct pci_dn; /* * The struct is used to trace PE related EEH functionality. * In theory, there will have one instance of the struct to - * be created against particular PE. In nature, PEs corelate + * be created against particular PE. In nature, PEs correlate * to each other. the struct has to reflect that hierarchy in * order to easily pick up those affected PEs when one particular * PE has EEH errors. diff --git a/arch/powerpc/include/asm/nohash/32/pte-44x.h b/arch/powerpc/include/asm/nohash/32/pte-44x.h index fdab41c..0656ff8 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-44x.h +++ b/arch/powerpc/include/asm/nohash/32/pte-44x.h @@ -32,7 +32,7 @@ * - - - - - - U0 U1 U2 U3 W I M G E - UX UW UR SX SW SR * * Newer 440 cores (440x6 as used on AMCC 460EX/460GT) have additional - * TLB2 storage attibute fields. Those are: + * TLB2 storage attribute fields. Those are: * * TLB2: * 0...10 11 12 13 14 15 16...31 diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 9bb8ddf..70b5cbc 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -802,7 +802,7 @@ struct opal_sg_entry { }; /* - * Candiate image SG list. + * Candidate image SG list. * * length = VER | length */ @@ -852,7 +852,7 @@ struct opal_i2c_request { * with individual elements being 16 bits wide to fetch the system * wide EPOW status. Each element in the buffer will contain the * EPOW status in it's bit representation for a particular EPOW sub - * class as defiend here. So multiple detailed EPOW status bits + * class as defined here. So multiple detailed EPOW status bits * specific for any sub class can be represented in a single buffer * element as it's bit representation. */ diff --git a/arch/powerpc/include/asm/pmac_feature.h b/arch/powerpc/include/asm/pmac_feature.h index 9256979..e08e829 100644 --- a/arch/powerpc/include/asm/pmac_feature.h +++ b/arch/powerpc/include/asm/pmac_feature.h @@ -210,7 +210,7 @@ static inline long pmac_call_feature(int selector, struct device_node* node, /* PMAC_FTR_SOUND_CHIP_ENABLE (struct device_node* node, 0, int value) * enable/disable the sound chip, whatever it is and provided it can - * acually be controlled + * actually be controlled */ #define PMAC_FTR_SOUND_CHIP_ENABLE PMAC_FTR_DEF(9) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 009fab1..c0c27bd 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -224,7 +224,7 @@ struct thread_struct { unsigned int align_ctl; /* alignment handling control */ #ifdef CONFIG_PPC64 unsigned long start_tb; /* Start purr when proc switched in */ - unsigned long accum_tb; /* Total accumilated purr for process */ + unsigned long accum_tb; /* Total accumulated purr for process */ #ifdef CONFIG_HAVE_HW_BREAKPOINT struct perf_event *ptrace_bps[HBP_NUM]; /* diff --git a/arch/powerpc/include/asm/ps3av.h b/arch/powerpc/include/asm/ps3av.h index 0427b0b..a1dc784 100644 --- a/arch/powerpc/include/asm/ps3av.h +++ b/arch/powerpc/include/asm/ps3av.h @@ -104,7 +104,7 @@ #define PS3AV_CMD_AV_INPUTLEN_16 0x02 #define PS3AV_CMD_AV_INPUTLEN_20 0x0a #define PS3AV_CMD_AV_INPUTLEN_24 0x0b -/* alayout */ +/* av_layout */ #define PS3AV_CMD_AV_LAYOUT_32 (1 << 0) #define PS3AV_CMD_AV_LAYOUT_44 (1 << 1) #define PS3AV_CMD_AV_LAYOUT_48 (1 << 2) diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index 2eeaf80..4ba26dd 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -96,7 +96,7 @@ static inline bool pte_user(pte_t pte) #define PTE_RPN_SHIFT (PAGE_SHIFT) #endif -/* The mask convered by the RPN must be a ULL on 32-bit platforms with +/* The mask covered by the RPN must be a ULL on 32-bit platforms with * 64-bit PTEs */ #if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h index f280dd1..9dc2de5 100644 --- a/arch/powerpc/include/asm/smu.h +++ b/arch/powerpc/include/asm/smu.h @@ -185,7 +185,7 @@ * x = processor mask * y = op. point index * z = processor freq. step index - * I haven't yet decyphered result codes + * I haven't yet deciphered result codes * */ #define SMU_CMD_POWER_COMMAND 0xaa diff --git a/arch/powerpc/include/asm/tsi108.h b/arch/powerpc/include/asm/tsi108.h index d531d9e..c2a955b 100644 --- a/arch/powerpc/include/asm/tsi108.h +++ b/arch/powerpc/include/asm/tsi108.h @@ -77,7 +77,7 @@ * nodes if your board uses the Broadcom PHYs */ #define TSI108_PHY_MV88E 0 /* Marvel 88Exxxx PHY */ -#define TSI108_PHY_BCM54XX 1 /* Broardcom BCM54xx PHY */ +#define TSI108_PHY_BCM54XX 1 /* Broadcom BCM54xx PHY */ /* Global variables */ diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index f8cd9fb..c5e5a94 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -156,7 +156,7 @@ setup_7410_workarounds: blr /* 740/750/7400/7410 - * Enable Store Gathering (SGE), Address Brodcast (ABE), + * Enable Store Gathering (SGE), Address Broadcast (ABE), * Branch History Table (BHTE), Branch Target ICache (BTIC) * Dynamic Power Management (DPM), Speculative (SPD) * Clear Instruction cache throttling (ICTC) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 2714a3b..389b0d3 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -139,7 +139,7 @@ static void eeh_enable_irq(struct pci_dev *dev) * into it. * * That's just wrong.The warning in the core code is - * there to tell people to fix their assymetries in + * there to tell people to fix their asymmetries in * their own code, not by abusing the core information * to avoid it. * diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 488e631..2d3b40f 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -453,7 +453,7 @@ exc_##n##_bad_stack: \ sth r1,PACA_TRAP_SAVE(r13); /* store trap */ \ b bad_stack_book3e; /* bad stack error */ -/* WARNING: If you change the layout of this stub, make sure you chcek +/* WARNING: If you change the layout of this stub, make sure you check * the debug exception handler which handles single stepping * into exceptions from userspace, and the MM code in * arch/powerpc/mm/tlb_nohash.c which patches the branch here diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 3759df5..f71b79a 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -82,7 +82,7 @@ int pcibios_unmap_io_space(struct pci_bus *bus) /* If this is not a PHB, we only flush the hash table over * the area mapped by this bridge. We don't play with the PTE - * mappings since we might have to deal with sub-page alignemnts + * mappings since we might have to deal with sub-page alignments * so flushing the hash table is the only sane way to make sure * that no hash entries are covering that removed bridge area * while still allowing other busses overlapping those pages diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index a2dd3b1..c5c3ae2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -802,7 +802,7 @@ static void tm_reclaim_thread(struct thread_struct *thr, * this state. * We do this using the current MSR, rather tracking it in * some specific thread_struct bit, as it has the additional - * benifit of checking for a potential TM bad thing exception. + * benefit of checking for a potential TM bad thing exception. */ if (!MSR_TM_SUSPENDED(mfmsr())) return; diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index fb2fb3e..c82eed9 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -698,7 +698,7 @@ static void check_location(struct seq_file *m, const char *c) /* * Format: * ${LETTER}${NUMBER}[[-/]${LETTER}${NUMBER} [ ... ] ] - * the '.' may be an abbrevation + * the '.' may be an abbreviation */ static void check_location_string(struct seq_file *m, const char *c) { diff --git a/arch/powerpc/lib/rheap.c b/arch/powerpc/lib/rheap.c index 69abf84..94058c2 100644 --- a/arch/powerpc/lib/rheap.c +++ b/arch/powerpc/lib/rheap.c @@ -325,7 +325,7 @@ void rh_init(rh_info_t * info, unsigned int alignment, int max_blocks, } EXPORT_SYMBOL_GPL(rh_init); -/* Attach a free memory region, coalesces regions if adjuscent */ +/* Attach a free memory region, coalesces regions if adjacent */ int rh_attach_region(rh_info_t * info, unsigned long start, int size) { rh_block_t *blk; diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 40e05e7..dc57de1 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -55,7 +55,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) * We need 14 to 65 bits of va for a tlibe of 4K page * With vpn we ignore the lower VPN_SHIFT bits already. * And top two bits are already ignored because we can - * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT + * only accomodate 76 bits in a 64 bit vpn with a VPN_SHIFT * of 12. */ va = vpn << VPN_SHIFT; @@ -605,7 +605,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, * crashdump and all bets are off anyway. * * TODO: add batching support when enabled. remember, no dynamic memory here, - * athough there is the control page available... + * although there is the control page available... */ static void native_hpte_clear(void) { diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c index ed7b097..ef2142f 100644 --- a/arch/powerpc/oprofile/cell/spu_task_sync.c +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c @@ -51,7 +51,7 @@ static void spu_buff_add(unsigned long int value, int spu) * That way we can tell the difference between the * buffer being full versus empty. * - * ASSUPTION: the buffer_lock is held when this function + * ASSUMPTION: the buffer_lock is held when this function * is called to lock the buffer, head and tail. */ int full = 1; diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 97a1d40..4708608 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -992,7 +992,7 @@ static u64 check_and_compute_delta(u64 prev, u64 val) * than the previous value it will cause the delta and the counter to * have bogus values unless we rolled a counter over. If a coutner is * rolled back, it will be smaller, but within 256, which is the maximum - * number of events to rollback at once. If we dectect a rollback + * number of events to rollback at once. If we detect a rollback * return 0. This can lead to a small lack of precision in the * counters. */ diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 2da41b7..7b2ca16 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1298,7 +1298,7 @@ static void h_24x7_event_read(struct perf_event *event) __this_cpu_write(hv_24x7_txn_err, ret); } else { /* - * Assoicate the event with the HCALL request index, + * Associate the event with the HCALL request index, * so ->commit_txn() can quickly find/update count. */ i = request_buffer->num_requests - 1; diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h index 791455e..634ef40 100644 --- a/arch/powerpc/perf/hv-24x7.h +++ b/arch/powerpc/perf/hv-24x7.h @@ -66,7 +66,7 @@ struct hv_24x7_result_element { /* -1 if @performance_domain does not refer to a virtual processor */ __be32 lpar_cfg_instance_id; - /* size = @result_element_data_size of cointaining result. */ + /* size = @result_element_data_size of containing result. */ __u64 element_data[1]; } __packed; diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c index 6081fbd..add5a53 100644 --- a/arch/powerpc/platforms/512x/clock-commonclk.c +++ b/arch/powerpc/platforms/512x/clock-commonclk.c @@ -719,7 +719,7 @@ static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq) * most one of a mux, div, and gate each into one 'struct clk' * item * - PSC/MSCAN/SPDIF clock generation OTOH already is very - * specific and cannot get mapped to componsites (at least not + * specific and cannot get mapped to composites (at least not * a single one, maybe two of them, but then some of these * intermediate clock signals get referenced elsewhere (e.g. * in the clock frequency measurement, CFM) and thus need diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 14a582b..9027d7c 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -178,7 +178,7 @@ static int tce_build_cell(struct iommu_table *tbl, long index, long npages, * default for now.*/ #ifdef CELL_IOMMU_STRICT_PROTECTION /* to avoid referencing a global, we use a trick here to setup the - * protection bit. "prot" is setup to be 3 fields of 4 bits apprended + * protection bit. "prot" is setup to be 3 fields of 4 bits appended * together for each of the 3 supported direction values. It is then * shifted left so that the fields matching the desired direction * lands on the appropriate bits, and other bits are masked out. @@ -338,7 +338,7 @@ static unsigned long *cell_iommu_alloc_ptab(struct cbe_iommu *iommu, start_seg = base >> IO_SEGMENT_SHIFT; segments = size >> IO_SEGMENT_SHIFT; pages_per_segment = 1ull << IO_PAGENO_BITS(page_shift); - /* PTEs for each segment must start on a 4K bounday */ + /* PTEs for each segment must start on a 4K boundary */ pages_per_segment = max(pages_per_segment, (1 << 12) / sizeof(unsigned long)); diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c index 54ee574..d06dcac 100644 --- a/arch/powerpc/platforms/cell/spider-pic.c +++ b/arch/powerpc/platforms/cell/spider-pic.c @@ -217,7 +217,7 @@ static void spider_irq_cascade(struct irq_desc *desc) chip->irq_eoi(&desc->irq_data); } -/* For hooking up the cascace we have a problem. Our device-tree is +/* For hooking up the cascade we have a problem. Our device-tree is * crap and we don't know on which BE iic interrupt we are hooked on at * least not the "standard" way. We can reconstitute it based on two * informations though: which BE node we are connected to and whether diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 3cbe38f..bb4a8e0 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -69,7 +69,7 @@ static DEFINE_SPINLOCK(spu_lock); * spu_full_list_lock and spu_full_list_mutex held, while iterating * through it requires either of these locks. * - * In addition spu_full_list_lock protects all assignmens to + * In addition spu_full_list_lock protects all assignments to * spu->mm. */ static LIST_HEAD(spu_full_list); @@ -253,7 +253,7 @@ static inline int __slb_present(struct copro_slb *slbs, int nr_slbs, * Setup the SPU kernel SLBs, in preparation for a context save/restore. We * need to map both the context save area, and the save/restore code. * - * Because the lscsa and code may cross segment boundaires, we check to see + * Because the lscsa and code may cross segment boundaries, we check to see * if mappings are required for the start and end of each range. We currently * assume that the mappings are smaller that one segment - if not, something * is seriously wrong. diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 2936a00..0625446 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -866,7 +866,7 @@ void spufs_wbox_callback(struct spu *spu) * - end of the mapped area * * If the file is opened without O_NONBLOCK, we wait here until - * space is availabyl, but return when we have been able to + * space is available, but return when we have been able to * write something. */ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf, diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index 9f79004..cfacbee 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -435,7 +435,7 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) /* Note: we don't need to force_sig SIGTRAP on single-step * since we have TIF_SINGLESTEP set, thus the kernel will do - * it upon return from the syscall anyawy + * it upon return from the syscall anyway. */ if (unlikely(status & SPU_STATUS_SINGLE_STEP)) ret = -ERESTARTSYS; diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 998f632..460f5f3 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -622,7 +622,7 @@ static struct spu *spu_get_idle(struct spu_context *ctx) /** * find_victim - find a lower priority context to preempt - * @ctx: canidate context for running + * @ctx: candidate context for running * * Returns the freed physical spu to run the new context on. */ diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index 7553b6a..6d6f277 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -15,7 +15,7 @@ * This file thus provides a simple low level unified i2c interface for * powermac that covers the various types of i2c busses used in Apple machines. * For now, keywest, PMU and SMU, though we could add Cuda, or other bit - * banging busses found on older chipstes in earlier machines if we ever need + * banging busses found on older chipsets in earlier machines if we ever need * one of them. * * The drivers in this file are synchronous/blocking. In addition, the diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 3a5ea82..1fc53e0 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -197,7 +197,7 @@ static int pnv_ioda2_init_m64(struct pnv_phb *phb) /* * Strip off the segment used by the reserved PE, which is - * expected to be 0 or last one of PE capabicity. + * expected to be 0 or last one of PE capability. */ r = &phb->hose->mem_resources[1]; if (phb->ioda.reserved_pe_idx == 0) diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 3998e0f..1c428f0 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -2,7 +2,7 @@ * The file intends to implement the platform dependent EEH operations on pseries. * Actually, the pseries platform is built based on RTAS heavily. That means the * pseries platform dependent EEH operations will be built on RTAS calls. The functions - * are devired from arch/powerpc/platforms/pseries/eeh.c and necessary cleanup has + * are derived from arch/powerpc/platforms/pseries/eeh.c and necessary cleanup has * been done. * * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2011. diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c index 0240c4f..f053bda 100644 --- a/arch/powerpc/platforms/pseries/io_event_irq.c +++ b/arch/powerpc/platforms/pseries/io_event_irq.c @@ -113,7 +113,7 @@ static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog) * - The owner of an event is determined by combinations of scope, * event type, and sub-type. There is no easy way to pre-sort clients * by scope or event type alone. For example, Torrent ISR route change - * event is reported with scope 0x00 (Not Applicatable) rather than + * event is reported with scope 0x00 (Not Applicable) rather than * 0x3B (Torrent-hub). It is better to let the clients to identify * who owns the event. */ diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 9883bc7..9a79c27 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -367,7 +367,7 @@ static void pseries_lpar_idle(void) { /* * Default handler to go into low thread priority and possibly - * low power mode by cedeing processor to hypervisor + * low power mode by ceding processor to hypervisor */ /* Indicate to hypervisor that we are idle. */ -- cgit v0.10.2 From e7da5dac4e9067a526136db2f23cc8696a5d4649 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Mon, 30 May 2016 16:18:11 +1000 Subject: powerpc/pseries: Drop support for MPIC in pseries MPIC was only used by Power3 which is now unsupported, so drop support for MPIC. XICS is now the only supported interrupt controller for pSeries so make the XICS functions generic. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 9a79c27..39f21fb 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -57,7 +57,6 @@ #include #include #include -#include #include #include #include @@ -77,8 +76,6 @@ EXPORT_SYMBOL(CMO_PageSize); int fwnmi_active; /* TRUE if an FWNMI handler is present */ -static struct device_node *pSeries_mpic_node; - static void pSeries_show_cpuinfo(struct seq_file *m) { struct device_node *root; @@ -172,48 +169,7 @@ static void __init pseries_setup_i8259_cascade(void) irq_set_chained_handler(cascade, pseries_8259_cascade); } -static void __init pseries_mpic_init_IRQ(void) -{ - struct device_node *np; - const unsigned int *opprop; - unsigned long openpic_addr = 0; - int naddr, n, i, opplen; - struct mpic *mpic; - - np = of_find_node_by_path("/"); - naddr = of_n_addr_cells(np); - opprop = of_get_property(np, "platform-open-pic", &opplen); - if (opprop != NULL) { - openpic_addr = of_read_number(opprop, naddr); - printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr); - } - of_node_put(np); - - BUG_ON(openpic_addr == 0); - - /* Setup the openpic driver */ - mpic = mpic_alloc(pSeries_mpic_node, openpic_addr, - MPIC_NO_RESET, 16, 0, " MPIC "); - BUG_ON(mpic == NULL); - - /* Add ISUs */ - opplen /= sizeof(u32); - for (n = 0, i = naddr; i < opplen; i += naddr, n++) { - unsigned long isuaddr = of_read_number(opprop + i, naddr); - mpic_assign_isu(mpic, n, isuaddr); - } - - /* Setup top-level get_irq */ - ppc_md.get_irq = mpic_get_irq; - - /* All ISUs are setup, complete initialization */ - mpic_init(mpic); - - /* Look for cascade */ - pseries_setup_i8259_cascade(); -} - -static void __init pseries_xics_init_IRQ(void) +static void __init pseries_init_irq(void) { xics_init(); pseries_setup_i8259_cascade(); @@ -228,32 +184,6 @@ static void pseries_lpar_enable_pmcs(void) plpar_hcall_norets(H_PERFMON, set, reset); } -static void __init pseries_discover_pic(void) -{ - struct device_node *np; - const char *typep; - - for_each_node_by_name(np, "interrupt-controller") { - typep = of_get_property(np, "compatible", NULL); - if (!typep) - continue; - if (strstr(typep, "open-pic")) { - pSeries_mpic_node = of_node_get(np); - ppc_md.init_IRQ = pseries_mpic_init_IRQ; - setup_kexec_cpu_down_mpic(); - smp_init_pseries_mpic(); - return; - } else if (strstr(typep, "ppc-xicp")) { - ppc_md.init_IRQ = pseries_xics_init_IRQ; - setup_kexec_cpu_down_xics(); - smp_init_pseries_xics(); - return; - } - } - printk(KERN_ERR "pSeries_discover_pic: failed to recognize" - " interrupt-controller\n"); -} - static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct of_reconfig_data *rd = data; @@ -506,7 +436,9 @@ static void __init pSeries_setup_arch(void) set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); /* Discover PIC type and setup ppc_md accordingly */ - pseries_discover_pic(); + setup_kexec_cpu_down_xics(); + smp_init_pseries_xics(); + /* openpic global configuration register (64-bit format). */ /* openpic Interrupt Source Unit pointer (64-bit format). */ @@ -838,6 +770,7 @@ define_machine(pseries) { .probe = pSeries_probe, .setup_arch = pSeries_setup_arch, .init_early = pSeries_init_early, + .init_IRQ = pseries_init_irq, .show_cpuinfo = pSeries_show_cpuinfo, .log_error = pSeries_log_error, .pcibios_fixup = pSeries_final_fixup, -- cgit v0.10.2 From 86425bedd5b69ea5969ff11bb07c8e5ff7117976 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Mon, 30 May 2016 16:18:12 +1000 Subject: powerpc/pseries: Remove MPIC from pseries smp MPIC was only used by Power3 which is now unsupported, so remove MPIC code. XICS is now the only supported interrupt controller for pSeries so do some cleanups too. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 7aa83f0..edeaec7 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -31,11 +31,9 @@ extern int pSeries_system_reset_exception(struct pt_regs *regs); extern int pSeries_machine_check_exception(struct pt_regs *regs); #ifdef CONFIG_SMP -extern void smp_init_pseries_mpic(void); -extern void smp_init_pseries_xics(void); +extern void smp_init_pseries(void); #else -static inline void smp_init_pseries_mpic(void) { }; -static inline void smp_init_pseries_xics(void) { }; +static inline void smp_init_pseries(void) { }; #endif #ifdef CONFIG_KEXEC diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 39f21fb..4b94a1e 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -437,7 +437,7 @@ static void __init pSeries_setup_arch(void) /* Discover PIC type and setup ppc_md accordingly */ setup_kexec_cpu_down_xics(); - smp_init_pseries_xics(); + smp_init_pseries(); /* openpic global configuration register (64-bit format). */ diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 6932ea8..f6f83ae 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include @@ -140,7 +139,7 @@ out: return 1; } -static void smp_xics_setup_cpu(int cpu) +static void smp_setup_cpu(int cpu) { if (cpu != boot_cpuid) xics_setup_cpu(); @@ -207,28 +206,22 @@ static __init void pSeries_smp_probe(void) } } -static struct smp_ops_t pSeries_mpic_smp_ops = { - .message_pass = smp_mpic_message_pass, - .probe = smp_mpic_probe, - .kick_cpu = smp_pSeries_kick_cpu, - .setup_cpu = smp_mpic_setup_cpu, -}; - -static struct smp_ops_t pSeries_xics_smp_ops = { +static struct smp_ops_t pseries_smp_ops = { .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */ .cause_ipi = NULL, /* Filled at runtime by pSeries_smp_probe() */ .probe = pSeries_smp_probe, .kick_cpu = smp_pSeries_kick_cpu, - .setup_cpu = smp_xics_setup_cpu, + .setup_cpu = smp_setup_cpu, .cpu_bootable = smp_generic_cpu_bootable, }; /* This is called very early */ -static void __init smp_init_pseries(void) +void __init smp_init_pseries(void) { int i; pr_debug(" -> smp_init_pSeries()\n"); + smp_ops = &pseries_smp_ops; alloc_bootmem_cpumask_var(&of_spin_mask); @@ -258,17 +251,3 @@ static void __init smp_init_pseries(void) pr_debug(" <- smp_init_pSeries()\n"); } - -void __init smp_init_pseries_mpic(void) -{ - smp_ops = &pSeries_mpic_smp_ops; - - smp_init_pseries(); -} - -void __init smp_init_pseries_xics(void) -{ - smp_ops = &pSeries_xics_smp_ops; - - smp_init_pseries(); -} -- cgit v0.10.2 From d739d2caa3d3e283ca4d41716cd317684e066ef9 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Mon, 30 May 2016 16:18:13 +1000 Subject: powerpc/pseries: Remove MPIC from pseries kexec MPIC was only used by Power3 which is now unsupported, so remove MPIC code. XICS is now the only supported interrupt controller for pSeries so do some cleanups too. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c index 13fa95b3..6681ac9 100644 --- a/arch/powerpc/platforms/pseries/kexec.c +++ b/arch/powerpc/platforms/pseries/kexec.c @@ -14,14 +14,13 @@ #include #include #include -#include #include #include #include #include "pseries.h" -static void pseries_kexec_cpu_down(int crash_shutdown, int secondary) +void pseries_kexec_cpu_down(int crash_shutdown, int secondary) { /* Don't risk a hypervisor call if we're crashing */ if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) { @@ -51,26 +50,6 @@ static void pseries_kexec_cpu_down(int crash_shutdown, int secondary) "(hw %d) failed with %d\n", cpu, hwcpu, ret); } } -} - -static void pseries_kexec_cpu_down_mpic(int crash_shutdown, int secondary) -{ - pseries_kexec_cpu_down(crash_shutdown, secondary); - mpic_teardown_this_cpu(secondary); -} -void __init setup_kexec_cpu_down_mpic(void) -{ - ppc_md.kexec_cpu_down = pseries_kexec_cpu_down_mpic; -} - -static void pseries_kexec_cpu_down_xics(int crash_shutdown, int secondary) -{ - pseries_kexec_cpu_down(crash_shutdown, secondary); xics_kexec_teardown_cpu(secondary); } - -void __init setup_kexec_cpu_down_xics(void) -{ - ppc_md.kexec_cpu_down = pseries_kexec_cpu_down_xics; -} diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index edeaec7..ddb9aa5 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -36,13 +36,7 @@ extern void smp_init_pseries(void); static inline void smp_init_pseries(void) { }; #endif -#ifdef CONFIG_KEXEC -extern void setup_kexec_cpu_down_xics(void); -extern void setup_kexec_cpu_down_mpic(void); -#else -static inline void setup_kexec_cpu_down_xics(void) { } -static inline void setup_kexec_cpu_down_mpic(void) { } -#endif +extern void pseries_kexec_cpu_down(int crash_shutdown, int secondary); extern void pSeries_final_fixup(void); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 4b94a1e..34668f9 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -436,7 +436,6 @@ static void __init pSeries_setup_arch(void) set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); /* Discover PIC type and setup ppc_md accordingly */ - setup_kexec_cpu_down_xics(); smp_init_pseries(); @@ -786,6 +785,7 @@ define_machine(pseries) { .machine_check_exception = pSeries_machine_check_exception, #ifdef CONFIG_KEXEC .machine_kexec = pSeries_machine_kexec, + .kexec_cpu_down = pseries_kexec_cpu_down, #endif #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE .memory_block_size = pseries_memory_block_size, -- cgit v0.10.2 From 8324947d6d318fbcf558d50ae348a85aacd68930 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Mon, 30 May 2016 16:18:14 +1000 Subject: powerpc/pseries: Remove MPIC from pseries cpu hotplug MPIC was only used by Power3 which is now unsupported, so remove MPIC code. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 282837a..a1b63e0 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -903,8 +903,6 @@ static int parse_cede_parameters(void) static int __init pseries_cpu_hotplug_init(void) { - struct device_node *np; - const char *typep; int cpu; int qcss_tok; @@ -913,17 +911,6 @@ static int __init pseries_cpu_hotplug_init(void) ppc_md.cpu_release = dlpar_cpu_release; #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ - for_each_node_by_name(np, "interrupt-controller") { - typep = of_get_property(np, "compatible", NULL); - if (strstr(typep, "open-pic")) { - of_node_put(np); - - printk(KERN_INFO "CPU Hotplug not supported on " - "systems using MPIC\n"); - return 0; - } - } - rtas_stop_self_token = rtas_token("stop-self"); qcss_tok = rtas_token("query-cpu-stopped-state"); -- cgit v0.10.2 From ac9cd1709c0be4334fbad4ab610ecb7acceac884 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Mon, 30 May 2016 16:18:15 +1000 Subject: powerpc/pseries: Remove MPIC from pseries event sources MPIC was only used by Power3 which is now unsupported, so remove MPIC code. XICS is now the only supported interrupt controller for pSeries so do some cleanups too. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c index 18380e8..a6ddca8 100644 --- a/arch/powerpc/platforms/pseries/event_sources.c +++ b/arch/powerpc/platforms/pseries/event_sources.c @@ -26,48 +26,21 @@ void request_event_sources_irqs(struct device_node *np, { int i, index, count = 0; struct of_phandle_args oirq; - const u32 *opicprop; - unsigned int opicplen; unsigned int virqs[16]; - /* Check for obsolete "open-pic-interrupt" property. If present, then - * map those interrupts using the default interrupt host and default - * trigger - */ - opicprop = of_get_property(np, "open-pic-interrupt", &opicplen); - if (opicprop) { - opicplen /= sizeof(u32); - for (i = 0; i < opicplen; i++) { - if (count > 15) - break; - virqs[count] = irq_create_mapping(NULL, *(opicprop++)); - if (virqs[count] == NO_IRQ) { - pr_err("event-sources: Unable to allocate " - "interrupt number for %s\n", - np->full_name); - WARN_ON(1); - } - else - count++; - - } - } - /* Else use normal interrupt tree parsing */ - else { - /* First try to do a proper OF tree parsing */ - for (index = 0; of_irq_parse_one(np, index, &oirq) == 0; - index++) { - if (count > 15) - break; - virqs[count] = irq_create_of_mapping(&oirq); - if (virqs[count] == NO_IRQ) { - pr_err("event-sources: Unable to allocate " - "interrupt number for %s\n", - np->full_name); - WARN_ON(1); - } - else - count++; + /* First try to do a proper OF tree parsing */ + for (index = 0; of_irq_parse_one(np, index, &oirq) == 0; + index++) { + if (count > 15) + break; + virqs[count] = irq_create_of_mapping(&oirq); + if (virqs[count] == NO_IRQ) { + pr_err("event-sources: Unable to allocate " + "interrupt number for %s\n", + np->full_name); + WARN_ON(1); + } else { + count++; } } -- cgit v0.10.2 From f55d966536034d33476fdd43c45d47225344469f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 6 Jun 2016 22:26:10 +0530 Subject: powerpc: Define and use PPC64_ELF_ABI_v2/v1 We're approaching 20 locations where we need to check for ELF ABI v2. That's fine, except the logic is a bit awkward, because we have to check that _CALL_ELF is defined and then what its value is. So check it once in asm/types.h and define PPC64_ELF_ABI_v2 when ELF ABI v2 is detected. We also have a few places where what we're really trying to check is that we are using the 64-bit v1 ABI, ie. function descriptors. So also add a #define for that, which simplifies several checks. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 994c60a..2015b07 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -49,8 +49,7 @@ void __patch_exception(int exc, unsigned long addr); static inline unsigned long ppc_function_entry(void *func) { -#if defined(CONFIG_PPC64) -#if defined(_CALL_ELF) && _CALL_ELF == 2 +#ifdef PPC64_ELF_ABI_v2 u32 *insn = func; /* @@ -75,14 +74,13 @@ static inline unsigned long ppc_function_entry(void *func) return (unsigned long)(insn + 2); else return (unsigned long)func; -#else +#elif defined(PPC64_ELF_ABI_v1) /* * On PPC64 ABIv1 the function pointer actually points to the * function's descriptor. The first entry in the descriptor is the * address of the function text. */ return ((func_descr_t *)func)->entry; -#endif #else return (unsigned long)func; #endif @@ -90,7 +88,7 @@ static inline unsigned long ppc_function_entry(void *func) static inline unsigned long ppc_global_function_entry(void *func) { -#if defined(CONFIG_PPC64) && defined(_CALL_ELF) && _CALL_ELF == 2 +#ifdef PPC64_ELF_ABI_v2 /* PPC64 ABIv2 the global entry point is at the address */ return (unsigned long)func; #else @@ -106,7 +104,7 @@ static inline unsigned long ppc_global_function_entry(void *func) */ /* This must match the definition of STK_GOT in */ -#if defined(_CALL_ELF) && _CALL_ELF == 2 +#ifdef PPC64_ELF_ABI_v2 #define R2_STACK_OFFSET 24 #else #define R2_STACK_OFFSET 40 diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 50ca758..686c5f7 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -1,6 +1,8 @@ #ifndef _ASM_POWERPC_FTRACE #define _ASM_POWERPC_FTRACE +#include + #ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_ADDR ((unsigned long)(_mcount)) #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ @@ -65,8 +67,8 @@ struct dyn_arch_ftrace { #endif #endif -#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) && !defined(__ASSEMBLY__) -#if !defined(_CALL_ELF) || _CALL_ELF != 2 +#if defined(CONFIG_FTRACE_SYSCALLS) && !defined(__ASSEMBLY__) +#ifdef PPC64_ELF_ABI_v1 #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) { @@ -79,6 +81,6 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name return !strcmp(sym + 4, name + 3); } #endif -#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 && !__ASSEMBLY__ */ +#endif /* CONFIG_FTRACE_SYSCALLS && !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_FTRACE */ diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index 039b583..2c9759bd 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -40,8 +40,7 @@ struct kprobe; typedef ppc_opcode_t kprobe_opcode_t; #define MAX_INSN_SIZE 1 -#ifdef CONFIG_PPC64 -#if defined(_CALL_ELF) && _CALL_ELF == 2 +#ifdef PPC64_ELF_ABI_v2 /* PPC64 ABIv2 needs local entry point */ #define kprobe_lookup_name(name, addr) \ { \ @@ -49,7 +48,7 @@ typedef ppc_opcode_t kprobe_opcode_t; if (addr) \ addr = (kprobe_opcode_t *)ppc_function_entry(addr); \ } -#else +#elif defined(PPC64_ELF_ABI_v1) /* * 64bit powerpc ABIv1 uses function descriptors: * - Check for the dot variant of the symbol first. @@ -92,8 +91,7 @@ typedef ppc_opcode_t kprobe_opcode_t; addr = (kprobe_opcode_t *)kallsyms_lookup_name(name); \ } \ } -#endif /* defined(_CALL_ELF) && _CALL_ELF == 2 */ -#endif /* CONFIG_PPC64 */ +#endif #define flush_insn_slot(p) do { } while (0) #define kretprobe_blacklist_size 0 diff --git a/arch/powerpc/include/asm/linkage.h b/arch/powerpc/include/asm/linkage.h index e3ad5c7..0cf5e21 100644 --- a/arch/powerpc/include/asm/linkage.h +++ b/arch/powerpc/include/asm/linkage.h @@ -1,8 +1,9 @@ #ifndef _ASM_POWERPC_LINKAGE_H #define _ASM_POWERPC_LINKAGE_H -#ifdef CONFIG_PPC64 -#if !defined(_CALL_ELF) || _CALL_ELF != 2 +#include + +#ifdef PPC64_ELF_ABI_v1 #define cond_syscall(x) \ asm ("\t.weak " #x "\n\t.set " #x ", sys_ni_syscall\n" \ "\t.weak ." #x "\n\t.set ." #x ", .sys_ni_syscall\n") @@ -10,6 +11,5 @@ asm ("\t.globl " #alias "\n\t.set " #alias ", " #name "\n" \ "\t.globl ." #alias "\n\t.set ." #alias ", ." #name) #endif -#endif #endif /* _ASM_POWERPC_LINKAGE_H */ diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 2b31632..7b591f9 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -189,7 +189,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #define __STK_REG(i) (112 + ((i)-14)*8) #define STK_REG(i) __STK_REG(__REG_##i) -#if defined(_CALL_ELF) && _CALL_ELF == 2 +#ifdef PPC64_ELF_ABI_v2 #define STK_GOT 24 #define __STK_PARAM(i) (32 + ((i)-3)*8) #else @@ -198,7 +198,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #endif #define STK_PARAM(i) __STK_PARAM(__REG_##i) -#if defined(_CALL_ELF) && _CALL_ELF == 2 +#ifdef PPC64_ELF_ABI_v2 #define _GLOBAL(name) \ .section ".text"; \ diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index c0c61fa..e492368 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -47,7 +47,7 @@ STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE) #define STACK_FRAME_MARKER 12 -#if defined(_CALL_ELF) && _CALL_ELF == 2 +#ifdef PPC64_ELF_ABI_v2 #define STACK_FRAME_MIN_SIZE 32 #else #define STACK_FRAME_MIN_SIZE STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index abf5866..7dc006b 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -62,7 +62,7 @@ static inline int overlaps_kvm_tmp(unsigned long start, unsigned long end) #endif } -#if !defined(_CALL_ELF) || _CALL_ELF != 2 +#ifdef PPC64_ELF_ABI_v1 #undef dereference_function_descriptor static inline void *dereference_function_descriptor(void *ptr) { @@ -73,7 +73,7 @@ static inline void *dereference_function_descriptor(void *ptr) ptr = p; return ptr; } -#endif +#endif /* PPC64_ELF_ABI_v1 */ #endif diff --git a/arch/powerpc/include/asm/types.h b/arch/powerpc/include/asm/types.h index bfb6ded..49a0678 100644 --- a/arch/powerpc/include/asm/types.h +++ b/arch/powerpc/include/asm/types.h @@ -15,6 +15,14 @@ #include +#ifdef __powerpc64__ +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define PPC64_ELF_ABI_v2 +#else +#define PPC64_ELF_ABI_v1 +#endif +#endif /* __powerpc64__ */ + #ifndef __ASSEMBLY__ typedef __vector128 vector128; diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 73e461a..2e0c565 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -453,7 +453,7 @@ _GLOBAL(ret_from_kernel_thread) REST_NVGPRS(r1) mtlr r14 mr r3,r15 -#if defined(_CALL_ELF) && _CALL_ELF == 2 +#ifdef PPC64_ELF_ABI_v2 mr r12,r14 #endif blrl diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 1123a4d..7af6c4d 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -608,7 +608,7 @@ unsigned long __init arch_syscall_addr(int nr) } #endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 */ -#if defined(CONFIG_PPC64) && (!defined(_CALL_ELF) || _CALL_ELF != 2) +#ifdef PPC64_ELF_ABI_v1 char *arch_ftrace_match_adjust(char *str, const char *search) { if (str[0] == '.' && search[0] != '.') @@ -616,4 +616,4 @@ char *arch_ftrace_match_adjust(char *str, const char *search) else return str; } -#endif /* defined(CONFIG_PPC64) && (!defined(_CALL_ELF) || _CALL_ELF != 2) */ +#endif /* PPC64_ELF_ABI_v1 */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 2d14774..064cd93 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -401,7 +401,7 @@ generic_secondary_common_init: ld r12,CPU_SPEC_RESTORE(r23) cmpdi 0,r12,0 beq 3f -#if !defined(_CALL_ELF) || _CALL_ELF != 2 +#ifdef PPC64_ELF_ABI_v1 ld r12,0(r12) #endif mtctr r12 diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 7c053f2..7d48e3b 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -506,13 +506,11 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) /* setup return addr to the jprobe handler routine */ regs->nip = arch_deref_entry_point(jp->entry); -#ifdef CONFIG_PPC64 -#if defined(_CALL_ELF) && _CALL_ELF == 2 +#ifdef PPC64_ELF_ABI_v2 regs->gpr[12] = (unsigned long)jp->entry; -#else +#elif defined(PPC64_ELF_ABI_v1) regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); #endif -#endif return 1; } diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index f28754c..7a85190 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -661,7 +661,7 @@ _GLOBAL(kexec_sequence) #ifndef CONFIG_PPC_BOOK3E /* clear out hardware hash page table and tlb */ -#if !defined(_CALL_ELF) || _CALL_ELF != 2 +#ifdef PPC64_ELF_ABI_v1 ld r12,0(r27) /* deref function descriptor */ #else mr r12,r27 diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 9ce9a25..f703f34 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -41,7 +41,7 @@ this, and makes other things simpler. Anton? --RR. */ -#if defined(_CALL_ELF) && _CALL_ELF == 2 +#ifdef PPC64_ELF_ABI_v2 /* An address is simply the address of the function. */ typedef unsigned long func_desc_t; @@ -132,7 +132,7 @@ static u32 ppc64_stub_insns[] = { /* Save current r2 value in magic place on the stack. */ 0xf8410000|R2_STACK_OFFSET, /* std r2,R2_STACK_OFFSET(r1) */ 0xe98b0020, /* ld r12,32(r11) */ -#if !defined(_CALL_ELF) || _CALL_ELF != 2 +#ifdef PPC64_ELF_ABI_v1 /* Set up new r2 from function descriptor */ 0xe84b0028, /* ld r2,40(r11) */ #endif diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index d044b8b..901e6fe 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -25,7 +25,7 @@ #include #if defined(CONFIG_PPC_BOOK3S_64) -#if defined(_CALL_ELF) && _CALL_ELF == 2 +#ifdef PPC64_ELF_ABI_v2 #define FUNC(name) name #else #define FUNC(name) GLUE(.,name) diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 16c4d88..42a4b23 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -36,7 +36,7 @@ #if defined(CONFIG_PPC_BOOK3S_64) -#if defined(_CALL_ELF) && _CALL_ELF == 2 +#ifdef PPC64_ELF_ABI_v2 #define FUNC(name) name #else #define FUNC(name) GLUE(.,name) -- cgit v0.10.2 From 1e61423fb1b6a814e50ed7845c6c5f2bd5b4c6dc Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 8 Jun 2016 18:02:37 +0800 Subject: powerpc/pseries: Remove unused pstore headers in nvram.c Since the pstore code has moved away from nvram.c, remove unused pstore headers pstore.h and kmsg_dump.h. Signed-off-by: Geliang Tang Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 9f818417..79aef8c 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -17,8 +17,6 @@ #include #include #include -#include -#include #include #include #include -- cgit v0.10.2 From 6e45273eacc829a44fae1d3df14065d6947335ae Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 20 May 2016 16:18:57 +0200 Subject: powerpc/pseries: Fix trivial typo in function name Signed-off-by: Greg Kurz Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index c638e24..e864b7c 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -483,7 +483,7 @@ static void rtas_event_scan(struct work_struct *w) } #ifdef CONFIG_PPC64 -static void retreive_nvram_error_log(void) +static void retrieve_nvram_error_log(void) { unsigned int err_type ; int rc ; @@ -501,7 +501,7 @@ static void retreive_nvram_error_log(void) } } #else /* CONFIG_PPC64 */ -static void retreive_nvram_error_log(void) +static void retrieve_nvram_error_log(void) { } #endif /* CONFIG_PPC64 */ @@ -513,7 +513,7 @@ static void start_event_scan(void) (30000 / rtas_event_scan_rate)); /* Retrieve errors from nvram if any */ - retreive_nvram_error_log(); + retrieve_nvram_error_log(); schedule_delayed_work_on(cpumask_first(cpu_online_mask), &event_scan_work, event_scan_delay); -- cgit v0.10.2 From 6262db7c088bbfc26480d10144cde70bbf077be3 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 10 Jun 2016 11:51:28 +0800 Subject: powerpc/spinlock: Fix spin_unlock_wait() There is an ordering issue with spin_unlock_wait() on powerpc, because the spin_lock primitive is an ACQUIRE and an ACQUIRE is only ordering the load part of the operation with memory operations following it. Therefore the following event sequence can happen: CPU 1 CPU 2 CPU 3 ================== ==================== ============== spin_unlock(&lock); spin_lock(&lock): r1 = *lock; // r1 == 0; o = object; o = READ_ONCE(object); // reordered here object = NULL; smp_mb(); spin_unlock_wait(&lock); *lock = 1; smp_mb(); o->dead = true; < o = READ_ONCE(object); > // reordered upwards if (o) // true BUG_ON(o->dead); // true!! To fix this, we add a "nop" ll/sc loop in arch_spin_unlock_wait() on ppc, the "nop" ll/sc loop reads the lock value and writes it back atomically, in this way it will synchronize the view of the lock on CPU1 with that on CPU2. Therefore in the scenario above, either CPU2 will fail to get the lock at first or CPU1 will see the lock acquired by CPU2, both cases will eliminate this bug. This is a similar idea as what Will Deacon did for ARM64 in: d86b8da04dfa ("arm64: spinlock: serialise spin_unlock_wait against concurrent lockers") Furthermore, if the "nop" ll/sc figures out the lock is locked, we actually don't need to do the "nop" ll/sc trick again, we can just do a normal load+check loop for the lock to be released, because in that case, spin_unlock_wait() is called when someone is holding the lock, and the store part of the "nop" ll/sc happens before the lock release of the current lock holder: "nop" ll/sc -> spin_unlock() and the lock release happens before the next lock acquisition: spin_unlock() -> spin_lock() which means the "nop" ll/sc happens before the next lock acquisition: "nop" ll/sc -> spin_unlock() -> spin_lock() With a smp_mb() preceding spin_unlock_wait(), the store of object is guaranteed to be observed by the next lock holder: STORE -> smp_mb() -> "nop" ll/sc -> spin_unlock() -> spin_lock() This patch therefore fixes the issue and also cleans the arch_spin_unlock_wait() a little bit by removing superfluous memory barriers in loops and consolidating the implementations for PPC32 and PPC64 into one. Suggested-by: "Paul E. McKenney" Signed-off-by: Boqun Feng Reviewed-by: "Paul E. McKenney" [mpe: Inline the "nop" ll/sc loop and set EH=0, munge change log] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 523673d..fa37fe9 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -162,12 +162,38 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) lock->slock = 0; } -#ifdef CONFIG_PPC64 -extern void arch_spin_unlock_wait(arch_spinlock_t *lock); -#else -#define arch_spin_unlock_wait(lock) \ - do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0) -#endif +static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) +{ + arch_spinlock_t lock_val; + + smp_mb(); + + /* + * Atomically load and store back the lock value (unchanged). This + * ensures that our observation of the lock value is ordered with + * respect to other lock operations. + */ + __asm__ __volatile__( +"1: " PPC_LWARX(%0, 0, %2, 0) "\n" +" stwcx. %0, 0, %2\n" +" bne- 1b\n" + : "=&r" (lock_val), "+m" (*lock) + : "r" (lock) + : "cr0", "xer"); + + if (arch_spin_value_unlocked(lock_val)) + goto out; + + while (lock->slock) { + HMT_low(); + if (SHARED_PROCESSOR) + __spin_yield(lock); + } + HMT_medium(); + +out: + smp_mb(); +} /* * Read-write spinlocks, allowing multiple readers diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index f7deebd..b7b1237 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -68,19 +68,3 @@ void __rw_yield(arch_rwlock_t *rw) get_hard_smp_processor_id(holder_cpu), yield_count); } #endif - -void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - smp_mb(); - - while (lock->slock) { - HMT_low(); - if (SHARED_PROCESSOR) - __spin_yield(lock); - } - HMT_medium(); - - smp_mb(); -} - -EXPORT_SYMBOL(arch_spin_unlock_wait); -- cgit v0.10.2 From e70bd3ae914ec40d8505ed842d14285aac50b77a Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Thu, 12 May 2016 19:04:14 +0530 Subject: powerpc/numa: Fix whitespace in hot_add_drconf_memory_max() Signed-off-by: Bharata B Rao Reviewed-by: David Gibson Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 669a15e..4a87ccb 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1163,18 +1163,18 @@ int hot_add_scn_to_nid(unsigned long scn_addr) static u64 hot_add_drconf_memory_max(void) { - struct device_node *memory = NULL; - unsigned int drconf_cell_cnt = 0; - u64 lmb_size = 0; + struct device_node *memory = NULL; + unsigned int drconf_cell_cnt = 0; + u64 lmb_size = 0; const __be32 *dm = NULL; - memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); - if (memory) { - drconf_cell_cnt = of_get_drconf_memory(memory, &dm); - lmb_size = of_get_lmb_size(memory); - of_node_put(memory); - } - return lmb_size * drconf_cell_cnt; + memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (memory) { + drconf_cell_cnt = of_get_drconf_memory(memory, &dm); + lmb_size = of_get_lmb_size(memory); + of_node_put(memory); + } + return lmb_size * drconf_cell_cnt; } /* -- cgit v0.10.2 From 45b64ee64970dee9392229302efe1d1567e8d304 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Thu, 12 May 2016 19:04:15 +0530 Subject: powerpc/numa: Fix multiple bugs in memory_hotplug_max() memory_hotplug_max() uses hot_add_drconf_memory_max() to get maxmimum addressable memory by referring to ibm,dyanamic-memory property. There are three problems with the current approach: 1 hot_add_drconf_memory_max() assumes that ibm,dynamic-memory includes all the LMBs of the guest, but that is not true for PowerKVM which populates only DR LMBs (LMBs that can be hotplugged/removed) in that property. 2 hot_add_drconf_memory_max() multiplies lmb-size with lmb-count to arrive at the max possible address. Since ibm,dynamic-memory doesn't include RMA LMBs, the address thus obtained will be less than the actual max address. For example, if max possible memory size is 32G, with lmb-size of 256MB there can be 127 LMBs in ibm,dynamic-memory (1 LMB for RMA which won't be present here). hot_add_drconf_memory_max() would then return the max addressable memory as 127 * 256MB = 31.75GB, the max address should have been 32G which is what ibm,lrdr-capacity shows. 3 In PowerKVM, there can be a gap between the end of boot time RAM and beginning of hotplug RAM area. So just multiplying lmb-count with lmb-size will not provide the correct max possible address for PowerKVM. This patch fixes 1 by using ibm,lrdr-capacity property to return the max addressable memory whenever the property is present. Then it fixes 2 & 3 by fetching the address of the last LMB in ibm,dynamic-memory property. Fixes: cd34206e949b ("powerpc: Add memory_hotplug_max()") Signed-off-by: Bharata B Rao Reviewed-by: David Gibson Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 4a87ccb..f8b1da7 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1164,17 +1164,33 @@ int hot_add_scn_to_nid(unsigned long scn_addr) static u64 hot_add_drconf_memory_max(void) { struct device_node *memory = NULL; + struct device_node *dn = NULL; unsigned int drconf_cell_cnt = 0; u64 lmb_size = 0; const __be32 *dm = NULL; + const __be64 *lrdr = NULL; + struct of_drconf_cell drmem; + + dn = of_find_node_by_path("/rtas"); + if (dn) { + lrdr = of_get_property(dn, "ibm,lrdr-capacity", NULL); + of_node_put(dn); + if (lrdr) + return be64_to_cpup(lrdr); + } memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); if (memory) { drconf_cell_cnt = of_get_drconf_memory(memory, &dm); lmb_size = of_get_lmb_size(memory); + + /* Advance to the last cell, each cell has 6 32 bit integers */ + dm += (drconf_cell_cnt - 1) * 6; + read_drconf_cell(&drmem, &dm); of_node_put(memory); + return drmem.base_addr + lmb_size; } - return lmb_size * drconf_cell_cnt; + return 0; } /* -- cgit v0.10.2 From aac6a91fea93e6bdd7ac20365d7ecc9187ca61da Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Thu, 2 Jun 2016 08:56:47 +1000 Subject: powerpc/asm: Remove unused symbols in asm-offsets.c THREAD_DSCR: Added in efcac6589a27 "powerpc: Per process DSCR + some fixes (try#4)" Last usage removed in 152d523e6307 "powerpc: Create context switch helpers save_sprs() and restore_sprs()" THREAD_DSCR_INHERIT: Added in 714332858bfd "powerpc: Restore correct DSCR in context switch" Last usage removed in 152d523e6307 "powerpc: Create context switch helpers save_sprs() and restore_sprs()" THREAD_TAR: Added in 2468dcf641e4 "powerpc: Add support for context switching the TAR register" Last usage removed in 152d523e6307 "powerpc: Create context switch helpers save_sprs() and restore_sprs()" THREAD_BESCR, THREAD_EBBHR and THREAD_EBBRR: Added in 9353374b8e15 "powerpc: Context switch the new EBB SPRs" Last usage removed in 152d523e6307 "powerpc: Create context switch helpers save_sprs() and restore_sprs()" THREAD_SIAR, THREAD_SDAR, THREAD_SIER, THREAD_MMCR0, and THREAD_MMCR2: Added in 59affcd3e460 "powerpc: Context switch more PMU related SPRs" Last usage removed in b11ae95100f7 "powerpc: Partial revert of "Context switch more PMU related SPRs"" PACA_LOCK_TOKEN: Added in 9e368f291560 "KVM: PPC: book3s_hv: Add support for PPC970-family processors" Last usage removed in c17b98cf6028 "KVM: PPC: Book3S HV: Remove code for PPC970 processors" HCALL_STAT_SIZE, HCALL_STAT_CALLS, HCALL_STAT_TB and HCALL_STAT_PURR: Added in 57852a853b0d "[POWERPC] powerpc: Instrument Hypervisor Calls" Last usage removed in c8cd093a6e9f "powerpc: tracing: Add hypervisor call tracepoints" VCPU_EPLC: Added in d30f6e480055 "KVM: PPC: booke: category E.HV (GS-mode) support" Never used. CPU_DOWN_FLUSH: Added in e7affb1dba0e "powerpc/cache: add cache flush operation for various e500" Never used. CFG_STAMP_XSEC: Added in 14cf11af6cf6 "powerpc: Merge enough to start building in arch/powerpc." Last usage removed in 0e469db8f70c "powerpc: Rework VDSO gettimeofday to prevent time going backwards" KVM_LPCR: Added in aa04b4cc5be6 "KVM: PPC: Allocate RMAs (Real Mode Areas) at boot for use by guests" Last usage removed in a0144e2a6b0b "KVM: PPC: Book3S HV: Store LPCR value for each virtual core" GPR15, GPR16, GPR17, GPR18, GPR19, GPR20, GPR21, GPR22, GPR23, GPR24, GPR25, GPR26, GPR27, GPR28, GPR29, GPR30 and GPR31: Added in 14cf11af6cf6 "powerpc: Merge enough to start building in arch/powerpc." Never used. VCPU_SHADOW_FSCR: Added in 616dff860282 "KVM: PPC: Book3S PR: Handle Facility interrupt and FSCR" Never used. VCPU_SHADOW_SRR1: Added in a2d56020d1d9 "KVM: PPC: Book3S PR: Keep volatile reg values in vcpu rather than shadow_vcpu" Never used. KVM_SPLIT_SIZE: Added in b4deba5c41e9 "KVM: PPC: Book3S HV: Implement dynamicmicro-threading on POWER8" Never used. VCPU_VCPUID: Added in de56a948b918 "KVM: PPC: Add support for Book3S processors in hypervisor mode" Last usage removed 1b400ba0cd24 "KVM: PPC: Book3S HV: Improve handling of local vs. global TLB invalidations" _MQ: Added in 14cf11af6cf6 "powerpc: Merge enough to start building in arch/powerpc." Never used. AUDITCONTEXT: Added in 14cf11af6cf6 "powerpc: Merge enough to start building in arch/powerpc." Last usage removed in 401d1f029beb "[PATCH] syscall entry/exit revamp" CLONE_VM: Added in 14cf11af6cf6 "powerpc: Merge enough to start building in arch/powerpc." Currently unused. CLONE_UNTRACED: Added in 14cf11af6cf6 "powerpc: Merge enough to start building in arch/powerpc." Currently unused. Signed-off-by: Rashmica Gupta [mpe: Munge change log] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 9ea0955..5b99f95 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -74,11 +74,8 @@ int main(void) DEFINE(MM, offsetof(struct task_struct, mm)); DEFINE(MMCONTEXTID, offsetof(struct mm_struct, context.id)); #ifdef CONFIG_PPC64 - DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); DEFINE(SIGSEGV, SIGSEGV); DEFINE(NMI_MASK, NMI_MASK); - DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr)); - DEFINE(THREAD_DSCR_INHERIT, offsetof(struct thread_struct, dscr_inherit)); DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr)); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); @@ -132,17 +129,6 @@ int main(void) DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu)); #endif -#ifdef CONFIG_PPC_BOOK3S_64 - DEFINE(THREAD_TAR, offsetof(struct thread_struct, tar)); - DEFINE(THREAD_BESCR, offsetof(struct thread_struct, bescr)); - DEFINE(THREAD_EBBHR, offsetof(struct thread_struct, ebbhr)); - DEFINE(THREAD_EBBRR, offsetof(struct thread_struct, ebbrr)); - DEFINE(THREAD_SIAR, offsetof(struct thread_struct, siar)); - DEFINE(THREAD_SDAR, offsetof(struct thread_struct, sdar)); - DEFINE(THREAD_SIER, offsetof(struct thread_struct, sier)); - DEFINE(THREAD_MMCR0, offsetof(struct thread_struct, mmcr0)); - DEFINE(THREAD_MMCR2, offsetof(struct thread_struct, mmcr2)); -#endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM DEFINE(PACATMSCRATCH, offsetof(struct paca_struct, tm_scratch)); DEFINE(THREAD_TM_TFHAR, offsetof(struct thread_struct, tm_tfhar)); @@ -178,7 +164,6 @@ int main(void) DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); /* paca */ DEFINE(PACA_SIZE, sizeof(struct paca_struct)); - DEFINE(PACA_LOCK_TOKEN, offsetof(struct paca_struct, lock_token)); DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index)); DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start)); DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack)); @@ -275,12 +260,6 @@ int main(void) /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); - - /* hcall statistics */ - DEFINE(HCALL_STAT_SIZE, sizeof(struct hcall_stats)); - DEFINE(HCALL_STAT_CALLS, offsetof(struct hcall_stats, num_calls)); - DEFINE(HCALL_STAT_TB, offsetof(struct hcall_stats, tb_total)); - DEFINE(HCALL_STAT_PURR, offsetof(struct hcall_stats, purr_total)); #endif /* CONFIG_PPC64 */ DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0])); DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1])); @@ -298,23 +277,6 @@ int main(void) DEFINE(GPR13, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[13])); #ifndef CONFIG_PPC64 DEFINE(GPR14, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[14])); - DEFINE(GPR15, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[15])); - DEFINE(GPR16, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[16])); - DEFINE(GPR17, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[17])); - DEFINE(GPR18, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[18])); - DEFINE(GPR19, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[19])); - DEFINE(GPR20, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[20])); - DEFINE(GPR21, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[21])); - DEFINE(GPR22, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[22])); - DEFINE(GPR23, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[23])); - DEFINE(GPR24, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[24])); - DEFINE(GPR25, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[25])); - DEFINE(GPR26, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[26])); - DEFINE(GPR27, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[27])); - DEFINE(GPR28, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[28])); - DEFINE(GPR29, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[29])); - DEFINE(GPR30, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[30])); - DEFINE(GPR31, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[31])); #endif /* CONFIG_PPC64 */ /* * Note: these symbols include _ because they overlap with special @@ -332,7 +294,6 @@ int main(void) DEFINE(RESULT, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, result)); DEFINE(_TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap)); #ifndef CONFIG_PPC64 - DEFINE(_MQ, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, mq)); /* * The PowerPC 400-class & Book-E processors have neither the DAR * nor the DSISR SPRs. Hence, we overload them to hold the similar @@ -369,8 +330,6 @@ int main(void) DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit)); #endif #endif - DEFINE(CLONE_VM, CLONE_VM); - DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); #ifndef CONFIG_PPC64 DEFINE(MM_PGD, offsetof(struct mm_struct, pgd)); @@ -380,7 +339,6 @@ int main(void) DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features)); DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup)); DEFINE(CPU_SPEC_RESTORE, offsetof(struct cpu_spec, cpu_restore)); - DEFINE(CPU_DOWN_FLUSH, offsetof(struct cpu_spec, cpu_down_flush)); DEFINE(pbe_address, offsetof(struct pbe, address)); DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); @@ -395,7 +353,6 @@ int main(void) DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct vdso_data, tb_orig_stamp)); DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct vdso_data, tb_ticks_per_sec)); DEFINE(CFG_TB_TO_XS, offsetof(struct vdso_data, tb_to_xs)); - DEFINE(CFG_STAMP_XSEC, offsetof(struct vdso_data, stamp_xsec)); DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct vdso_data, tb_update_count)); DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct vdso_data, tz_minuteswest)); DEFINE(CFG_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); @@ -517,7 +474,6 @@ int main(void) DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1)); DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); - DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr)); DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); @@ -528,7 +484,6 @@ int main(void) DEFINE(VCPU_THREAD_CPU, offsetof(struct kvm_vcpu, arch.thread_cpu)); #endif #ifdef CONFIG_PPC_BOOK3S - DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); DEFINE(VCPU_IC, offsetof(struct kvm_vcpu, arch.ic)); @@ -566,7 +521,6 @@ int main(void) DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar)); DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr)); DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr)); - DEFINE(VCPU_SHADOW_FSCR, offsetof(struct kvm_vcpu, arch.shadow_fscr)); DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb)); DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr)); DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr)); @@ -576,7 +530,6 @@ int main(void) DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr)); DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop)); DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort)); - DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map)); DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); @@ -693,7 +646,6 @@ int main(void) DEFINE(KVM_SPLIT_RPR, offsetof(struct kvm_split_mode, rpr)); DEFINE(KVM_SPLIT_PMMAR, offsetof(struct kvm_split_mode, pmmar)); DEFINE(KVM_SPLIT_LDBAR, offsetof(struct kvm_split_mode, ldbar)); - DEFINE(KVM_SPLIT_SIZE, offsetof(struct kvm_split_mode, subcore_size)); DEFINE(KVM_SPLIT_DO_NAP, offsetof(struct kvm_split_mode, do_nap)); DEFINE(KVM_SPLIT_NAPPED, offsetof(struct kvm_split_mode, napped)); #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ @@ -756,7 +708,6 @@ int main(void) #ifdef CONFIG_KVM_BOOKE_HV DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4)); DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6)); - DEFINE(VCPU_EPLC, offsetof(struct kvm_vcpu, arch.eplc)); #endif #ifdef CONFIG_KVM_EXIT_TIMING -- cgit v0.10.2 From 3079abe555511031e2ba5d1e21ddc52edc9af349 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Wed, 11 May 2016 19:22:18 +1000 Subject: powerpc/mm: Ensure "special" zones are empty The mm zone mechanism was traditionally used by arch specific code to partition memory into allocation zones. However there are several zones that are managed by the mm subsystem rather than the architecture. Most architectures set the max PFN of these special zones to zero, however on powerpc we set them to ~0ul. This, in conjunction with a bug in free_area_init_nodes() results in all of system memory being placed in ZONE_DEVICE when enabled. Device memory cannot be used for regular kernel memory allocations so this will cause a kernel panic at boot. Given the planned addition of more mm managed zones (ZONE_CMA) we should aim to be consistent with every other architecture and set the max PFN for these zones to zero. Signed-off-by: Oliver O'Halloran Reviewed-by: Balbir Singh Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 2fd57fa..9eac9d4 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -239,8 +239,14 @@ static int __init mark_nonram_nosave(void) static bool zone_limits_final; +/* + * The memory zones past TOP_ZONE are managed by generic mm code. + * These should be set to zero since that's what every other + * architecture does. + */ static unsigned long max_zone_pfns[MAX_NR_ZONES] = { - [0 ... MAX_NR_ZONES - 1] = ~0UL + [0 ... TOP_ZONE ] = ~0UL, + [TOP_ZONE + 1 ... MAX_NR_ZONES - 1] = 0 }; /* -- cgit v0.10.2 From 1d1451655bad9a6a5fd7a42de68420069ce3bee3 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Wed, 11 May 2016 10:57:32 +1000 Subject: powerpc: Add array bounds checking to crash_shutdown_handlers The array crash_shutdown_handles is an array of size CRASH_HANDLER_MAX+1 containing up to CRASH_HANDLER_MAX shutdown_handlers. It is assumed to be NULL terminated, which it is under normal circumstances. Array accesses in the functions crash_shutdown_unregister() and default_machine_crash_shutdown() rely on this NULL termination property when traversing this list and don't protect again out of bounds accesses. If the NULL terminator were somehow overwritten these functions could potentially access out of the bounds of the array. Shrink the array to size CRASH_HANDLER_MAX and implement explicit array bounds checking when accessing the elements of the crash_shutdown_handles[] array in crash_shutdown_unregister() and default_machine_crash_shutdown(). Signed-off-by: Suraj Jitindar Singh Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 2bb252c..3dc1fad 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -48,8 +48,8 @@ int crashing_cpu = -1; static int time_to_dump; #define CRASH_HANDLER_MAX 3 -/* NULL terminated list of shutdown handles */ -static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1]; +/* List of shutdown handles */ +static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX]; static DEFINE_SPINLOCK(crash_handlers_lock); static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; @@ -288,9 +288,14 @@ int crash_shutdown_unregister(crash_shutdown_t handler) rc = 1; } else { /* Shift handles down */ - for (; crash_shutdown_handles[i]; i++) + for (; i < (CRASH_HANDLER_MAX - 1); i++) crash_shutdown_handles[i] = crash_shutdown_handles[i+1]; + /* + * Reset last entry to NULL now that it has been shifted down, + * this will allow new handles to be added here. + */ + crash_shutdown_handles[i] = NULL; rc = 0; } @@ -346,7 +351,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) old_handler = __debugger_fault_handler; __debugger_fault_handler = handle_fault; crash_shutdown_cpu = smp_processor_id(); - for (i = 0; crash_shutdown_handles[i]; i++) { + for (i = 0; crash_shutdown_handles[i] && i < CRASH_HANDLER_MAX; i++) { if (setjmp(crash_shutdown_buf) == 0) { /* * Insert syncs and delay to ensure -- cgit v0.10.2 From 34852ed5511ec5d07897f22d5607061a248fc82f Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Wed, 18 May 2016 11:16:49 +1000 Subject: powerpc/sparse: make some things static This is just a smattering of things picked up by sparse that should be made static. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 3dc1fad..888bdf1 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -65,7 +65,7 @@ static int handle_fault(struct pt_regs *regs) #ifdef CONFIG_SMP static atomic_t cpus_in_crash; -void crash_ipi_callback(struct pt_regs *regs) +static void crash_ipi_callback(struct pt_regs *regs) { static cpumask_t cpus_state_saved = CPU_MASK_NONE; diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 692873b..c4f1d1f 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -35,7 +35,7 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); #ifdef CONFIG_PPC64 /* Time in microseconds we delay before sleeping in the idle loop */ -DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; +static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; static ssize_t store_smt_snooze_delay(struct device *dev, struct device_attribute *attr, diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index fcc8b68..92a8020 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -29,7 +29,7 @@ static u32 supported_cpuidle_states; -int pnv_save_sprs_for_winkle(void) +static int pnv_save_sprs_for_winkle(void) { int cpu; int rc; diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c index 9276779..164a13d 100644 --- a/arch/powerpc/platforms/pseries/pseries_energy.c +++ b/arch/powerpc/platforms/pseries/pseries_energy.c @@ -208,19 +208,19 @@ static ssize_t percpu_deactivate_hint_show(struct device *dev, * Per-cpu value of the hint */ -struct device_attribute attr_cpu_activate_hint_list = +static struct device_attribute attr_cpu_activate_hint_list = __ATTR(pseries_activate_hint_list, 0444, cpu_activate_hint_list_show, NULL); -struct device_attribute attr_cpu_deactivate_hint_list = +static struct device_attribute attr_cpu_deactivate_hint_list = __ATTR(pseries_deactivate_hint_list, 0444, cpu_deactivate_hint_list_show, NULL); -struct device_attribute attr_percpu_activate_hint = +static struct device_attribute attr_percpu_activate_hint = __ATTR(pseries_activate_hint, 0444, percpu_activate_hint_show, NULL); -struct device_attribute attr_percpu_deactivate_hint = +static struct device_attribute attr_percpu_deactivate_hint = __ATTR(pseries_deactivate_hint, 0444, percpu_deactivate_hint_show, NULL); -- cgit v0.10.2 From 42f5b4cacd783faf05e3ff8bf85e8be31f3dfa9d Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Wed, 18 May 2016 11:16:50 +1000 Subject: powerpc: Introduce asm-prototypes.h Sparse picked up a number of functions that are implemented in C and then only referred to in asm code. This introduces asm-prototypes.h, which provides a place for prototypes of these functions. This silences some sparse warnings. Signed-off-by: Daniel Axtens [mpe: Add include guards, clean up copyright & GPL text] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h new file mode 100644 index 0000000..e71b909 --- /dev/null +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -0,0 +1,75 @@ +#ifndef _ASM_POWERPC_ASM_PROTOTYPES_H +#define _ASM_POWERPC_ASM_PROTOTYPES_H +/* + * This file is for prototypes of C functions that are only called + * from asm, and any associated variables. + * + * Copyright 2016, Daniel Axtens, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + */ + +#include +#include + +/* SMP */ +extern struct thread_info *current_set[NR_CPUS]; +extern struct thread_info *secondary_ti; +void start_secondary(void *unused); + +/* kexec */ +struct paca_struct; +struct kimage; +extern struct paca_struct kexec_paca; +void kexec_copy_flush(struct kimage *image); + +/* pseries hcall tracing */ +extern struct static_key hcall_tracepoint_key; +void __trace_hcall_entry(unsigned long opcode, unsigned long *args); +void __trace_hcall_exit(long opcode, unsigned long retval, + unsigned long *retbuf); +/* OPAL tracing */ +#ifdef HAVE_JUMP_LABEL +extern struct static_key opal_tracepoint_key; +#endif + +void __trace_opal_entry(unsigned long opcode, unsigned long *args); +void __trace_opal_exit(long opcode, unsigned long retval); + +/* VMX copying */ +int enter_vmx_usercopy(void); +int exit_vmx_usercopy(void); +int enter_vmx_copy(void); +void * exit_vmx_copy(void *dest); + +/* Traps */ +long machine_check_early(struct pt_regs *regs); +long hmi_exception_realmode(struct pt_regs *regs); +void SMIException(struct pt_regs *regs); +void handle_hmi_exception(struct pt_regs *regs); +void instruction_breakpoint_exception(struct pt_regs *regs); +void RunModeException(struct pt_regs *regs); +void __kprobes single_step_exception(struct pt_regs *regs); +void __kprobes program_check_exception(struct pt_regs *regs); +void alignment_exception(struct pt_regs *regs); +void StackOverflow(struct pt_regs *regs); +void nonrecoverable_exception(struct pt_regs *regs); +void kernel_fp_unavailable_exception(struct pt_regs *regs); +void altivec_unavailable_exception(struct pt_regs *regs); +void vsx_unavailable_exception(struct pt_regs *regs); +void fp_unavailable_tm(struct pt_regs *regs); +void altivec_unavailable_tm(struct pt_regs *regs); +void vsx_unavailable_tm(struct pt_regs *regs); +void facility_unavailable_exception(struct pt_regs *regs); +void TAUException(struct pt_regs *regs); +void altivec_assist_exception(struct pt_regs *regs); +void unrecoverable_exception(struct pt_regs *regs); +void kernel_bad_stack(struct pt_regs *regs); +void system_reset_exception(struct pt_regs *regs); +void machine_check_exception(struct pt_regs *regs); +void __kprobes emulation_assist_interrupt(struct pt_regs *regs); + +#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index b8c202d..50bf551 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -29,6 +29,7 @@ #include #include #include +#include #ifdef CONFIG_PPC_BOOK3E int default_machine_kexec_prepare(struct kimage *image) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 55c924b..f1adc3c 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -53,6 +53,7 @@ #include #include #include +#include #ifdef DEBUG #include diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 9229ba6..11d15e7 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index b27e030..bf925cd 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c @@ -21,6 +21,7 @@ #include #include #include +#include int enter_vmx_usercopy(void) { diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index e11273b..1e496b7 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -1,6 +1,7 @@ #include #include #include +#include #ifdef HAVE_JUMP_LABEL struct static_key opal_tracepoint_key = STATIC_KEY_INIT; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 7f6100d..03ff986 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "pseries.h" -- cgit v0.10.2 From 665e87ffe1c400c525c3a4cd6fcb5db75972fadd Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Wed, 18 May 2016 11:16:51 +1000 Subject: powerpc/sparse: Include headers containing prototypes Sometimes headers that provide prototypes for functions are accidentally omitted from the files that define the functions. Fix a couple of times that occurs. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index f1adc3c..1b55c78 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/platforms/pseries/power.c b/arch/powerpc/platforms/pseries/power.c index c26eadd..a4a0b57 100644 --- a/arch/powerpc/platforms/pseries/power.c +++ b/arch/powerpc/platforms/pseries/power.c @@ -27,6 +27,8 @@ #include #include +#include "pseries.h" + unsigned long rtas_poweron_auto; /* default and normal state is 0 */ static ssize_t auto_poweron_show(struct kobject *kobj, -- cgit v0.10.2 From a9650e9bc53239c30c39f77d9d8541e84641298a Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Wed, 18 May 2016 11:16:52 +1000 Subject: powerpc/align: Use #ifdef __BIG_ENDIAN__ #else for REG_BYTE Sparse complains that it doesn't know what REG_BYTE is: arch/powerpc/kernel/align.c:313:29: error: undefined identifier 'REG_BYTE' REG_BYTE is defined differently based on whether we're compiling for LE, BE32 or BE64. Sparse apparently doesn't provide __BIG_ENDIAN__ or __LITTLE_ENDIAN__, which means we get no definition. Rather than check for __BIG_ENDIAN__ and then separately for __LITTLE_ENDIAN__, just switch the #ifdef to check for __BIG_ENDIAN__ and then #else we define the little endian version. Technically that's dicey because PDP_ENDIAN is also a possibility, but we already do it in a lot of places so one more hardly matters. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 8e7cb8e..d7ad66b 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -228,9 +228,7 @@ static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr) #else #define REG_BYTE(rp, i) *((u8 *)(rp) + (i)) #endif -#endif - -#ifdef __LITTLE_ENDIAN__ +#else #define REG_BYTE(rp, i) (*(((u8 *)((rp) + ((i)>>2)) + ((i)&3)))) #endif -- cgit v0.10.2 From 64417a398973d964139306c0b152536e468bf77c Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Mon, 18 Apr 2016 15:03:50 +1000 Subject: cxl: static-ify variables to fix sparse warnings Make a couple more variables static. Found by sparse. Signed-off-by: Andrew Donnellan Reviewed-by: fbarrat@linux.vnet.ibm.com Reviewed-by: Matthew R. Ochs Acked-by: Ian Munsie Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/flash.c b/drivers/misc/cxl/flash.c index 68dd0b7..c63d61e 100644 --- a/drivers/misc/cxl/flash.c +++ b/drivers/misc/cxl/flash.c @@ -24,8 +24,8 @@ struct ai_header { }; static struct semaphore sem; -unsigned long *buffer[CXL_AI_MAX_ENTRIES]; -struct sg_list *le; +static unsigned long *buffer[CXL_AI_MAX_ENTRIES]; +static struct sg_list *le; static u64 continue_token; static unsigned int transfer; -- cgit v0.10.2 From 292841b09648ce7aee5df16ab72581f3b6c2bd7a Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Tue, 24 May 2016 02:14:05 +1000 Subject: cxl: Update process element after allocating interrupts In the kernel API, it is possible to attempt to allocate AFU interrupts after already starting a context. Since the process element structure used by the hardware is only filled out at the time the context is started, it will not be updated with the interrupt numbers that have just been allocated and therefore AFU interrupts will not work unless they were allocated prior to starting the context. This can present some difficulties as each CAPI enabled PCI device in the kernel API has a default context, which may need to be started very early to enable translations, potentially before interrupts can easily be set up. This patch makes the API more flexible to allow interrupts to be allocated after a context has already been started and takes care of updating the PE structure used by the hardware and notifying it to discard any cached copy it may have. The update is currently performed via a terminate/remove/add sequence. This is necessary on some hardware such as the XSL that does not properly support the update LLCMD. Note that this is only supported on powernv at present - attempting to perform this ordering on PowerVM will raise a warning. Signed-off-by: Ian Munsie Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 6d228cc..99081b8 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -102,7 +102,10 @@ int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num) if (num == 0) num = ctx->afu->pp_irqs; res = afu_allocate_irqs(ctx, num); - if (!res && !cpu_has_feature(CPU_FTR_HVMODE)) { + if (res) + return res; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) { /* In a guest, the PSL interrupt is not multiplexed. It was * allocated above, and we need to set its handler */ @@ -110,6 +113,13 @@ int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num) if (hwirq) cxl_map_irq(ctx->afu->adapter, hwirq, cxl_ops->psl_interrupt, ctx, "psl"); } + + if (ctx->status == STARTED) { + if (cxl_ops->update_ivtes) + cxl_ops->update_ivtes(ctx); + else WARN(1, "BUG: cxl_allocate_afu_irqs must be called prior to starting the context on this platform\n"); + } + return res; } EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs); diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 4fe5078..fbec29b 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -855,6 +855,7 @@ struct cxl_backend_ops { int (*attach_process)(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr); int (*detach_process)(struct cxl_context *ctx); + void (*update_ivtes)(struct cxl_context *ctx); bool (*support_attributes)(const char *attr_name, enum cxl_attrs type); bool (*link_ok)(struct cxl *cxl, struct cxl_afu *afu); void (*release_afu)(struct device *dev); diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c index bc8d0b9..1edba52 100644 --- a/drivers/misc/cxl/guest.c +++ b/drivers/misc/cxl/guest.c @@ -1182,6 +1182,7 @@ const struct cxl_backend_ops cxl_guest_ops = { .ack_irq = guest_ack_irq, .attach_process = guest_attach_process, .detach_process = guest_detach_process, + .update_ivtes = NULL, .support_attributes = guest_support_attributes, .link_ok = guest_link_ok, .release_afu = guest_release_afu, diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 55d8a14..efe8af9 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -430,7 +430,6 @@ static int remove_process_element(struct cxl_context *ctx) return rc; } - void cxl_assign_psn_space(struct cxl_context *ctx) { if (!ctx->afu->pp_size || ctx->master) { @@ -507,10 +506,39 @@ static u64 calculate_sr(struct cxl_context *ctx) return sr; } +static void update_ivtes_directed(struct cxl_context *ctx) +{ + bool need_update = (ctx->status == STARTED); + int r; + + if (need_update) { + WARN_ON(terminate_process_element(ctx)); + WARN_ON(remove_process_element(ctx)); + } + + for (r = 0; r < CXL_IRQ_RANGES; r++) { + ctx->elem->ivte_offsets[r] = cpu_to_be16(ctx->irqs.offset[r]); + ctx->elem->ivte_ranges[r] = cpu_to_be16(ctx->irqs.range[r]); + } + + /* + * Theoretically we could use the update llcmd, instead of a + * terminate/remove/add (or if an atomic update was required we could + * do a suspend/update/resume), however it seems there might be issues + * with the update llcmd on some cards (including those using an XSL on + * an ASIC) so for now it's safest to go with the commands that are + * known to work. In the future if we come across a situation where the + * card may be performing transactions using the same PE while we are + * doing this update we might need to revisit this. + */ + if (need_update) + WARN_ON(add_process_element(ctx)); +} + static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr) { u32 pid; - int r, result; + int result; cxl_assign_psn_space(ctx); @@ -545,10 +573,7 @@ static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr) ctx->irqs.range[0] = 1; } - for (r = 0; r < CXL_IRQ_RANGES; r++) { - ctx->elem->ivte_offsets[r] = cpu_to_be16(ctx->irqs.offset[r]); - ctx->elem->ivte_ranges[r] = cpu_to_be16(ctx->irqs.range[r]); - } + update_ivtes_directed(ctx); ctx->elem->common.amr = cpu_to_be64(amr); ctx->elem->common.wed = cpu_to_be64(wed); @@ -600,6 +625,22 @@ static int activate_dedicated_process(struct cxl_afu *afu) return cxl_chardev_d_afu_add(afu); } +static void update_ivtes_dedicated(struct cxl_context *ctx) +{ + struct cxl_afu *afu = ctx->afu; + + cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An, + (((u64)ctx->irqs.offset[0] & 0xffff) << 48) | + (((u64)ctx->irqs.offset[1] & 0xffff) << 32) | + (((u64)ctx->irqs.offset[2] & 0xffff) << 16) | + ((u64)ctx->irqs.offset[3] & 0xffff)); + cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, (u64) + (((u64)ctx->irqs.range[0] & 0xffff) << 48) | + (((u64)ctx->irqs.range[1] & 0xffff) << 32) | + (((u64)ctx->irqs.range[2] & 0xffff) << 16) | + ((u64)ctx->irqs.range[3] & 0xffff)); +} + static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr) { struct cxl_afu *afu = ctx->afu; @@ -618,16 +659,7 @@ static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr) cxl_prefault(ctx, wed); - cxl_p1n_write(afu, CXL_PSL_IVTE_Offset_An, - (((u64)ctx->irqs.offset[0] & 0xffff) << 48) | - (((u64)ctx->irqs.offset[1] & 0xffff) << 32) | - (((u64)ctx->irqs.offset[2] & 0xffff) << 16) | - ((u64)ctx->irqs.offset[3] & 0xffff)); - cxl_p1n_write(afu, CXL_PSL_IVTE_Limit_An, (u64) - (((u64)ctx->irqs.range[0] & 0xffff) << 48) | - (((u64)ctx->irqs.range[1] & 0xffff) << 32) | - (((u64)ctx->irqs.range[2] & 0xffff) << 16) | - ((u64)ctx->irqs.range[3] & 0xffff)); + update_ivtes_dedicated(ctx); cxl_p2n_write(afu, CXL_PSL_AMR_An, amr); @@ -709,6 +741,15 @@ static inline int detach_process_native_dedicated(struct cxl_context *ctx) return 0; } +static void native_update_ivtes(struct cxl_context *ctx) +{ + if (ctx->afu->current_mode == CXL_MODE_DIRECTED) + return update_ivtes_directed(ctx); + if (ctx->afu->current_mode == CXL_MODE_DEDICATED) + return update_ivtes_dedicated(ctx); + WARN(1, "native_update_ivtes: Bad mode\n"); +} + static inline int detach_process_native_afu_directed(struct cxl_context *ctx) { if (!ctx->pe_inserted) @@ -1128,6 +1169,7 @@ const struct cxl_backend_ops cxl_native_ops = { .irq_wait = native_irq_wait, .attach_process = native_attach_process, .detach_process = native_detach_process, + .update_ivtes = native_update_ivtes, .support_attributes = native_support_attributes, .link_ok = cxl_adapter_link_ok, .release_afu = cxl_pci_release_afu, -- cgit v0.10.2 From 6d382616ac2283ed65c7a6a52d05b064488aa8f8 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Tue, 24 May 2016 03:39:18 +1000 Subject: cxl: Abstract the differences between the PSL and XSL The XSL (Translation Service Layer) is a stripped down version of the PSL (Power Service Layer) used in some cards such as the Mellanox CX4. Like the PSL, it implements the CAIA architecture, but has a number of differences, mostly in it's implementation dependent registers. This adds an ops structure to abstract these differences to bring initial support for XSL CAPI devices. The XSL does not implement the optional architected SERR register, however while it treats it as a reserved register and should work with no special treatment, attempting to access it will cause the XSL_FEC (First Error Capture) register to be filled out, preventing it from capturing any subsequent errors. Therefore, this patch also prevents the kernel from trying to set up the SERR register so that the FEC register may still be useful, and to save one interrupt. The XSL also uses a special DMA cxl mode, which uses a slightly different init sequence for the CAPP and PHB. The kernel support for this will be in a future patch once the corresponding support has been merged into skiboot. Co-authored-by: Ian Munsie Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index fbec29b..790faeb 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -81,6 +81,7 @@ static const cxl_p1_reg_t CXL_PSL_TLBIA = {0x00A8}; static const cxl_p1_reg_t CXL_PSL_AFUSEL = {0x00B0}; /* 0x00C0:7EFF Implementation dependent area */ +/* PSL registers */ static const cxl_p1_reg_t CXL_PSL_FIR1 = {0x0100}; static const cxl_p1_reg_t CXL_PSL_FIR2 = {0x0108}; static const cxl_p1_reg_t CXL_PSL_Timebase = {0x0110}; @@ -91,6 +92,11 @@ static const cxl_p1_reg_t CXL_PSL_FIR_CNTL = {0x0148}; static const cxl_p1_reg_t CXL_PSL_DSNDCTL = {0x0150}; static const cxl_p1_reg_t CXL_PSL_SNWRALLOC = {0x0158}; static const cxl_p1_reg_t CXL_PSL_TRACE = {0x0170}; +/* XSL registers (Mellanox CX4) */ +static const cxl_p1_reg_t CXL_XSL_Timebase = {0x0100}; +static const cxl_p1_reg_t CXL_XSL_TB_CTLSTAT = {0x0108}; +static const cxl_p1_reg_t CXL_XSL_FEC = {0x0158}; +static const cxl_p1_reg_t CXL_XSL_DSNCTL = {0x0168}; /* 0x7F00:7FFF Reserved PCIe MSI-X Pending Bit Array area */ /* 0x8000:FFFF Reserved PCIe MSI-X Table Area */ @@ -525,6 +531,20 @@ struct cxl_context { struct rcu_head rcu; }; +struct cxl_service_layer_ops { + int (*adapter_regs_init)(struct cxl *adapter, struct pci_dev *dev); + int (*afu_regs_init)(struct cxl_afu *afu); + int (*register_serr_irq)(struct cxl_afu *afu); + void (*release_serr_irq)(struct cxl_afu *afu); + void (*debugfs_add_adapter_sl_regs)(struct cxl *adapter, struct dentry *dir); + void (*debugfs_add_afu_sl_regs)(struct cxl_afu *afu, struct dentry *dir); + void (*psl_irq_dump_registers)(struct cxl_context *ctx); + void (*err_irq_dump_registers)(struct cxl *adapter); + void (*debugfs_stop_trace)(struct cxl *adapter); + void (*write_timebase_ctrl)(struct cxl *adapter); + u64 (*timebase_read)(struct cxl *adapter); +}; + struct cxl_native { u64 afu_desc_off; u64 afu_desc_size; @@ -533,6 +553,7 @@ struct cxl_native { irq_hw_number_t err_hwirq; unsigned int err_virq; u64 ps_off; + const struct cxl_service_layer_ops *sl_ops; }; struct cxl_guest { @@ -805,6 +826,11 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter); int cxl_afu_disable(struct cxl_afu *afu); int cxl_psl_purge(struct cxl_afu *afu); +void cxl_debugfs_add_adapter_psl_regs(struct cxl *adapter, struct dentry *dir); +void cxl_debugfs_add_adapter_xsl_regs(struct cxl *adapter, struct dentry *dir); +void cxl_debugfs_add_afu_psl_regs(struct cxl_afu *afu, struct dentry *dir); +void cxl_native_psl_irq_dump_regs(struct cxl_context *ctx); +void cxl_native_err_irq_dump_regs(struct cxl *adapter); void cxl_stop_trace(struct cxl *cxl); int cxl_pci_vphb_add(struct cxl_afu *afu); void cxl_pci_vphb_remove(struct cxl_afu *afu); diff --git a/drivers/misc/cxl/debugfs.c b/drivers/misc/cxl/debugfs.c index 5751899..ec7b8a0 100644 --- a/drivers/misc/cxl/debugfs.c +++ b/drivers/misc/cxl/debugfs.c @@ -51,6 +51,19 @@ static struct dentry *debugfs_create_io_x64(const char *name, umode_t mode, return debugfs_create_file(name, mode, parent, (void __force *)value, &fops_io_x64); } +void cxl_debugfs_add_adapter_psl_regs(struct cxl *adapter, struct dentry *dir) +{ + debugfs_create_io_x64("fir1", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR1)); + debugfs_create_io_x64("fir2", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR2)); + debugfs_create_io_x64("fir_cntl", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR_CNTL)); + debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_TRACE)); +} + +void cxl_debugfs_add_adapter_xsl_regs(struct cxl *adapter, struct dentry *dir) +{ + debugfs_create_io_x64("fec", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_XSL_FEC)); +} + int cxl_debugfs_adapter_add(struct cxl *adapter) { struct dentry *dir; @@ -65,13 +78,10 @@ int cxl_debugfs_adapter_add(struct cxl *adapter) return PTR_ERR(dir); adapter->debugfs = dir; - debugfs_create_io_x64("fir1", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR1)); - debugfs_create_io_x64("fir2", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR2)); - debugfs_create_io_x64("fir_cntl", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_FIR_CNTL)); debugfs_create_io_x64("err_ivte", S_IRUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_ErrIVTE)); - debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1_addr(adapter, CXL_PSL_TRACE)); - + if (adapter->native->sl_ops->debugfs_add_adapter_sl_regs) + adapter->native->sl_ops->debugfs_add_adapter_sl_regs(adapter, dir); return 0; } @@ -80,6 +90,14 @@ void cxl_debugfs_adapter_remove(struct cxl *adapter) debugfs_remove_recursive(adapter->debugfs); } +void cxl_debugfs_add_afu_psl_regs(struct cxl_afu *afu, struct dentry *dir) +{ + debugfs_create_io_x64("fir", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_FIR_SLICE_An)); + debugfs_create_io_x64("serr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SERR_An)); + debugfs_create_io_x64("afu_debug", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_AFU_DEBUG_An)); + debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SLICE_TRACE)); +} + int cxl_debugfs_afu_add(struct cxl_afu *afu) { struct dentry *dir; @@ -94,18 +112,15 @@ int cxl_debugfs_afu_add(struct cxl_afu *afu) return PTR_ERR(dir); afu->debugfs = dir; - debugfs_create_io_x64("fir", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_FIR_SLICE_An)); - debugfs_create_io_x64("serr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SERR_An)); - debugfs_create_io_x64("afu_debug", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_AFU_DEBUG_An)); debugfs_create_io_x64("sr", S_IRUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SR_An)); - debugfs_create_io_x64("dsisr", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_DSISR_An)); debugfs_create_io_x64("dar", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_DAR_An)); debugfs_create_io_x64("sstp0", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_SSTP0_An)); debugfs_create_io_x64("sstp1", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_SSTP1_An)); debugfs_create_io_x64("err_status", S_IRUSR, dir, _cxl_p2n_addr(afu, CXL_PSL_ErrStat_An)); - debugfs_create_io_x64("trace", S_IRUSR | S_IWUSR, dir, _cxl_p1n_addr(afu, CXL_PSL_SLICE_TRACE)); + if (afu->adapter->native->sl_ops->debugfs_add_afu_sl_regs) + afu->adapter->native->sl_ops->debugfs_add_afu_sl_regs(afu, dir); return 0; } diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index efe8af9..e80d8f7 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -795,26 +795,38 @@ static int native_get_irq_info(struct cxl_afu *afu, struct cxl_irq_info *info) return 0; } -static irqreturn_t native_handle_psl_slice_error(struct cxl_context *ctx, - u64 dsisr, u64 errstat) +void cxl_native_psl_irq_dump_regs(struct cxl_context *ctx) { u64 fir1, fir2, fir_slice, serr, afu_debug; fir1 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR1); fir2 = cxl_p1_read(ctx->afu->adapter, CXL_PSL_FIR2); fir_slice = cxl_p1n_read(ctx->afu, CXL_PSL_FIR_SLICE_An); - serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An); afu_debug = cxl_p1n_read(ctx->afu, CXL_AFU_DEBUG_An); - dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%016llx\n", errstat); dev_crit(&ctx->afu->dev, "PSL_FIR1: 0x%016llx\n", fir1); dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2); - dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); + if (ctx->afu->adapter->native->sl_ops->register_serr_irq) { + serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An); + dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); + } dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); +} + +static irqreturn_t native_handle_psl_slice_error(struct cxl_context *ctx, + u64 dsisr, u64 errstat) +{ + + dev_crit(&ctx->afu->dev, "PSL ERROR STATUS: 0x%016llx\n", errstat); - dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n"); - cxl_stop_trace(ctx->afu->adapter); + if (ctx->afu->adapter->native->sl_ops->psl_irq_dump_registers) + ctx->afu->adapter->native->sl_ops->psl_irq_dump_registers(ctx); + + if (ctx->afu->adapter->native->sl_ops->debugfs_stop_trace) { + dev_crit(&ctx->afu->dev, "STOPPING CXL TRACE\n"); + ctx->afu->adapter->native->sl_ops->debugfs_stop_trace(ctx->afu->adapter); + } return cxl_ops->ack_irq(ctx, 0, errstat); } @@ -892,6 +904,9 @@ static irqreturn_t native_slice_irq_err(int irq, void *data) struct cxl_afu *afu = data; u64 fir_slice, errstat, serr, afu_debug; + /* + * slice err interrupt is only used with full PSL (no XSL) + */ WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq); serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); @@ -908,23 +923,33 @@ static irqreturn_t native_slice_irq_err(int irq, void *data) return IRQ_HANDLED; } +void cxl_native_err_irq_dump_regs(struct cxl *adapter) +{ + u64 fir1, fir2; + + fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1); + fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2); + + dev_crit(&adapter->dev, "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n", fir1, fir2); +} + static irqreturn_t native_irq_err(int irq, void *data) { struct cxl *adapter = data; - u64 fir1, fir2, err_ivte; + u64 err_ivte; WARN(1, "CXL ERROR interrupt %i\n", irq); err_ivte = cxl_p1_read(adapter, CXL_PSL_ErrIVTE); dev_crit(&adapter->dev, "PSL_ErrIVTE: 0x%016llx\n", err_ivte); - dev_crit(&adapter->dev, "STOPPING CXL TRACE\n"); - cxl_stop_trace(adapter); - - fir1 = cxl_p1_read(adapter, CXL_PSL_FIR1); - fir2 = cxl_p1_read(adapter, CXL_PSL_FIR2); + if (adapter->native->sl_ops->debugfs_stop_trace) { + dev_crit(&adapter->dev, "STOPPING CXL TRACE\n"); + adapter->native->sl_ops->debugfs_stop_trace(adapter); + } - dev_crit(&adapter->dev, "PSL_FIR1: 0x%016llx\nPSL_FIR2: 0x%016llx\n", fir1, fir2); + if (adapter->native->sl_ops->err_irq_dump_registers) + adapter->native->sl_ops->err_irq_dump_registers(adapter); return IRQ_HANDLED; } diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index a08fcc8..556718d 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -352,13 +352,10 @@ static u64 get_capp_unit_id(struct device_node *np) return 0; } -static int init_implementation_adapter_regs(struct cxl *adapter, struct pci_dev *dev) +static int calc_capp_routing(struct pci_dev *dev, u64 *chipid, u64 *capp_unit_id) { struct device_node *np; const __be32 *prop; - u64 psl_dsnctl; - u64 chipid; - u64 capp_unit_id; if (!(np = pnv_pci_get_phb_node(dev))) return -ENODEV; @@ -367,14 +364,28 @@ static int init_implementation_adapter_regs(struct cxl *adapter, struct pci_dev np = of_get_next_parent(np); if (!np) return -ENODEV; - chipid = be32_to_cpup(prop); - capp_unit_id = get_capp_unit_id(np); + *chipid = be32_to_cpup(prop); + *capp_unit_id = get_capp_unit_id(np); of_node_put(np); - if (!capp_unit_id) { + if (!*capp_unit_id) { pr_err("cxl: invalid capp unit id\n"); return -ENODEV; } + return 0; +} + +static int init_implementation_adapter_psl_regs(struct cxl *adapter, struct pci_dev *dev) +{ + u64 psl_dsnctl; + u64 chipid; + u64 capp_unit_id; + int rc; + + rc = calc_capp_routing(dev, &chipid, &capp_unit_id); + if (rc) + return rc; + psl_dsnctl = 0x0000900000000000ULL; /* pteupd ttype, scdone */ psl_dsnctl |= (0x2ULL << (63-38)); /* MMIO hang pulse: 256 us */ /* Tell PSL where to route data to */ @@ -393,8 +404,61 @@ static int init_implementation_adapter_regs(struct cxl *adapter, struct pci_dev return 0; } +static int init_implementation_adapter_xsl_regs(struct cxl *adapter, struct pci_dev *dev) +{ + u64 xsl_dsnctl; + u64 chipid; + u64 capp_unit_id; + int rc; + + rc = calc_capp_routing(dev, &chipid, &capp_unit_id); + if (rc) + return rc; + + /* Tell XSL where to route data to */ + xsl_dsnctl = 0x0000600000000000ULL | (chipid << (63-5)); + xsl_dsnctl |= (capp_unit_id << (63-13)); + cxl_p1_write(adapter, CXL_XSL_DSNCTL, xsl_dsnctl); + + return 0; +} + +/* PSL & XSL */ +#define TBSYNC_CAL(n) (((u64)n & 0x7) << (63-3)) #define TBSYNC_CNT(n) (((u64)n & 0x7) << (63-6)) -#define _2048_250MHZ_CYCLES 1 +/* For the PSL this is a multiple for 0 < n <= 7: */ +#define PSL_2048_250MHZ_CYCLES 1 + +static void write_timebase_ctrl_psl(struct cxl *adapter) +{ + cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT, + TBSYNC_CNT(2 * PSL_2048_250MHZ_CYCLES)); +} + +/* XSL */ +#define TBSYNC_ENA (1ULL << 63) +/* For the XSL this is 2**n * 2000 clocks for 0 < n <= 6: */ +#define XSL_2000_CLOCKS 1 +#define XSL_4000_CLOCKS 2 +#define XSL_8000_CLOCKS 3 + +static void write_timebase_ctrl_xsl(struct cxl *adapter) +{ + cxl_p1_write(adapter, CXL_XSL_TB_CTLSTAT, + TBSYNC_ENA | + TBSYNC_CAL(3) | + TBSYNC_CNT(XSL_4000_CLOCKS)); +} + +static u64 timebase_read_psl(struct cxl *adapter) +{ + return cxl_p1_read(adapter, CXL_PSL_Timebase); +} + +static u64 timebase_read_xsl(struct cxl *adapter) +{ + return cxl_p1_read(adapter, CXL_XSL_Timebase); +} static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev) { @@ -421,8 +485,7 @@ static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev) * Setup PSL Timebase Control and Status register * with the recommended Timebase Sync Count value */ - cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT, - TBSYNC_CNT(2 * _2048_250MHZ_CYCLES)); + adapter->native->sl_ops->write_timebase_ctrl(adapter); /* Enable PSL Timebase */ cxl_p1_write(adapter, CXL_PSL_Control, 0x0000000000000000); @@ -435,7 +498,7 @@ static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev) dev_info(&dev->dev, "PSL timebase can't synchronize\n"); return; } - psl_tb = cxl_p1_read(adapter, CXL_PSL_Timebase); + psl_tb = adapter->native->sl_ops->timebase_read(adapter); delta = mftb() - psl_tb; if (delta < 0) delta = -delta; @@ -445,7 +508,7 @@ static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev) return; } -static int init_implementation_afu_regs(struct cxl_afu *afu) +static int init_implementation_afu_psl_regs(struct cxl_afu *afu) { /* read/write masks for this slice */ cxl_p1n_write(afu, CXL_PSL_APCALLOC_A, 0xFFFFFFFEFEFEFEFEULL); @@ -753,11 +816,13 @@ static int sanitise_afu_regs(struct cxl_afu *afu) else cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); } - reg = cxl_p1n_read(afu, CXL_PSL_SERR_An); - if (reg) { - if (reg & ~0xffff) - dev_warn(&afu->dev, "AFU had pending SERR: %#016llx\n", reg); - cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff); + if (afu->adapter->native->sl_ops->register_serr_irq) { + reg = cxl_p1n_read(afu, CXL_PSL_SERR_An); + if (reg) { + if (reg & ~0xffff) + dev_warn(&afu->dev, "AFU had pending SERR: %#016llx\n", reg); + cxl_p1n_write(afu, CXL_PSL_SERR_An, reg & ~0xffff); + } } reg = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); if (reg) { @@ -835,11 +900,13 @@ static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc if ((rc = cxl_afu_descriptor_looks_ok(afu))) goto err1; - if ((rc = init_implementation_afu_regs(afu))) - goto err1; + if (adapter->native->sl_ops->afu_regs_init) + if ((rc = adapter->native->sl_ops->afu_regs_init(afu))) + goto err1; - if ((rc = cxl_native_register_serr_irq(afu))) - goto err1; + if (adapter->native->sl_ops->register_serr_irq) + if ((rc = adapter->native->sl_ops->register_serr_irq(afu))) + goto err1; if ((rc = cxl_native_register_psl_irq(afu))) goto err2; @@ -847,7 +914,8 @@ static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc return 0; err2: - cxl_native_release_serr_irq(afu); + if (adapter->native->sl_ops->release_serr_irq) + adapter->native->sl_ops->release_serr_irq(afu); err1: pci_unmap_slice_regs(afu); return rc; @@ -856,7 +924,8 @@ err1: static void pci_deconfigure_afu(struct cxl_afu *afu) { cxl_native_release_psl_irq(afu); - cxl_native_release_serr_irq(afu); + if (afu->adapter->native->sl_ops->release_serr_irq) + afu->adapter->native->sl_ops->release_serr_irq(afu); pci_unmap_slice_regs(afu); } @@ -1177,7 +1246,7 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) if ((rc = sanitise_adapter_regs(adapter))) goto err; - if ((rc = init_implementation_adapter_regs(adapter, dev))) + if ((rc = adapter->native->sl_ops->adapter_regs_init(adapter, dev))) goto err; if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_CAPI))) @@ -1212,6 +1281,39 @@ static void cxl_deconfigure_adapter(struct cxl *adapter) pci_disable_device(pdev); } +static const struct cxl_service_layer_ops psl_ops = { + .adapter_regs_init = init_implementation_adapter_psl_regs, + .afu_regs_init = init_implementation_afu_psl_regs, + .register_serr_irq = cxl_native_register_serr_irq, + .release_serr_irq = cxl_native_release_serr_irq, + .debugfs_add_adapter_sl_regs = cxl_debugfs_add_adapter_psl_regs, + .debugfs_add_afu_sl_regs = cxl_debugfs_add_afu_psl_regs, + .psl_irq_dump_registers = cxl_native_psl_irq_dump_regs, + .err_irq_dump_registers = cxl_native_err_irq_dump_regs, + .debugfs_stop_trace = cxl_stop_trace, + .write_timebase_ctrl = write_timebase_ctrl_psl, + .timebase_read = timebase_read_psl, +}; + +static const struct cxl_service_layer_ops xsl_ops = { + .adapter_regs_init = init_implementation_adapter_xsl_regs, + .debugfs_add_adapter_sl_regs = cxl_debugfs_add_adapter_xsl_regs, + .write_timebase_ctrl = write_timebase_ctrl_xsl, + .timebase_read = timebase_read_xsl, +}; + +static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev) +{ + if (dev->vendor == PCI_VENDOR_ID_MELLANOX && dev->device == 0x1013) { + dev_info(&adapter->dev, "Device uses an XSL\n"); + adapter->native->sl_ops = &xsl_ops; + } else { + dev_info(&adapter->dev, "Device uses a PSL\n"); + adapter->native->sl_ops = &psl_ops; + } +} + + static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev) { struct cxl *adapter; @@ -1227,6 +1329,8 @@ static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev) goto err_release; } + set_sl_ops(adapter, dev); + /* Set defaults for parameters which need to persist over * configure/reconfigure */ -- cgit v0.10.2 From b385c9e971468eb8816b26742449d6d1e49f55f1 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 8 Jun 2016 15:09:54 +1000 Subject: cxl: Add support for CAPP DMA mode This adds support for using CAPP DMA mode, which is required for XSL based cards such as the Mellanox CX4 to function. This is currently an RFC as it depends on the corresponding support to be merged into skiboot first, which was submitted here: http://patchwork.ozlabs.org/patch/625582/ In the event that the skiboot on the system does not have the above support, it will indicate as such in the kernel log and abort the init process. Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 70b5cbc..cd9371b 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -825,6 +825,7 @@ enum { OPAL_PHB_CAPI_MODE_CAPI = 1, OPAL_PHB_CAPI_MODE_SNOOP_OFF = 2, OPAL_PHB_CAPI_MODE_SNOOP_ON = 3, + OPAL_PHB_CAPI_MODE_DMA = 4, }; /* OPAL I2C request */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 1fc53e0..2c0e09f 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2793,7 +2793,9 @@ int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode) pe_info(pe, "Switching PHB to CXL\n"); rc = opal_pci_set_phb_cxl_mode(phb->opal_id, mode, pe->pe_number); - if (rc) + if (rc == OPAL_UNSUPPORTED) + dev_err(&dev->dev, "Required cxl mode not supported by firmware - update skiboot\n"); + else if (rc) dev_err(&dev->dev, "opal_pci_set_phb_cxl_mode failed: %i\n", rc); return rc; diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 790faeb..ce2b9d5 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -543,6 +543,7 @@ struct cxl_service_layer_ops { void (*debugfs_stop_trace)(struct cxl *adapter); void (*write_timebase_ctrl)(struct cxl *adapter); u64 (*timebase_read)(struct cxl *adapter); + int capi_mode; }; struct cxl_native { diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 556718d..648817a 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1249,7 +1249,7 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) if ((rc = adapter->native->sl_ops->adapter_regs_init(adapter, dev))) goto err; - if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_CAPI))) + if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode))) goto err; /* If recovery happened, the last step is to turn on snooping. @@ -1293,6 +1293,7 @@ static const struct cxl_service_layer_ops psl_ops = { .debugfs_stop_trace = cxl_stop_trace, .write_timebase_ctrl = write_timebase_ctrl_psl, .timebase_read = timebase_read_psl, + .capi_mode = OPAL_PHB_CAPI_MODE_CAPI, }; static const struct cxl_service_layer_ops xsl_ops = { @@ -1300,6 +1301,7 @@ static const struct cxl_service_layer_ops xsl_ops = { .debugfs_add_adapter_sl_regs = cxl_debugfs_add_adapter_xsl_regs, .write_timebase_ctrl = write_timebase_ctrl_xsl, .timebase_read = timebase_read_xsl, + .capi_mode = OPAL_PHB_CAPI_MODE_DMA, }; static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev) -- cgit v0.10.2 From a430739009384ba2c4804f3a427334ff395433cd Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Wed, 15 Jun 2016 16:42:16 +0200 Subject: cxl: Make vPHB device node match adapter's On bare-metal, when a device is attached to the cxl card, lsvpd shows a location code such as (with cxlflash): # lsvpd -l sg22 ... *YL U78CB.001.WZS0073-P1-C33-B0-T0-L0 which makes it hard to easily identify the cxl adapter owning the flash device, since in this example C33 refers to a P8 processor. lsvpd looks in the parent devices until it finds a location code, so the device node for the vPHB ends up being used. By reusing the device node of the adapter for the vPHB, lsvpd shows: # lsvpd -l sg16 ... *YL U78C9.001.WZS09XA-P1-C7-B1-T0-L3 where C7 is the PCI slot of the cxl adapter. On powerVM, the vPHB was already using the adapter device node, so there's no change there. Tested by cxlflash on bare-metal and powerVM. Signed-off-by: Frederic Barrat Reviewed-by: Matthew R. Ochs Acked-by: Ian Munsie Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index cdc7723..012b6aa 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -208,20 +208,19 @@ static struct pci_controller_ops cxl_pci_controller_ops = int cxl_pci_vphb_add(struct cxl_afu *afu) { - struct pci_dev *phys_dev; - struct pci_controller *phb, *phys_phb; + struct pci_controller *phb; struct device_node *vphb_dn; struct device *parent; - if (cpu_has_feature(CPU_FTR_HVMODE)) { - phys_dev = to_pci_dev(afu->adapter->dev.parent); - phys_phb = pci_bus_to_host(phys_dev->bus); - vphb_dn = phys_phb->dn; - parent = &phys_dev->dev; - } else { - vphb_dn = afu->adapter->dev.parent->of_node; - parent = afu->adapter->dev.parent; - } + /* The parent device is the adapter. Reuse the device node of + * the adapter. + * We don't seem to care what device node is used for the vPHB, + * but tools such as lsvpd walk up the device parents looking + * for a valid location code, so we might as well show devices + * attached to the adapter as being located on that adapter. + */ + parent = afu->adapter->dev.parent; + vphb_dn = parent->of_node; /* Alloc and setup PHB data structure */ phb = pcibios_alloc_controller(vphb_dn); -- cgit v0.10.2 From fb36e90736938d50fdaa1be7afdb21608d402c2b Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 17 Jun 2016 15:25:17 +1000 Subject: powerpc/pci: Fix SRIOV not building without EEH enabled On Book3E CPUs (and possibly other configs), it is possible to have SRIOV (CONFIG_PCI_IOV) set without CONFIG_EEH. The SRIOV code does not check for this, and if EEH is disabled, pci_dn.c fails to build. Fix this by gating the EEH-specific code in the SRIOV implementation behind CONFIG_EEH. Fixes: 39218cd0 ("powerpc/eeh: EEH device for VF") Reported-by: Michael Ellerman Signed-off-by: Russell Currey Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index ecdccce..afeda26 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -181,7 +181,9 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) { #ifdef CONFIG_PCI_IOV struct pci_dn *parent, *pdn; +#ifdef CONFIG_EEH struct eeh_dev *edev; +#endif /* CONFIG_EEH */ int i; /* Only support IOV for now */ @@ -208,11 +210,13 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) return NULL; } +#ifdef CONFIG_EEH /* Create the EEH device for the VF */ eeh_dev_init(pdn, pci_bus_to_host(pdev->bus)); edev = pdn_to_eeh_dev(pdn); BUG_ON(!edev); edev->physfn = pdev; +#endif /* CONFIG_EEH */ } #endif /* CONFIG_PCI_IOV */ @@ -266,12 +270,14 @@ void remove_dev_pci_data(struct pci_dev *pdev) pdn->devfn != pci_iov_virtfn_devfn(pdev, i)) continue; +#ifdef CONFIG_EEH /* Release EEH device for the VF */ edev = pdn_to_eeh_dev(pdn); if (edev) { pdn->edev = NULL; kfree(edev); } +#endif /* CONFIG_EEH */ if (!list_empty(&pdn->list)) list_del(&pdn->list); -- cgit v0.10.2 From 61ed9cfb1b0951a3b4b98dd8bfb98eeb112cfee4 Mon Sep 17 00:00:00 2001 From: Thiago Jung Bauermann Date: Thu, 31 Mar 2016 17:10:40 -0300 Subject: powerpc/kprobes: Remove kretprobe_trampoline_holder. Fixes the following testsuite failure: $ sudo ./perf test -v kallsyms 1: vmlinux symtab matches kallsyms : --- start --- test child forked, pid 12489 Using /proc/kcore for kernel object code Looking at the vmlinux_path (8 entries long) Using /boot/vmlinux for symbols 0xc00000000003d300: diff name v: .kretprobe_trampoline_holder k: kretprobe_trampoline Maps only in vmlinux: c00000000086ca38-c000000000879b6c 87ca38 [kernel].text.unlikely c000000000879b6c-c000000000bf0000 889b6c [kernel].meminit.text c000000000bf0000-c000000000c53264 c00000 [kernel].init.text c000000000c53264-d000000004250000 c63264 [kernel].exit.text d000000004250000-d000000004450000 0 [libcrc32c] d000000004450000-d000000004620000 0 [xfs] d000000004620000-d000000004680000 0 [autofs4] d000000004680000-d0000000046e0000 0 [x_tables] d0000000046e0000-d000000004780000 0 [ip_tables] d000000004780000-d0000000047e0000 0 [rng_core] d0000000047e0000-ffffffffffffffff 0 [pseries_rng] Maps in vmlinux with a different name in kallsyms: Maps only in kallsyms: d000000000000000-f000000000000000 1000000000010000 [kernel.kallsyms] f000000000000000-ffffffffffffffff 3000000000010000 [kernel.kallsyms] test child finished with -1 ---- end ---- vmlinux symtab matches kallsyms: FAILED! The problem is that the kretprobe_trampoline symbol looks like this: $ eu-readelf -s /boot/vmlinux G kretprobe_trampoline 2431: c000000001302368 24 NOTYPE LOCAL DEFAULT 37 kretprobe_trampoline_holder 2432: c00000000003d300 8 FUNC LOCAL DEFAULT 1 .kretprobe_trampoline_holder 97543: c00000000003d300 0 NOTYPE GLOBAL DEFAULT 1 kretprobe_trampoline Its type is NOTYPE, and its size is 0, and this is a problem because symbol-elf.c:dso__load_sym skips function symbols that are not STT_FUNC or STT_GNU_IFUNC (this is determined by elf_sym__is_function). Even if the type is changed to STT_FUNC, when dso__load_sym calls symbols__fixup_duplicate, the kretprobe_trampoline symbol is dropped in favour of .kretprobe_trampoline_holder because the latter has non-zero size (as determined by choose_best_symbol). With this patch, all vmlinux symbols match /proc/kallsyms and the testcase passes. Commit c1c355ce14c0 ("x86/kprobes: Get rid of kretprobe_trampoline_holder()") gets rid of kretprobe_trampoline_holder altogether on x86. This commit does the same on powerpc. This change introduces no regressions on the perf and ftracetest testsuite results. Reviewed-by: Naveen N. Rao Signed-off-by: Thiago Jung Bauermann Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 7d48e3b..3ed8ec09 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -278,12 +278,11 @@ no_kprobe: * - When the probed function returns, this probe * causes the handlers to fire */ -static void __used kretprobe_trampoline_holder(void) -{ - asm volatile(".global kretprobe_trampoline\n" - "kretprobe_trampoline:\n" - "nop\n"); -} +asm(".global kretprobe_trampoline\n" + ".type kretprobe_trampoline, @function\n" + "kretprobe_trampoline:\n" + "nop\n" + ".size kretprobe_trampoline, .-kretprobe_trampoline\n"); /* * Called when the probe at kretprobe trampoline is hit -- cgit v0.10.2 From 103b7827d977ea34c982e6a9d2f960f731f7ee76 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Fri, 4 Mar 2016 10:31:48 +0530 Subject: powerpc: Fix misleading comment in early_setup_secondary() Current comment in the early_setup_secondary() for paca->soft_enabled update is misleading. Comment should say to Mark interrupts "disabled" instead of "enabled". Fix the typo. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 96d4a2b..5530bb5 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -321,7 +321,7 @@ void __init early_setup(unsigned long dt_ptr) #ifdef CONFIG_SMP void early_setup_secondary(void) { - /* Mark interrupts enabled in PACA */ + /* Mark interrupts disabled in PACA */ get_paca()->soft_enabled = 0; /* Initialize the hash table or TLB handling */ -- cgit v0.10.2 From b57bd2de8c6c9aa03f1b899edd6f5582cc8b5b08 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 9 Jun 2016 12:31:08 +1000 Subject: powerpc: Improve FSCR init and context switching This fixes a few issues with FSCR init and switching. In commit 152d523e6307 ("powerpc: Create context switch helpers save_sprs() and restore_sprs()") we moved the setting of the FSCR register from inside an CPU_FTR_ARCH_207S section to inside just a CPU_FTR_ARCH_DSCR section. Hence we are setting FSCR on POWER6/7 where the FSCR doesn't exist. This is harmless but we shouldn't do it. Also, we can simplify the FSCR context switch. We don't need to go through the calculation involving dscr_inherit. We can just restore what we saved last time. We also set an initial value in INIT_THREAD, so that pid 1 which is cloned from that gets a sane value. Based on patch by Jack Miller. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index c0c27bd..f6b1a5f 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -347,6 +347,7 @@ struct thread_struct { .fs = KERNEL_DS, \ .fpexc_mode = 0, \ .ppr = INIT_PPR, \ + .fscr = FSCR_TAR | FSCR_EBB \ } #endif diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index c5c3ae2..6d0a831 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1031,18 +1031,11 @@ static inline void restore_sprs(struct thread_struct *old_thread, #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_DSCR)) { u64 dscr = get_paca()->dscr_default; - u64 fscr = old_thread->fscr & ~FSCR_DSCR; - - if (new_thread->dscr_inherit) { + if (new_thread->dscr_inherit) dscr = new_thread->dscr; - fscr |= FSCR_DSCR; - } if (old_thread->dscr != dscr) mtspr(SPRN_DSCR, dscr); - - if (old_thread->fscr != fscr) - mtspr(SPRN_FSCR, fscr); } if (cpu_has_feature(CPU_FTR_ARCH_207S)) { @@ -1053,6 +1046,9 @@ static inline void restore_sprs(struct thread_struct *old_thread, if (old_thread->ebbrr != new_thread->ebbrr) mtspr(SPRN_EBBRR, new_thread->ebbrr); + if (old_thread->fscr != new_thread->fscr) + mtspr(SPRN_FSCR, new_thread->fscr); + if (old_thread->tar != new_thread->tar) mtspr(SPRN_TAR, new_thread->tar); } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 11d15e7..d2518c3 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1419,7 +1419,8 @@ void facility_unavailable_exception(struct pt_regs *regs) rd = (instword >> 21) & 0x1f; current->thread.dscr = regs->gpr[rd]; current->thread.dscr_inherit = 1; - mtspr(SPRN_FSCR, value | FSCR_DSCR); + current->thread.fscr |= FSCR_DSCR; + mtspr(SPRN_FSCR, current->thread.fscr); } /* Read from DSCR (mfspr RT, 0x03) */ -- cgit v0.10.2 From bd3ea317fddfd0f2044f94bed294b90c4bc8e69e Mon Sep 17 00:00:00 2001 From: Jack Miller Date: Thu, 9 Jun 2016 12:31:09 +1000 Subject: powerpc: Load Monitor Register Support This enables new registers, LMRR and LMSER, that can trigger an EBB in userspace code when a monitored load (via the new ldmx instruction) loads memory from a monitored space. This facility is controlled by a new FSCR bit, LM. This patch disables the FSCR LM control bit on task init and enables that bit when a load monitor facility unavailable exception is taken for using it. On context switch, this bit is then used to determine whether the two relevant registers are saved and restored. This is done lazily for performance reasons. Signed-off-by: Jack Miller Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index f6b1a5f..b5925d5 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -314,6 +314,8 @@ struct thread_struct { unsigned long mmcr2; unsigned mmcr0; unsigned used_ebb; + unsigned long lmrr; + unsigned long lmser; #endif }; diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index a0948f4..ce44fe2 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -282,6 +282,8 @@ #define SPRN_HRMOR 0x139 /* Real mode offset register */ #define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */ #define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ +#define SPRN_LMRR 0x32D /* Load Monitor Region Register */ +#define SPRN_LMSER 0x32E /* Load Monitor Section Enable Register */ #define SPRN_IC 0x350 /* Virtual Instruction Count */ #define SPRN_VTB 0x351 /* Virtual Time Base */ #define SPRN_LDBAR 0x352 /* LD Base Address Register */ @@ -291,6 +293,7 @@ #define SPRN_PMCR 0x374 /* Power Management Control Register */ /* HFSCR and FSCR bit numbers are the same */ +#define FSCR_LM_LG 11 /* Enable Load Monitor Registers */ #define FSCR_TAR_LG 8 /* Enable Target Address Register */ #define FSCR_EBB_LG 7 /* Enable Event Based Branching */ #define FSCR_TM_LG 5 /* Enable Transactional Memory */ @@ -300,10 +303,12 @@ #define FSCR_VECVSX_LG 1 /* Enable VMX/VSX */ #define FSCR_FP_LG 0 /* Enable Floating Point */ #define SPRN_FSCR 0x099 /* Facility Status & Control Register */ +#define FSCR_LM __MASK(FSCR_LM_LG) #define FSCR_TAR __MASK(FSCR_TAR_LG) #define FSCR_EBB __MASK(FSCR_EBB_LG) #define FSCR_DSCR __MASK(FSCR_DSCR_LG) #define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */ +#define HFSCR_LM __MASK(FSCR_LM_LG) #define HFSCR_TAR __MASK(FSCR_TAR_LG) #define HFSCR_EBB __MASK(FSCR_EBB_LG) #define HFSCR_TM __MASK(FSCR_TM_LG) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6d0a831..ddceeb9 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1017,6 +1017,14 @@ static inline void save_sprs(struct thread_struct *t) */ t->tar = mfspr(SPRN_TAR); } + + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + /* Conditionally save Load Monitor registers, if enabled */ + if (t->fscr & FSCR_LM) { + t->lmrr = mfspr(SPRN_LMRR); + t->lmser = mfspr(SPRN_LMSER); + } + } #endif } @@ -1052,6 +1060,16 @@ static inline void restore_sprs(struct thread_struct *old_thread, if (old_thread->tar != new_thread->tar) mtspr(SPRN_TAR, new_thread->tar); } + + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + /* Conditionally restore Load Monitor registers, if enabled */ + if (new_thread->fscr & FSCR_LM) { + if (old_thread->lmrr != new_thread->lmrr) + mtspr(SPRN_LMRR, new_thread->lmrr); + if (old_thread->lmser != new_thread->lmser) + mtspr(SPRN_LMSER, new_thread->lmser); + } + } #endif } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index d2518c3..f7e2f2e 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1377,6 +1377,7 @@ void facility_unavailable_exception(struct pt_regs *regs) [FSCR_TM_LG] = "TM", [FSCR_EBB_LG] = "EBB", [FSCR_TAR_LG] = "TAR", + [FSCR_LM_LG] = "LM", }; char *facility = "unknown"; u64 value; @@ -1434,6 +1435,14 @@ void facility_unavailable_exception(struct pt_regs *regs) emulate_single_step(regs); } return; + } else if ((status == FSCR_LM_LG) && cpu_has_feature(CPU_FTR_ARCH_300)) { + /* + * This process has touched LM, so turn it on forever + * for this process + */ + current->thread.fscr |= FSCR_LM; + mtspr(SPRN_FSCR, current->thread.fscr); + return; } if ((status < ARRAY_SIZE(facility_strings)) && -- cgit v0.10.2 From 16c19a2e983346c547501795aadffde1977b058d Mon Sep 17 00:00:00 2001 From: Jack Miller Date: Thu, 9 Jun 2016 12:31:10 +1000 Subject: selftests/powerpc: Load Monitor Register Tests Adds two tests. One is a simple test to ensure that the new registers LMRR and LMSER are properly maintained. The other actually uses the existing EBB test infrastructure to test that LMRR and LMSER behave as documented. Signed-off-by: Jack Miller Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman diff --git a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore index 42bddbe..44b7df1 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore @@ -20,3 +20,5 @@ back_to_back_ebbs_test lost_exception_test no_handler_test cycles_with_mmcr2_test +ebb_lmr +ebb_lmr_regs \ No newline at end of file diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index 8d2279c4..6b0453e 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -14,7 +14,7 @@ TEST_PROGS := reg_access_test event_attributes_test cycles_test \ fork_cleanup_test ebb_on_child_test \ ebb_on_willing_child_test back_to_back_ebbs_test \ lost_exception_test no_handler_test \ - cycles_with_mmcr2_test + cycles_with_mmcr2_test ebb_lmr ebb_lmr_regs all: $(TEST_PROGS) diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_lmr.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_lmr.c new file mode 100644 index 0000000..c47ebd5 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_lmr.c @@ -0,0 +1,143 @@ +/* + * Copyright 2016, Jack Miller, IBM Corp. + * Licensed under GPLv2. + */ + +#include +#include + +#include "ebb.h" +#include "ebb_lmr.h" + +#define SIZE (32 * 1024 * 1024) /* 32M */ +#define LM_SIZE 0 /* Smallest encoding, 32M */ + +#define SECTIONS 64 /* 1 per bit in LMSER */ +#define SECTION_SIZE (SIZE / SECTIONS) +#define SECTION_LONGS (SECTION_SIZE / sizeof(long)) + +static unsigned long *test_mem; + +static int lmr_count = 0; + +void ebb_lmr_handler(void) +{ + lmr_count++; +} + +void ldmx_full_section(unsigned long *mem, int section) +{ + unsigned long *ptr; + int i; + + for (i = 0; i < SECTION_LONGS; i++) { + ptr = &mem[(SECTION_LONGS * section) + i]; + ldmx((unsigned long) &ptr); + ebb_lmr_reset(); + } +} + +unsigned long section_masks[] = { + 0x8000000000000000, + 0xFF00000000000000, + 0x0000000F70000000, + 0x8000000000000001, + 0xF0F0F0F0F0F0F0F0, + 0x0F0F0F0F0F0F0F0F, + 0x0 +}; + +int ebb_lmr_section_test(unsigned long *mem) +{ + unsigned long *mask = section_masks; + int i; + + for (; *mask; mask++) { + mtspr(SPRN_LMSER, *mask); + printf("Testing mask 0x%016lx\n", mfspr(SPRN_LMSER)); + + for (i = 0; i < 64; i++) { + lmr_count = 0; + ldmx_full_section(mem, i); + if (*mask & (1UL << (63 - i))) + FAIL_IF(lmr_count != SECTION_LONGS); + else + FAIL_IF(lmr_count); + } + } + + return 0; +} + +int ebb_lmr(void) +{ + int i; + + SKIP_IF(!lmr_is_supported()); + + setup_ebb_handler(ebb_lmr_handler); + + ebb_global_enable(); + + FAIL_IF(posix_memalign((void **)&test_mem, SIZE, SIZE) != 0); + + mtspr(SPRN_LMSER, 0); + + FAIL_IF(mfspr(SPRN_LMSER) != 0); + + mtspr(SPRN_LMRR, ((unsigned long)test_mem | LM_SIZE)); + + FAIL_IF(mfspr(SPRN_LMRR) != ((unsigned long)test_mem | LM_SIZE)); + + /* Read every single byte to ensure we get no false positives */ + for (i = 0; i < SECTIONS; i++) + ldmx_full_section(test_mem, i); + + FAIL_IF(lmr_count != 0); + + /* Turn on the first section */ + + mtspr(SPRN_LMSER, (1UL << 63)); + FAIL_IF(mfspr(SPRN_LMSER) != (1UL << 63)); + + /* Enable LM (BESCR) */ + + mtspr(SPRN_BESCR, mfspr(SPRN_BESCR) | BESCR_LME); + FAIL_IF(!(mfspr(SPRN_BESCR) & BESCR_LME)); + + ldmx((unsigned long)&test_mem); + + FAIL_IF(lmr_count != 1); // exactly one exception + FAIL_IF(mfspr(SPRN_BESCR) & BESCR_LME); // LM now disabled + FAIL_IF(!(mfspr(SPRN_BESCR) & BESCR_LMEO)); // occurred bit set + + printf("Simple LMR EBB OK\n"); + + /* This shouldn't cause an EBB since it's been disabled */ + ldmx((unsigned long)&test_mem); + FAIL_IF(lmr_count != 1); + + printf("LMR disable on EBB OK\n"); + + ebb_lmr_reset(); + + /* This should cause an EBB or reset is broken */ + ldmx((unsigned long)&test_mem); + FAIL_IF(lmr_count != 2); + + printf("LMR reset EBB OK\n"); + + ebb_lmr_reset(); + + return ebb_lmr_section_test(test_mem); +} + +int main(void) +{ + int ret = test_harness(ebb_lmr, "ebb_lmr"); + + if (test_mem) + free(test_mem); + + return ret; +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_lmr.h b/tools/testing/selftests/powerpc/pmu/ebb/ebb_lmr.h new file mode 100644 index 0000000..ef50abd --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_lmr.h @@ -0,0 +1,39 @@ +#ifndef _SELFTESTS_POWERPC_PMU_EBB_LMR_H +#define _SELFTESTS_POWERPC_PMU_EBB_LMR_H + +#include "reg.h" + +#ifndef PPC_FEATURE2_ARCH_3_00 +#define PPC_FEATURE2_ARCH_3_00 0x00800000 +#endif + +#define lmr_is_supported() have_hwcap2(PPC_FEATURE2_ARCH_3_00) + +static inline void ebb_lmr_reset(void) +{ + unsigned long bescr = mfspr(SPRN_BESCR); + bescr &= ~(BESCR_LMEO); + bescr |= BESCR_LME; + mtspr(SPRN_BESCR, bescr); +} + +#define LDMX(t, a, b)\ + (0x7c00026a | \ + (((t) & 0x1f) << 21) | \ + (((a) & 0x1f) << 16) | \ + (((b) & 0x1f) << 11)) + +static inline unsigned long ldmx(unsigned long address) +{ + unsigned long ret; + + asm volatile ("mr 9, %1\r\n" + ".long " __stringify(LDMX(9, 0, 9)) "\r\n" + "mr %0, 9\r\n":"=r"(ret) + :"r"(address) + :"r9"); + + return ret; +} + +#endif diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_lmr_regs.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_lmr_regs.c new file mode 100644 index 0000000..aff4241 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_lmr_regs.c @@ -0,0 +1,37 @@ +/* + * Copyright 2016, Jack Miller, IBM Corp. + * Licensed under GPLv2. + */ + +#include +#include +#include + +#include "ebb.h" +#include "ebb_lmr.h" + +#define CHECKS 10000 + +int ebb_lmr_regs(void) +{ + int i; + + SKIP_IF(!lmr_is_supported()); + + ebb_global_enable(); + + for (i = 0; i < CHECKS; i++) { + mtspr(SPRN_LMRR, i << 25); // skip size and rsvd bits + mtspr(SPRN_LMSER, i); + + FAIL_IF(mfspr(SPRN_LMRR) != (i << 25)); + FAIL_IF(mfspr(SPRN_LMSER) != i); + } + + return 0; +} + +int main(void) +{ + return test_harness(ebb_lmr_regs, "ebb_lmr_regs"); +} diff --git a/tools/testing/selftests/powerpc/reg.h b/tools/testing/selftests/powerpc/reg.h index 65bfdee..fddf368 100644 --- a/tools/testing/selftests/powerpc/reg.h +++ b/tools/testing/selftests/powerpc/reg.h @@ -34,6 +34,11 @@ #define BESCR_PMEO 0x1 /* PMU Event-based exception Occurred */ #define BESCR_PME (0x1ul << 32) /* PMU Event-based exception Enable */ +#define BESCR_LME (0x1ul << 34) /* Load Monitor Enable */ +#define BESCR_LMEO (0x1ul << 2) /* Load Monitor Exception Occurred */ + +#define SPRN_LMRR 813 /* Load Monitor Region Register */ +#define SPRN_LMSER 814 /* Load Monitor Section Enable Register */ #define SPRN_PMC1 771 #define SPRN_PMC2 772 -- cgit v0.10.2 From f8ab481066e7246e4b272233aa0b6948f5069f41 Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Thu, 2 Jun 2016 08:45:14 -0300 Subject: powerpc: export cpu_to_core_id() Export cpu_to_core_id(). This will be used by the lpfc driver. This enables topology_core_id() from (defined to cpu_to_core_id() in arch/powerpc/include/asm/topology.h) to be used by (non-builtin) modules. That is arch-neutral, already used by eg, drivers/base/topology.c, but it is builtin (obj-y in Makefile) thus didn't need the export. Since the module uses topology_core_id() and this is defined to cpu_to_core_id(), it needs the export, otherwise: ERROR: "cpu_to_core_id" [drivers/scsi/lpfc/lpfc.ko] undefined! Tested on next-20160601. Signed-off-by: Mauricio Faria de Oliveira Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 1b55c78..5a1f015 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -595,6 +595,7 @@ out: of_node_put(np); return id; } +EXPORT_SYMBOL_GPL(cpu_to_core_id); /* Helper routines for cpu to core mapping */ int cpu_core_index_of_thread(int cpu) -- cgit v0.10.2 From d366d28cd1325f11d582ec6d4a14b8329d3e1a20 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:25 +1000 Subject: PCI: Add pcibios_setup_bridge() Currently, PowerPC PowerNV platform utilizes ppc_md.pcibios_fixup(), which is called for once after PCI probing and resource assignment are completed, to allocate platform required resources for PCI devices: PE#, IO and MMIO mapping, DMA address translation (TCE) table etc. Obviously, it's not hotplug friendly. This adds weak function pcibios_setup_bridge(), which is called by pci_setup_bridge(). PowerPC PowerNV platform will reuse the function to assign above platform required resources to newly plugged PCI devices during PCI hotplug in subsequent patches. Signed-off-by: Gavin Shan Acked-by: Bjorn Helgaas Signed-off-by: Michael Ellerman diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 55641a3..d678c46 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -695,11 +695,16 @@ static void __pci_setup_bridge(struct pci_bus *bus, unsigned long type) pci_write_config_word(bridge, PCI_BRIDGE_CONTROL, bus->bridge_ctl); } +void __weak pcibios_setup_bridge(struct pci_bus *bus, unsigned long type) +{ +} + void pci_setup_bridge(struct pci_bus *bus) { unsigned long type = IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH; + pcibios_setup_bridge(bus, type); __pci_setup_bridge(bus, type); } diff --git a/include/linux/pci.h b/include/linux/pci.h index b67e4df..c40ac91 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -854,6 +854,7 @@ void pci_stop_and_remove_bus_device_locked(struct pci_dev *dev); void pci_stop_root_bus(struct pci_bus *bus); void pci_remove_root_bus(struct pci_bus *bus); void pci_setup_cardbus(struct pci_bus *bus); +void pcibios_setup_bridge(struct pci_bus *bus, unsigned long type); void pci_sort_breadthfirst(void); #define dev_is_pci(d) ((d)->bus == &pci_bus_type) #define dev_is_pf(d) ((dev_is_pci(d) ? to_pci_dev(d)->is_physfn : false)) -- cgit v0.10.2 From c5fcb29a649723806a350dcb8854610f2f6b8819 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:26 +1000 Subject: powerpc/pci: Override pcibios_setup_bridge() This overrides pcibios_setup_bridge() that is called to update PCI bridge windows when PCI resource assignment is completed, to assign PE and setup various (resource) mapping for the PE in subsequent patches. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 467c0b0..b5e88e4 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -33,6 +33,8 @@ struct pci_controller_ops { /* Called during PCI resource reassignment */ resource_size_t (*window_alignment)(struct pci_bus *bus, unsigned long type); + void (*setup_bridge)(struct pci_bus *bus, + unsigned long type); void (*reset_secondary_bus)(struct pci_dev *pdev); #ifdef CONFIG_PCI_MSI diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 0f7a60f..40df3a5 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -124,6 +124,14 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus, return 1; } +void pcibios_setup_bridge(struct pci_bus *bus, unsigned long type) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + + if (hose->controller_ops.setup_bridge) + hose->controller_ops.setup_bridge(bus, type); +} + void pcibios_reset_secondary_bus(struct pci_dev *dev) { struct pci_controller *phb = pci_bus_to_host(dev->bus); -- cgit v0.10.2 From e368e4ca9c0686514febd3f1767d465caf208db0 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:27 +1000 Subject: powerpc/powernv: Remove PCI_RESET_DELAY_US The macro defined in arch/powerpc/platforms/powernv/pci.c isn't used by anyone. Just remove it. Signed-off-by: Gavin Shan Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 1d92bd9..b1ee631 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -36,9 +36,6 @@ #include "powernv.h" #include "pci.h" -/* Delay in usec */ -#define PCI_RESET_DELAY_US 3000000 - #ifdef CONFIG_PCI_MSI int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { -- cgit v0.10.2 From 577c8c886823ea81fae92ff54ae4225a3f1f6c75 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:28 +1000 Subject: powerpc/powernv: Move pnv_pci_ioda_setup_opal_tce_kill() around pnv_pci_ioda_setup_opal_tce_kill() called by pnv_ioda_setup_dma() to remap the TCE kill regiter. What's done in pnv_ioda_setup_dma() will be covered in pcibios_setup_bridge() which is invoked on each PCI bridge. It means we will possibly remap the TCE kill register for multiple times and it's unnecessary. This moves pnv_pci_ioda_setup_opal_tce_kill() to where the PHB is initialized (pnv_pci_init_ioda_phb()) to avoid above issue. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2c0e09f..5bbee82 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2702,8 +2702,6 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) pr_info("PCI: Domain %04x has %d available 32-bit DMA segments\n", hose->global_number, phb->ioda.dma32_count); - pnv_pci_ioda_setup_opal_tce_kill(phb); - /* Walk our PE list and configure their DMA segments */ list_for_each_entry(pe, &phb->ioda.pe_list, list) { weight = pnv_pci_ioda_pe_dma_weight(pe); @@ -3482,6 +3480,9 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, if (phb->regs == NULL) pr_err(" Failed to map registers !\n"); + /* Initialize TCE kill register */ + pnv_pci_ioda_setup_opal_tce_kill(phb); + /* Initialize more IODA stuff */ phb->ioda.total_pe_num = 1; prop32 = of_get_property(np, "ibm,opal-num-pes", NULL); -- cgit v0.10.2 From c127562ae1d85c7f79192ace0431f97add8f83ea Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:29 +1000 Subject: powerpc/powernv: Increase PE# capacity Each PHB maintains an array helping to translate 2-bytes Request ID (RID) to PE# with the assumption that PE# takes one byte, meaning that we can't have more than 256 PEs. However, pci_dn->pe_number already had 4-bytes for the PE#. This extends the PE# capacity for every PHB. After that, the PE number is represented by 4-bytes value. Then we can reuse IODA_INVALID_PE to check the PE# in phb->pe_rmap[] is valid or not. Signed-off-by: Gavin Shan Reviewed-by: Daniel Axtens Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 5bbee82..393d324 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -761,7 +761,7 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) /* Clear the reverse map */ for (rid = pe->rid; rid < rid_end; rid++) - phb->ioda.pe_rmap[rid] = 0; + phb->ioda.pe_rmap[rid] = IODA_INVALID_PE; /* Release from all parents PELT-V */ while (parent) { @@ -3492,6 +3492,10 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, if (prop32) phb->ioda.reserved_pe_idx = be32_to_cpup(prop32); + /* Invalidate RID to PE# mapping */ + for (segno = 0; segno < ARRAY_SIZE(phb->ioda.pe_rmap); segno++) + phb->ioda.pe_rmap[segno] = IODA_INVALID_PE; + /* Parse 64-bit MMIO range */ pnv_ioda_parse_m64_window(phb); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 7dee25e..de56ed2 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -152,11 +152,8 @@ struct pnv_phb { struct list_head pe_list; struct mutex pe_list_mutex; - /* Reverse map of PEs, will have to extend if - * we are to support more than 256 PEs, indexed - * bus { bus, devfn } - */ - unsigned char pe_rmap[0x10000]; + /* Reverse map of PEs, indexed by {bus, devfn} */ + unsigned int pe_rmap[0x10000]; /* TCE cache invalidate registers (physical and * remapped) -- cgit v0.10.2 From 9fcd6f4a2b264f15071ab5e2d61ac2d9125ebc1d Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:30 +1000 Subject: powerpc/powernv: Allocate PE# in reverse order PE number for one particular PE can be allocated dynamically or reserved according to the consumed M64 (64-bits prefetchable) segments of the PE. The M64 segment can't be remapped to arbitrary PE, meaning the PE number is determined according to the index of the consumed M64 segment. As below figure shows, M64 resource grows from low to high end, meaning the PE (number) reserved according to M64 segment grows from low to high end as well, so does the dynamically allocated PE number. It will lead to conflict: PE number (M64 segment) reserved by dynamic allocation is required by hot added PCI adapter at later point. It fails the PCI hotplug because of the PE number can't be reserved based on the index of the consumed M64 segment. +---+---+---+---+---+--------------------------------+-----+ | 0 | 1 | 2 | 3 | 4 | ....... | 255 | +---+---+---+---+---+--------------------------------+-----+ PE number for dynamic allocation -----------------> PE number reserved for M64 segment -----------------> To resolve above conflicts, this forces the PE number to be allocated dynamically in reverse order. With this patch applied, the PE numbers are reserved in ascending order, but allocated dynamically in reverse order. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 393d324..e820973 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -141,16 +141,14 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb) { - unsigned long pe; + unsigned long pe = phb->ioda.total_pe_num - 1; - do { - pe = find_next_zero_bit(phb->ioda.pe_alloc, - phb->ioda.total_pe_num, 0); - if (pe >= phb->ioda.total_pe_num) - return NULL; - } while(test_and_set_bit(pe, phb->ioda.pe_alloc)); + for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) { + if (!test_and_set_bit(pe, phb->ioda.pe_alloc)) + return pnv_ioda_init_pe(phb, pe); + } - return pnv_ioda_init_pe(phb, pe); + return NULL; } static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) -- cgit v0.10.2 From ccd1c1911a70298cc0473de89ff0f95cd8d16ffe Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:31 +1000 Subject: powerpc/powernv: Create PEs in pcibios_setup_bridge() Currently, the PEs and their associated resources are assigned in ppc_md.pcibios_fixup() except those used by SRIOV VFs. The function is called for once after PCI probing and resources assignment is completed. So it's obviously not hotplug friendly. This creates PEs dynamically in pcibios_setup_bridge() that is called for the event during system bootup and PCI hotplug: updating PCI bridge's windows after resource assignment/reassignment are done. In partial hotplug case, not all PCI devices included to one particular PE are unplugged and plugged again, we just need unbinding/binding the hot added PCI devices with the corresponding PE without creating new one. The change is applied to IODA1 and IODA2 PHBs only. The behaviour on NPU PHBs aren't changed. There are no PCI bridges on NPU PHBs, meaning pcibios_setup_bridge() won't be invoked there. We have to use old path (pnv_pci_ioda_fixup()) to setup PEs on NPU PHBs. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index e820973..062f7fb 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1022,6 +1022,15 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) pci_name(dev)); continue; } + + /* + * In partial hotplug case, the PCI device might be still + * associated with the PE and needn't attach it to the PE + * again. + */ + if (pdn->pe_number != IODA_INVALID_PE) + continue; + pdn->pcidev = dev; pdn->pe_number = pe->pe_number; if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) @@ -1040,6 +1049,18 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) struct pci_controller *hose = pci_bus_to_host(bus); struct pnv_phb *phb = hose->private_data; struct pnv_ioda_pe *pe = NULL; + unsigned int pe_num; + + /* + * In partial hotplug case, the PE instance might be still alive. + * We should reuse it instead of allocating a new one. + */ + pe_num = phb->ioda.pe_rmap[bus->number << 8]; + if (pe_num != IODA_INVALID_PE) { + pe = &phb->ioda.pe_array[pe_num]; + pnv_ioda_setup_same_PE(bus, pe); + return NULL; + } /* Check if PE is determined by M64 */ if (phb->pick_m64_pe) @@ -1154,30 +1175,6 @@ static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus) pnv_ioda_setup_npu_PE(pdev); } -static void pnv_ioda_setup_PEs(struct pci_bus *bus) -{ - struct pci_dev *dev; - - pnv_ioda_setup_bus_PE(bus, false); - - list_for_each_entry(dev, &bus->devices, bus_list) { - if (dev->subordinate) { - if (pci_pcie_type(dev) == PCI_EXP_TYPE_PCI_BRIDGE) - pnv_ioda_setup_bus_PE(dev->subordinate, true); - else - pnv_ioda_setup_PEs(dev->subordinate); - } - } -} - -/* - * Configure PEs so that the downstream PCI buses and devices - * could have their associated PE#. Unfortunately, we didn't - * figure out the way to identify the PLX bridge yet. So we - * simply put the PCI bus and the subordinate behind the root - * port to PE# here. The game rule here is expected to be changed - * as soon as we can detected PLX bridge correctly. - */ static void pnv_pci_ioda_setup_PEs(void) { struct pci_controller *hose, *tmp; @@ -1185,22 +1182,11 @@ static void pnv_pci_ioda_setup_PEs(void) list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb = hose->private_data; - - /* M64 layout might affect PE allocation */ - if (phb->reserve_m64_pe) - phb->reserve_m64_pe(hose->bus, NULL, true); - - /* - * On NPU PHB, we expect separate PEs for individual PCI - * functions. PCI bus dependent PEs are required for the - * remaining types of PHBs. - */ if (phb->type == PNV_PHB_NPU) { /* PE#0 is needed for error reporting */ pnv_ioda_reserve_pe(phb, 0); pnv_ioda_setup_npu_PEs(hose->bus); - } else - pnv_ioda_setup_PEs(hose->bus); + } } } @@ -2655,6 +2641,9 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, { int64_t rc; + if (!pnv_pci_ioda_pe_dma_weight(pe)) + return; + /* TVE #1 is selected by PCI address bit 59 */ pe->tce_bypass_base = 1ull << 59; @@ -2686,47 +2675,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, pnv_ioda_setup_bus_dma(pe, pe->pbus); } -static void pnv_ioda_setup_dma(struct pnv_phb *phb) -{ - struct pci_controller *hose = phb->hose; - struct pnv_ioda_pe *pe; - unsigned int weight; - - /* If we have more PE# than segments available, hand out one - * per PE until we run out and let the rest fail. If not, - * then we assign at least one segment per PE, plus more based - * on the amount of devices under that PE - */ - pr_info("PCI: Domain %04x has %d available 32-bit DMA segments\n", - hose->global_number, phb->ioda.dma32_count); - - /* Walk our PE list and configure their DMA segments */ - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - weight = pnv_pci_ioda_pe_dma_weight(pe); - if (!weight) - continue; - - /* - * For IODA2 compliant PHB3, we needn't care about the weight. - * The all available 32-bits DMA space will be assigned to - * the specific PE. - */ - if (phb->type == PNV_PHB_IODA1) { - pnv_pci_ioda1_setup_dma_pe(phb, pe); - } else if (phb->type == PNV_PHB_IODA2) { - pe_info(pe, "Assign DMA32 space\n"); - pnv_pci_ioda2_setup_dma_pe(phb, pe); - } else if (phb->type == PNV_PHB_NPU) { - /* - * We initialise the DMA space for an NPU PHB - * after setup of the PHB is complete as we - * point the NPU TVT to the the same location - * as the PHB3 TVT. - */ - } - } -} - #ifdef CONFIG_PCI_MSI static void pnv_ioda2_msi_eoi(struct irq_data *d) { @@ -3195,41 +3143,6 @@ static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe) } } -static void pnv_pci_ioda_setup_seg(void) -{ - struct pci_controller *tmp, *hose; - struct pnv_phb *phb; - struct pnv_ioda_pe *pe; - - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { - phb = hose->private_data; - - /* NPU PHB does not support IO or MMIO segmentation */ - if (phb->type == PNV_PHB_NPU) - continue; - - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - pnv_ioda_setup_pe_seg(pe); - } - } -} - -static void pnv_pci_ioda_setup_DMA(void) -{ - struct pci_controller *hose, *tmp; - struct pnv_phb *phb; - - list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { - pnv_ioda_setup_dma(hose->private_data); - - /* Mark the PHB initialization done */ - phb = hose->private_data; - phb->initialized = 1; - } - - pnv_pci_ioda_setup_iommu_api(); -} - static void pnv_pci_ioda_create_dbgfs(void) { #ifdef CONFIG_DEBUG_FS @@ -3240,6 +3153,9 @@ static void pnv_pci_ioda_create_dbgfs(void) list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { phb = hose->private_data; + /* Notify initialization of PHB done */ + phb->initialized = 1; + sprintf(name, "PCI%04x", hose->global_number); phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); if (!phb->dbgfs) @@ -3252,9 +3168,7 @@ static void pnv_pci_ioda_create_dbgfs(void) static void pnv_pci_ioda_fixup(void) { pnv_pci_ioda_setup_PEs(); - pnv_pci_ioda_setup_seg(); - pnv_pci_ioda_setup_DMA(); - + pnv_pci_ioda_setup_iommu_api(); pnv_pci_ioda_create_dbgfs(); #ifdef CONFIG_EEH @@ -3304,6 +3218,45 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, return phb->ioda.io_segsize; } +static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + struct pnv_phb *phb = hose->private_data; + struct pci_dev *bridge = bus->self; + struct pnv_ioda_pe *pe; + bool all = (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE); + + /* Don't assign PE to PCI bus, which doesn't have subordinate devices */ + if (list_empty(&bus->devices)) + return; + + /* Reserve PEs according to used M64 resources */ + if (phb->reserve_m64_pe) + phb->reserve_m64_pe(bus, NULL, all); + + /* + * Assign PE. We might run here because of partial hotplug. + * For the case, we just pick up the existing PE and should + * not allocate resources again. + */ + pe = pnv_ioda_setup_bus_PE(bus, all); + if (!pe) + return; + + pnv_ioda_setup_pe_seg(pe); + switch (phb->type) { + case PNV_PHB_IODA1: + pnv_pci_ioda1_setup_dma_pe(phb, pe); + break; + case PNV_PHB_IODA2: + pnv_pci_ioda2_setup_dma_pe(phb, pe); + break; + default: + pr_warn("%s: No DMA for PHB#%d (type %d)\n", + __func__, phb->hose->global_number, phb->type); + } +} + #ifdef CONFIG_PCI_IOV static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, int resno) @@ -3381,6 +3334,7 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { #endif .enable_device_hook = pnv_pci_enable_device_hook, .window_alignment = pnv_pci_window_alignment, + .setup_bridge = pnv_pci_setup_bridge, .reset_secondary_bus = pnv_pci_reset_secondary_bus, .dma_set_mask = pnv_pci_ioda_dma_set_mask, .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask, -- cgit v0.10.2 From 63803c39c858e4c9156601c332b7a9ddffdbd37f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:32 +1000 Subject: powerpc/powernv: Setup PE for root bus There is no parent bridge for root bus, meaning pcibios_setup_bridge() isn't invoked for root bus. The PE for root bus is the ancestor of other PEs in PELTV. It means we need PE for root bus populated before all others. This populates the PE for root bus in pcibios_setup_bridge() path if it's not populated yet. The PE number next to the reserved one is used as the PE# to avoid holes in continuous M64 space. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 062f7fb..74183ff 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -194,14 +194,14 @@ static int pnv_ioda2_init_m64(struct pnv_phb *phb) set_bit(phb->ioda.m64_bar_idx, &phb->ioda.m64_bar_alloc); /* - * Strip off the segment used by the reserved PE, which is - * expected to be 0 or last one of PE capability. + * Exclude the segments for reserved and root bus PE, which + * are first or last two PEs. */ r = &phb->hose->mem_resources[1]; if (phb->ioda.reserved_pe_idx == 0) - r->start += phb->ioda.m64_segsize; + r->start += (2 * phb->ioda.m64_segsize); else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1)) - r->end -= phb->ioda.m64_segsize; + r->end -= (2 * phb->ioda.m64_segsize); else pr_warn(" Cannot strip M64 segment for reserved PE#%d\n", phb->ioda.reserved_pe_idx); @@ -281,14 +281,14 @@ static int pnv_ioda1_init_m64(struct pnv_phb *phb) } /* - * Exclude the segment used by the reserved PE, which - * is expected to be 0 or last supported PE#. + * Exclude the segments for reserved and root bus PE, which + * are first or last two PEs. */ r = &phb->hose->mem_resources[1]; if (phb->ioda.reserved_pe_idx == 0) - r->start += phb->ioda.m64_segsize; + r->start += (2 * phb->ioda.m64_segsize); else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1)) - r->end -= phb->ioda.m64_segsize; + r->end -= (2 * phb->ioda.m64_segsize); else WARN(1, "Wrong reserved PE#%d on PHB#%d\n", phb->ioda.reserved_pe_idx, phb->hose->global_number); @@ -1062,8 +1062,13 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) return NULL; } + /* PE number for root bus should have been reserved */ + if (pci_is_root_bus(bus) && + phb->ioda.root_pe_idx != IODA_INVALID_PE) + pe = &phb->ioda.pe_array[phb->ioda.root_pe_idx]; + /* Check if PE is determined by M64 */ - if (phb->pick_m64_pe) + if (!pe && phb->pick_m64_pe) pe = phb->pick_m64_pe(bus, all); /* The PE number isn't pinned by M64 */ @@ -3226,6 +3231,15 @@ static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type) struct pnv_ioda_pe *pe; bool all = (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE); + /* The PE for root bus should be realized before any one else */ + if (!phb->ioda.root_pe_populated) { + pe = pnv_ioda_setup_bus_PE(phb->hose->bus, false); + if (pe) { + phb->ioda.root_pe_idx = pe->pe_number; + phb->ioda.root_pe_populated = true; + } + } + /* Don't assign PE to PCI bus, which doesn't have subordinate devices */ if (list_empty(&bus->devices)) return; @@ -3499,7 +3513,22 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb->ioda.dma32_segmap[segno] = IODA_INVALID_PE; } phb->ioda.pe_array = aux + pemap_off; - set_bit(phb->ioda.reserved_pe_idx, phb->ioda.pe_alloc); + + /* + * Choose PE number for root bus, which shouldn't have + * M64 resources consumed by its child devices. To pick + * the PE number adjacent to the reserved one if possible. + */ + pnv_ioda_reserve_pe(phb, phb->ioda.reserved_pe_idx); + if (phb->ioda.reserved_pe_idx == 0) { + phb->ioda.root_pe_idx = 1; + pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx); + } else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1)) { + phb->ioda.root_pe_idx = phb->ioda.reserved_pe_idx - 1; + pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx); + } else { + phb->ioda.root_pe_idx = IODA_INVALID_PE; + } INIT_LIST_HEAD(&phb->ioda.pe_list); mutex_init(&phb->ioda.pe_list_mutex); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index de56ed2..8927e5d 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -110,6 +110,8 @@ struct pnv_phb { /* Global bridge info */ unsigned int total_pe_num; unsigned int reserved_pe_idx; + unsigned int root_pe_idx; + bool root_pe_populated; /* 32-bit MMIO window */ unsigned int m32_size; -- cgit v0.10.2 From 40e2a47e62dabad64f0d2955ec17064225305a52 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:33 +1000 Subject: powerpc/powernv: Extend PCI bridge resources The PCI slots are associated with root port or downstream ports of the PCIe switch connected to root port. When adapter is hot added to the PCI slot, it usually requests more IO or memory resource from the directly connected parent bridge (port) and update the bridge's windows accordingly. The resource windows of upstream bridges can't be updated automatically. It possibly leads to unbalanced resource across the bridges: The window of downstream bridge is overruning that of upstream bridge. The IO or MMIO path won't work. This resolves the above issue by extending bridge windows of root port and upstream port of the PCIe switch connected to the root port to PHB's windows. The windows of root port and bridge behind that are extended to the PHB's windows to accomodate the PCI hotplug happening in future. The PHB's 64KB 32-bits MSI region is included in bridge's M32 windows (in hardware) though it's excluded in the corresponding resource, as the bridge's M32 windows have 1MB as their minimal alignment. We observed EEH error during system boot when the MSI region is included in bridge's M32 window. This excludes top 1MB (including 64KB 32-bits MSI region) region from bridge's M32 windows when extending them. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 74183ff..f647c8d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3223,6 +3223,64 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, return phb->ioda.io_segsize; } +/* + * We are updating root port or the upstream port of the + * bridge behind the root port with PHB's windows in order + * to accommodate the changes on required resources during + * PCI (slot) hotplug, which is connected to either root + * port or the downstream ports of PCIe switch behind the + * root port. + */ +static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus, + unsigned long type) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + struct pnv_phb *phb = hose->private_data; + struct pci_dev *bridge = bus->self; + struct resource *r, *w; + bool msi_region = false; + int i; + + /* Check if we need apply fixup to the bridge's windows */ + if (!pci_is_root_bus(bridge->bus) && + !pci_is_root_bus(bridge->bus->self->bus)) + return; + + /* Fixup the resources */ + for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) { + r = &bridge->resource[PCI_BRIDGE_RESOURCES + i]; + if (!r->flags || !r->parent) + continue; + + w = NULL; + if (r->flags & type & IORESOURCE_IO) + w = &hose->io_resource; + else if (pnv_pci_is_mem_pref_64(r->flags) && + (type & IORESOURCE_PREFETCH) && + phb->ioda.m64_segsize) + w = &hose->mem_resources[1]; + else if (r->flags & type & IORESOURCE_MEM) { + w = &hose->mem_resources[0]; + msi_region = true; + } + + r->start = w->start; + r->end = w->end; + + /* The 64KB 32-bits MSI region shouldn't be included in + * the 32-bits bridge window. Otherwise, we can see strange + * issues. One of them is EEH error observed on Garrison. + * + * Exclude top 1MB region which is the minimal alignment of + * 32-bits bridge window. + */ + if (msi_region) { + r->end += 0x10000; + r->end -= 0x100000; + } + } +} + static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type) { struct pci_controller *hose = pci_bus_to_host(bus); @@ -3231,6 +3289,9 @@ static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type) struct pnv_ioda_pe *pe; bool all = (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE); + /* Extend bridge's windows if necessary */ + pnv_pci_fixup_bridge_resources(bus, type); + /* The PE for root bus should be realized before any one else */ if (!phb->ioda.root_pe_populated) { pe = pnv_ioda_setup_bus_PE(phb->hose->bus, false); -- cgit v0.10.2 From 93e01a5039915048e6b0be559ca4bfba1cce6f71 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:34 +1000 Subject: powerpc/powernv: Make pnv_ioda_deconfigure_pe() visible pnv_ioda_deconfigure_pe() is visible only when CONFIG_PCI_IOV is enabled. The function will be used to tear down PE's associated mapping in PCI hotplug path that doesn't depend on CONFIG_PCI_IOV. This makes pnv_ioda_deconfigure_pe() visible and not depend on CONFIG_PCI_IOV. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index f647c8d..58ca7df 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -712,7 +712,6 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb, return 0; } -#ifdef CONFIG_PCI_IOV static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { struct pci_dev *parent; @@ -747,9 +746,11 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) } rid_end = pe->rid + (count << 8); } else { +#ifdef CONFIG_PCI_IOV if (pe->flags & PNV_IODA_PE_VF) parent = pe->parent_dev; else +#endif parent = pe->pdev->bus->self; bcomp = OpalPciBusAll; dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; @@ -787,11 +788,12 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) pe->pbus = NULL; pe->pdev = NULL; +#ifdef CONFIG_PCI_IOV pe->parent_dev = NULL; +#endif return 0; } -#endif /* CONFIG_PCI_IOV */ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { -- cgit v0.10.2 From c5f7700bbd2e6b598738d85a6c46887b8c8fb6c5 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:35 +1000 Subject: powerpc/powernv: Dynamically release PE This supports releasing PEs dynamically. A reference count is introduced to PE representing number of PCI devices associated with the PE. The reference count is increased when PCI device joins the PE and decreased when PCI device leaves the PE in pnv_pci_release_device(). When the count becomes zero, the PE and its consumed resources are released. Note that the count is accessed concurrently. So a counter with "int" type is enough here. In order to release the sources consumed by the PE, couple of helper functions are introduced as below: * pnv_pci_ioda1_unset_window() - Unset IODA1 DMA32 window * pnv_pci_ioda1_release_dma_pe() - Release IODA1 DMA32 segments * pnv_pci_ioda2_release_dma_pe() - Release IODA2 DMA resource * pnv_ioda_release_pe_seg() - Unmap IO/M32/M64 segments Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 58ca7df..4e0b2fc 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1033,6 +1033,7 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe) if (pdn->pe_number != IODA_INVALID_PE) continue; + pe->device_count++; pdn->pcidev = dev; pdn->pe_number = pe->pe_number; if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) @@ -3394,6 +3395,178 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) return true; } +static long pnv_pci_ioda1_unset_window(struct iommu_table_group *table_group, + int num) +{ + struct pnv_ioda_pe *pe = container_of(table_group, + struct pnv_ioda_pe, table_group); + struct pnv_phb *phb = pe->phb; + unsigned int idx; + long rc; + + pe_info(pe, "Removing DMA window #%d\n", num); + for (idx = 0; idx < phb->ioda.dma32_count; idx++) { + if (phb->ioda.dma32_segmap[idx] != pe->pe_number) + continue; + + rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, + idx, 0, 0ul, 0ul, 0ul); + if (rc != OPAL_SUCCESS) { + pe_warn(pe, "Failure %ld unmapping DMA32 segment#%d\n", + rc, idx); + return rc; + } + + phb->ioda.dma32_segmap[idx] = IODA_INVALID_PE; + } + + pnv_pci_unlink_table_and_group(table_group->tables[num], table_group); + return OPAL_SUCCESS; +} + +static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe) +{ + unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe); + struct iommu_table *tbl = pe->table_group.tables[0]; + int64_t rc; + + if (!weight) + return; + + rc = pnv_pci_ioda1_unset_window(&pe->table_group, 0); + if (rc != OPAL_SUCCESS) + return; + + pnv_pci_ioda1_tce_invalidate(tbl, tbl->it_offset, tbl->it_size, false); + if (pe->table_group.group) { + iommu_group_put(pe->table_group.group); + WARN_ON(pe->table_group.group); + } + + free_pages(tbl->it_base, get_order(tbl->it_size << 3)); + iommu_free_table(tbl, "pnv"); +} + +static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) +{ + struct iommu_table *tbl = pe->table_group.tables[0]; + unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe); +#ifdef CONFIG_IOMMU_API + int64_t rc; +#endif + + if (!weight) + return; + +#ifdef CONFIG_IOMMU_API + rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); + if (rc) + pe_warn(pe, "OPAL error %ld release DMA window\n", rc); +#endif + + pnv_pci_ioda2_set_bypass(pe, false); + if (pe->table_group.group) { + iommu_group_put(pe->table_group.group); + WARN_ON(pe->table_group.group); + } + + pnv_pci_ioda2_table_free_pages(tbl); + iommu_free_table(tbl, "pnv"); +} + +static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe, + unsigned short win, + unsigned int *map) +{ + struct pnv_phb *phb = pe->phb; + int idx; + int64_t rc; + + for (idx = 0; idx < phb->ioda.total_pe_num; idx++) { + if (map[idx] != pe->pe_number) + continue; + + if (win == OPAL_M64_WINDOW_TYPE) + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + phb->ioda.reserved_pe_idx, win, + idx / PNV_IODA1_M64_SEGS, + idx % PNV_IODA1_M64_SEGS); + else + rc = opal_pci_map_pe_mmio_window(phb->opal_id, + phb->ioda.reserved_pe_idx, win, 0, idx); + + if (rc != OPAL_SUCCESS) + pe_warn(pe, "Error %ld unmapping (%d) segment#%d\n", + rc, win, idx); + + map[idx] = IODA_INVALID_PE; + } +} + +static void pnv_ioda_release_pe_seg(struct pnv_ioda_pe *pe) +{ + struct pnv_phb *phb = pe->phb; + + if (phb->type == PNV_PHB_IODA1) { + pnv_ioda_free_pe_seg(pe, OPAL_IO_WINDOW_TYPE, + phb->ioda.io_segmap); + pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE, + phb->ioda.m32_segmap); + pnv_ioda_free_pe_seg(pe, OPAL_M64_WINDOW_TYPE, + phb->ioda.m64_segmap); + } else if (phb->type == PNV_PHB_IODA2) { + pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE, + phb->ioda.m32_segmap); + } +} + +static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) +{ + struct pnv_phb *phb = pe->phb; + struct pnv_ioda_pe *slave, *tmp; + + /* Release slave PEs in compound PE */ + if (pe->flags & PNV_IODA_PE_MASTER) { + list_for_each_entry_safe(slave, tmp, &pe->slaves, list) + pnv_ioda_release_pe(slave); + } + + list_del(&pe->list); + switch (phb->type) { + case PNV_PHB_IODA1: + pnv_pci_ioda1_release_pe_dma(pe); + break; + case PNV_PHB_IODA2: + pnv_pci_ioda2_release_pe_dma(pe); + break; + default: + WARN_ON(1); + } + + pnv_ioda_release_pe_seg(pe); + pnv_ioda_deconfigure_pe(pe->phb, pe); + pnv_ioda_free_pe(pe); +} + +static void pnv_pci_release_device(struct pci_dev *pdev) +{ + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; + struct pci_dn *pdn = pci_get_pdn(pdev); + struct pnv_ioda_pe *pe; + + if (pdev->is_virtfn) + return; + + if (!pdn || pdn->pe_number == IODA_INVALID_PE) + return; + + pe = &phb->ioda.pe_array[pdn->pe_number]; + WARN_ON(--pe->device_count < 0); + if (pe->device_count == 0) + pnv_ioda_release_pe(pe); +} + static void pnv_pci_ioda_shutdown(struct pci_controller *hose) { struct pnv_phb *phb = hose->private_data; @@ -3410,6 +3583,7 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .teardown_msi_irqs = pnv_teardown_msi_irqs, #endif .enable_device_hook = pnv_pci_enable_device_hook, + .release_device = pnv_pci_release_device, .window_alignment = pnv_pci_window_alignment, .setup_bridge = pnv_pci_setup_bridge, .reset_secondary_bus = pnv_pci_reset_secondary_bus, diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 8927e5d..3a97990 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -30,6 +30,7 @@ struct pnv_phb; struct pnv_ioda_pe { unsigned long flags; struct pnv_phb *phb; + int device_count; /* A PE can be associated with a single device or an * entire bus (& children). In the former case, pdev -- cgit v0.10.2 From 7415c14c560e7378b9cd3564c4c4f6b5e058e19d Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:36 +1000 Subject: powerpc/pci: Update bridge windows on PCI plug On the PCI plugging event, PCI slot's subordinate devices are scanned and their (IO and MMIO) resources are assigned. Platform dependent resources (PE#, IO/MMIO/DMA windows) are allocated or created on updating windows of the slot's upstream bridge. This updates the windows of the hot plugged slot's upstream bridge in pcibios_finish_adding_to_bus() so that the platform resources (PE#, IO/MMIO/DMA segments) are allocated or created accordingly. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 40df3a5..be9e515 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1444,8 +1444,12 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus) /* Allocate bus and devices resources */ pcibios_allocate_bus_resources(bus); pcibios_claim_one_bus(bus); - if (!pci_has_flag(PCI_PROBE_ONLY)) - pci_assign_unassigned_bus_resources(bus); + if (!pci_has_flag(PCI_PROBE_ONLY)) { + if (bus->self) + pci_assign_unassigned_bridge_resources(bus->self); + else + pci_assign_unassigned_bus_resources(bus); + } /* Fixup EEH */ eeh_add_device_tree_late(bus); -- cgit v0.10.2 From 8cc7581cdb84a232468c41bc417183a423dfbb07 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:37 +1000 Subject: powerpc/pci: Delay populating pdn The pdn (struct pci_dn) instances are allocated from memblock or bootmem when creating PCI controller (hoses) in setup_arch(). PCI hotplug, which will be supported by proceeding patches, releases PCI device nodes and their corresponding pdn on unplugging event. The memory chunks for pdn instances allocated from memblock or bootmem are hard to reused after being released. This delays creating pdn by pci_devs_phb_init() from setup_arch() to core_initcall() so that they are allocated from slab. The memory consumed by pdn can be released to system without problem during PCI unplugging time. It indicates that pci_dn is unavailable in setup_arch() and the the fixup on pdn (like AGP's) can't be carried out that time. We have to do that in pcibios_root_bridge_prepare() on maple/pasemi/powermac platforms where/when the pdn is available. pcibios_root_bridge_prepare is called from subsys_initcall() which is executed after core_initcall() so the code flow does not change. At the mean while, the EEH device is created when pdn is populated, meaning pdn and EEH device have same life cycle. In turn, we needn't call eeh_dev_init() to create EEH device explicitly. Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 7d34f3d..8e37b71 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -274,7 +274,7 @@ void eeh_pe_restore_bars(struct eeh_pe *pe); const char *eeh_pe_loc_get(struct eeh_pe *pe); struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe); -void *eeh_dev_init(struct pci_dn *pdn, void *data); +struct eeh_dev *eeh_dev_init(struct pci_dn *pdn); void eeh_dev_phb_init_dynamic(struct pci_controller *phb); int eeh_init(void); int __init eeh_ops_register(struct eeh_ops *ops); diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h index 8753e4e..0f73de0 100644 --- a/arch/powerpc/include/asm/ppc-pci.h +++ b/arch/powerpc/include/asm/ppc-pci.h @@ -39,8 +39,6 @@ void *pci_traverse_device_nodes(struct device_node *start, void *traverse_pci_dn(struct pci_dn *root, void *(*fn)(struct pci_dn *, void *), void *data); - -extern void pci_devs_phb_init(void); extern void pci_devs_phb_init_dynamic(struct pci_controller *phb); /* From rtas_pci.h */ diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c index 7815095..d6b2ca7 100644 --- a/arch/powerpc/kernel/eeh_dev.c +++ b/arch/powerpc/kernel/eeh_dev.c @@ -44,14 +44,13 @@ /** * eeh_dev_init - Create EEH device according to OF node * @pdn: PCI device node - * @data: PHB * * It will create EEH device according to the given OF node. The function * might be called by PCI emunation, DR, PHB hotplug. */ -void *eeh_dev_init(struct pci_dn *pdn, void *data) +struct eeh_dev *eeh_dev_init(struct pci_dn *pdn) { - struct pci_controller *phb = data; + struct pci_controller *phb = pdn->phb; struct eeh_dev *edev; /* Allocate EEH device */ @@ -69,7 +68,7 @@ void *eeh_dev_init(struct pci_dn *pdn, void *data) INIT_LIST_HEAD(&edev->list); INIT_LIST_HEAD(&edev->rmv_list); - return NULL; + return edev; } /** @@ -81,16 +80,8 @@ void *eeh_dev_init(struct pci_dn *pdn, void *data) */ void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { - struct pci_dn *root = phb->pci_data; - /* EEH PE for PHB */ eeh_phb_pe_create(phb); - - /* EEH device for PHB */ - eeh_dev_init(root, phb); - - /* EEH devices for children OF nodes */ - traverse_pci_dn(root, eeh_dev_init, phb); } /** @@ -106,8 +97,6 @@ static int __init eeh_dev_phb_init(void) list_for_each_entry_safe(phb, tmp, &hose_list, list_node) eeh_dev_phb_init_dynamic(phb); - pr_info("EEH: devices created\n"); - return 0; } diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index afeda26..bfe60a1 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -212,8 +212,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) #ifdef CONFIG_EEH /* Create the EEH device for the VF */ - eeh_dev_init(pdn, pci_bus_to_host(pdev->bus)); - edev = pdn_to_eeh_dev(pdn); + edev = eeh_dev_init(pdn); BUG_ON(!edev); edev->physfn = pdev; #endif /* CONFIG_EEH */ @@ -295,8 +294,11 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose, const __be32 *regs; struct device_node *parent; struct pci_dn *pdn; +#ifdef CONFIG_EEH + struct eeh_dev *edev; +#endif - pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL); + pdn = kzalloc(sizeof(*pdn), GFP_KERNEL); if (pdn == NULL) return NULL; dn->data = pdn; @@ -325,6 +327,15 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose, /* Extended config space */ pdn->pci_ext_config_space = (type && of_read_number(type, 1) == 1); + /* Create EEH device */ +#ifdef CONFIG_EEH + edev = eeh_dev_init(pdn); + if (!edev) { + kfree(pdn); + return NULL; + } +#endif + /* Attach to parent node */ INIT_LIST_HEAD(&pdn->child_list); INIT_LIST_HEAD(&pdn->list); @@ -510,15 +521,19 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb) * pci device found underneath. This routine runs once, * early in the boot sequence. */ -void __init pci_devs_phb_init(void) +static int __init pci_devs_phb_init(void) { struct pci_controller *phb, *tmp; /* This must be done first so the device nodes have valid pci info! */ list_for_each_entry_safe(phb, tmp, &hose_list, list_node) pci_devs_phb_init_dynamic(phb); + + return 0; } +core_initcall(pci_devs_phb_init); + static void pci_dev_pdn_setup(struct pci_dev *pdev) { struct pci_dn *pdn; diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index a923230..a2f89e6 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -568,6 +568,26 @@ void maple_pci_irq_fixup(struct pci_dev *dev) DBG(" <- maple_pci_irq_fixup\n"); } +static int maple_pci_root_bridge_prepare(struct pci_host_bridge *bridge) +{ + struct pci_controller *hose = pci_bus_to_host(bridge->bus); + struct device_node *np, *child; + + if (hose != u3_agp) + return 0; + + /* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We + * assume there is no P2P bridge on the AGP bus, which should be a + * safe assumptions hopefully. + */ + np = hose->dn; + PCI_DN(np)->busno = 0xf0; + for_each_child_of_node(np, child) + PCI_DN(child)->busno = 0xf0; + + return 0; +} + void __init maple_pci_init(void) { struct device_node *np, *root; @@ -605,19 +625,7 @@ void __init maple_pci_init(void) if (ht && maple_add_bridge(ht) != 0) of_node_put(ht); - /* Setup the linkage between OF nodes and PHBs */ - pci_devs_phb_init(); - - /* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We - * assume there is no P2P bridge on the AGP bus, which should be a - * safe assumptions hopefully. - */ - if (u3_agp) { - struct device_node *np = u3_agp->dn; - PCI_DN(np)->busno = 0xf0; - for (np = np->child; np; np = np->sibling) - PCI_DN(np)->busno = 0xf0; - } + ppc_md.pcibios_root_bridge_prepare = maple_pci_root_bridge_prepare; /* Tell pci.c to not change any resource allocations. */ pci_add_flags(PCI_PROBE_ONLY); diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c index f3a68a0..10c4e8f 100644 --- a/arch/powerpc/platforms/pasemi/pci.c +++ b/arch/powerpc/platforms/pasemi/pci.c @@ -229,9 +229,6 @@ void __init pas_pci_init(void) of_node_get(np); of_node_put(root); - - /* Setup the linkage between OF nodes and PHBs */ - pci_devs_phb_init(); } void __iomem *pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset) diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index 59ab16f..6e06c3b 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -878,6 +878,29 @@ void pmac_pci_irq_fixup(struct pci_dev *dev) #endif /* CONFIG_PPC32 */ } +#ifdef CONFIG_PPC64 +static int pmac_pci_root_bridge_prepare(struct pci_host_bridge *bridge) +{ + struct pci_controller *hose = pci_bus_to_host(bridge->bus); + struct device_node *np, *child; + + if (hose != u3_agp) + return 0; + + /* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We + * assume there is no P2P bridge on the AGP bus, which should be a + * safe assumptions for now. We should do something better in the + * future though + */ + np = hose->dn; + PCI_DN(np)->busno = 0xf0; + for_each_child_of_node(np, child) + PCI_DN(child)->busno = 0xf0; + + return 0; +} +#endif /* CONFIG_PPC64 */ + void __init pmac_pci_init(void) { struct device_node *np, *root; @@ -914,20 +937,7 @@ void __init pmac_pci_init(void) if (ht && pmac_add_bridge(ht) != 0) of_node_put(ht); - /* Setup the linkage between OF nodes and PHBs */ - pci_devs_phb_init(); - - /* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We - * assume there is no P2P bridge on the AGP bus, which should be a - * safe assumptions for now. We should do something better in the - * future though - */ - if (u3_agp) { - struct device_node *np = u3_agp->dn; - PCI_DN(np)->busno = 0xf0; - for (np = np->child; np; np = np->sibling) - PCI_DN(np)->busno = 0xf0; - } + ppc_md.pcibios_root_bridge_prepare = pmac_pci_root_bridge_prepare; /* pmac_check_ht_link(); */ #else /* CONFIG_PPC64 */ diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index b1ee631..0f1b8bf 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -816,9 +816,6 @@ void __init pnv_pci_init(void) for_each_compatible_node(np, NULL, "ibm,ioda2-npu-phb") pnv_pci_init_npu_phb(np); - /* Setup the linkage between OF nodes and PHBs */ - pci_devs_phb_init(); - /* Configure IOMMU DMA hooks */ set_pci_dma_ops(&dma_iommu_ops); } diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 34668f9..e61e9b9 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -195,11 +195,8 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act case OF_RECONFIG_ATTACH_NODE: parent = of_get_parent(np); pdn = parent ? PCI_DN(parent) : NULL; - if (pdn) { - /* Create pdn and EEH device */ + if (pdn) pci_add_device_node_info(pdn->phb, np); - eeh_dev_init(PCI_DN(np), pdn->phb); - } of_node_put(parent); break; @@ -422,7 +419,6 @@ static void __init find_and_init_phbs(void) } of_node_put(root); - pci_devs_phb_init(); /* * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties -- cgit v0.10.2 From ebe2253127395b884e1ace5fb48598e1eb4de689 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:38 +1000 Subject: powerpc/powernv: Support PCI slot ID The reset and poll functionality from (OPAL) firmware supports PHB and PCI slot at same time. They are identified by ID. This supports PCI slot ID by: * Rename the argument name for opal_pci_reset() and opal_pci_poll() accordingly * Rename pnv_eeh_phb_poll() to pnv_eeh_poll() and adjust its argument name. * One macro is added to produce PCI slot ID. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 9d86c66..348132c 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -131,7 +131,7 @@ int64_t opal_pci_map_pe_dma_window(uint64_t phb_id, uint16_t pe_number, uint16_t int64_t opal_pci_map_pe_dma_window_real(uint64_t phb_id, uint16_t pe_number, uint16_t dma_window_number, uint64_t pci_start_addr, uint64_t pci_mem_size); -int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope, uint8_t assert_state); +int64_t opal_pci_reset(uint64_t id, uint8_t reset_scope, uint8_t assert_state); int64_t opal_pci_get_hub_diag_data(uint64_t hub_id, void *diag_buffer, uint64_t diag_buffer_len); @@ -148,7 +148,7 @@ int64_t opal_get_dpo_status(__be64 *dpo_timeout); int64_t opal_set_system_attention_led(uint8_t led_action); int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe, __be16 *pci_error_type, __be16 *severity); -int64_t opal_pci_poll(uint64_t phb_id); +int64_t opal_pci_poll(uint64_t id); int64_t opal_return_cpu(void); int64_t opal_check_token(uint64_t token); int64_t opal_reinit_cpus(uint64_t flags); diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index 6f77f71..c607902 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -13,6 +13,10 @@ #include #include +#define PCI_SLOT_ID_PREFIX 0x8000000000000000 +#define PCI_SLOT_ID(phb_id, bdfn) \ + (PCI_SLOT_ID_PREFIX | ((uint64_t)(bdfn) << 16) | (phb_id)) + int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode); int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, unsigned int virq); diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 9226df1..26bb60b 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -717,12 +717,12 @@ static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay) return ret; } -static s64 pnv_eeh_phb_poll(struct pnv_phb *phb) +static s64 pnv_eeh_poll(unsigned long id) { s64 rc = OPAL_HARDWARE; while (1) { - rc = opal_pci_poll(phb->opal_id); + rc = opal_pci_poll(id); if (rc <= 0) break; @@ -762,7 +762,7 @@ int pnv_eeh_phb_reset(struct pci_controller *hose, int option) * reset followed by hot reset on root bus. So we also * need the PCI bus settlement delay. */ - rc = pnv_eeh_phb_poll(phb); + rc = pnv_eeh_poll(phb->opal_id); if (option == EEH_RESET_DEACTIVATE) { if (system_state < SYSTEM_RUNNING) udelay(1000 * EEH_PE_RST_SETTLE_TIME); @@ -805,7 +805,7 @@ static int pnv_eeh_root_reset(struct pci_controller *hose, int option) goto out; /* Poll state of the PHB until the request is done */ - rc = pnv_eeh_phb_poll(phb); + rc = pnv_eeh_poll(phb->opal_id); if (option == EEH_RESET_DEACTIVATE) msleep(EEH_PE_RST_SETTLE_TIME); out: -- cgit v0.10.2 From 9c0e1ecbe19540c23f941c1fdb59e79b16c17d55 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:39 +1000 Subject: powerpc/powernv: Use PCI slot reset infrastructure The (OPAL) firmware might provide the PCI slot reset capability which is identified by property "ibm,reset-by-firmware" on the PCI slot associated device node. This routes the reset request to firmware if "ibm,reset-by-firmware" exists in the PCI slot device node. Otherwise, the reset is done inside kernel as before. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 26bb60b..86544ea 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "powernv.h" #include "pci.h" @@ -815,7 +816,7 @@ out: return 0; } -static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option) +static int __pnv_eeh_bridge_reset(struct pci_dev *dev, int option) { struct pci_dn *pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); struct eeh_dev *edev = pdn_to_eeh_dev(pdn); @@ -866,6 +867,44 @@ static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option) return 0; } +static int pnv_eeh_bridge_reset(struct pci_dev *pdev, int option) +{ + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; + struct device_node *dn = pci_device_to_OF_node(pdev); + uint64_t id = PCI_SLOT_ID(phb->opal_id, + (pdev->bus->number << 8) | pdev->devfn); + uint8_t scope; + int64_t rc; + + /* Hot reset to the bus if firmware cannot handle */ + if (!dn || !of_get_property(dn, "ibm,reset-by-firmware", NULL)) + return __pnv_eeh_bridge_reset(pdev, option); + + switch (option) { + case EEH_RESET_FUNDAMENTAL: + scope = OPAL_RESET_PCI_FUNDAMENTAL; + break; + case EEH_RESET_HOT: + scope = OPAL_RESET_PCI_HOT; + break; + case EEH_RESET_DEACTIVATE: + return 0; + default: + dev_dbg(&pdev->dev, "%s: Unsupported reset %d\n", + __func__, option); + return -EINVAL; + } + + rc = opal_pci_reset(id, scope, OPAL_ASSERT_RESET); + if (rc <= OPAL_SUCCESS) + goto out; + + rc = pnv_eeh_poll(id); +out: + return (rc == OPAL_SUCCESS) ? 0 : -EIO; +} + void pnv_pci_reset_secondary_bus(struct pci_dev *dev) { struct pci_controller *hose; -- cgit v0.10.2 From 7e19bf32c8ac977e7702a67d7392a3e0a9644bc8 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:40 +1000 Subject: powerpc/powernv: Introduce pnv_pci_get_slot_id() This introduces pnv_pci_get_slot_id() to get the hotpluggable PCI slot ID from the corresponding device node. It will be used by hotplug driver. Requested-by: Andrew Donnellan Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index c607902..810cc9a 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -17,6 +17,8 @@ #define PCI_SLOT_ID(phb_id, bdfn) \ (PCI_SLOT_ID_PREFIX | ((uint64_t)(bdfn) << 16) | (phb_id)) +extern int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id); + int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode); int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, unsigned int virq); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 0f1b8bf..2607d29 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,43 @@ #include "powernv.h" #include "pci.h" +int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id) +{ + struct device_node *parent = np; + u32 bdfn; + u64 phbid; + int ret; + + ret = of_property_read_u32(np, "reg", &bdfn); + if (ret) + return -ENXIO; + + bdfn = ((bdfn & 0x00ffff00) >> 8); + while ((parent = of_get_parent(parent))) { + if (!PCI_DN(parent)) { + of_node_put(parent); + break; + } + + if (!of_device_is_compatible(parent, "ibm,ioda2-phb")) { + of_node_put(parent); + continue; + } + + ret = of_property_read_u64(parent, "ibm,opal-phbid", &phbid); + if (ret) { + of_node_put(parent); + return -ENXIO; + } + + *id = PCI_SLOT_ID(phbid, bdfn); + return 0; + } + + return -ENODEV; +} +EXPORT_SYMBOL_GPL(pnv_pci_get_slot_id); + #ifdef CONFIG_PCI_MSI int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { -- cgit v0.10.2 From ea0d856cb26d4d78b9529de49a0f89379224e2f9 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:41 +1000 Subject: powerpc/powernv: Functions to get/set PCI slot state This exports 4 functions, which base on the corresponding OPAL APIs to get/set PCI slot status. Those functions are going to be used by PowerNV PCI hotplug driver: pnv_pci_get_device_tree() opal_get_device_tree() pnv_pci_get_presence_state() opal_pci_get_presence_state() pnv_pci_get_power_state() opal_pci_get_power_state() pnv_pci_set_power_state() opal_pci_set_power_state() Signed-off-by: Gavin Shan Reviewed-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index cd9371b..72b5f27 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -158,7 +158,11 @@ #define OPAL_LEDS_SET_INDICATOR 115 #define OPAL_CEC_REBOOT2 116 #define OPAL_CONSOLE_FLUSH 117 -#define OPAL_LAST 117 +#define OPAL_GET_DEVICE_TREE 118 +#define OPAL_PCI_GET_PRESENCE_STATE 119 +#define OPAL_PCI_GET_POWER_STATE 120 +#define OPAL_PCI_SET_POWER_STATE 121 +#define OPAL_LAST 121 /* Device tree flags */ @@ -344,6 +348,18 @@ enum OpalPciResetState { OPAL_ASSERT_RESET = 1 }; +enum OpalPciSlotPresence { + OPAL_PCI_SLOT_EMPTY = 0, + OPAL_PCI_SLOT_PRESENT = 1 +}; + +enum OpalPciSlotPower { + OPAL_PCI_SLOT_POWER_OFF = 0, + OPAL_PCI_SLOT_POWER_ON = 1, + OPAL_PCI_SLOT_OFFLINE = 2, + OPAL_PCI_SLOT_ONLINE = 3 +}; + enum OpalSlotLedType { OPAL_SLOT_LED_TYPE_ID = 0, /* IDENTIFY LED */ OPAL_SLOT_LED_TYPE_FAULT = 1, /* FAULT LED */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 348132c..fa71fea 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -209,6 +209,12 @@ int64_t opal_flash_write(uint64_t id, uint64_t offset, uint64_t buf, uint64_t size, uint64_t token); int64_t opal_flash_erase(uint64_t id, uint64_t offset, uint64_t size, uint64_t token); +int64_t opal_get_device_tree(uint32_t phandle, uint64_t buf, uint64_t len); +int64_t opal_pci_get_presence_state(uint64_t id, uint64_t data); +int64_t opal_pci_get_power_state(uint64_t id, uint64_t data); +int64_t opal_pci_set_power_state(uint64_t async_token, uint64_t id, + uint64_t data); +int64_t opal_pci_poll2(uint64_t id, uint64_t data); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index 810cc9a..791db1b 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -12,12 +12,18 @@ #include #include +#include #define PCI_SLOT_ID_PREFIX 0x8000000000000000 #define PCI_SLOT_ID(phb_id, bdfn) \ (PCI_SLOT_ID_PREFIX | ((uint64_t)(bdfn) << 16) | (phb_id)) extern int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id); +extern int pnv_pci_get_device_tree(uint32_t phandle, void *buf, uint64_t len); +extern int pnv_pci_get_presence_state(uint64_t id, uint8_t *state); +extern int pnv_pci_get_power_state(uint64_t id, uint8_t *state); +extern int pnv_pci_set_power_state(uint64_t id, uint8_t state, + struct opal_msg *msg); int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode); int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index e45b88a..3ea1a855 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -302,3 +302,7 @@ OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG); OPAL_CALL(opal_leds_get_ind, OPAL_LEDS_GET_INDICATOR); OPAL_CALL(opal_leds_set_ind, OPAL_LEDS_SET_INDICATOR); OPAL_CALL(opal_console_flush, OPAL_CONSOLE_FLUSH); +OPAL_CALL(opal_get_device_tree, OPAL_GET_DEVICE_TREE); +OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE); +OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE); +OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 2607d29..62c7637 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -74,6 +74,88 @@ int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id) } EXPORT_SYMBOL_GPL(pnv_pci_get_slot_id); +int pnv_pci_get_device_tree(uint32_t phandle, void *buf, uint64_t len) +{ + int64_t rc; + + if (!opal_check_token(OPAL_GET_DEVICE_TREE)) + return -ENXIO; + + rc = opal_get_device_tree(phandle, (uint64_t)buf, len); + if (rc < OPAL_SUCCESS) + return -EIO; + + return rc; +} +EXPORT_SYMBOL_GPL(pnv_pci_get_device_tree); + +int pnv_pci_get_presence_state(uint64_t id, uint8_t *state) +{ + int64_t rc; + + if (!opal_check_token(OPAL_PCI_GET_PRESENCE_STATE)) + return -ENXIO; + + rc = opal_pci_get_presence_state(id, (uint64_t)state); + if (rc != OPAL_SUCCESS) + return -EIO; + + return 0; +} +EXPORT_SYMBOL_GPL(pnv_pci_get_presence_state); + +int pnv_pci_get_power_state(uint64_t id, uint8_t *state) +{ + int64_t rc; + + if (!opal_check_token(OPAL_PCI_GET_POWER_STATE)) + return -ENXIO; + + rc = opal_pci_get_power_state(id, (uint64_t)state); + if (rc != OPAL_SUCCESS) + return -EIO; + + return 0; +} +EXPORT_SYMBOL_GPL(pnv_pci_get_power_state); + +int pnv_pci_set_power_state(uint64_t id, uint8_t state, struct opal_msg *msg) +{ + struct opal_msg m; + int token, ret; + int64_t rc; + + if (!opal_check_token(OPAL_PCI_SET_POWER_STATE)) + return -ENXIO; + + token = opal_async_get_token_interruptible(); + if (unlikely(token < 0)) + return token; + + rc = opal_pci_set_power_state(token, id, (uint64_t)&state); + if (rc == OPAL_SUCCESS) { + ret = 0; + goto exit; + } else if (rc != OPAL_ASYNC_COMPLETION) { + ret = -EIO; + goto exit; + } + + ret = opal_async_wait_response(token, &m); + if (ret < 0) + goto exit; + + if (msg) { + ret = 1; + memcpy(msg, &m, sizeof(m)); + } + +exit: + opal_async_release_token(token); + return ret; +} +EXPORT_SYMBOL_GPL(pnv_pci_set_power_state); + #ifdef CONFIG_PCI_MSI int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { -- cgit v0.10.2 From 66725152fb9f17bbc17d976da7678b3e098b3b01 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 20 May 2016 16:41:42 +1000 Subject: PCI/hotplug: PowerPC PowerNV PCI hotplug driver This adds standalone driver to support PCI hotplug for PowerPC PowerNV platform that runs on top of skiboot firmware. The firmware identifies hotpluggable slots and marked their device tree node with proper "ibm,slot-pluggable" and "ibm,reset-by-firmware". The driver scans device tree nodes to create/register PCI hotplug slot accordingly. The PCI slots are organized in fashion of tree, which means one PCI slot might have parent PCI slot and parent PCI slot possibly contains multiple child PCI slots. At the plugging time, the parent PCI slot is populated before its children. The child PCI slots are removed before their parent PCI slot can be removed from the system. If the skiboot firmware doesn't support slot status retrieval, the PCI slot device node shouldn't have property "ibm,reset-by-firmware". In that case, none of valid PCI slots will be detected from device tree. The skiboot firmware doesn't export the capability to access attention LEDs yet and it's something for TBD. Signed-off-by: Gavin Shan Acked-by: Bjorn Helgaas Signed-off-by: Michael Ellerman diff --git a/MAINTAINERS b/MAINTAINERS index 16700e4..ea80d71 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6857,6 +6857,7 @@ F: drivers/crypto/nx/ F: drivers/crypto/vmx/ F: drivers/net/ethernet/ibm/ibmveth.* F: drivers/net/ethernet/ibm/ibmvnic.* +F: drivers/pci/hotplug/pnv_php.c F: drivers/pci/hotplug/rpa* F: drivers/scsi/ibmvscsi/ N: opal diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig index df8caec..aadce45 100644 --- a/drivers/pci/hotplug/Kconfig +++ b/drivers/pci/hotplug/Kconfig @@ -113,6 +113,19 @@ config HOTPLUG_PCI_SHPC When in doubt, say N. +config HOTPLUG_PCI_POWERNV + tristate "PowerPC PowerNV PCI Hotplug driver" + depends on PPC_POWERNV && EEH + select OF_DYNAMIC + help + Say Y here if you run PowerPC PowerNV platform that supports + PCI Hotplug + + To compile this driver as a module, choose M here: the + module will be called pnv-php. + + When in doubt, say N. + config HOTPLUG_PCI_RPA tristate "RPA PCI Hotplug driver" depends on PPC_PSERIES && EEH diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile index b616e75..e33cdda 100644 --- a/drivers/pci/hotplug/Makefile +++ b/drivers/pci/hotplug/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_HOTPLUG_PCI_PCIE) += pciehp.o obj-$(CONFIG_HOTPLUG_PCI_CPCI_ZT5550) += cpcihp_zt5550.o obj-$(CONFIG_HOTPLUG_PCI_CPCI_GENERIC) += cpcihp_generic.o obj-$(CONFIG_HOTPLUG_PCI_SHPC) += shpchp.o +obj-$(CONFIG_HOTPLUG_PCI_POWERNV) += pnv-php.o obj-$(CONFIG_HOTPLUG_PCI_RPA) += rpaphp.o obj-$(CONFIG_HOTPLUG_PCI_RPA_DLPAR) += rpadlpar_io.o obj-$(CONFIG_HOTPLUG_PCI_SGI) += sgi_hotplug.o @@ -50,6 +51,8 @@ ibmphp-objs := ibmphp_core.o \ acpiphp-objs := acpiphp_core.o \ acpiphp_glue.o +pnv-php-objs := pnv_php.o + rpaphp-objs := rpaphp_core.o \ rpaphp_pci.o \ rpaphp_slot.o diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c new file mode 100644 index 0000000..6086db6 --- /dev/null +++ b/drivers/pci/hotplug/pnv_php.c @@ -0,0 +1,733 @@ +/* + * PCI Hotplug Driver for PowerPC PowerNV platform. + * + * Copyright Gavin Shan, IBM Corporation 2016. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "Gavin Shan, IBM Corporation" +#define DRIVER_DESC "PowerPC PowerNV PCI Hotplug Driver" + +struct pnv_php_slot { + struct hotplug_slot slot; + struct hotplug_slot_info slot_info; + uint64_t id; + char *name; + int slot_no; + struct kref kref; +#define PNV_PHP_STATE_INITIALIZED 0 +#define PNV_PHP_STATE_REGISTERED 1 +#define PNV_PHP_STATE_POPULATED 2 +#define PNV_PHP_STATE_OFFLINE 3 + int state; + struct device_node *dn; + struct pci_dev *pdev; + struct pci_bus *bus; + bool power_state_check; + void *fdt; + void *dt; + struct of_changeset ocs; + struct pnv_php_slot *parent; + struct list_head children; + struct list_head link; +}; + +static LIST_HEAD(pnv_php_slot_list); +static DEFINE_SPINLOCK(pnv_php_lock); + +static void pnv_php_register(struct device_node *dn); +static void pnv_php_unregister_one(struct device_node *dn); +static void pnv_php_unregister(struct device_node *dn); + +static void pnv_php_free_slot(struct kref *kref) +{ + struct pnv_php_slot *php_slot = container_of(kref, + struct pnv_php_slot, kref); + + WARN_ON(!list_empty(&php_slot->children)); + kfree(php_slot->name); + kfree(php_slot); +} + +static inline void pnv_php_put_slot(struct pnv_php_slot *php_slot) +{ + + if (WARN_ON(!php_slot)) + return; + + kref_put(&php_slot->kref, pnv_php_free_slot); +} + +static struct pnv_php_slot *pnv_php_match(struct device_node *dn, + struct pnv_php_slot *php_slot) +{ + struct pnv_php_slot *target, *tmp; + + if (php_slot->dn == dn) { + kref_get(&php_slot->kref); + return php_slot; + } + + list_for_each_entry(tmp, &php_slot->children, link) { + target = pnv_php_match(dn, tmp); + if (target) + return target; + } + + return NULL; +} + +static struct pnv_php_slot *pnv_php_find_slot(struct device_node *dn) +{ + struct pnv_php_slot *php_slot, *tmp; + unsigned long flags; + + spin_lock_irqsave(&pnv_php_lock, flags); + list_for_each_entry(tmp, &pnv_php_slot_list, link) { + php_slot = pnv_php_match(dn, tmp); + if (php_slot) { + spin_unlock_irqrestore(&pnv_php_lock, flags); + return php_slot; + } + } + spin_unlock_irqrestore(&pnv_php_lock, flags); + + return NULL; +} + +/* + * Remove pdn for all children of the indicated device node. + * The function should remove pdn in a depth-first manner. + */ +static void pnv_php_rmv_pdns(struct device_node *dn) +{ + struct device_node *child; + + for_each_child_of_node(dn, child) { + pnv_php_rmv_pdns(child); + + pci_remove_device_node_info(child); + } +} + +/* + * Detach all child nodes of the indicated device nodes. The + * function should handle device nodes in depth-first manner. + * + * We should not invoke of_node_release() as the memory for + * individual device node is part of large memory block. The + * large block is allocated from memblock (system bootup) or + * kmalloc() when unflattening the device tree by OF changeset. + * We can not free the large block allocated from memblock. For + * later case, it should be released at once. + */ +static void pnv_php_detach_device_nodes(struct device_node *parent) +{ + struct device_node *dn; + int refcount; + + for_each_child_of_node(parent, dn) { + pnv_php_detach_device_nodes(dn); + + of_node_put(dn); + refcount = atomic_read(&dn->kobj.kref.refcount); + if (unlikely(refcount != 1)) + pr_warn("Invalid refcount %d on <%s>\n", + refcount, of_node_full_name(dn)); + + of_detach_node(dn); + } +} + +static void pnv_php_rmv_devtree(struct pnv_php_slot *php_slot) +{ + pnv_php_rmv_pdns(php_slot->dn); + + /* + * Decrease the refcount if the device nodes were created + * through OF changeset before detaching them. + */ + if (php_slot->fdt) + of_changeset_destroy(&php_slot->ocs); + pnv_php_detach_device_nodes(php_slot->dn); + + if (php_slot->fdt) { + kfree(php_slot->dt); + kfree(php_slot->fdt); + php_slot->dt = NULL; + php_slot->dn->child = NULL; + php_slot->fdt = NULL; + } +} + +/* + * As the nodes in OF changeset are applied in reverse order, we + * need revert the nodes in advance so that we have correct node + * order after the changeset is applied. + */ +static void pnv_php_reverse_nodes(struct device_node *parent) +{ + struct device_node *child, *next; + + /* In-depth first */ + for_each_child_of_node(parent, child) + pnv_php_reverse_nodes(child); + + /* Reverse the nodes in the child list */ + child = parent->child; + parent->child = NULL; + while (child) { + next = child->sibling; + + child->sibling = parent->child; + parent->child = child; + child = next; + } +} + +static int pnv_php_populate_changeset(struct of_changeset *ocs, + struct device_node *dn) +{ + struct device_node *child; + int ret = 0; + + for_each_child_of_node(dn, child) { + ret = of_changeset_attach_node(ocs, child); + if (unlikely(ret)) + break; + + ret = pnv_php_populate_changeset(ocs, child); + if (unlikely(ret)) + break; + } + + return ret; +} + +static void *pnv_php_add_one_pdn(struct device_node *dn, void *data) +{ + struct pci_controller *hose = (struct pci_controller *)data; + struct pci_dn *pdn; + + pdn = pci_add_device_node_info(hose, dn); + if (unlikely(!pdn)) + return ERR_PTR(-ENOMEM); + + return NULL; +} + +static void pnv_php_add_pdns(struct pnv_php_slot *slot) +{ + struct pci_controller *hose = pci_bus_to_host(slot->bus); + + pci_traverse_device_nodes(slot->dn, pnv_php_add_one_pdn, hose); +} + +static int pnv_php_add_devtree(struct pnv_php_slot *php_slot) +{ + void *fdt, *fdt1, *dt; + int ret; + + /* We don't know the FDT blob size. We try to get it through + * maximal memory chunk and then copy it to another chunk that + * fits the real size. + */ + fdt1 = kzalloc(0x10000, GFP_KERNEL); + if (unlikely(!fdt1)) { + ret = -ENOMEM; + dev_warn(&php_slot->pdev->dev, "Cannot alloc FDT blob\n"); + goto out; + } + + ret = pnv_pci_get_device_tree(php_slot->dn->phandle, fdt1, 0x10000); + if (unlikely(ret)) { + dev_warn(&php_slot->pdev->dev, "Error %d getting FDT blob\n", + ret); + goto free_fdt1; + } + + fdt = kzalloc(fdt_totalsize(fdt1), GFP_KERNEL); + if (unlikely(!fdt)) { + ret = -ENOMEM; + dev_warn(&php_slot->pdev->dev, "Cannot %d bytes memory\n", + fdt_totalsize(fdt1)); + goto free_fdt1; + } + + /* Unflatten device tree blob */ + memcpy(fdt, fdt1, fdt_totalsize(fdt1)); + dt = of_fdt_unflatten_tree(fdt, php_slot->dn, NULL); + if (unlikely(!dt)) { + ret = -EINVAL; + dev_warn(&php_slot->pdev->dev, "Cannot unflatten FDT\n"); + goto free_fdt; + } + + /* Initialize and apply the changeset */ + of_changeset_init(&php_slot->ocs); + pnv_php_reverse_nodes(php_slot->dn); + ret = pnv_php_populate_changeset(&php_slot->ocs, php_slot->dn); + if (unlikely(ret)) { + pnv_php_reverse_nodes(php_slot->dn); + dev_warn(&php_slot->pdev->dev, "Error %d populating changeset\n", + ret); + goto free_dt; + } + + php_slot->dn->child = NULL; + ret = of_changeset_apply(&php_slot->ocs); + if (unlikely(ret)) { + dev_warn(&php_slot->pdev->dev, "Error %d applying changeset\n", + ret); + goto destroy_changeset; + } + + /* Add device node firmware data */ + pnv_php_add_pdns(php_slot); + php_slot->fdt = fdt; + php_slot->dt = dt; + kfree(fdt1); + goto out; + +destroy_changeset: + of_changeset_destroy(&php_slot->ocs); +free_dt: + kfree(dt); + php_slot->dn->child = NULL; +free_fdt: + kfree(fdt); +free_fdt1: + kfree(fdt1); +out: + return ret; +} + +static int pnv_php_set_slot_power_state(struct hotplug_slot *slot, + uint8_t state) +{ + struct pnv_php_slot *php_slot = slot->private; + struct opal_msg msg; + int ret; + + ret = pnv_pci_set_power_state(php_slot->id, state, &msg); + if (likely(ret > 0)) { + if (be64_to_cpu(msg.params[1]) != php_slot->dn->phandle || + be64_to_cpu(msg.params[2]) != state || + be64_to_cpu(msg.params[3]) != OPAL_SUCCESS) { + dev_warn(&php_slot->pdev->dev, "Wrong msg (%lld, %lld, %lld)\n", + be64_to_cpu(msg.params[1]), + be64_to_cpu(msg.params[2]), + be64_to_cpu(msg.params[3])); + return -ENOMSG; + } + } else if (unlikely(ret < 0)) { + dev_warn(&php_slot->pdev->dev, "Error %d powering %s\n", + ret, (state == OPAL_PCI_SLOT_POWER_ON) ? "on" : "off"); + return ret; + } + + if (state == OPAL_PCI_SLOT_POWER_OFF) + pnv_php_rmv_devtree(php_slot); + else + ret = pnv_php_add_devtree(php_slot); + + return ret; +} + +static int pnv_php_get_power_state(struct hotplug_slot *slot, u8 *state) +{ + struct pnv_php_slot *php_slot = slot->private; + uint8_t power_state = OPAL_PCI_SLOT_POWER_ON; + int ret; + + /* + * Retrieve power status from firmware. If we fail + * getting that, the power status fails back to + * be on. + */ + ret = pnv_pci_get_power_state(php_slot->id, &power_state); + if (unlikely(ret)) { + dev_warn(&php_slot->pdev->dev, "Error %d getting power status\n", + ret); + } else { + *state = power_state; + slot->info->power_status = power_state; + } + + return 0; +} + +static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state) +{ + struct pnv_php_slot *php_slot = slot->private; + uint8_t presence = OPAL_PCI_SLOT_EMPTY; + int ret; + + /* + * Retrieve presence status from firmware. If we can't + * get that, it will fail back to be empty. + */ + ret = pnv_pci_get_presence_state(php_slot->id, &presence); + if (likely(ret >= 0)) { + *state = presence; + slot->info->adapter_status = presence; + ret = 0; + } else { + dev_warn(&php_slot->pdev->dev, "Error %d getting presence\n", + ret); + } + + return ret; +} + +static int pnv_php_set_attention_state(struct hotplug_slot *slot, u8 state) +{ + /* FIXME: Make it real once firmware supports it */ + slot->info->attention_status = state; + + return 0; +} + +static int pnv_php_enable(struct pnv_php_slot *php_slot, bool rescan) +{ + struct hotplug_slot *slot = &php_slot->slot; + uint8_t presence = OPAL_PCI_SLOT_EMPTY; + uint8_t power_status = OPAL_PCI_SLOT_POWER_ON; + int ret; + + /* Check if the slot has been configured */ + if (php_slot->state != PNV_PHP_STATE_REGISTERED) + return 0; + + /* Retrieve slot presence status */ + ret = pnv_php_get_adapter_state(slot, &presence); + if (unlikely(ret)) + return ret; + + /* Proceed if there have nothing behind the slot */ + if (presence == OPAL_PCI_SLOT_EMPTY) + goto scan; + + /* + * If the power supply to the slot is off, we can't detect + * adapter presence state. That means we have to turn the + * slot on before going to probe slot's presence state. + * + * On the first time, we don't change the power status to + * boost system boot with assumption that the firmware + * supplies consistent slot power status: empty slot always + * has its power off and non-empty slot has its power on. + */ + if (!php_slot->power_state_check) { + php_slot->power_state_check = true; + + ret = pnv_php_get_power_state(slot, &power_status); + if (unlikely(ret)) + return ret; + + if (power_status != OPAL_PCI_SLOT_POWER_ON) + return 0; + } + + /* Check the power status. Scan the slot if it is already on */ + ret = pnv_php_get_power_state(slot, &power_status); + if (unlikely(ret)) + return ret; + + if (power_status == OPAL_PCI_SLOT_POWER_ON) + goto scan; + + /* Power is off, turn it on and then scan the slot */ + ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_ON); + if (unlikely(ret)) + return ret; + +scan: + if (presence == OPAL_PCI_SLOT_PRESENT) { + if (rescan) { + pci_lock_rescan_remove(); + pci_hp_add_devices(php_slot->bus); + pci_unlock_rescan_remove(); + } + + /* Rescan for child hotpluggable slots */ + php_slot->state = PNV_PHP_STATE_POPULATED; + if (rescan) + pnv_php_register(php_slot->dn); + } else { + php_slot->state = PNV_PHP_STATE_POPULATED; + } + + return 0; +} + +static int pnv_php_enable_slot(struct hotplug_slot *slot) +{ + struct pnv_php_slot *php_slot = container_of(slot, + struct pnv_php_slot, slot); + + return pnv_php_enable(php_slot, true); +} + +static int pnv_php_disable_slot(struct hotplug_slot *slot) +{ + struct pnv_php_slot *php_slot = slot->private; + int ret; + + if (php_slot->state != PNV_PHP_STATE_POPULATED) + return 0; + + /* Remove all devices behind the slot */ + pci_lock_rescan_remove(); + pci_hp_remove_devices(php_slot->bus); + pci_unlock_rescan_remove(); + + /* Detach the child hotpluggable slots */ + pnv_php_unregister(php_slot->dn); + + /* Notify firmware and remove device nodes */ + ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_OFF); + + php_slot->state = PNV_PHP_STATE_REGISTERED; + return ret; +} + +static struct hotplug_slot_ops php_slot_ops = { + .get_power_status = pnv_php_get_power_state, + .get_adapter_status = pnv_php_get_adapter_state, + .set_attention_status = pnv_php_set_attention_state, + .enable_slot = pnv_php_enable_slot, + .disable_slot = pnv_php_disable_slot, +}; + +static void pnv_php_release(struct hotplug_slot *slot) +{ + struct pnv_php_slot *php_slot = slot->private; + unsigned long flags; + + /* Remove from global or child list */ + spin_lock_irqsave(&pnv_php_lock, flags); + list_del(&php_slot->link); + spin_unlock_irqrestore(&pnv_php_lock, flags); + + /* Detach from parent */ + pnv_php_put_slot(php_slot); + pnv_php_put_slot(php_slot->parent); +} + +static struct pnv_php_slot *pnv_php_alloc_slot(struct device_node *dn) +{ + struct pnv_php_slot *php_slot; + struct pci_bus *bus; + const char *label; + uint64_t id; + + label = of_get_property(dn, "ibm,slot-label", NULL); + if (unlikely(!label)) + return NULL; + + if (pnv_pci_get_slot_id(dn, &id)) + return NULL; + + bus = pci_find_bus_by_node(dn); + if (unlikely(!bus)) + return NULL; + + php_slot = kzalloc(sizeof(*php_slot), GFP_KERNEL); + if (unlikely(!php_slot)) + return NULL; + + php_slot->name = kstrdup(label, GFP_KERNEL); + if (unlikely(!php_slot->name)) { + kfree(php_slot); + return NULL; + } + + if (likely(dn->child && PCI_DN(dn->child))) + php_slot->slot_no = PCI_SLOT(PCI_DN(dn->child)->devfn); + else + php_slot->slot_no = -1; /* Placeholder slot */ + + kref_init(&php_slot->kref); + php_slot->state = PNV_PHP_STATE_INITIALIZED; + php_slot->dn = dn; + php_slot->pdev = bus->self; + php_slot->bus = bus; + php_slot->id = id; + php_slot->power_state_check = false; + php_slot->slot.ops = &php_slot_ops; + php_slot->slot.info = &php_slot->slot_info; + php_slot->slot.release = pnv_php_release; + php_slot->slot.private = php_slot; + + INIT_LIST_HEAD(&php_slot->children); + INIT_LIST_HEAD(&php_slot->link); + + return php_slot; +} + +static int pnv_php_register_slot(struct pnv_php_slot *php_slot) +{ + struct pnv_php_slot *parent; + struct device_node *dn = php_slot->dn; + unsigned long flags; + int ret; + + /* Check if the slot is registered or not */ + parent = pnv_php_find_slot(php_slot->dn); + if (unlikely(parent)) { + pnv_php_put_slot(parent); + return -EEXIST; + } + + /* Register PCI slot */ + ret = pci_hp_register(&php_slot->slot, php_slot->bus, + php_slot->slot_no, php_slot->name); + if (unlikely(ret)) { + dev_warn(&php_slot->pdev->dev, "Error %d registering slot\n", + ret); + return ret; + } + + /* Attach to the parent's child list or global list */ + while ((dn = of_get_parent(dn))) { + if (!PCI_DN(dn)) { + of_node_put(dn); + break; + } + + parent = pnv_php_find_slot(dn); + if (parent) { + of_node_put(dn); + break; + } + + of_node_put(dn); + } + + spin_lock_irqsave(&pnv_php_lock, flags); + php_slot->parent = parent; + if (parent) + list_add_tail(&php_slot->link, &parent->children); + else + list_add_tail(&php_slot->link, &pnv_php_slot_list); + spin_unlock_irqrestore(&pnv_php_lock, flags); + + php_slot->state = PNV_PHP_STATE_REGISTERED; + return 0; +} + +static int pnv_php_register_one(struct device_node *dn) +{ + struct pnv_php_slot *php_slot; + const __be32 *prop32; + int ret; + + /* Check if it's hotpluggable slot */ + prop32 = of_get_property(dn, "ibm,slot-pluggable", NULL); + if (!prop32 || !of_read_number(prop32, 1)) + return -ENXIO; + + prop32 = of_get_property(dn, "ibm,reset-by-firmware", NULL); + if (!prop32 || !of_read_number(prop32, 1)) + return -ENXIO; + + php_slot = pnv_php_alloc_slot(dn); + if (unlikely(!php_slot)) + return -ENODEV; + + ret = pnv_php_register_slot(php_slot); + if (unlikely(ret)) + goto free_slot; + + ret = pnv_php_enable(php_slot, false); + if (unlikely(ret)) + goto unregister_slot; + + return 0; + +unregister_slot: + pnv_php_unregister_one(php_slot->dn); +free_slot: + pnv_php_put_slot(php_slot); + return ret; +} + +static void pnv_php_register(struct device_node *dn) +{ + struct device_node *child; + + /* + * The parent slots should be registered before their + * child slots. + */ + for_each_child_of_node(dn, child) { + pnv_php_register_one(child); + pnv_php_register(child); + } +} + +static void pnv_php_unregister_one(struct device_node *dn) +{ + struct pnv_php_slot *php_slot; + + php_slot = pnv_php_find_slot(dn); + if (!php_slot) + return; + + php_slot->state = PNV_PHP_STATE_OFFLINE; + pnv_php_put_slot(php_slot); + pci_hp_deregister(&php_slot->slot); +} + +static void pnv_php_unregister(struct device_node *dn) +{ + struct device_node *child; + + /* The child slots should go before their parent slots */ + for_each_child_of_node(dn, child) { + pnv_php_unregister(child); + pnv_php_unregister_one(child); + } +} + +static int __init pnv_php_init(void) +{ + struct device_node *dn; + + pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); + for_each_compatible_node(dn, NULL, "ibm,ioda2-phb") + pnv_php_register(dn); + + return 0; +} + +static void __exit pnv_php_exit(void) +{ + struct device_node *dn; + + for_each_compatible_node(dn, NULL, "ibm,ioda2-phb") + pnv_php_unregister(dn); +} + +module_init(pnv_php_init); +module_exit(pnv_php_exit); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); -- cgit v0.10.2 From 9497a1c1c5b4de2a359b6d8648b4000679363473 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 21 Jun 2016 12:35:56 +1000 Subject: powerpc/powernv: Print correct PHB type names We're initializing "IODA1" and "IODA2" PHBs though they are IODA2 and NPU PHBs as below kernel log indicates. Initializing IODA1 OPAL PHB /pciex@3fffe40700000 Initializing IODA2 OPAL PHB /pciex@3fff000400000 This fixes the PHB names. After it's applied, we get: Initializing IODA2 PHB (/pciex@3fffe40700000) Initializing NPU PHB (/pciex@3fff000400000) Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 4e0b2fc..2115ed7 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -55,6 +55,7 @@ #define POWERNV_IOMMU_DEFAULT_LEVELS 1 #define POWERNV_IOMMU_MAX_LEVELS 5 +static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU" }; static void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl); void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, @@ -3628,7 +3629,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, void *aux; long rc; - pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name); + pr_info("Initializing %s PHB (%s)\n", + pnv_phb_names[ioda_type], of_node_full_name(np)); prop64 = of_get_property(np, "ibm,opal-phbid", NULL); if (!prop64) { -- cgit v0.10.2 From 5593e3032736ccba30d28bd27ebf9e8671980ca9 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Wed, 8 Jun 2016 11:54:27 -0500 Subject: powerpc/powernv: set power_save func after the idle states are initialized pnv_init_idle_states() discovers supported idle states from the device tree and does the required initialization. Set power_save function pointer only after this initialization is done Otherwise on machines which don't support nap, eg. Power9, the kernel will crash when it tries to nap. Reviewed-by: Gautham R. Shenoy Signed-off-by: Shreyas B. Prabhu Acked-by: Benjamin Herrenschmidt Acked-by: Michael Neuling Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 92a8020..8a77f5c 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -285,6 +285,9 @@ static int __init pnv_init_idle_states(void) } pnv_alloc_idle_core_states(); + + if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) + ppc_md.power_save = power7_idle; out_free: kfree(flags); out: diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index ee6430b..8492bbb 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -315,7 +315,7 @@ define_machine(powernv) { .get_proc_freq = pnv_get_proc_freq, .progress = pnv_progress, .machine_shutdown = pnv_shutdown, - .power_save = power7_idle, + .power_save = NULL, .calibrate_decr = generic_calibrate_decr, #ifdef CONFIG_KEXEC .kexec_cpu_down = pnv_kexec_cpu_down, -- cgit v0.10.2 From aaf2f7e09932a08c1287d8e4c602bccbe98f60a6 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 22 Jun 2016 21:55:02 +0530 Subject: powerpc/bpf/jit: Fix/enhance 32-bit Load Immediate implementation The existing LI32() macro can sometimes result in a sign-extended 32-bit load that does not clear the top 32-bits properly. As an example, loading 0x7fffffff results in the register containing 0xffffffff7fffffff. While this does not impact classic BPF JIT implementation (since that only uses the lower word for all operations), we would like to share this macro between classic BPF JIT and extended BPF JIT, wherein the entire 64-bit value in the register matters. Fix this by first doing a shifted LI followed by ORI. An additional optimization is with loading values between -32768 to -1, where we now only need a single LI. The new implementation now generates the same or less number of instructions. Acked-by: Alexei Starovoitov Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 889fd19..a9882db 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -232,10 +232,17 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh); (((cond) & 0x3ff) << 16) | \ (((dest) - (ctx->idx * 4)) & \ 0xfffc)) -#define PPC_LI32(d, i) do { PPC_LI(d, IMM_L(i)); \ - if ((u32)(uintptr_t)(i) >= 32768) { \ - PPC_ADDIS(d, d, IMM_HA(i)); \ +/* Sign-extended 32-bit immediate load */ +#define PPC_LI32(d, i) do { \ + if ((int)(uintptr_t)(i) >= -32768 && \ + (int)(uintptr_t)(i) < 32768) \ + PPC_LI(d, i); \ + else { \ + PPC_LIS(d, IMM_H(i)); \ + if (IMM_L(i)) \ + PPC_ORI(d, d, IMM_L(i)); \ } } while(0) + #define PPC_LI64(d, i) do { \ if (!((uintptr_t)(i) & 0xffffffff00000000ULL)) \ PPC_LI32(d, i); \ -- cgit v0.10.2 From b1a057879af03f6e3b1700c909a6d089e83d1254 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 22 Jun 2016 21:55:03 +0530 Subject: powerpc/bpf/jit: Optimize 64-bit Immediate loads Similar to the LI32() optimization, if the value can be represented in 32-bits, use LI32(). Also handle loading a few specific forms of immediate values in an optimum manner. Acked-by: Alexei Starovoitov Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index a9882db..4c1e055 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -244,20 +244,25 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh); } } while(0) #define PPC_LI64(d, i) do { \ - if (!((uintptr_t)(i) & 0xffffffff00000000ULL)) \ + if ((long)(i) >= -2147483648 && \ + (long)(i) < 2147483648) \ PPC_LI32(d, i); \ else { \ - PPC_LIS(d, ((uintptr_t)(i) >> 48)); \ - if ((uintptr_t)(i) & 0x0000ffff00000000ULL) \ - PPC_ORI(d, d, \ - ((uintptr_t)(i) >> 32) & 0xffff); \ + if (!((uintptr_t)(i) & 0xffff800000000000ULL)) \ + PPC_LI(d, ((uintptr_t)(i) >> 32) & 0xffff); \ + else { \ + PPC_LIS(d, ((uintptr_t)(i) >> 48)); \ + if ((uintptr_t)(i) & 0x0000ffff00000000ULL) \ + PPC_ORI(d, d, \ + ((uintptr_t)(i) >> 32) & 0xffff); \ + } \ PPC_SLDI(d, d, 32); \ if ((uintptr_t)(i) & 0x00000000ffff0000ULL) \ PPC_ORIS(d, d, \ ((uintptr_t)(i) >> 16) & 0xffff); \ if ((uintptr_t)(i) & 0x000000000000ffffULL) \ PPC_ORI(d, d, (uintptr_t)(i) & 0xffff); \ - } } while (0); + } } while (0) #ifdef CONFIG_PPC64 #define PPC_FUNC_ADDR(d,i) do { PPC_LI64(d, i); } while(0) -- cgit v0.10.2 From 277285b854c666308cf6cb92a696748f976d6f64 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 22 Jun 2016 21:55:04 +0530 Subject: powerpc/bpf/jit: Introduce rotate immediate instructions Since we will be using the rotate immediate instructions for extended BPF JIT, let's introduce macros for the same. And since the shift immediate operations use the rotate immediate instructions, let's redo those macros to use the newly introduced instructions. Acked-by: Alexei Starovoitov Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 1d035c1..fd8d640 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -272,6 +272,8 @@ #define __PPC_SH(s) __PPC_WS(s) #define __PPC_MB(s) (((s) & 0x1f) << 6) #define __PPC_ME(s) (((s) & 0x1f) << 1) +#define __PPC_MB64(s) (__PPC_MB(s) | ((s) & 0x20)) +#define __PPC_ME64(s) __PPC_MB64(s) #define __PPC_BI(s) (((s) & 0x1f) << 16) #define __PPC_CT(t) (((t) & 0x0f) << 21) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 4c1e055..95d0e38 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -210,18 +210,20 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh); ___PPC_RS(a) | ___PPC_RB(s)) #define PPC_SRW(d, a, s) EMIT(PPC_INST_SRW | ___PPC_RA(d) | \ ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_RLWINM(d, a, i, mb, me) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \ + ___PPC_RS(a) | __PPC_SH(i) | \ + __PPC_MB(mb) | __PPC_ME(me)) +#define PPC_RLDICR(d, a, i, me) EMIT(PPC_INST_RLDICR | ___PPC_RA(d) | \ + ___PPC_RS(a) | __PPC_SH(i) | \ + __PPC_ME64(me) | (((i) & 0x20) >> 4)) + /* slwi = rlwinm Rx, Ry, n, 0, 31-n */ -#define PPC_SLWI(d, a, i) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH(i) | \ - __PPC_MB(0) | __PPC_ME(31-(i))) +#define PPC_SLWI(d, a, i) PPC_RLWINM(d, a, i, 0, 31-(i)) /* srwi = rlwinm Rx, Ry, 32-n, n, 31 */ -#define PPC_SRWI(d, a, i) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH(32-(i)) | \ - __PPC_MB(i) | __PPC_ME(31)) +#define PPC_SRWI(d, a, i) PPC_RLWINM(d, a, 32-(i), i, 31) /* sldi = rldicr Rx, Ry, n, 63-n */ -#define PPC_SLDI(d, a, i) EMIT(PPC_INST_RLDICR | ___PPC_RA(d) | \ - ___PPC_RS(a) | __PPC_SH(i) | \ - __PPC_MB(63-(i)) | (((i) & 0x20) >> 4)) +#define PPC_SLDI(d, a, i) PPC_RLDICR(d, a, i, 63-(i)) + #define PPC_NEG(d, a) EMIT(PPC_INST_NEG | ___PPC_RT(d) | ___PPC_RA(a)) /* Long jump; (unconditional 'branch') */ -- cgit v0.10.2 From cef1e8cdcdb50513e7d3351f536e7e1e3e347827 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 22 Jun 2016 21:55:05 +0530 Subject: powerpc/bpf/jit: A few cleanups 1. Per the ISA, ADDIS actually uses RT, rather than RS. Though the result is the same, make the usage clear. 2. The multiply instruction used is a 32-bit multiply. Rename PPC_MUL() to PPC_MULW() to make the same clear. 3. PPC_STW[U] take the entire 16-bit immediate value and do not require word-alignment, per the ISA. Change the macros to use IMM_L(). 4. A few white-space cleanups to satisfy checkpatch.pl. Acked-by: Alexei Starovoitov Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 95d0e38..9041d3f 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -83,7 +83,7 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh); */ #define IMM_H(i) ((uintptr_t)(i)>>16) #define IMM_HA(i) (((uintptr_t)(i)>>16) + \ - (((uintptr_t)(i) & 0x8000) >> 15)) + (((uintptr_t)(i) & 0x8000) >> 15)) #define IMM_L(i) ((uintptr_t)(i) & 0xffff) #define PLANT_INSTR(d, idx, instr) \ @@ -99,16 +99,16 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh); #define PPC_MR(d, a) PPC_OR(d, a, a) #define PPC_LI(r, i) PPC_ADDI(r, 0, i) #define PPC_ADDIS(d, a, i) EMIT(PPC_INST_ADDIS | \ - ___PPC_RS(d) | ___PPC_RA(a) | IMM_L(i)) + ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i)) #define PPC_LIS(r, i) PPC_ADDIS(r, 0, i) #define PPC_STD(r, base, i) EMIT(PPC_INST_STD | ___PPC_RS(r) | \ ___PPC_RA(base) | ((i) & 0xfffc)) #define PPC_STDU(r, base, i) EMIT(PPC_INST_STDU | ___PPC_RS(r) | \ ___PPC_RA(base) | ((i) & 0xfffc)) #define PPC_STW(r, base, i) EMIT(PPC_INST_STW | ___PPC_RS(r) | \ - ___PPC_RA(base) | ((i) & 0xfffc)) + ___PPC_RA(base) | IMM_L(i)) #define PPC_STWU(r, base, i) EMIT(PPC_INST_STWU | ___PPC_RS(r) | \ - ___PPC_RA(base) | ((i) & 0xfffc)) + ___PPC_RA(base) | IMM_L(i)) #define PPC_LBZ(r, base, i) EMIT(PPC_INST_LBZ | ___PPC_RT(r) | \ ___PPC_RA(base) | IMM_L(i)) @@ -174,13 +174,14 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh); #define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i)) #define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i)) #define PPC_CMPLWI(a, i) EMIT(PPC_INST_CMPLWI | ___PPC_RA(a) | IMM_L(i)) -#define PPC_CMPLW(a, b) EMIT(PPC_INST_CMPLW | ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_CMPLW(a, b) EMIT(PPC_INST_CMPLW | ___PPC_RA(a) | \ + ___PPC_RB(b)) #define PPC_SUB(d, a, b) EMIT(PPC_INST_SUB | ___PPC_RT(d) | \ ___PPC_RB(a) | ___PPC_RA(b)) #define PPC_ADD(d, a, b) EMIT(PPC_INST_ADD | ___PPC_RT(d) | \ ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_MUL(d, a, b) EMIT(PPC_INST_MULLW | ___PPC_RT(d) | \ +#define PPC_MULW(d, a, b) EMIT(PPC_INST_MULLW | ___PPC_RT(d) | \ ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_MULHWU(d, a, b) EMIT(PPC_INST_MULHWU | ___PPC_RT(d) | \ ___PPC_RA(a) | ___PPC_RB(b)) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 2d66a84..6012aac 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -161,14 +161,14 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, break; case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */ ctx->seen |= SEEN_XREG; - PPC_MUL(r_A, r_A, r_X); + PPC_MULW(r_A, r_A, r_X); break; case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ if (K < 32768) PPC_MULI(r_A, r_A, K); else { PPC_LI32(r_scratch1, K); - PPC_MUL(r_A, r_A, r_scratch1); + PPC_MULW(r_A, r_A, r_scratch1); } break; case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */ @@ -184,7 +184,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, } if (code == (BPF_ALU | BPF_MOD | BPF_X)) { PPC_DIVWU(r_scratch1, r_A, r_X); - PPC_MUL(r_scratch1, r_X, r_scratch1); + PPC_MULW(r_scratch1, r_X, r_scratch1); PPC_SUB(r_A, r_A, r_scratch1); } else { PPC_DIVWU(r_A, r_A, r_X); @@ -193,7 +193,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */ PPC_LI32(r_scratch2, K); PPC_DIVWU(r_scratch1, r_A, r_scratch2); - PPC_MUL(r_scratch1, r_scratch2, r_scratch1); + PPC_MULW(r_scratch1, r_scratch2, r_scratch1); PPC_SUB(r_A, r_A, r_scratch1); break; case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */ -- cgit v0.10.2 From 6ac0ba5a4f82b40b4f6b3a75e7e4f0a15a3d7b9b Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 22 Jun 2016 21:55:06 +0530 Subject: powerpc/bpf/jit: Isolate classic BPF JIT specifics into a separate header Break out classic BPF JIT specifics into a separate header in preparation for eBPF JIT implementation. Note that ppc32 will still need the classic BPF JIT. Acked-by: Alexei Starovoitov Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 9041d3f..313cfaf 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -1,4 +1,5 @@ -/* bpf_jit.h: BPF JIT compiler for PPC64 +/* + * bpf_jit.h: BPF JIT compiler for PPC * * Copyright 2011 Matt Evans , IBM Corporation * @@ -10,66 +11,8 @@ #ifndef _BPF_JIT_H #define _BPF_JIT_H -#ifdef CONFIG_PPC64 -#define BPF_PPC_STACK_R3_OFF 48 -#define BPF_PPC_STACK_LOCALS 32 -#define BPF_PPC_STACK_BASIC (48+64) -#define BPF_PPC_STACK_SAVE (18*8) -#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \ - BPF_PPC_STACK_SAVE) -#define BPF_PPC_SLOWPATH_FRAME (48+64) -#else -#define BPF_PPC_STACK_R3_OFF 24 -#define BPF_PPC_STACK_LOCALS 16 -#define BPF_PPC_STACK_BASIC (24+32) -#define BPF_PPC_STACK_SAVE (18*4) -#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \ - BPF_PPC_STACK_SAVE) -#define BPF_PPC_SLOWPATH_FRAME (24+32) -#endif - -#define REG_SZ (BITS_PER_LONG/8) - -/* - * Generated code register usage: - * - * As normal PPC C ABI (e.g. r1=sp, r2=TOC), with: - * - * skb r3 (Entry parameter) - * A register r4 - * X register r5 - * addr param r6 - * r7-r10 scratch - * skb->data r14 - * skb headlen r15 (skb->len - skb->data_len) - * m[0] r16 - * m[...] ... - * m[15] r31 - */ -#define r_skb 3 -#define r_ret 3 -#define r_A 4 -#define r_X 5 -#define r_addr 6 -#define r_scratch1 7 -#define r_scratch2 8 -#define r_D 14 -#define r_HL 15 -#define r_M 16 - #ifndef __ASSEMBLY__ -/* - * Assembly helpers from arch/powerpc/net/bpf_jit.S: - */ -#define DECLARE_LOAD_FUNC(func) \ - extern u8 func[], func##_negative_offset[], func##_positive_offset[] - -DECLARE_LOAD_FUNC(sk_load_word); -DECLARE_LOAD_FUNC(sk_load_half); -DECLARE_LOAD_FUNC(sk_load_byte); -DECLARE_LOAD_FUNC(sk_load_byte_msh); - #ifdef CONFIG_PPC64 #define FUNCTION_DESCR_SIZE 24 #else @@ -131,46 +74,6 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh); #define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0) #endif -/* Convenience helpers for the above with 'far' offsets: */ -#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LBZ(r, base, i); \ - else { PPC_ADDIS(r, base, IMM_HA(i)); \ - PPC_LBZ(r, r, IMM_L(i)); } } while(0) - -#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) PPC_LD(r, base, i); \ - else { PPC_ADDIS(r, base, IMM_HA(i)); \ - PPC_LD(r, r, IMM_L(i)); } } while(0) - -#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LWZ(r, base, i); \ - else { PPC_ADDIS(r, base, IMM_HA(i)); \ - PPC_LWZ(r, r, IMM_L(i)); } } while(0) - -#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LHZ(r, base, i); \ - else { PPC_ADDIS(r, base, IMM_HA(i)); \ - PPC_LHZ(r, r, IMM_L(i)); } } while(0) - -#ifdef CONFIG_PPC64 -#define PPC_LL_OFFS(r, base, i) do { PPC_LD_OFFS(r, base, i); } while(0) -#else -#define PPC_LL_OFFS(r, base, i) do { PPC_LWZ_OFFS(r, base, i); } while(0) -#endif - -#ifdef CONFIG_SMP -#ifdef CONFIG_PPC64 -#define PPC_BPF_LOAD_CPU(r) \ - do { BUILD_BUG_ON(FIELD_SIZEOF(struct paca_struct, paca_index) != 2); \ - PPC_LHZ_OFFS(r, 13, offsetof(struct paca_struct, paca_index)); \ - } while (0) -#else -#define PPC_BPF_LOAD_CPU(r) \ - do { BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, cpu) != 4); \ - PPC_LHZ_OFFS(r, (1 & ~(THREAD_SIZE - 1)), \ - offsetof(struct thread_info, cpu)); \ - } while(0) -#endif -#else -#define PPC_BPF_LOAD_CPU(r) do { PPC_LI(r, 0); } while(0) -#endif - #define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i)) #define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i)) #define PPC_CMPLWI(a, i) EMIT(PPC_INST_CMPLWI | ___PPC_RA(a) | IMM_L(i)) @@ -273,14 +176,6 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh); #define PPC_FUNC_ADDR(d,i) do { PPC_LI32(d, i); } while(0) #endif -#define PPC_LHBRX_OFFS(r, base, i) \ - do { PPC_LI32(r, i); PPC_LHBRX(r, r, base); } while(0) -#ifdef __LITTLE_ENDIAN__ -#define PPC_NTOHS_OFFS(r, base, i) PPC_LHBRX_OFFS(r, base, i) -#else -#define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i) -#endif - static inline bool is_nearbranch(int offset) { return (offset < 32768) && (offset >= -32768); @@ -317,18 +212,6 @@ static inline bool is_nearbranch(int offset) #define COND_NE (CR0_EQ | COND_CMP_FALSE) #define COND_LT (CR0_LT | COND_CMP_TRUE) -#define SEEN_DATAREF 0x10000 /* might call external helpers */ -#define SEEN_XREG 0x20000 /* X reg is used */ -#define SEEN_MEM 0x40000 /* SEEN_MEM+(1<, IBM Corporation + * + * Split from bpf_jit.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#ifndef _BPF_JIT32_H +#define _BPF_JIT32_H + +#include "bpf_jit.h" + +#ifdef CONFIG_PPC64 +#define BPF_PPC_STACK_R3_OFF 48 +#define BPF_PPC_STACK_LOCALS 32 +#define BPF_PPC_STACK_BASIC (48+64) +#define BPF_PPC_STACK_SAVE (18*8) +#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \ + BPF_PPC_STACK_SAVE) +#define BPF_PPC_SLOWPATH_FRAME (48+64) +#else +#define BPF_PPC_STACK_R3_OFF 24 +#define BPF_PPC_STACK_LOCALS 16 +#define BPF_PPC_STACK_BASIC (24+32) +#define BPF_PPC_STACK_SAVE (18*4) +#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \ + BPF_PPC_STACK_SAVE) +#define BPF_PPC_SLOWPATH_FRAME (24+32) +#endif + +#define REG_SZ (BITS_PER_LONG/8) + +/* + * Generated code register usage: + * + * As normal PPC C ABI (e.g. r1=sp, r2=TOC), with: + * + * skb r3 (Entry parameter) + * A register r4 + * X register r5 + * addr param r6 + * r7-r10 scratch + * skb->data r14 + * skb headlen r15 (skb->len - skb->data_len) + * m[0] r16 + * m[...] ... + * m[15] r31 + */ +#define r_skb 3 +#define r_ret 3 +#define r_A 4 +#define r_X 5 +#define r_addr 6 +#define r_scratch1 7 +#define r_scratch2 8 +#define r_D 14 +#define r_HL 15 +#define r_M 16 + +#ifndef __ASSEMBLY__ + +/* + * Assembly helpers from arch/powerpc/net/bpf_jit.S: + */ +#define DECLARE_LOAD_FUNC(func) \ + extern u8 func[], func##_negative_offset[], func##_positive_offset[] + +DECLARE_LOAD_FUNC(sk_load_word); +DECLARE_LOAD_FUNC(sk_load_half); +DECLARE_LOAD_FUNC(sk_load_byte); +DECLARE_LOAD_FUNC(sk_load_byte_msh); + +#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LBZ(r, base, i); \ + else { PPC_ADDIS(r, base, IMM_HA(i)); \ + PPC_LBZ(r, r, IMM_L(i)); } } while(0) + +#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) PPC_LD(r, base, i); \ + else { PPC_ADDIS(r, base, IMM_HA(i)); \ + PPC_LD(r, r, IMM_L(i)); } } while(0) + +#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LWZ(r, base, i); \ + else { PPC_ADDIS(r, base, IMM_HA(i)); \ + PPC_LWZ(r, r, IMM_L(i)); } } while(0) + +#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LHZ(r, base, i); \ + else { PPC_ADDIS(r, base, IMM_HA(i)); \ + PPC_LHZ(r, r, IMM_L(i)); } } while(0) + +#ifdef CONFIG_PPC64 +#define PPC_LL_OFFS(r, base, i) do { PPC_LD_OFFS(r, base, i); } while(0) +#else +#define PPC_LL_OFFS(r, base, i) do { PPC_LWZ_OFFS(r, base, i); } while(0) +#endif + +#ifdef CONFIG_SMP +#ifdef CONFIG_PPC64 +#define PPC_BPF_LOAD_CPU(r) \ + do { BUILD_BUG_ON(FIELD_SIZEOF(struct paca_struct, paca_index) != 2); \ + PPC_LHZ_OFFS(r, 13, offsetof(struct paca_struct, paca_index)); \ + } while (0) +#else +#define PPC_BPF_LOAD_CPU(r) \ + do { BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, cpu) != 4); \ + PPC_LHZ_OFFS(r, (1 & ~(THREAD_SIZE - 1)), \ + offsetof(struct thread_info, cpu)); \ + } while(0) +#endif +#else +#define PPC_BPF_LOAD_CPU(r) do { PPC_LI(r, 0); } while(0) +#endif + +#define PPC_LHBRX_OFFS(r, base, i) \ + do { PPC_LI32(r, i); PPC_LHBRX(r, r, base); } while(0) +#ifdef __LITTLE_ENDIAN__ +#define PPC_NTOHS_OFFS(r, base, i) PPC_LHBRX_OFFS(r, base, i) +#else +#define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i) +#endif + +#define SEEN_DATAREF 0x10000 /* might call external helpers */ +#define SEEN_XREG 0x20000 /* X reg is used */ +#define SEEN_MEM 0x40000 /* SEEN_MEM+(1< -#include "bpf_jit.h" +#include "bpf_jit32.h" /* * All of these routines are called directly from generated code, diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 6012aac..7e706f3 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -16,7 +16,7 @@ #include #include -#include "bpf_jit.h" +#include "bpf_jit32.h" int bpf_jit_enable __read_mostly; -- cgit v0.10.2 From 156d0e290e969caba25f1851c52417c14d141b24 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 22 Jun 2016 21:55:07 +0530 Subject: powerpc/ebpf/jit: Implement JIT compiler for extended BPF PPC64 eBPF JIT compiler. Enable with: echo 1 > /proc/sys/net/core/bpf_jit_enable or echo 2 > /proc/sys/net/core/bpf_jit_enable ... to see the generated JIT code. This can further be processed with tools/net/bpf_jit_disasm. With CONFIG_TEST_BPF=m and 'modprobe test_bpf': test_bpf: Summary: 305 PASSED, 0 FAILED, [297/297 JIT'ed] ... on both ppc64 BE and LE. The details of the approach are documented through various comments in the code. Acked-by: Alexei Starovoitov Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 01f7464..ee82f9a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -128,7 +128,8 @@ config PPC select IRQ_FORCED_THREADING select HAVE_RCU_TABLE_FREE if SMP select HAVE_SYSCALL_TRACEPOINTS - select HAVE_CBPF_JIT + select HAVE_CBPF_JIT if !PPC64 + select HAVE_EBPF_JIT if PPC64 select HAVE_ARCH_JUMP_LABEL select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h index dc85dcb..cee3aa0 100644 --- a/arch/powerpc/include/asm/asm-compat.h +++ b/arch/powerpc/include/asm/asm-compat.h @@ -36,11 +36,13 @@ #define PPC_MIN_STKFRM 112 #ifdef __BIG_ENDIAN__ +#define LHZX_BE stringify_in_c(lhzx) #define LWZX_BE stringify_in_c(lwzx) #define LDX_BE stringify_in_c(ldx) #define STWX_BE stringify_in_c(stwx) #define STDX_BE stringify_in_c(stdx) #else +#define LHZX_BE stringify_in_c(lhbrx) #define LWZX_BE stringify_in_c(lwbrx) #define LDX_BE stringify_in_c(ldbrx) #define STWX_BE stringify_in_c(stwbrx) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index fd8d640..6a77d130 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -142,9 +142,11 @@ #define PPC_INST_ISEL 0x7c00001e #define PPC_INST_ISEL_MASK 0xfc00003e #define PPC_INST_LDARX 0x7c0000a8 +#define PPC_INST_STDCX 0x7c0001ad #define PPC_INST_LSWI 0x7c0004aa #define PPC_INST_LSWX 0x7c00042a #define PPC_INST_LWARX 0x7c000028 +#define PPC_INST_STWCX 0x7c00012d #define PPC_INST_LWSYNC 0x7c2004ac #define PPC_INST_SYNC 0x7c0004ac #define PPC_INST_SYNC_MASK 0xfc0007fe @@ -211,8 +213,11 @@ #define PPC_INST_LBZ 0x88000000 #define PPC_INST_LD 0xe8000000 #define PPC_INST_LHZ 0xa0000000 -#define PPC_INST_LHBRX 0x7c00062c #define PPC_INST_LWZ 0x80000000 +#define PPC_INST_LHBRX 0x7c00062c +#define PPC_INST_LDBRX 0x7c000428 +#define PPC_INST_STB 0x98000000 +#define PPC_INST_STH 0xb0000000 #define PPC_INST_STD 0xf8000000 #define PPC_INST_STDU 0xf8000001 #define PPC_INST_STW 0x90000000 @@ -221,22 +226,34 @@ #define PPC_INST_MTLR 0x7c0803a6 #define PPC_INST_CMPWI 0x2c000000 #define PPC_INST_CMPDI 0x2c200000 +#define PPC_INST_CMPW 0x7c000000 +#define PPC_INST_CMPD 0x7c200000 #define PPC_INST_CMPLW 0x7c000040 +#define PPC_INST_CMPLD 0x7c200040 #define PPC_INST_CMPLWI 0x28000000 +#define PPC_INST_CMPLDI 0x28200000 #define PPC_INST_ADDI 0x38000000 #define PPC_INST_ADDIS 0x3c000000 #define PPC_INST_ADD 0x7c000214 #define PPC_INST_SUB 0x7c000050 #define PPC_INST_BLR 0x4e800020 #define PPC_INST_BLRL 0x4e800021 +#define PPC_INST_MULLD 0x7c0001d2 #define PPC_INST_MULLW 0x7c0001d6 #define PPC_INST_MULHWU 0x7c000016 #define PPC_INST_MULLI 0x1c000000 #define PPC_INST_DIVWU 0x7c000396 +#define PPC_INST_DIVD 0x7c0003d2 #define PPC_INST_RLWINM 0x54000000 +#define PPC_INST_RLWIMI 0x50000000 +#define PPC_INST_RLDICL 0x78000000 #define PPC_INST_RLDICR 0x78000004 #define PPC_INST_SLW 0x7c000030 +#define PPC_INST_SLD 0x7c000036 #define PPC_INST_SRW 0x7c000430 +#define PPC_INST_SRD 0x7c000436 +#define PPC_INST_SRAD 0x7c000634 +#define PPC_INST_SRADI 0x7c000674 #define PPC_INST_AND 0x7c000038 #define PPC_INST_ANDDOT 0x7c000039 #define PPC_INST_OR 0x7c000378 @@ -247,6 +264,7 @@ #define PPC_INST_XORI 0x68000000 #define PPC_INST_XORIS 0x6c000000 #define PPC_INST_NEG 0x7c0000d0 +#define PPC_INST_EXTSW 0x7c0007b4 #define PPC_INST_BRANCH 0x48000000 #define PPC_INST_BRANCH_COND 0x40800000 #define PPC_INST_LBZCIX 0x7c0006aa diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile index 1306a58..c1ff16a 100644 --- a/arch/powerpc/net/Makefile +++ b/arch/powerpc/net/Makefile @@ -1,4 +1,8 @@ # # Arch-specific network modules # +ifeq ($(CONFIG_PPC64),y) +obj-$(CONFIG_BPF_JIT) += bpf_jit_asm64.o bpf_jit_comp64.o +else obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o +endif diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 313cfaf..d5301b6 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -2,6 +2,7 @@ * bpf_jit.h: BPF JIT compiler for PPC * * Copyright 2011 Matt Evans , IBM Corporation + * 2016 Naveen N. Rao * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -13,7 +14,9 @@ #ifndef __ASSEMBLY__ -#ifdef CONFIG_PPC64 +#include + +#ifdef PPC64_ELF_ABI_v1 #define FUNCTION_DESCR_SIZE 24 #else #define FUNCTION_DESCR_SIZE 0 @@ -52,6 +55,10 @@ ___PPC_RA(base) | IMM_L(i)) #define PPC_STWU(r, base, i) EMIT(PPC_INST_STWU | ___PPC_RS(r) | \ ___PPC_RA(base) | IMM_L(i)) +#define PPC_STH(r, base, i) EMIT(PPC_INST_STH | ___PPC_RS(r) | \ + ___PPC_RA(base) | IMM_L(i)) +#define PPC_STB(r, base, i) EMIT(PPC_INST_STB | ___PPC_RS(r) | \ + ___PPC_RA(base) | IMM_L(i)) #define PPC_LBZ(r, base, i) EMIT(PPC_INST_LBZ | ___PPC_RT(r) | \ ___PPC_RA(base) | IMM_L(i)) @@ -63,6 +70,19 @@ ___PPC_RA(base) | IMM_L(i)) #define PPC_LHBRX(r, base, b) EMIT(PPC_INST_LHBRX | ___PPC_RT(r) | \ ___PPC_RA(base) | ___PPC_RB(b)) +#define PPC_LDBRX(r, base, b) EMIT(PPC_INST_LDBRX | ___PPC_RT(r) | \ + ___PPC_RA(base) | ___PPC_RB(b)) + +#define PPC_BPF_LDARX(t, a, b, eh) EMIT(PPC_INST_LDARX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b) | \ + __PPC_EH(eh)) +#define PPC_BPF_LWARX(t, a, b, eh) EMIT(PPC_INST_LWARX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b) | \ + __PPC_EH(eh)) +#define PPC_BPF_STWCX(s, a, b) EMIT(PPC_INST_STWCX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_BPF_STDCX(s, a, b) EMIT(PPC_INST_STDCX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) #ifdef CONFIG_PPC64 #define PPC_BPF_LL(r, base, i) do { PPC_LD(r, base, i); } while(0) @@ -76,14 +96,23 @@ #define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i)) #define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i)) +#define PPC_CMPW(a, b) EMIT(PPC_INST_CMPW | ___PPC_RA(a) | \ + ___PPC_RB(b)) +#define PPC_CMPD(a, b) EMIT(PPC_INST_CMPD | ___PPC_RA(a) | \ + ___PPC_RB(b)) #define PPC_CMPLWI(a, i) EMIT(PPC_INST_CMPLWI | ___PPC_RA(a) | IMM_L(i)) +#define PPC_CMPLDI(a, i) EMIT(PPC_INST_CMPLDI | ___PPC_RA(a) | IMM_L(i)) #define PPC_CMPLW(a, b) EMIT(PPC_INST_CMPLW | ___PPC_RA(a) | \ ___PPC_RB(b)) +#define PPC_CMPLD(a, b) EMIT(PPC_INST_CMPLD | ___PPC_RA(a) | \ + ___PPC_RB(b)) #define PPC_SUB(d, a, b) EMIT(PPC_INST_SUB | ___PPC_RT(d) | \ ___PPC_RB(a) | ___PPC_RA(b)) #define PPC_ADD(d, a, b) EMIT(PPC_INST_ADD | ___PPC_RT(d) | \ ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_MULD(d, a, b) EMIT(PPC_INST_MULLD | ___PPC_RT(d) | \ + ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_MULW(d, a, b) EMIT(PPC_INST_MULLW | ___PPC_RT(d) | \ ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_MULHWU(d, a, b) EMIT(PPC_INST_MULHWU | ___PPC_RT(d) | \ @@ -92,6 +121,8 @@ ___PPC_RA(a) | IMM_L(i)) #define PPC_DIVWU(d, a, b) EMIT(PPC_INST_DIVWU | ___PPC_RT(d) | \ ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_DIVD(d, a, b) EMIT(PPC_INST_DIVD | ___PPC_RT(d) | \ + ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_AND(d, a, b) EMIT(PPC_INST_AND | ___PPC_RA(d) | \ ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_ANDI(d, a, i) EMIT(PPC_INST_ANDI | ___PPC_RA(d) | \ @@ -100,6 +131,7 @@ ___PPC_RS(a) | ___PPC_RB(b)) #define PPC_OR(d, a, b) EMIT(PPC_INST_OR | ___PPC_RA(d) | \ ___PPC_RS(a) | ___PPC_RB(b)) +#define PPC_MR(d, a) PPC_OR(d, a, a) #define PPC_ORI(d, a, i) EMIT(PPC_INST_ORI | ___PPC_RA(d) | \ ___PPC_RS(a) | IMM_L(i)) #define PPC_ORIS(d, a, i) EMIT(PPC_INST_ORIS | ___PPC_RA(d) | \ @@ -110,13 +142,30 @@ ___PPC_RS(a) | IMM_L(i)) #define PPC_XORIS(d, a, i) EMIT(PPC_INST_XORIS | ___PPC_RA(d) | \ ___PPC_RS(a) | IMM_L(i)) +#define PPC_EXTSW(d, a) EMIT(PPC_INST_EXTSW | ___PPC_RA(d) | \ + ___PPC_RS(a)) #define PPC_SLW(d, a, s) EMIT(PPC_INST_SLW | ___PPC_RA(d) | \ ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_SLD(d, a, s) EMIT(PPC_INST_SLD | ___PPC_RA(d) | \ + ___PPC_RS(a) | ___PPC_RB(s)) #define PPC_SRW(d, a, s) EMIT(PPC_INST_SRW | ___PPC_RA(d) | \ ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_SRD(d, a, s) EMIT(PPC_INST_SRD | ___PPC_RA(d) | \ + ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_SRAD(d, a, s) EMIT(PPC_INST_SRAD | ___PPC_RA(d) | \ + ___PPC_RS(a) | ___PPC_RB(s)) +#define PPC_SRADI(d, a, i) EMIT(PPC_INST_SRADI | ___PPC_RA(d) | \ + ___PPC_RS(a) | __PPC_SH(i) | \ + (((i) & 0x20) >> 4)) #define PPC_RLWINM(d, a, i, mb, me) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \ ___PPC_RS(a) | __PPC_SH(i) | \ __PPC_MB(mb) | __PPC_ME(me)) +#define PPC_RLWIMI(d, a, i, mb, me) EMIT(PPC_INST_RLWIMI | ___PPC_RA(d) | \ + ___PPC_RS(a) | __PPC_SH(i) | \ + __PPC_MB(mb) | __PPC_ME(me)) +#define PPC_RLDICL(d, a, i, mb) EMIT(PPC_INST_RLDICL | ___PPC_RA(d) | \ + ___PPC_RS(a) | __PPC_SH(i) | \ + __PPC_MB64(mb) | (((i) & 0x20) >> 4)) #define PPC_RLDICR(d, a, i, me) EMIT(PPC_INST_RLDICR | ___PPC_RA(d) | \ ___PPC_RS(a) | __PPC_SH(i) | \ __PPC_ME64(me) | (((i) & 0x20) >> 4)) @@ -127,6 +176,8 @@ #define PPC_SRWI(d, a, i) PPC_RLWINM(d, a, 32-(i), i, 31) /* sldi = rldicr Rx, Ry, n, 63-n */ #define PPC_SLDI(d, a, i) PPC_RLDICR(d, a, i, 63-(i)) +/* sldi = rldicl Rx, Ry, 64-n, n */ +#define PPC_SRDI(d, a, i) PPC_RLDICL(d, a, 64-(i), i) #define PPC_NEG(d, a) EMIT(PPC_INST_NEG | ___PPC_RT(d) | ___PPC_RA(a)) diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h new file mode 100644 index 0000000..5046d6f --- /dev/null +++ b/arch/powerpc/net/bpf_jit64.h @@ -0,0 +1,102 @@ +/* + * bpf_jit64.h: BPF JIT compiler for PPC64 + * + * Copyright 2016 Naveen N. Rao + * IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#ifndef _BPF_JIT64_H +#define _BPF_JIT64_H + +#include "bpf_jit.h" + +/* + * Stack layout: + * + * [ prev sp ] <------------- + * [ nv gpr save area ] 8*8 | + * fp (r31) --> [ ebpf stack space ] 512 | + * [ local/tmp var space ] 16 | + * [ frame header ] 32/112 | + * sp (r1) ---> [ stack pointer ] -------------- + */ + +/* for bpf JIT code internal usage */ +#define BPF_PPC_STACK_LOCALS 16 +/* for gpr non volatile registers BPG_REG_6 to 10, plus skb cache registers */ +#define BPF_PPC_STACK_SAVE (8*8) +/* Ensure this is quadword aligned */ +#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS + \ + MAX_BPF_STACK + BPF_PPC_STACK_SAVE) + +#ifndef __ASSEMBLY__ + +/* BPF register usage */ +#define SKB_HLEN_REG (MAX_BPF_REG + 0) +#define SKB_DATA_REG (MAX_BPF_REG + 1) +#define TMP_REG_1 (MAX_BPF_REG + 2) +#define TMP_REG_2 (MAX_BPF_REG + 3) + +/* BPF to ppc register mappings */ +static const int b2p[] = { + /* function return value */ + [BPF_REG_0] = 8, + /* function arguments */ + [BPF_REG_1] = 3, + [BPF_REG_2] = 4, + [BPF_REG_3] = 5, + [BPF_REG_4] = 6, + [BPF_REG_5] = 7, + /* non volatile registers */ + [BPF_REG_6] = 27, + [BPF_REG_7] = 28, + [BPF_REG_8] = 29, + [BPF_REG_9] = 30, + /* frame pointer aka BPF_REG_10 */ + [BPF_REG_FP] = 31, + /* eBPF jit internal registers */ + [SKB_HLEN_REG] = 25, + [SKB_DATA_REG] = 26, + [TMP_REG_1] = 9, + [TMP_REG_2] = 10 +}; + +/* Assembly helpers */ +#define DECLARE_LOAD_FUNC(func) u64 func(u64 r3, u64 r4); \ + u64 func##_negative_offset(u64 r3, u64 r4); \ + u64 func##_positive_offset(u64 r3, u64 r4); + +DECLARE_LOAD_FUNC(sk_load_word); +DECLARE_LOAD_FUNC(sk_load_half); +DECLARE_LOAD_FUNC(sk_load_byte); + +#define CHOOSE_LOAD_FUNC(imm, func) \ + (imm < 0 ? \ + (imm >= SKF_LL_OFF ? func##_negative_offset : func) : \ + func##_positive_offset) + +#define SEEN_FUNC 0x1000 /* might call external helpers */ +#define SEEN_STACK 0x2000 /* uses BPF stack */ +#define SEEN_SKB 0x4000 /* uses sk_buff */ + +struct codegen_context { + /* + * This is used to track register usage as well + * as calls to external helpers. + * - register usage is tracked with corresponding + * bits (r3-r10 and r25-r31) + * - rest of the bits can be used to track other + * things -- for now, we use bits 16 to 23 + * encoded in SEEN_* macros above + */ + unsigned int seen; + unsigned int idx; +}; + +#endif /* !__ASSEMBLY__ */ + +#endif diff --git a/arch/powerpc/net/bpf_jit_asm64.S b/arch/powerpc/net/bpf_jit_asm64.S new file mode 100644 index 0000000..7e4c514 --- /dev/null +++ b/arch/powerpc/net/bpf_jit_asm64.S @@ -0,0 +1,180 @@ +/* + * bpf_jit_asm64.S: Packet/header access helper functions + * for PPC64 BPF compiler. + * + * Copyright 2016, Naveen N. Rao + * IBM Corporation + * + * Based on bpf_jit_asm.S by Matt Evans + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include +#include "bpf_jit64.h" + +/* + * All of these routines are called directly from generated code, + * with the below register usage: + * r27 skb pointer (ctx) + * r25 skb header length + * r26 skb->data pointer + * r4 offset + * + * Result is passed back in: + * r8 data read in host endian format (accumulator) + * + * r9 is used as a temporary register + */ + +#define r_skb r27 +#define r_hlen r25 +#define r_data r26 +#define r_off r4 +#define r_val r8 +#define r_tmp r9 + +_GLOBAL_TOC(sk_load_word) + cmpdi r_off, 0 + blt bpf_slow_path_word_neg + b sk_load_word_positive_offset + +_GLOBAL_TOC(sk_load_word_positive_offset) + /* Are we accessing past headlen? */ + subi r_tmp, r_hlen, 4 + cmpd r_tmp, r_off + blt bpf_slow_path_word + /* Nope, just hitting the header. cr0 here is eq or gt! */ + LWZX_BE r_val, r_data, r_off + blr /* Return success, cr0 != LT */ + +_GLOBAL_TOC(sk_load_half) + cmpdi r_off, 0 + blt bpf_slow_path_half_neg + b sk_load_half_positive_offset + +_GLOBAL_TOC(sk_load_half_positive_offset) + subi r_tmp, r_hlen, 2 + cmpd r_tmp, r_off + blt bpf_slow_path_half + LHZX_BE r_val, r_data, r_off + blr + +_GLOBAL_TOC(sk_load_byte) + cmpdi r_off, 0 + blt bpf_slow_path_byte_neg + b sk_load_byte_positive_offset + +_GLOBAL_TOC(sk_load_byte_positive_offset) + cmpd r_hlen, r_off + ble bpf_slow_path_byte + lbzx r_val, r_data, r_off + blr + +/* + * Call out to skb_copy_bits: + * Allocate a new stack frame here to remain ABI-compliant in + * stashing LR. + */ +#define bpf_slow_path_common(SIZE) \ + mflr r0; \ + std r0, PPC_LR_STKOFF(r1); \ + stdu r1, -(STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS)(r1); \ + mr r3, r_skb; \ + /* r4 = r_off as passed */ \ + addi r5, r1, STACK_FRAME_MIN_SIZE; \ + li r6, SIZE; \ + bl skb_copy_bits; \ + nop; \ + /* save r5 */ \ + addi r5, r1, STACK_FRAME_MIN_SIZE; \ + /* r3 = 0 on success */ \ + addi r1, r1, STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS; \ + ld r0, PPC_LR_STKOFF(r1); \ + mtlr r0; \ + cmpdi r3, 0; \ + blt bpf_error; /* cr0 = LT */ + +bpf_slow_path_word: + bpf_slow_path_common(4) + /* Data value is on stack, and cr0 != LT */ + LWZX_BE r_val, 0, r5 + blr + +bpf_slow_path_half: + bpf_slow_path_common(2) + LHZX_BE r_val, 0, r5 + blr + +bpf_slow_path_byte: + bpf_slow_path_common(1) + lbzx r_val, 0, r5 + blr + +/* + * Call out to bpf_internal_load_pointer_neg_helper + */ +#define sk_negative_common(SIZE) \ + mflr r0; \ + std r0, PPC_LR_STKOFF(r1); \ + stdu r1, -STACK_FRAME_MIN_SIZE(r1); \ + mr r3, r_skb; \ + /* r4 = r_off, as passed */ \ + li r5, SIZE; \ + bl bpf_internal_load_pointer_neg_helper; \ + nop; \ + addi r1, r1, STACK_FRAME_MIN_SIZE; \ + ld r0, PPC_LR_STKOFF(r1); \ + mtlr r0; \ + /* R3 != 0 on success */ \ + cmpldi r3, 0; \ + beq bpf_error_slow; /* cr0 = EQ */ + +bpf_slow_path_word_neg: + lis r_tmp, -32 /* SKF_LL_OFF */ + cmpd r_off, r_tmp /* addr < SKF_* */ + blt bpf_error /* cr0 = LT */ + b sk_load_word_negative_offset + +_GLOBAL_TOC(sk_load_word_negative_offset) + sk_negative_common(4) + LWZX_BE r_val, 0, r3 + blr + +bpf_slow_path_half_neg: + lis r_tmp, -32 /* SKF_LL_OFF */ + cmpd r_off, r_tmp /* addr < SKF_* */ + blt bpf_error /* cr0 = LT */ + b sk_load_half_negative_offset + +_GLOBAL_TOC(sk_load_half_negative_offset) + sk_negative_common(2) + LHZX_BE r_val, 0, r3 + blr + +bpf_slow_path_byte_neg: + lis r_tmp, -32 /* SKF_LL_OFF */ + cmpd r_off, r_tmp /* addr < SKF_* */ + blt bpf_error /* cr0 = LT */ + b sk_load_byte_negative_offset + +_GLOBAL_TOC(sk_load_byte_negative_offset) + sk_negative_common(1) + lbzx r_val, 0, r3 + blr + +bpf_error_slow: + /* fabricate a cr0 = lt */ + li r_tmp, -1 + cmpdi r_tmp, 0 +bpf_error: + /* + * Entered with cr0 = lt + * Generated code will 'blt epilogue', returning 0. + */ + li r_val, 0 + blr diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c new file mode 100644 index 0000000..6073b78 --- /dev/null +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -0,0 +1,954 @@ +/* + * bpf_jit_comp64.c: eBPF JIT compiler + * + * Copyright 2016 Naveen N. Rao + * IBM Corporation + * + * Based on the powerpc classic BPF JIT compiler by Matt Evans + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include +#include +#include +#include +#include +#include + +#include "bpf_jit64.h" + +int bpf_jit_enable __read_mostly; + +static void bpf_jit_fill_ill_insns(void *area, unsigned int size) +{ + int *p = area; + + /* Fill whole space with trap instructions */ + while (p < (int *)((char *)area + size)) + *p++ = BREAKPOINT_INSTRUCTION; +} + +static inline void bpf_flush_icache(void *start, void *end) +{ + smp_wmb(); + flush_icache_range((unsigned long)start, (unsigned long)end); +} + +static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i) +{ + return (ctx->seen & (1 << (31 - b2p[i]))); +} + +static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) +{ + ctx->seen |= (1 << (31 - b2p[i])); +} + +static inline bool bpf_has_stack_frame(struct codegen_context *ctx) +{ + /* + * We only need a stack frame if: + * - we call other functions (kernel helpers), or + * - the bpf program uses its stack area + * The latter condition is deduced from the usage of BPF_REG_FP + */ + return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP); +} + +static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx) +{ + /* + * Load skb->len and skb->data_len + * r3 points to skb + */ + PPC_LWZ(b2p[SKB_HLEN_REG], 3, offsetof(struct sk_buff, len)); + PPC_LWZ(b2p[TMP_REG_1], 3, offsetof(struct sk_buff, data_len)); + /* header_len = len - data_len */ + PPC_SUB(b2p[SKB_HLEN_REG], b2p[SKB_HLEN_REG], b2p[TMP_REG_1]); + + /* skb->data pointer */ + PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data)); +} + +static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func) +{ +#ifdef PPC64_ELF_ABI_v1 + /* func points to the function descriptor */ + PPC_LI64(b2p[TMP_REG_2], func); + /* Load actual entry point from function descriptor */ + PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); + /* ... and move it to LR */ + PPC_MTLR(b2p[TMP_REG_1]); + /* + * Load TOC from function descriptor at offset 8. + * We can clobber r2 since we get called through a + * function pointer (so caller will save/restore r2) + * and since we don't use a TOC ourself. + */ + PPC_BPF_LL(2, b2p[TMP_REG_2], 8); +#else + /* We can clobber r12 */ + PPC_FUNC_ADDR(12, func); + PPC_MTLR(12); +#endif + PPC_BLRL(); +} + +static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) +{ + int i; + bool new_stack_frame = bpf_has_stack_frame(ctx); + + if (new_stack_frame) { + /* + * We need a stack frame, but we don't necessarily need to + * save/restore LR unless we call other functions + */ + if (ctx->seen & SEEN_FUNC) { + EMIT(PPC_INST_MFLR | __PPC_RT(R0)); + PPC_BPF_STL(0, 1, PPC_LR_STKOFF); + } + + PPC_BPF_STLU(1, 1, -BPF_PPC_STACKFRAME); + } + + /* + * Back up non-volatile regs -- BPF registers 6-10 + * If we haven't created our own stack frame, we save these + * in the protected zone below the previous stack frame + */ + for (i = BPF_REG_6; i <= BPF_REG_10; i++) + if (bpf_is_seen_register(ctx, i)) + PPC_BPF_STL(b2p[i], 1, + (new_stack_frame ? BPF_PPC_STACKFRAME : 0) - + (8 * (32 - b2p[i]))); + + /* + * Save additional non-volatile regs if we cache skb + * Also, setup skb data + */ + if (ctx->seen & SEEN_SKB) { + PPC_BPF_STL(b2p[SKB_HLEN_REG], 1, + BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_HLEN_REG]))); + PPC_BPF_STL(b2p[SKB_DATA_REG], 1, + BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_DATA_REG]))); + bpf_jit_emit_skb_loads(image, ctx); + } + + /* Setup frame pointer to point to the bpf stack area */ + if (bpf_is_seen_register(ctx, BPF_REG_FP)) + PPC_ADDI(b2p[BPF_REG_FP], 1, + BPF_PPC_STACKFRAME - BPF_PPC_STACK_SAVE); +} + +static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) +{ + int i; + bool new_stack_frame = bpf_has_stack_frame(ctx); + + /* Move result to r3 */ + PPC_MR(3, b2p[BPF_REG_0]); + + /* Restore NVRs */ + for (i = BPF_REG_6; i <= BPF_REG_10; i++) + if (bpf_is_seen_register(ctx, i)) + PPC_BPF_LL(b2p[i], 1, + (new_stack_frame ? BPF_PPC_STACKFRAME : 0) - + (8 * (32 - b2p[i]))); + + /* Restore non-volatile registers used for skb cache */ + if (ctx->seen & SEEN_SKB) { + PPC_BPF_LL(b2p[SKB_HLEN_REG], 1, + BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_HLEN_REG]))); + PPC_BPF_LL(b2p[SKB_DATA_REG], 1, + BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_DATA_REG]))); + } + + /* Tear down our stack frame */ + if (new_stack_frame) { + PPC_ADDI(1, 1, BPF_PPC_STACKFRAME); + if (ctx->seen & SEEN_FUNC) { + PPC_BPF_LL(0, 1, PPC_LR_STKOFF); + PPC_MTLR(0); + } + } + + PPC_BLR(); +} + +/* Assemble the body code between the prologue & epilogue */ +static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, + struct codegen_context *ctx, + u32 *addrs) +{ + const struct bpf_insn *insn = fp->insnsi; + int flen = fp->len; + int i; + + /* Start of epilogue code - will only be valid 2nd pass onwards */ + u32 exit_addr = addrs[flen]; + + for (i = 0; i < flen; i++) { + u32 code = insn[i].code; + u32 dst_reg = b2p[insn[i].dst_reg]; + u32 src_reg = b2p[insn[i].src_reg]; + s16 off = insn[i].off; + s32 imm = insn[i].imm; + u64 imm64; + u8 *func; + u32 true_cond; + int stack_local_off; + + /* + * addrs[] maps a BPF bytecode address into a real offset from + * the start of the body code. + */ + addrs[i] = ctx->idx * 4; + + /* + * As an optimization, we note down which non-volatile registers + * are used so that we can only save/restore those in our + * prologue and epilogue. We do this here regardless of whether + * the actual BPF instruction uses src/dst registers or not + * (for instance, BPF_CALL does not use them). The expectation + * is that those instructions will have src_reg/dst_reg set to + * 0. Even otherwise, we just lose some prologue/epilogue + * optimization but everything else should work without + * any issues. + */ + if (dst_reg >= 24 && dst_reg <= 31) + bpf_set_seen_register(ctx, insn[i].dst_reg); + if (src_reg >= 24 && src_reg <= 31) + bpf_set_seen_register(ctx, insn[i].src_reg); + + switch (code) { + /* + * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG + */ + case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */ + case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */ + PPC_ADD(dst_reg, dst_reg, src_reg); + goto bpf_alu32_trunc; + case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */ + case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */ + PPC_SUB(dst_reg, dst_reg, src_reg); + goto bpf_alu32_trunc; + case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ + case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ + case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ + case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ + if (BPF_OP(code) == BPF_SUB) + imm = -imm; + if (imm) { + if (imm >= -32768 && imm < 32768) + PPC_ADDI(dst_reg, dst_reg, IMM_L(imm)); + else { + PPC_LI32(b2p[TMP_REG_1], imm); + PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]); + } + } + goto bpf_alu32_trunc; + case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ + case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ + if (BPF_CLASS(code) == BPF_ALU) + PPC_MULW(dst_reg, dst_reg, src_reg); + else + PPC_MULD(dst_reg, dst_reg, src_reg); + goto bpf_alu32_trunc; + case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */ + case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ + if (imm >= -32768 && imm < 32768) + PPC_MULI(dst_reg, dst_reg, IMM_L(imm)); + else { + PPC_LI32(b2p[TMP_REG_1], imm); + if (BPF_CLASS(code) == BPF_ALU) + PPC_MULW(dst_reg, dst_reg, + b2p[TMP_REG_1]); + else + PPC_MULD(dst_reg, dst_reg, + b2p[TMP_REG_1]); + } + goto bpf_alu32_trunc; + case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ + case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ + PPC_CMPWI(src_reg, 0); + PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12); + PPC_LI(b2p[BPF_REG_0], 0); + PPC_JMP(exit_addr); + if (BPF_OP(code) == BPF_MOD) { + PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg); + PPC_MULW(b2p[TMP_REG_1], src_reg, + b2p[TMP_REG_1]); + PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); + } else + PPC_DIVWU(dst_reg, dst_reg, src_reg); + goto bpf_alu32_trunc; + case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ + case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ + PPC_CMPDI(src_reg, 0); + PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12); + PPC_LI(b2p[BPF_REG_0], 0); + PPC_JMP(exit_addr); + if (BPF_OP(code) == BPF_MOD) { + PPC_DIVD(b2p[TMP_REG_1], dst_reg, src_reg); + PPC_MULD(b2p[TMP_REG_1], src_reg, + b2p[TMP_REG_1]); + PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); + } else + PPC_DIVD(dst_reg, dst_reg, src_reg); + break; + case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ + case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ + case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */ + case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ + if (imm == 0) + return -EINVAL; + else if (imm == 1) + goto bpf_alu32_trunc; + + PPC_LI32(b2p[TMP_REG_1], imm); + switch (BPF_CLASS(code)) { + case BPF_ALU: + if (BPF_OP(code) == BPF_MOD) { + PPC_DIVWU(b2p[TMP_REG_2], dst_reg, + b2p[TMP_REG_1]); + PPC_MULW(b2p[TMP_REG_1], + b2p[TMP_REG_1], + b2p[TMP_REG_2]); + PPC_SUB(dst_reg, dst_reg, + b2p[TMP_REG_1]); + } else + PPC_DIVWU(dst_reg, dst_reg, + b2p[TMP_REG_1]); + break; + case BPF_ALU64: + if (BPF_OP(code) == BPF_MOD) { + PPC_DIVD(b2p[TMP_REG_2], dst_reg, + b2p[TMP_REG_1]); + PPC_MULD(b2p[TMP_REG_1], + b2p[TMP_REG_1], + b2p[TMP_REG_2]); + PPC_SUB(dst_reg, dst_reg, + b2p[TMP_REG_1]); + } else + PPC_DIVD(dst_reg, dst_reg, + b2p[TMP_REG_1]); + break; + } + goto bpf_alu32_trunc; + case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */ + case BPF_ALU64 | BPF_NEG: /* dst = -dst */ + PPC_NEG(dst_reg, dst_reg); + goto bpf_alu32_trunc; + + /* + * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH + */ + case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */ + case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */ + PPC_AND(dst_reg, dst_reg, src_reg); + goto bpf_alu32_trunc; + case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */ + case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ + if (!IMM_H(imm)) + PPC_ANDI(dst_reg, dst_reg, IMM_L(imm)); + else { + /* Sign-extended */ + PPC_LI32(b2p[TMP_REG_1], imm); + PPC_AND(dst_reg, dst_reg, b2p[TMP_REG_1]); + } + goto bpf_alu32_trunc; + case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ + case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ + PPC_OR(dst_reg, dst_reg, src_reg); + goto bpf_alu32_trunc; + case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */ + case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ + if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { + /* Sign-extended */ + PPC_LI32(b2p[TMP_REG_1], imm); + PPC_OR(dst_reg, dst_reg, b2p[TMP_REG_1]); + } else { + if (IMM_L(imm)) + PPC_ORI(dst_reg, dst_reg, IMM_L(imm)); + if (IMM_H(imm)) + PPC_ORIS(dst_reg, dst_reg, IMM_H(imm)); + } + goto bpf_alu32_trunc; + case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */ + case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */ + PPC_XOR(dst_reg, dst_reg, src_reg); + goto bpf_alu32_trunc; + case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */ + case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ + if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { + /* Sign-extended */ + PPC_LI32(b2p[TMP_REG_1], imm); + PPC_XOR(dst_reg, dst_reg, b2p[TMP_REG_1]); + } else { + if (IMM_L(imm)) + PPC_XORI(dst_reg, dst_reg, IMM_L(imm)); + if (IMM_H(imm)) + PPC_XORIS(dst_reg, dst_reg, IMM_H(imm)); + } + goto bpf_alu32_trunc; + case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ + /* slw clears top 32 bits */ + PPC_SLW(dst_reg, dst_reg, src_reg); + break; + case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ + PPC_SLD(dst_reg, dst_reg, src_reg); + break; + case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */ + /* with imm 0, we still need to clear top 32 bits */ + PPC_SLWI(dst_reg, dst_reg, imm); + break; + case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */ + if (imm != 0) + PPC_SLDI(dst_reg, dst_reg, imm); + break; + case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ + PPC_SRW(dst_reg, dst_reg, src_reg); + break; + case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ + PPC_SRD(dst_reg, dst_reg, src_reg); + break; + case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ + PPC_SRWI(dst_reg, dst_reg, imm); + break; + case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ + if (imm != 0) + PPC_SRDI(dst_reg, dst_reg, imm); + break; + case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ + PPC_SRAD(dst_reg, dst_reg, src_reg); + break; + case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */ + if (imm != 0) + PPC_SRADI(dst_reg, dst_reg, imm); + break; + + /* + * MOV + */ + case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */ + case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ + PPC_MR(dst_reg, src_reg); + goto bpf_alu32_trunc; + case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ + case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */ + PPC_LI32(dst_reg, imm); + if (imm < 0) + goto bpf_alu32_trunc; + break; + +bpf_alu32_trunc: + /* Truncate to 32-bits */ + if (BPF_CLASS(code) == BPF_ALU) + PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31); + break; + + /* + * BPF_FROM_BE/LE + */ + case BPF_ALU | BPF_END | BPF_FROM_LE: + case BPF_ALU | BPF_END | BPF_FROM_BE: +#ifdef __BIG_ENDIAN__ + if (BPF_SRC(code) == BPF_FROM_BE) + goto emit_clear; +#else /* !__BIG_ENDIAN__ */ + if (BPF_SRC(code) == BPF_FROM_LE) + goto emit_clear; +#endif + switch (imm) { + case 16: + /* Rotate 8 bits left & mask with 0x0000ff00 */ + PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23); + /* Rotate 8 bits right & insert LSB to reg */ + PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31); + /* Move result back to dst_reg */ + PPC_MR(dst_reg, b2p[TMP_REG_1]); + break; + case 32: + /* + * Rotate word left by 8 bits: + * 2 bytes are already in their final position + * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) + */ + PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31); + /* Rotate 24 bits and insert byte 1 */ + PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7); + /* Rotate 24 bits and insert byte 3 */ + PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23); + PPC_MR(dst_reg, b2p[TMP_REG_1]); + break; + case 64: + /* + * Way easier and faster(?) to store the value + * into stack and then use ldbrx + * + * First, determine where in stack we can store + * this: + * - if we have allotted a stack frame, then we + * will utilize the area set aside by + * BPF_PPC_STACK_LOCALS + * - else, we use the area beneath the NV GPR + * save area + * + * ctx->seen will be reliable in pass2, but + * the instructions generated will remain the + * same across all passes + */ + if (bpf_has_stack_frame(ctx)) + stack_local_off = STACK_FRAME_MIN_SIZE; + else + stack_local_off = -(BPF_PPC_STACK_SAVE + 8); + + PPC_STD(dst_reg, 1, stack_local_off); + PPC_ADDI(b2p[TMP_REG_1], 1, stack_local_off); + PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]); + break; + } + break; + +emit_clear: + switch (imm) { + case 16: + /* zero-extend 16 bits into 64 bits */ + PPC_RLDICL(dst_reg, dst_reg, 0, 48); + break; + case 32: + /* zero-extend 32 bits into 64 bits */ + PPC_RLDICL(dst_reg, dst_reg, 0, 32); + break; + case 64: + /* nop */ + break; + } + break; + + /* + * BPF_ST(X) + */ + case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ + case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ + if (BPF_CLASS(code) == BPF_ST) { + PPC_LI(b2p[TMP_REG_1], imm); + src_reg = b2p[TMP_REG_1]; + } + PPC_STB(src_reg, dst_reg, off); + break; + case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ + case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ + if (BPF_CLASS(code) == BPF_ST) { + PPC_LI(b2p[TMP_REG_1], imm); + src_reg = b2p[TMP_REG_1]; + } + PPC_STH(src_reg, dst_reg, off); + break; + case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ + case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ + if (BPF_CLASS(code) == BPF_ST) { + PPC_LI32(b2p[TMP_REG_1], imm); + src_reg = b2p[TMP_REG_1]; + } + PPC_STW(src_reg, dst_reg, off); + break; + case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ + case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ + if (BPF_CLASS(code) == BPF_ST) { + PPC_LI32(b2p[TMP_REG_1], imm); + src_reg = b2p[TMP_REG_1]; + } + PPC_STD(src_reg, dst_reg, off); + break; + + /* + * BPF_STX XADD (atomic_add) + */ + /* *(u32 *)(dst + off) += src */ + case BPF_STX | BPF_XADD | BPF_W: + /* Get EA into TMP_REG_1 */ + PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); + /* error if EA is not word-aligned */ + PPC_ANDI(b2p[TMP_REG_2], b2p[TMP_REG_1], 0x03); + PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + 12); + PPC_LI(b2p[BPF_REG_0], 0); + PPC_JMP(exit_addr); + /* load value from memory into TMP_REG_2 */ + PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); + /* add value from src_reg into this */ + PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); + /* store result back */ + PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); + /* we're done if this succeeded */ + PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (7*4)); + /* otherwise, let's try once more */ + PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); + PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); + PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); + /* exit if the store was not successful */ + PPC_LI(b2p[BPF_REG_0], 0); + PPC_BCC(COND_NE, exit_addr); + break; + /* *(u64 *)(dst + off) += src */ + case BPF_STX | BPF_XADD | BPF_DW: + PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); + /* error if EA is not doubleword-aligned */ + PPC_ANDI(b2p[TMP_REG_2], b2p[TMP_REG_1], 0x07); + PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (3*4)); + PPC_LI(b2p[BPF_REG_0], 0); + PPC_JMP(exit_addr); + PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); + PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); + PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); + PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (7*4)); + PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); + PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); + PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); + PPC_LI(b2p[BPF_REG_0], 0); + PPC_BCC(COND_NE, exit_addr); + break; + + /* + * BPF_LDX + */ + /* dst = *(u8 *)(ul) (src + off) */ + case BPF_LDX | BPF_MEM | BPF_B: + PPC_LBZ(dst_reg, src_reg, off); + break; + /* dst = *(u16 *)(ul) (src + off) */ + case BPF_LDX | BPF_MEM | BPF_H: + PPC_LHZ(dst_reg, src_reg, off); + break; + /* dst = *(u32 *)(ul) (src + off) */ + case BPF_LDX | BPF_MEM | BPF_W: + PPC_LWZ(dst_reg, src_reg, off); + break; + /* dst = *(u64 *)(ul) (src + off) */ + case BPF_LDX | BPF_MEM | BPF_DW: + PPC_LD(dst_reg, src_reg, off); + break; + + /* + * Doubleword load + * 16 byte instruction that uses two 'struct bpf_insn' + */ + case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ + imm64 = ((u64)(u32) insn[i].imm) | + (((u64)(u32) insn[i+1].imm) << 32); + /* Adjust for two bpf instructions */ + addrs[++i] = ctx->idx * 4; + PPC_LI64(dst_reg, imm64); + break; + + /* + * Return/Exit + */ + case BPF_JMP | BPF_EXIT: + /* + * If this isn't the very last instruction, branch to + * the epilogue. If we _are_ the last instruction, + * we'll just fall through to the epilogue. + */ + if (i != flen - 1) + PPC_JMP(exit_addr); + /* else fall through to the epilogue */ + break; + + /* + * Call kernel helper + */ + case BPF_JMP | BPF_CALL: + ctx->seen |= SEEN_FUNC; + func = (u8 *) __bpf_call_base + imm; + + /* Save skb pointer if we need to re-cache skb data */ + if (bpf_helper_changes_skb_data(func)) + PPC_BPF_STL(3, 1, STACK_FRAME_MIN_SIZE); + + bpf_jit_emit_func_call(image, ctx, (u64)func); + + /* move return value from r3 to BPF_REG_0 */ + PPC_MR(b2p[BPF_REG_0], 3); + + /* refresh skb cache */ + if (bpf_helper_changes_skb_data(func)) { + /* reload skb pointer to r3 */ + PPC_BPF_LL(3, 1, STACK_FRAME_MIN_SIZE); + bpf_jit_emit_skb_loads(image, ctx); + } + break; + + /* + * Jumps and branches + */ + case BPF_JMP | BPF_JA: + PPC_JMP(addrs[i + 1 + off]); + break; + + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_X: + true_cond = COND_GT; + goto cond_branch; + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_X: + true_cond = COND_GE; + goto cond_branch; + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_X: + true_cond = COND_EQ; + goto cond_branch; + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JNE | BPF_X: + true_cond = COND_NE; + goto cond_branch; + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP | BPF_JSET | BPF_X: + true_cond = COND_NE; + /* Fall through */ + +cond_branch: + switch (code) { + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + /* unsigned comparison */ + PPC_CMPLD(dst_reg, src_reg); + break; + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + /* signed comparison */ + PPC_CMPD(dst_reg, src_reg); + break; + case BPF_JMP | BPF_JSET | BPF_X: + PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, src_reg); + break; + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + /* + * Need sign-extended load, so only positive + * values can be used as imm in cmpldi + */ + if (imm >= 0 && imm < 32768) + PPC_CMPLDI(dst_reg, imm); + else { + /* sign-extending load */ + PPC_LI32(b2p[TMP_REG_1], imm); + /* ... but unsigned comparison */ + PPC_CMPLD(dst_reg, b2p[TMP_REG_1]); + } + break; + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + /* + * signed comparison, so any 16-bit value + * can be used in cmpdi + */ + if (imm >= -32768 && imm < 32768) + PPC_CMPDI(dst_reg, imm); + else { + PPC_LI32(b2p[TMP_REG_1], imm); + PPC_CMPD(dst_reg, b2p[TMP_REG_1]); + } + break; + case BPF_JMP | BPF_JSET | BPF_K: + /* andi does not sign-extend the immediate */ + if (imm >= 0 && imm < 32768) + /* PPC_ANDI is _only/always_ dot-form */ + PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm); + else { + PPC_LI32(b2p[TMP_REG_1], imm); + PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, + b2p[TMP_REG_1]); + } + break; + } + PPC_BCC(true_cond, addrs[i + 1 + off]); + break; + + /* + * Loads from packet header/data + * Assume 32-bit input value in imm and X (src_reg) + */ + + /* Absolute loads */ + case BPF_LD | BPF_W | BPF_ABS: + func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_word); + goto common_load_abs; + case BPF_LD | BPF_H | BPF_ABS: + func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_half); + goto common_load_abs; + case BPF_LD | BPF_B | BPF_ABS: + func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_byte); +common_load_abs: + /* + * Load from [imm] + * Load into r4, which can just be passed onto + * skb load helpers as the second parameter + */ + PPC_LI32(4, imm); + goto common_load; + + /* Indirect loads */ + case BPF_LD | BPF_W | BPF_IND: + func = (u8 *)sk_load_word; + goto common_load_ind; + case BPF_LD | BPF_H | BPF_IND: + func = (u8 *)sk_load_half; + goto common_load_ind; + case BPF_LD | BPF_B | BPF_IND: + func = (u8 *)sk_load_byte; +common_load_ind: + /* + * Load from [src_reg + imm] + * Treat src_reg as a 32-bit value + */ + PPC_EXTSW(4, src_reg); + if (imm) { + if (imm >= -32768 && imm < 32768) + PPC_ADDI(4, 4, IMM_L(imm)); + else { + PPC_LI32(b2p[TMP_REG_1], imm); + PPC_ADD(4, 4, b2p[TMP_REG_1]); + } + } + +common_load: + ctx->seen |= SEEN_SKB; + ctx->seen |= SEEN_FUNC; + bpf_jit_emit_func_call(image, ctx, (u64)func); + + /* + * Helper returns 'lt' condition on error, and an + * appropriate return value in BPF_REG_0 + */ + PPC_BCC(COND_LT, exit_addr); + break; + + /* + * TODO: Tail call + */ + case BPF_JMP | BPF_CALL | BPF_X: + + default: + /* + * The filter contains something cruel & unusual. + * We don't handle it, but also there shouldn't be + * anything missing from our list. + */ + pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", + code, i); + return -ENOTSUPP; + } + } + + /* Set end-of-body-code address for exit. */ + addrs[i] = ctx->idx * 4; + + return 0; +} + +void bpf_jit_compile(struct bpf_prog *fp) { } + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) +{ + u32 proglen; + u32 alloclen; + u8 *image = NULL; + u32 *code_base; + u32 *addrs; + struct codegen_context cgctx; + int pass; + int flen; + struct bpf_binary_header *bpf_hdr; + + if (!bpf_jit_enable) + return fp; + + flen = fp->len; + addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL); + if (addrs == NULL) + return fp; + + cgctx.idx = 0; + cgctx.seen = 0; + /* Scouting faux-generate pass 0 */ + if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) + /* We hit something illegal or unsupported. */ + goto out; + + /* + * Pretend to build prologue, given the features we've seen. This will + * update ctgtx.idx as it pretends to output instructions, then we can + * calculate total size from idx. + */ + bpf_jit_build_prologue(0, &cgctx); + bpf_jit_build_epilogue(0, &cgctx); + + proglen = cgctx.idx * 4; + alloclen = proglen + FUNCTION_DESCR_SIZE; + + bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, + bpf_jit_fill_ill_insns); + if (!bpf_hdr) + goto out; + + code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); + + /* Code generation passes 1-2 */ + for (pass = 1; pass < 3; pass++) { + /* Now build the prologue, body code & epilogue for real. */ + cgctx.idx = 0; + bpf_jit_build_prologue(code_base, &cgctx); + bpf_jit_build_body(fp, code_base, &cgctx, addrs); + bpf_jit_build_epilogue(code_base, &cgctx); + + if (bpf_jit_enable > 1) + pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, + proglen - (cgctx.idx * 4), cgctx.seen); + } + + if (bpf_jit_enable > 1) + /* + * Note that we output the base address of the code_base + * rather than image, since opcodes are in code_base. + */ + bpf_jit_dump(flen, proglen, pass, code_base); + + if (image) { + bpf_flush_icache(bpf_hdr, image + alloclen); +#ifdef PPC64_ELF_ABI_v1 + /* Function descriptor nastiness: Address + TOC */ + ((u64 *)image)[0] = (u64)code_base; + ((u64 *)image)[1] = local_paca->kernel_toc; +#endif + fp->bpf_func = (void *)image; + fp->jited = 1; + } + +out: + kfree(addrs); + return fp; +} + +void bpf_jit_free(struct bpf_prog *fp) +{ + unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; + struct bpf_binary_header *bpf_hdr = (void *)addr; + + if (fp->jited) + bpf_jit_binary_free(bpf_hdr); + + bpf_prog_unlock_free(fp); +} -- cgit v0.10.2 From cdb1b3424dba7d38a2835f6f5f5aaeae74885410 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 22 Jun 2016 17:23:07 +1000 Subject: powerpc/pci: Reduce log level of PCI I/O space warning If a PHB has no I/O space, there's no need to make it look like something bad happened, a pr_debug() is plenty enough since this is the case of all our modern POWER chips. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index be9e515..d1f91e1 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1497,9 +1497,9 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose, res = &hose->io_resource; if (!res->flags) { - pr_info("PCI: I/O resource not set for host" - " bridge %s (domain %d)\n", - hose->dn->full_name, hose->global_number); + pr_debug("PCI: I/O resource not set for host" + " bridge %s (domain %d)\n", + hose->dn->full_name, hose->global_number); } else { offset = pcibios_io_space_offset(hose); -- cgit v0.10.2 From 4a03749f140cbee6fee66b674ba763942d1446f2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 27 Jun 2016 12:07:41 +0100 Subject: powerpc/fadump: Trivial fix of spelling mistake, clean up message Fix trivial spelling mistake "rgistration". Also use pr_err() instead of printk() and unsplit the string to keep it all on one line. Signed-off-by: Colin Ian King [mpe: Keep rc on the same line, splitting it doesn't help] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 3cb3b02a..f066486 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1009,9 +1009,8 @@ static int fadump_invalidate_dump(struct fadump_mem_struct *fdm) } while (wait_time); if (rc) { - printk(KERN_ERR "Failed to invalidate firmware-assisted dump " - "rgistration. unexpected error(%d).\n", rc); - return rc; + pr_err("Failed to invalidate firmware-assisted dump registration. Unexpected error (%d).\n", rc); + return rc } fw_dump.dump_active = 0; fdm_active = NULL; -- cgit v0.10.2 From 6e8a9279a85abd07d05e9322844b0f254e8437ac Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 23 Jun 2016 18:05:56 +0100 Subject: powerpc/powernv: Fix spelling mistake "Retrived" -> "Retrieved" Trivial fix to spelling mistake in pr_debug() message. Signed-off-by: Colin Ian King Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c index 00a2943..4495f42 100644 --- a/arch/powerpc/platforms/powernv/opal-memory-errors.c +++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c @@ -44,7 +44,7 @@ static void handle_memory_error_event(struct OpalMemoryErrorData *merr_evt) { uint64_t paddr_start, paddr_end; - pr_debug("%s: Retrived memory error event, type: 0x%x\n", + pr_debug("%s: Retrieved memory error event, type: 0x%x\n", __func__, merr_evt->type); switch (merr_evt->type) { case OPAL_MEM_ERR_TYPE_RESILIENCE: -- cgit v0.10.2 From b810253bd9342f863a86ec7dfff4a5a7a0394d2f Mon Sep 17 00:00:00 2001 From: Philippe Bergheaud Date: Thu, 23 Jun 2016 15:03:53 +0200 Subject: cxl: Add mechanism for delivering AFU driver specific events This adds an afu_driver_ops structure with fetch_event() and event_delivered() callbacks. An AFU driver such as cxlflash can fill this out and associate it with a context to enable passing custom AFU specific events to userspace. This also adds a new kernel API function cxl_context_pending_events(), that the AFU driver can use to notify the cxl driver that new specific events are ready to be delivered, and wake up anyone waiting on the context wait queue. The current count of AFU driver specific events is stored in the field afu_driver_events of the context structure. The cxl driver checks the afu_driver_events count during poll, select, read, etc. calls to check if an AFU driver specific event is pending, and calls fetch_event() to obtain and deliver that event. This way, the cxl driver takes care of all the usual locking semantics around these calls and handles all the generic cxl events, so that the AFU driver only needs to worry about it's own events. fetch_event() return a struct cxl_event_afu_driver_reserved, allocated by the AFU driver, and filled in with the specific event information and size. Total event size (header + data) should not be greater than CXL_READ_MIN_SIZE (4K). Th cxl driver prepends an appropriate cxl event header, copies the event to userspace, and finally calls event_delivered() to return the status of the operation to the AFU driver. The event is identified by the context and cxl_event_afu_driver_reserved pointers. Since AFU drivers provide their own means for userspace to obtain the AFU file descriptor (i.e. cxlflash uses an ioctl on their scsi file descriptor to obtain the AFU file descriptor) and the generic cxl driver will never use this event, the ABI of the event is up to each individual AFU driver. Signed-off-by: Philippe Bergheaud Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig index 8756d06..560412c 100644 --- a/drivers/misc/cxl/Kconfig +++ b/drivers/misc/cxl/Kconfig @@ -15,12 +15,17 @@ config CXL_EEH bool default n +config CXL_AFU_DRIVER_OPS + bool + default n + config CXL tristate "Support for IBM Coherent Accelerators (CXL)" depends on PPC_POWERNV && PCI_MSI && EEH select CXL_BASE select CXL_KERNEL_API select CXL_EEH + select CXL_AFU_DRIVER_OPS default m help Select this option to enable driver support for IBM Coherent diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 99081b8..f11dc0e 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -333,6 +333,23 @@ struct cxl_context *cxl_fops_get_context(struct file *file) } EXPORT_SYMBOL_GPL(cxl_fops_get_context); +void cxl_set_driver_ops(struct cxl_context *ctx, + struct cxl_afu_driver_ops *ops) +{ + WARN_ON(!ops->fetch_event || !ops->event_delivered); + atomic_set(&ctx->afu_driver_events, 0); + ctx->afu_driver_ops = ops; +} +EXPORT_SYMBOL_GPL(cxl_set_driver_ops); + +void cxl_context_events_pending(struct cxl_context *ctx, + unsigned int new_events) +{ + atomic_add(new_events, &ctx->afu_driver_events); + wake_up_all(&ctx->wq); +} +EXPORT_SYMBOL_GPL(cxl_context_events_pending); + int cxl_start_work(struct cxl_context *ctx, struct cxl_ioctl_start_work *work) { diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index ce2b9d5..422ee53 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -24,6 +24,7 @@ #include #include +#include #include extern uint cxl_verbose; @@ -34,7 +35,7 @@ extern uint cxl_verbose; * Bump version each time a user API change is made, whether it is * backwards compatible ot not. */ -#define CXL_API_VERSION 2 +#define CXL_API_VERSION 3 #define CXL_API_VERSION_COMPATIBLE 1 /* @@ -528,6 +529,10 @@ struct cxl_context { bool pending_fault; bool pending_afu_err; + /* Used by AFU drivers for driver specific event delivery */ + struct cxl_afu_driver_ops *afu_driver_ops; + atomic_t afu_driver_events; + struct rcu_head rcu; }; diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index eec468f..5fb9894 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -293,6 +293,17 @@ int afu_mmap(struct file *file, struct vm_area_struct *vm) return cxl_context_iomap(ctx, vm); } +static inline bool ctx_event_pending(struct cxl_context *ctx) +{ + if (ctx->pending_irq || ctx->pending_fault || ctx->pending_afu_err) + return true; + + if (ctx->afu_driver_ops && atomic_read(&ctx->afu_driver_events)) + return true; + + return false; +} + unsigned int afu_poll(struct file *file, struct poll_table_struct *poll) { struct cxl_context *ctx = file->private_data; @@ -305,8 +316,7 @@ unsigned int afu_poll(struct file *file, struct poll_table_struct *poll) pr_devel("afu_poll wait done pe: %i\n", ctx->pe); spin_lock_irqsave(&ctx->lock, flags); - if (ctx->pending_irq || ctx->pending_fault || - ctx->pending_afu_err) + if (ctx_event_pending(ctx)) mask |= POLLIN | POLLRDNORM; else if (ctx->status == CLOSED) /* Only error on closed when there are no futher events pending @@ -319,16 +329,46 @@ unsigned int afu_poll(struct file *file, struct poll_table_struct *poll) return mask; } -static inline int ctx_event_pending(struct cxl_context *ctx) +static ssize_t afu_driver_event_copy(struct cxl_context *ctx, + char __user *buf, + struct cxl_event *event, + struct cxl_event_afu_driver_reserved *pl) { - return (ctx->pending_irq || ctx->pending_fault || - ctx->pending_afu_err || (ctx->status == CLOSED)); + /* Check event */ + if (!pl) { + ctx->afu_driver_ops->event_delivered(ctx, pl, -EINVAL); + return -EFAULT; + } + + /* Check event size */ + event->header.size += pl->data_size; + if (event->header.size > CXL_READ_MIN_SIZE) { + ctx->afu_driver_ops->event_delivered(ctx, pl, -EINVAL); + return -EFAULT; + } + + /* Copy event header */ + if (copy_to_user(buf, event, sizeof(struct cxl_event_header))) { + ctx->afu_driver_ops->event_delivered(ctx, pl, -EFAULT); + return -EFAULT; + } + + /* Copy event data */ + buf += sizeof(struct cxl_event_header); + if (copy_to_user(buf, &pl->data, pl->data_size)) { + ctx->afu_driver_ops->event_delivered(ctx, pl, -EFAULT); + return -EFAULT; + } + + ctx->afu_driver_ops->event_delivered(ctx, pl, 0); /* Success */ + return event->header.size; } ssize_t afu_read(struct file *file, char __user *buf, size_t count, loff_t *off) { struct cxl_context *ctx = file->private_data; + struct cxl_event_afu_driver_reserved *pl = NULL; struct cxl_event event; unsigned long flags; int rc; @@ -344,7 +384,7 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count, for (;;) { prepare_to_wait(&ctx->wq, &wait, TASK_INTERRUPTIBLE); - if (ctx_event_pending(ctx)) + if (ctx_event_pending(ctx) || (ctx->status == CLOSED)) break; if (!cxl_ops->link_ok(ctx->afu->adapter, ctx->afu)) { @@ -374,7 +414,12 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count, memset(&event, 0, sizeof(event)); event.header.process_element = ctx->pe; event.header.size = sizeof(struct cxl_event_header); - if (ctx->pending_irq) { + if (ctx->afu_driver_ops && atomic_read(&ctx->afu_driver_events)) { + pr_devel("afu_read delivering AFU driver specific event\n"); + pl = ctx->afu_driver_ops->fetch_event(ctx); + atomic_dec(&ctx->afu_driver_events); + event.header.type = CXL_EVENT_AFU_DRIVER; + } else if (ctx->pending_irq) { pr_devel("afu_read delivering AFU interrupt\n"); event.header.size += sizeof(struct cxl_event_afu_interrupt); event.header.type = CXL_EVENT_AFU_INTERRUPT; @@ -404,6 +449,9 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count, spin_unlock_irqrestore(&ctx->lock, flags); + if (event.header.type == CXL_EVENT_AFU_DRIVER) + return afu_driver_event_copy(ctx, buf, &event, pl); + if (copy_to_user(buf, &event, event.header.size)) return -EFAULT; return event.header.size; @@ -558,7 +606,7 @@ int __init cxl_file_init(void) * If these change we really need to update API. Either change some * flags or update API version number CXL_API_VERSION. */ - BUILD_BUG_ON(CXL_API_VERSION != 2); + BUILD_BUG_ON(CXL_API_VERSION != 3); BUILD_BUG_ON(sizeof(struct cxl_ioctl_start_work) != 64); BUILD_BUG_ON(sizeof(struct cxl_event_header) != 8); BUILD_BUG_ON(sizeof(struct cxl_event_afu_interrupt) != 8); diff --git a/include/misc/cxl.h b/include/misc/cxl.h index 56560c5..17419f6 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -220,4 +220,52 @@ void cxl_perst_reloads_same_image(struct cxl_afu *afu, */ ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count); +/* + * AFU driver ops allow an AFU driver to create their own events to pass to + * userspace through the file descriptor as a simpler alternative to overriding + * the read() and poll() calls that works with the generic cxl events. These + * events are given priority over the generic cxl events, so they will be + * delivered first if multiple types of events are pending. + * + * The AFU driver must call cxl_context_events_pending() to notify the cxl + * driver that new events are ready to be delivered for a specific context. + * cxl_context_events_pending() will adjust the current count of AFU driver + * events for this context, and wake up anyone waiting on the context wait + * queue. + * + * The cxl driver will then call fetch_event() to get a structure defining + * the size and address of the driver specific event data. The cxl driver + * will build a cxl header with type and process_element fields filled in, + * and header.size set to sizeof(struct cxl_event_header) + data_size. + * The total size of the event is limited to CXL_READ_MIN_SIZE (4K). + * + * fetch_event() is called with a spin lock held, so it must not sleep. + * + * The cxl driver will then deliver the event to userspace, and finally + * call event_delivered() to return the status of the operation, identified + * by cxl context and AFU driver event data pointers. + * 0 Success + * -EFAULT copy_to_user() has failed + * -EINVAL Event data pointer is NULL, or event size is greater than + * CXL_READ_MIN_SIZE. + */ +struct cxl_afu_driver_ops { + struct cxl_event_afu_driver_reserved *(*fetch_event) ( + struct cxl_context *ctx); + void (*event_delivered) (struct cxl_context *ctx, + struct cxl_event_afu_driver_reserved *event, + int rc); +}; + +/* + * Associate the above driver ops with a specific context. + * Reset the current count of AFU driver events. + */ +void cxl_set_driver_ops(struct cxl_context *ctx, + struct cxl_afu_driver_ops *ops); + +/* Notify cxl driver that new events are ready to be delivered for context */ +void cxl_context_events_pending(struct cxl_context *ctx, + unsigned int new_events); + #endif /* _MISC_CXL_H */ diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h index 8cd334f..cbae529 100644 --- a/include/uapi/misc/cxl.h +++ b/include/uapi/misc/cxl.h @@ -93,6 +93,7 @@ enum cxl_event_type { CXL_EVENT_AFU_INTERRUPT = 1, CXL_EVENT_DATA_STORAGE = 2, CXL_EVENT_AFU_ERROR = 3, + CXL_EVENT_AFU_DRIVER = 4, }; struct cxl_event_header { @@ -124,12 +125,28 @@ struct cxl_event_afu_error { __u64 error; }; +struct cxl_event_afu_driver_reserved { + /* + * Defines the buffer passed to the cxl driver by the AFU driver. + * + * This is not ABI since the event header.size passed to the user for + * existing events is set in the read call to sizeof(cxl_event_header) + * + sizeof(whatever event is being dispatched) and the user is already + * required to use a 4K buffer on the read call. + * + * Of course the contents will be ABI, but that's up the AFU driver. + */ + size_t data_size; + u8 data[]; +}; + struct cxl_event { struct cxl_event_header header; union { struct cxl_event_afu_interrupt irq; struct cxl_event_data_storage fault; struct cxl_event_afu_error afu_error; + struct cxl_event_afu_driver_reserved afu_driver_event; }; }; -- cgit v0.10.2 From ad42de859ff14c079e966e61cbcba85265b982e1 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 24 Jun 2016 08:47:07 +0200 Subject: cxl: Add set and get private data to context struct This provides AFU drivers a means to associate private data with a cxl context. This is particularly intended for make the new callbacks for driver specific events easier for AFU drivers to use, as they can easily get back to any private data structures they may use. Signed-off-by: Michael Neuling Signed-off-by: Ian Munsie Signed-off-by: Philippe Bergheaud Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index f11dc0e..7707055 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -94,6 +94,27 @@ static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num) return 0; } + +int cxl_set_priv(struct cxl_context *ctx, void *priv) +{ + if (!ctx) + return -EINVAL; + + ctx->priv = priv; + + return 0; +} +EXPORT_SYMBOL_GPL(cxl_set_priv); + +void *cxl_get_priv(struct cxl_context *ctx) +{ + if (!ctx) + return ERR_PTR(-EINVAL); + + return ctx->priv; +} +EXPORT_SYMBOL_GPL(cxl_get_priv); + int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num) { int res; diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 422ee53..27578fc 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -484,6 +484,9 @@ struct cxl_context { /* Only used in PR mode */ u64 process_token; + /* driver private data */ + void *priv; + unsigned long *irq_bitmap; /* Accessed from IRQ context */ struct cxl_irq_ranges irqs; struct list_head irq_names; diff --git a/include/misc/cxl.h b/include/misc/cxl.h index 17419f6..b6d040f 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -86,6 +86,13 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev); int cxl_release_context(struct cxl_context *ctx); /* + * Set and get private data associated with a context. Allows drivers to have a + * back pointer to some useful structure. + */ +int cxl_set_priv(struct cxl_context *ctx, void *priv); +void *cxl_get_priv(struct cxl_context *ctx); + +/* * Allocate AFU interrupts for this context. num=0 will allocate the default * for this AFU as given in the AFU descriptor. This number doesn't include the * interrupt 0 (CAIA defines AFU IRQ 0 for page faults). Each interrupt to be -- cgit v0.10.2 From 1ae88fd54c3ac31f68f91e37f719be7e2dbcc810 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Wed, 29 Jun 2016 13:38:37 +1000 Subject: devicetree/bindings: Add binding for operator panel on FSP machines Add a binding to Documentation/devicetree/bindings/powerpc/opal (oppanel-opal.txt) for the operator panel which is present on IBM Power Systems machines with FSPs. Signed-off-by: Suraj Jitindar Singh Acked-by: Rob Herring Acked-by: Stewart Smith Signed-off-by: Michael Ellerman diff --git a/Documentation/devicetree/bindings/powerpc/opal/oppanel-opal.txt b/Documentation/devicetree/bindings/powerpc/opal/oppanel-opal.txt new file mode 100644 index 0000000..dffb791 --- /dev/null +++ b/Documentation/devicetree/bindings/powerpc/opal/oppanel-opal.txt @@ -0,0 +1,14 @@ +IBM OPAL Operator Panel Binding +------------------------------- + +Required properties: +- compatible : Should be "ibm,opal-oppanel". +- #lines : Number of lines on the operator panel e.g. <0x2>. +- #length : Number of characters per line of the operator panel e.g. <0x10>. + +Example: + oppanel { + compatible = "ibm,opal-oppanel"; + #lines = <0x2>; + #length = <0x10>; + }; -- cgit v0.10.2 From d0226d315dba5e401a124b394a1af5e35e082b08 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Wed, 29 Jun 2016 13:38:38 +1000 Subject: powerpc/opal: Add inline function to get rc from an ASYNC_COMP opal_msg An opal_msg of type OPAL_MSG_ASYNC_COMP contains the return code in the params[1] struct member. However this isn't intuitive or obvious when reading the code and requires that a user look at the skiboot documentation or opal-api.h to verify this. Add an inline function to get the return code from an opal_msg and update call sites accordingly. Signed-off-by: Suraj Jitindar Singh Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index fa71fea..9ab52e2 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -282,6 +282,14 @@ extern int opal_error_code(int rc); ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count); +static inline int opal_get_async_rc(struct opal_msg msg) +{ + if (msg.msg_type != OPAL_MSG_ASYNC_COMP) + return OPAL_PARAMETER; + else + return be64_to_cpu(msg.params[1]); +} + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_OPAL_H */ diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c index a06059d..308efd1 100644 --- a/arch/powerpc/platforms/powernv/opal-sensor.c +++ b/arch/powerpc/platforms/powernv/opal-sensor.c @@ -55,7 +55,7 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data) goto out_token; } - ret = opal_error_code(be64_to_cpu(msg.params[1])); + ret = opal_error_code(opal_get_async_rc(msg)); *sensor_data = be32_to_cpu(data); break; diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c index afe66c5..23fb664 100644 --- a/arch/powerpc/platforms/powernv/opal-sysparam.c +++ b/arch/powerpc/platforms/powernv/opal-sysparam.c @@ -67,7 +67,7 @@ static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer) goto out_token; } - ret = opal_error_code(be64_to_cpu(msg.params[1])); + ret = opal_error_code(opal_get_async_rc(msg)); out_token: opal_async_release_token(token); @@ -103,7 +103,7 @@ static int opal_set_sys_param(u32 param_id, u32 length, void *buffer) goto out_token; } - ret = opal_error_code(be64_to_cpu(msg.params[1])); + ret = opal_error_code(opal_get_async_rc(msg)); out_token: opal_async_release_token(token); diff --git a/drivers/i2c/busses/i2c-opal.c b/drivers/i2c/busses/i2c-opal.c index 75dd6d0..11e2a1f 100644 --- a/drivers/i2c/busses/i2c-opal.c +++ b/drivers/i2c/busses/i2c-opal.c @@ -71,7 +71,7 @@ static int i2c_opal_send_request(u32 bus_id, struct opal_i2c_request *req) if (rc) goto exit; - rc = be64_to_cpu(msg.params[1]); + rc = opal_get_async_rc(msg); if (rc != OPAL_SUCCESS) { rc = i2c_opal_translate_error(rc); goto exit; diff --git a/drivers/leds/leds-powernv.c b/drivers/leds/leds-powernv.c index dfb8bd3..b2a98c7 100644 --- a/drivers/leds/leds-powernv.c +++ b/drivers/leds/leds-powernv.c @@ -118,7 +118,7 @@ static int powernv_led_set(struct powernv_led_data *powernv_led, goto out_token; } - rc = be64_to_cpu(msg.params[1]); + rc = opal_get_async_rc(msg); if (rc != OPAL_SUCCESS) dev_err(dev, "%s : OAPL async call returned failed [rc=%d]\n", __func__, rc); diff --git a/drivers/mtd/devices/powernv_flash.c b/drivers/mtd/devices/powernv_flash.c index d5b870b..f5396f2 100644 --- a/drivers/mtd/devices/powernv_flash.c +++ b/drivers/mtd/devices/powernv_flash.c @@ -95,7 +95,7 @@ static int powernv_flash_async_op(struct mtd_info *mtd, enum flash_op op, return -EIO; } - rc = be64_to_cpu(msg.params[1]); + rc = opal_get_async_rc(msg); if (rc == OPAL_SUCCESS) { rc = 0; if (retlen) diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c index 9c18d6f..ea20f62 100644 --- a/drivers/rtc/rtc-opal.c +++ b/drivers/rtc/rtc-opal.c @@ -134,7 +134,7 @@ static int opal_get_tpo_time(struct device *dev, struct rtc_wkalrm *alarm) goto exit; } - rc = be64_to_cpu(msg.params[1]); + rc = opal_get_async_rc(msg); if (rc != OPAL_SUCCESS) { rc = -EIO; goto exit; @@ -181,7 +181,7 @@ static int opal_set_tpo_time(struct device *dev, struct rtc_wkalrm *alarm) goto exit; } - rc = be64_to_cpu(msg.params[1]); + rc = opal_get_async_rc(msg); if (rc != OPAL_SUCCESS) rc = -EIO; -- cgit v0.10.2 From 43a1dd9b5fc64184e578ac1570d016d2862e00b2 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Wed, 29 Jun 2016 13:38:39 +1000 Subject: powerpc/powernv: Add driver for operator panel on FSP machines Implement new character device driver to allow access from user space to the operator panel display present on IBM Power Systems machines with FSPs. This will allow status information to be presented on the display which is visible to a user. The driver implements a character buffer which a user can read/write by accessing the device (/dev/op_panel). This buffer is then displayed on the operator panel display. Any attempt to write past the last character position will have no effect and attempts to write more characters than the size of the display will be truncated. The device may only be accessed by a single process at a time. Signed-off-by: Suraj Jitindar Singh Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/MAINTAINERS b/MAINTAINERS index ea80d71..3318633 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9081,6 +9081,12 @@ F: drivers/firmware/psci.c F: include/linux/psci.h F: include/uapi/linux/psci.h +POWERNV OPERATOR PANEL LCD DISPLAY DRIVER +M: Suraj Jitindar Singh +L: linuxppc-dev@lists.ozlabs.org +S: Maintained +F: drivers/char/powernv-op-panel.c + PNP SUPPORT M: "Rafael J. Wysocki" S: Maintained diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 0450310..959d32b 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -181,6 +181,7 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_JSM=m CONFIG_VIRTIO_CONSOLE=m +CONFIG_POWERNV_OP_PANEL=m CONFIG_IPMI_HANDLER=y CONFIG_IPMI_DEVICE_INTERFACE=y CONFIG_IPMI_POWERNV=y diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 9ab52e2..3b369e9 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -178,6 +178,8 @@ int64_t opal_dump_ack(uint32_t dump_id); int64_t opal_dump_resend_notification(void); int64_t opal_get_msg(uint64_t buffer, uint64_t size); +int64_t opal_write_oppanel_async(uint64_t token, oppanel_line_t *lines, + uint64_t num_lines); int64_t opal_check_completion(uint64_t buffer, uint64_t size, uint64_t token); int64_t opal_sync_host_reboot(void); int64_t opal_get_param(uint64_t token, uint32_t param_id, uint64_t buffer, diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 3ea1a855..7979d6d 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -278,6 +278,7 @@ OPAL_CALL(opal_dump_info2, OPAL_DUMP_INFO2); OPAL_CALL(opal_dump_read, OPAL_DUMP_READ); OPAL_CALL(opal_dump_ack, OPAL_DUMP_ACK); OPAL_CALL(opal_get_msg, OPAL_GET_MSG); +OPAL_CALL(opal_write_oppanel_async, OPAL_WRITE_OPPANEL_ASYNC); OPAL_CALL(opal_check_completion, OPAL_CHECK_ASYNC_COMPLETION); OPAL_CALL(opal_dump_resend_notification, OPAL_DUMP_RESEND); OPAL_CALL(opal_sync_host_reboot, OPAL_SYNC_HOST_REBOOT); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 0256d07..228751a 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -751,6 +751,9 @@ static int __init opal_init(void) opal_pdev_init(opal_node, "ibm,opal-flash"); opal_pdev_init(opal_node, "ibm,opal-prd"); + /* Initialise platform device: oppanel interface */ + opal_pdev_init(opal_node, "ibm,opal-oppanel"); + /* Initialise OPAL kmsg dumper for flushing console on panic */ opal_kmsg_init(); @@ -885,3 +888,5 @@ EXPORT_SYMBOL_GPL(opal_i2c_request); /* Export these symbols for PowerNV LED class driver */ EXPORT_SYMBOL_GPL(opal_leds_get_ind); EXPORT_SYMBOL_GPL(opal_leds_set_ind); +/* Export this symbol for PowerNV Operator Panel class driver */ +EXPORT_SYMBOL_GPL(opal_write_oppanel_async); diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 601f64f..fdb8f3e 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -178,6 +178,20 @@ config IBM_BSR of threads across a large system which avoids bouncing a cacheline between several cores on a system +config POWERNV_OP_PANEL + tristate "IBM POWERNV Operator Panel Display support" + depends on PPC_POWERNV + default m + help + If you say Y here, a special character device node, /dev/op_panel, + will be created which exposes the operator panel display on IBM + Power Systems machines with FSPs. + + If you don't require access to the operator panel display from user + space, say N. + + If unsure, say M here to build it as a module called powernv-op-panel. + source "drivers/char/ipmi/Kconfig" config DS1620 diff --git a/drivers/char/Makefile b/drivers/char/Makefile index d8a7579..55d16bf 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -60,3 +60,4 @@ js-rtc-y = rtc.o obj-$(CONFIG_TILE_SROM) += tile-srom.o obj-$(CONFIG_XILLYBUS) += xillybus/ +obj-$(CONFIG_POWERNV_OP_PANEL) += powernv-op-panel.o diff --git a/drivers/char/powernv-op-panel.c b/drivers/char/powernv-op-panel.c new file mode 100644 index 0000000..a45dabc --- /dev/null +++ b/drivers/char/powernv-op-panel.c @@ -0,0 +1,223 @@ +/* + * OPAL Operator Panel Display Driver + * + * Copyright 2016, Suraj Jitindar Singh, IBM Corporation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * This driver creates a character device (/dev/op_panel) which exposes the + * operator panel (character LCD display) on IBM Power Systems machines + * with FSPs. + * A character buffer written to the device will be displayed on the + * operator panel. + */ + +static DEFINE_MUTEX(oppanel_mutex); + +static u32 num_lines, oppanel_size; +static oppanel_line_t *oppanel_lines; +static char *oppanel_data; + +static loff_t oppanel_llseek(struct file *filp, loff_t offset, int whence) +{ + return fixed_size_llseek(filp, offset, whence, oppanel_size); +} + +static ssize_t oppanel_read(struct file *filp, char __user *userbuf, size_t len, + loff_t *f_pos) +{ + return simple_read_from_buffer(userbuf, len, f_pos, oppanel_data, + oppanel_size); +} + +static int __op_panel_update_display(void) +{ + struct opal_msg msg; + int rc, token; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + if (token != -ERESTARTSYS) + pr_debug("Couldn't get OPAL async token [token=%d]\n", + token); + return token; + } + + rc = opal_write_oppanel_async(token, oppanel_lines, num_lines); + switch (rc) { + case OPAL_ASYNC_COMPLETION: + rc = opal_async_wait_response(token, &msg); + if (rc) { + pr_debug("Failed to wait for async response [rc=%d]\n", + rc); + break; + } + rc = opal_get_async_rc(msg); + if (rc != OPAL_SUCCESS) { + pr_debug("OPAL async call returned failed [rc=%d]\n", + rc); + break; + } + case OPAL_SUCCESS: + break; + default: + pr_debug("OPAL write op-panel call failed [rc=%d]\n", rc); + } + + opal_async_release_token(token); + return rc; +} + +static ssize_t oppanel_write(struct file *filp, const char __user *userbuf, + size_t len, loff_t *f_pos) +{ + loff_t f_pos_prev = *f_pos; + ssize_t ret; + int rc; + + if (!*f_pos) + memset(oppanel_data, ' ', oppanel_size); + else if (*f_pos >= oppanel_size) + return -EFBIG; + + ret = simple_write_to_buffer(oppanel_data, oppanel_size, f_pos, userbuf, + len); + if (ret > 0) { + rc = __op_panel_update_display(); + if (rc != OPAL_SUCCESS) { + pr_err_ratelimited("OPAL call failed to write to op panel display [rc=%d]\n", + rc); + *f_pos = f_pos_prev; + return -EIO; + } + } + return ret; +} + +static int oppanel_open(struct inode *inode, struct file *filp) +{ + if (!mutex_trylock(&oppanel_mutex)) { + pr_debug("Device Busy\n"); + return -EBUSY; + } + return 0; +} + +static int oppanel_release(struct inode *inode, struct file *filp) +{ + mutex_unlock(&oppanel_mutex); + return 0; +} + +static const struct file_operations oppanel_fops = { + .owner = THIS_MODULE, + .llseek = oppanel_llseek, + .read = oppanel_read, + .write = oppanel_write, + .open = oppanel_open, + .release = oppanel_release +}; + +static struct miscdevice oppanel_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "op_panel", + .fops = &oppanel_fops +}; + +static int oppanel_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + u32 line_len; + int rc, i; + + rc = of_property_read_u32(np, "#length", &line_len); + if (rc) { + pr_err_ratelimited("Operator panel length property not found\n"); + return rc; + } + rc = of_property_read_u32(np, "#lines", &num_lines); + if (rc) { + pr_err_ratelimited("Operator panel lines property not found\n"); + return rc; + } + oppanel_size = line_len * num_lines; + + pr_devel("Operator panel of size %u found with %u lines of length %u\n", + oppanel_size, num_lines, line_len); + + oppanel_data = kcalloc(oppanel_size, sizeof(*oppanel_data), GFP_KERNEL); + if (!oppanel_data) + return -ENOMEM; + + oppanel_lines = kcalloc(num_lines, sizeof(oppanel_line_t), GFP_KERNEL); + if (!oppanel_lines) { + rc = -ENOMEM; + goto free_oppanel_data; + } + + memset(oppanel_data, ' ', oppanel_size); + for (i = 0; i < num_lines; i++) { + oppanel_lines[i].line_len = cpu_to_be64(line_len); + oppanel_lines[i].line = cpu_to_be64(__pa(&oppanel_data[i * + line_len])); + } + + rc = misc_register(&oppanel_dev); + if (rc) { + pr_err_ratelimited("Failed to register as misc device\n"); + goto free_oppanel; + } + + return 0; + +free_oppanel: + kfree(oppanel_lines); +free_oppanel_data: + kfree(oppanel_data); + return rc; +} + +static int oppanel_remove(struct platform_device *pdev) +{ + misc_deregister(&oppanel_dev); + kfree(oppanel_lines); + kfree(oppanel_data); + return 0; +} + +static const struct of_device_id oppanel_match[] = { + { .compatible = "ibm,opal-oppanel" }, + { }, +}; + +static struct platform_driver oppanel_driver = { + .driver = { + .name = "powernv-op-panel", + .of_match_table = oppanel_match, + }, + .probe = oppanel_probe, + .remove = oppanel_remove, +}; + +module_platform_driver(oppanel_driver); + +MODULE_DEVICE_TABLE(of, oppanel_match); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("PowerNV Operator Panel LCD Display Driver"); +MODULE_AUTHOR("Suraj Jitindar Singh "); -- cgit v0.10.2 From b5b1cfc5d4d8457e98bbab0b8402c07b3938c3e6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 5 Jul 2016 23:45:56 +1000 Subject: powerpc/fadump: Fix build error introduced by recent cleanup We spent so much time bike-shedding the printk() we missed that the next line was missing a semi-colon. And it seems none of our defconfigs turn on CONFIG_FA_DUMP. Fixes: 4a03749f140c ("powerpc/fadump: Trivial fix of spelling mistake, clean up message") Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index f066486..b3a6633 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1010,7 +1010,7 @@ static int fadump_invalidate_dump(struct fadump_mem_struct *fdm) if (rc) { pr_err("Failed to invalidate firmware-assisted dump registration. Unexpected error (%d).\n", rc); - return rc + return rc; } fw_dump.dump_active = 0; fdm_active = NULL; -- cgit v0.10.2 From 4d3576b207167bdb7af31408871d1bb1a2c1a5c7 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sun, 26 Jun 2016 23:07:04 +0530 Subject: powerpc/perf: factor out power8 pmu macros and defines Factor out some of the power8 pmu macros to new a header file to share with power9 pmu code. Just code movement and no logic change. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h new file mode 100644 index 0000000..03205f5 --- /dev/null +++ b/arch/powerpc/perf/isa207-common.h @@ -0,0 +1,230 @@ +/* + * Copyright 2009 Paul Mackerras, IBM Corporation. + * Copyright 2013 Michael Ellerman, IBM Corporation. + * Copyright 2016 Madhavan Srinivasan, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or any later version. + */ + +#ifndef _LINUX_POWERPC_PERF_ISA207_COMMON_H_ +#define _LINUX_POWERPC_PERF_ISA207_COMMON_H_ + +#include +#include +#include +#include + +/* + * Raw event encoding for PowerISA v2.07: + * + * 60 56 52 48 44 40 36 32 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * | | [ ] [ thresh_cmp ] [ thresh_ctl ] + * | | | | + * | | *- IFM (Linux) thresh start/stop OR FAB match -* + * | *- BHRB (Linux) + * *- EBB (Linux) + * + * 28 24 20 16 12 8 4 0 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * [ ] [ sample ] [cache] [ pmc ] [unit ] c m [ pmcxsel ] + * | | | | | + * | | | | *- mark + * | | *- L1/L2/L3 cache_sel | + * | | | + * | *- sampling mode for marked events *- combine + * | + * *- thresh_sel + * + * Below uses IBM bit numbering. + * + * MMCR1[x:y] = unit (PMCxUNIT) + * MMCR1[x] = combine (PMCxCOMB) + * + * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011 + * # PM_MRK_FAB_RSP_MATCH + * MMCR1[20:27] = thresh_ctl (FAB_CRESP_MATCH / FAB_TYPE_MATCH) + * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001 + * # PM_MRK_FAB_RSP_MATCH_CYC + * MMCR1[20:27] = thresh_ctl (FAB_CRESP_MATCH / FAB_TYPE_MATCH) + * else + * MMCRA[48:55] = thresh_ctl (THRESH START/END) + * + * if thresh_sel: + * MMCRA[45:47] = thresh_sel + * + * if thresh_cmp: + * MMCRA[22:24] = thresh_cmp[0:2] + * MMCRA[25:31] = thresh_cmp[3:9] + * + * if unit == 6 or unit == 7 + * MMCRC[53:55] = cache_sel[1:3] (L2EVENT_SEL) + * else if unit == 8 or unit == 9: + * if cache_sel[0] == 0: # L3 bank + * MMCRC[47:49] = cache_sel[1:3] (L3EVENT_SEL0) + * else if cache_sel[0] == 1: + * MMCRC[50:51] = cache_sel[2:3] (L3EVENT_SEL1) + * else if cache_sel[1]: # L1 event + * MMCR1[16] = cache_sel[2] + * MMCR1[17] = cache_sel[3] + * + * if mark: + * MMCRA[63] = 1 (SAMPLE_ENABLE) + * MMCRA[57:59] = sample[0:2] (RAND_SAMP_ELIG) + * MMCRA[61:62] = sample[3:4] (RAND_SAMP_MODE) + * + * if EBB and BHRB: + * MMCRA[32:33] = IFM + * + */ + +#define EVENT_EBB_MASK 1ull +#define EVENT_EBB_SHIFT PERF_EVENT_CONFIG_EBB_SHIFT +#define EVENT_BHRB_MASK 1ull +#define EVENT_BHRB_SHIFT 62 +#define EVENT_WANTS_BHRB (EVENT_BHRB_MASK << EVENT_BHRB_SHIFT) +#define EVENT_IFM_MASK 3ull +#define EVENT_IFM_SHIFT 60 +#define EVENT_THR_CMP_SHIFT 40 /* Threshold CMP value */ +#define EVENT_THR_CMP_MASK 0x3ff +#define EVENT_THR_CTL_SHIFT 32 /* Threshold control value (start/stop) */ +#define EVENT_THR_CTL_MASK 0xffull +#define EVENT_THR_SEL_SHIFT 29 /* Threshold select value */ +#define EVENT_THR_SEL_MASK 0x7 +#define EVENT_THRESH_SHIFT 29 /* All threshold bits */ +#define EVENT_THRESH_MASK 0x1fffffull +#define EVENT_SAMPLE_SHIFT 24 /* Sampling mode & eligibility */ +#define EVENT_SAMPLE_MASK 0x1f +#define EVENT_CACHE_SEL_SHIFT 20 /* L2/L3 cache select */ +#define EVENT_CACHE_SEL_MASK 0xf +#define EVENT_IS_L1 (4 << EVENT_CACHE_SEL_SHIFT) +#define EVENT_PMC_SHIFT 16 /* PMC number (1-based) */ +#define EVENT_PMC_MASK 0xf +#define EVENT_UNIT_SHIFT 12 /* Unit */ +#define EVENT_UNIT_MASK 0xf +#define EVENT_COMBINE_SHIFT 11 /* Combine bit */ +#define EVENT_COMBINE_MASK 0x1 +#define EVENT_MARKED_SHIFT 8 /* Marked bit */ +#define EVENT_MARKED_MASK 0x1 +#define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) +#define EVENT_PSEL_MASK 0xff /* PMCxSEL value */ + +/* Bits defined by Linux */ +#define EVENT_LINUX_MASK \ + ((EVENT_EBB_MASK << EVENT_EBB_SHIFT) | \ + (EVENT_BHRB_MASK << EVENT_BHRB_SHIFT) | \ + (EVENT_IFM_MASK << EVENT_IFM_SHIFT)) + +#define EVENT_VALID_MASK \ + ((EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \ + (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \ + (EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \ + (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \ + (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \ + (EVENT_COMBINE_MASK << EVENT_COMBINE_SHIFT) | \ + (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \ + EVENT_LINUX_MASK | \ + EVENT_PSEL_MASK) + +#define ONLY_PLM \ + (PERF_SAMPLE_BRANCH_USER |\ + PERF_SAMPLE_BRANCH_KERNEL |\ + PERF_SAMPLE_BRANCH_HV) + +/* + * Layout of constraint bits: + * + * 60 56 52 48 44 40 36 32 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * [ fab_match ] [ thresh_cmp ] [ thresh_ctl ] [ ] + * | + * thresh_sel -* + * + * 28 24 20 16 12 8 4 0 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * [ ] | [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1] + * | | | | + * BHRB IFM -* | | | Count of events for each PMC. + * EBB -* | | p1, p2, p3, p4, p5, p6. + * L1 I/D qualifier -* | + * nc - number of counters -* + * + * The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints + * we want the low bit of each field to be added to any existing value. + * + * Everything else is a value field. + */ + +#define CNST_FAB_MATCH_VAL(v) (((v) & EVENT_THR_CTL_MASK) << 56) +#define CNST_FAB_MATCH_MASK CNST_FAB_MATCH_VAL(EVENT_THR_CTL_MASK) + +/* We just throw all the threshold bits into the constraint */ +#define CNST_THRESH_VAL(v) (((v) & EVENT_THRESH_MASK) << 32) +#define CNST_THRESH_MASK CNST_THRESH_VAL(EVENT_THRESH_MASK) + +#define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24) +#define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK) + +#define CNST_IFM_VAL(v) (((v) & EVENT_IFM_MASK) << 25) +#define CNST_IFM_MASK CNST_IFM_VAL(EVENT_IFM_MASK) + +#define CNST_L1_QUAL_VAL(v) (((v) & 3) << 22) +#define CNST_L1_QUAL_MASK CNST_L1_QUAL_VAL(3) + +#define CNST_SAMPLE_VAL(v) (((v) & EVENT_SAMPLE_MASK) << 16) +#define CNST_SAMPLE_MASK CNST_SAMPLE_VAL(EVENT_SAMPLE_MASK) + +/* + * For NC we are counting up to 4 events. This requires three bits, and we need + * the fifth event to overflow and set the 4th bit. To achieve that we bias the + * fields by 3 in test_adder. + */ +#define CNST_NC_SHIFT 12 +#define CNST_NC_VAL (1 << CNST_NC_SHIFT) +#define CNST_NC_MASK (8 << CNST_NC_SHIFT) +#define ISA207_TEST_ADDER (3 << CNST_NC_SHIFT) + +/* + * For the per-PMC fields we have two bits. The low bit is added, so if two + * events ask for the same PMC the sum will overflow, setting the high bit, + * indicating an error. So our mask sets the high bit. + */ +#define CNST_PMC_SHIFT(pmc) ((pmc - 1) * 2) +#define CNST_PMC_VAL(pmc) (1 << CNST_PMC_SHIFT(pmc)) +#define CNST_PMC_MASK(pmc) (2 << CNST_PMC_SHIFT(pmc)) + +/* Our add_fields is defined as: */ +#define ISA207_ADD_FIELDS \ + CNST_PMC_VAL(1) | CNST_PMC_VAL(2) | CNST_PMC_VAL(3) | \ + CNST_PMC_VAL(4) | CNST_PMC_VAL(5) | CNST_PMC_VAL(6) | CNST_NC_VAL + + +/* Bits in MMCR1 for PowerISA v2.07 */ +#define MMCR1_UNIT_SHIFT(pmc) (60 - (4 * ((pmc) - 1))) +#define MMCR1_COMBINE_SHIFT(pmc) (35 - ((pmc) - 1)) +#define MMCR1_PMCSEL_SHIFT(pmc) (24 - (((pmc) - 1)) * 8) +#define MMCR1_FAB_SHIFT 36 +#define MMCR1_DC_QUAL_SHIFT 47 +#define MMCR1_IC_QUAL_SHIFT 46 + +/* Bits in MMCRA for PowerISA v2.07 */ +#define MMCRA_SAMP_MODE_SHIFT 1 +#define MMCRA_SAMP_ELIG_SHIFT 4 +#define MMCRA_THR_CTL_SHIFT 8 +#define MMCRA_THR_SEL_SHIFT 16 +#define MMCRA_THR_CMP_SHIFT 32 +#define MMCRA_SDAR_MODE_TLB (1ull << 42) +#define MMCRA_IFM_SHIFT 30 + +/* Bits in MMCR2 for PowerISA v2.07 */ +#define MMCR2_FCS(pmc) (1ull << (63 - (((pmc) - 1) * 9))) +#define MMCR2_FCP(pmc) (1ull << (62 - (((pmc) - 1) * 9))) +#define MMCR2_FCH(pmc) (1ull << (57 - (((pmc) - 1) * 9))) + +#define MAX_ALT 2 +#define MAX_PMU_COUNTERS 6 + +#endif diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 7cf3b43..4303e9b 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -12,10 +12,7 @@ #define pr_fmt(fmt) "power8-pmu: " fmt -#include -#include -#include -#include +#include "isa207-common.h" /* * Some power8 event codes. @@ -28,219 +25,11 @@ enum { #undef EVENT -/* - * Raw event encoding for POWER8: - * - * 60 56 52 48 44 40 36 32 - * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - * | | [ ] [ thresh_cmp ] [ thresh_ctl ] - * | | | | - * | | *- IFM (Linux) thresh start/stop OR FAB match -* - * | *- BHRB (Linux) - * *- EBB (Linux) - * - * 28 24 20 16 12 8 4 0 - * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - * [ ] [ sample ] [cache] [ pmc ] [unit ] c m [ pmcxsel ] - * | | | | | - * | | | | *- mark - * | | *- L1/L2/L3 cache_sel | - * | | | - * | *- sampling mode for marked events *- combine - * | - * *- thresh_sel - * - * Below uses IBM bit numbering. - * - * MMCR1[x:y] = unit (PMCxUNIT) - * MMCR1[x] = combine (PMCxCOMB) - * - * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011 - * # PM_MRK_FAB_RSP_MATCH - * MMCR1[20:27] = thresh_ctl (FAB_CRESP_MATCH / FAB_TYPE_MATCH) - * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001 - * # PM_MRK_FAB_RSP_MATCH_CYC - * MMCR1[20:27] = thresh_ctl (FAB_CRESP_MATCH / FAB_TYPE_MATCH) - * else - * MMCRA[48:55] = thresh_ctl (THRESH START/END) - * - * if thresh_sel: - * MMCRA[45:47] = thresh_sel - * - * if thresh_cmp: - * MMCRA[22:24] = thresh_cmp[0:2] - * MMCRA[25:31] = thresh_cmp[3:9] - * - * if unit == 6 or unit == 7 - * MMCRC[53:55] = cache_sel[1:3] (L2EVENT_SEL) - * else if unit == 8 or unit == 9: - * if cache_sel[0] == 0: # L3 bank - * MMCRC[47:49] = cache_sel[1:3] (L3EVENT_SEL0) - * else if cache_sel[0] == 1: - * MMCRC[50:51] = cache_sel[2:3] (L3EVENT_SEL1) - * else if cache_sel[1]: # L1 event - * MMCR1[16] = cache_sel[2] - * MMCR1[17] = cache_sel[3] - * - * if mark: - * MMCRA[63] = 1 (SAMPLE_ENABLE) - * MMCRA[57:59] = sample[0:2] (RAND_SAMP_ELIG) - * MMCRA[61:62] = sample[3:4] (RAND_SAMP_MODE) - * - * if EBB and BHRB: - * MMCRA[32:33] = IFM - * - */ - -#define EVENT_EBB_MASK 1ull -#define EVENT_EBB_SHIFT PERF_EVENT_CONFIG_EBB_SHIFT -#define EVENT_BHRB_MASK 1ull -#define EVENT_BHRB_SHIFT 62 -#define EVENT_WANTS_BHRB (EVENT_BHRB_MASK << EVENT_BHRB_SHIFT) -#define EVENT_IFM_MASK 3ull -#define EVENT_IFM_SHIFT 60 -#define EVENT_THR_CMP_SHIFT 40 /* Threshold CMP value */ -#define EVENT_THR_CMP_MASK 0x3ff -#define EVENT_THR_CTL_SHIFT 32 /* Threshold control value (start/stop) */ -#define EVENT_THR_CTL_MASK 0xffull -#define EVENT_THR_SEL_SHIFT 29 /* Threshold select value */ -#define EVENT_THR_SEL_MASK 0x7 -#define EVENT_THRESH_SHIFT 29 /* All threshold bits */ -#define EVENT_THRESH_MASK 0x1fffffull -#define EVENT_SAMPLE_SHIFT 24 /* Sampling mode & eligibility */ -#define EVENT_SAMPLE_MASK 0x1f -#define EVENT_CACHE_SEL_SHIFT 20 /* L2/L3 cache select */ -#define EVENT_CACHE_SEL_MASK 0xf -#define EVENT_IS_L1 (4 << EVENT_CACHE_SEL_SHIFT) -#define EVENT_PMC_SHIFT 16 /* PMC number (1-based) */ -#define EVENT_PMC_MASK 0xf -#define EVENT_UNIT_SHIFT 12 /* Unit */ -#define EVENT_UNIT_MASK 0xf -#define EVENT_COMBINE_SHIFT 11 /* Combine bit */ -#define EVENT_COMBINE_MASK 0x1 -#define EVENT_MARKED_SHIFT 8 /* Marked bit */ -#define EVENT_MARKED_MASK 0x1 -#define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) -#define EVENT_PSEL_MASK 0xff /* PMCxSEL value */ - -/* Bits defined by Linux */ -#define EVENT_LINUX_MASK \ - ((EVENT_EBB_MASK << EVENT_EBB_SHIFT) | \ - (EVENT_BHRB_MASK << EVENT_BHRB_SHIFT) | \ - (EVENT_IFM_MASK << EVENT_IFM_SHIFT)) - -#define EVENT_VALID_MASK \ - ((EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \ - (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \ - (EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \ - (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \ - (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \ - (EVENT_COMBINE_MASK << EVENT_COMBINE_SHIFT) | \ - (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \ - EVENT_LINUX_MASK | \ - EVENT_PSEL_MASK) - /* MMCRA IFM bits - POWER8 */ #define POWER8_MMCRA_IFM1 0x0000000040000000UL #define POWER8_MMCRA_IFM2 0x0000000080000000UL #define POWER8_MMCRA_IFM3 0x00000000C0000000UL -#define ONLY_PLM \ - (PERF_SAMPLE_BRANCH_USER |\ - PERF_SAMPLE_BRANCH_KERNEL |\ - PERF_SAMPLE_BRANCH_HV) - -/* - * Layout of constraint bits: - * - * 60 56 52 48 44 40 36 32 - * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - * [ fab_match ] [ thresh_cmp ] [ thresh_ctl ] [ ] - * | - * thresh_sel -* - * - * 28 24 20 16 12 8 4 0 - * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - * [ ] | [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1] - * | | | | - * BHRB IFM -* | | | Count of events for each PMC. - * EBB -* | | p1, p2, p3, p4, p5, p6. - * L1 I/D qualifier -* | - * nc - number of counters -* - * - * The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints - * we want the low bit of each field to be added to any existing value. - * - * Everything else is a value field. - */ - -#define CNST_FAB_MATCH_VAL(v) (((v) & EVENT_THR_CTL_MASK) << 56) -#define CNST_FAB_MATCH_MASK CNST_FAB_MATCH_VAL(EVENT_THR_CTL_MASK) - -/* We just throw all the threshold bits into the constraint */ -#define CNST_THRESH_VAL(v) (((v) & EVENT_THRESH_MASK) << 32) -#define CNST_THRESH_MASK CNST_THRESH_VAL(EVENT_THRESH_MASK) - -#define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24) -#define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK) - -#define CNST_IFM_VAL(v) (((v) & EVENT_IFM_MASK) << 25) -#define CNST_IFM_MASK CNST_IFM_VAL(EVENT_IFM_MASK) - -#define CNST_L1_QUAL_VAL(v) (((v) & 3) << 22) -#define CNST_L1_QUAL_MASK CNST_L1_QUAL_VAL(3) - -#define CNST_SAMPLE_VAL(v) (((v) & EVENT_SAMPLE_MASK) << 16) -#define CNST_SAMPLE_MASK CNST_SAMPLE_VAL(EVENT_SAMPLE_MASK) - -/* - * For NC we are counting up to 4 events. This requires three bits, and we need - * the fifth event to overflow and set the 4th bit. To achieve that we bias the - * fields by 3 in test_adder. - */ -#define CNST_NC_SHIFT 12 -#define CNST_NC_VAL (1 << CNST_NC_SHIFT) -#define CNST_NC_MASK (8 << CNST_NC_SHIFT) -#define POWER8_TEST_ADDER (3 << CNST_NC_SHIFT) - -/* - * For the per-PMC fields we have two bits. The low bit is added, so if two - * events ask for the same PMC the sum will overflow, setting the high bit, - * indicating an error. So our mask sets the high bit. - */ -#define CNST_PMC_SHIFT(pmc) ((pmc - 1) * 2) -#define CNST_PMC_VAL(pmc) (1 << CNST_PMC_SHIFT(pmc)) -#define CNST_PMC_MASK(pmc) (2 << CNST_PMC_SHIFT(pmc)) - -/* Our add_fields is defined as: */ -#define POWER8_ADD_FIELDS \ - CNST_PMC_VAL(1) | CNST_PMC_VAL(2) | CNST_PMC_VAL(3) | \ - CNST_PMC_VAL(4) | CNST_PMC_VAL(5) | CNST_PMC_VAL(6) | CNST_NC_VAL - - -/* Bits in MMCR1 for POWER8 */ -#define MMCR1_UNIT_SHIFT(pmc) (60 - (4 * ((pmc) - 1))) -#define MMCR1_COMBINE_SHIFT(pmc) (35 - ((pmc) - 1)) -#define MMCR1_PMCSEL_SHIFT(pmc) (24 - (((pmc) - 1)) * 8) -#define MMCR1_FAB_SHIFT 36 -#define MMCR1_DC_QUAL_SHIFT 47 -#define MMCR1_IC_QUAL_SHIFT 46 - -/* Bits in MMCRA for POWER8 */ -#define MMCRA_SAMP_MODE_SHIFT 1 -#define MMCRA_SAMP_ELIG_SHIFT 4 -#define MMCRA_THR_CTL_SHIFT 8 -#define MMCRA_THR_SEL_SHIFT 16 -#define MMCRA_THR_CMP_SHIFT 32 -#define MMCRA_SDAR_MODE_TLB (1ull << 42) -#define MMCRA_IFM_SHIFT 30 - -/* Bits in MMCR2 for POWER8 */ -#define MMCR2_FCS(pmc) (1ull << (63 - (((pmc) - 1) * 9))) -#define MMCR2_FCP(pmc) (1ull << (62 - (((pmc) - 1) * 9))) -#define MMCR2_FCH(pmc) (1ull << (57 - (((pmc) - 1) * 9))) - - static inline bool event_is_fab_match(u64 event) { /* Only check pmc, unit and pmcxsel, ignore the edge bit (0) */ @@ -485,8 +274,6 @@ static int power8_compute_mmcr(u64 event[], int n_ev, return 0; } -#define MAX_ALT 2 - /* Table of alternatives, sorted by column 0 */ static const unsigned int event_alternatives[][MAX_ALT] = { { PM_MRK_ST_CMPL, PM_MRK_ST_CMPL_ALT }, @@ -841,10 +628,10 @@ static int power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { static struct power_pmu power8_pmu = { .name = "POWER8", - .n_counter = 6, + .n_counter = MAX_PMU_COUNTERS, .max_alternatives = MAX_ALT + 1, - .add_fields = POWER8_ADD_FIELDS, - .test_adder = POWER8_TEST_ADDER, + .add_fields = ISA207_ADD_FIELDS, + .test_adder = ISA207_TEST_ADDER, .compute_mmcr = power8_compute_mmcr, .config_bhrb = power8_config_bhrb, .bhrb_filter_map = power8_bhrb_filter_map, -- cgit v0.10.2 From 7ffd948fae4cd4f0207bece20132edd9afb9abcc Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sun, 26 Jun 2016 23:07:05 +0530 Subject: powerpc/perf: factor out power8 pmu functions Factor out some of the power8 pmu functions to new file "isa207-common.c" to share with power9 pmu code. Only code movement and no logic change Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index 77b6394..92f8ea4 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o \ - power8-pmu.o + isa207-common.o power8-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c new file mode 100644 index 0000000..6143c99 --- /dev/null +++ b/arch/powerpc/perf/isa207-common.c @@ -0,0 +1,263 @@ +/* + * Common Performance counter support functions for PowerISA v2.07 processors. + * + * Copyright 2009 Paul Mackerras, IBM Corporation. + * Copyright 2013 Michael Ellerman, IBM Corporation. + * Copyright 2016 Madhavan Srinivasan, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include "isa207-common.h" + +static inline bool event_is_fab_match(u64 event) +{ + /* Only check pmc, unit and pmcxsel, ignore the edge bit (0) */ + event &= 0xff0fe; + + /* PM_MRK_FAB_RSP_MATCH & PM_MRK_FAB_RSP_MATCH_CYC */ + return (event == 0x30056 || event == 0x4f052); +} + +int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) +{ + unsigned int unit, pmc, cache, ebb; + unsigned long mask, value; + + mask = value = 0; + + if (event & ~EVENT_VALID_MASK) + return -1; + + pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; + unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; + cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK; + ebb = (event >> EVENT_EBB_SHIFT) & EVENT_EBB_MASK; + + if (pmc) { + u64 base_event; + + if (pmc > 6) + return -1; + + /* Ignore Linux defined bits when checking event below */ + base_event = event & ~EVENT_LINUX_MASK; + + if (pmc >= 5 && base_event != 0x500fa && + base_event != 0x600f4) + return -1; + + mask |= CNST_PMC_MASK(pmc); + value |= CNST_PMC_VAL(pmc); + } + + if (pmc <= 4) { + /* + * Add to number of counters in use. Note this includes events with + * a PMC of 0 - they still need a PMC, it's just assigned later. + * Don't count events on PMC 5 & 6, there is only one valid event + * on each of those counters, and they are handled above. + */ + mask |= CNST_NC_MASK; + value |= CNST_NC_VAL; + } + + if (unit >= 6 && unit <= 9) { + /* + * L2/L3 events contain a cache selector field, which is + * supposed to be programmed into MMCRC. However MMCRC is only + * HV writable, and there is no API for guest kernels to modify + * it. The solution is for the hypervisor to initialise the + * field to zeroes, and for us to only ever allow events that + * have a cache selector of zero. The bank selector (bit 3) is + * irrelevant, as long as the rest of the value is 0. + */ + if (cache & 0x7) + return -1; + + } else if (event & EVENT_IS_L1) { + mask |= CNST_L1_QUAL_MASK; + value |= CNST_L1_QUAL_VAL(cache); + } + + if (event & EVENT_IS_MARKED) { + mask |= CNST_SAMPLE_MASK; + value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT); + } + + /* + * Special case for PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC, + * the threshold control bits are used for the match value. + */ + if (event_is_fab_match(event)) { + mask |= CNST_FAB_MATCH_MASK; + value |= CNST_FAB_MATCH_VAL(event >> EVENT_THR_CTL_SHIFT); + } else { + /* + * Check the mantissa upper two bits are not zero, unless the + * exponent is also zero. See the THRESH_CMP_MANTISSA doc. + */ + unsigned int cmp, exp; + + cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; + exp = cmp >> 7; + + if (exp && (cmp & 0x60) == 0) + return -1; + + mask |= CNST_THRESH_MASK; + value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT); + } + + if (!pmc && ebb) + /* EBB events must specify the PMC */ + return -1; + + if (event & EVENT_WANTS_BHRB) { + if (!ebb) + /* Only EBB events can request BHRB */ + return -1; + + mask |= CNST_IFM_MASK; + value |= CNST_IFM_VAL(event >> EVENT_IFM_SHIFT); + } + + /* + * All events must agree on EBB, either all request it or none. + * EBB events are pinned & exclusive, so this should never actually + * hit, but we leave it as a fallback in case. + */ + mask |= CNST_EBB_VAL(ebb); + value |= CNST_EBB_MASK; + + *maskp = mask; + *valp = value; + + return 0; +} + +int isa207_compute_mmcr(u64 event[], int n_ev, + unsigned int hwc[], unsigned long mmcr[], + struct perf_event *pevents[]) +{ + unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val; + unsigned int pmc, pmc_inuse; + int i; + + pmc_inuse = 0; + + /* First pass to count resource use */ + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; + if (pmc) + pmc_inuse |= 1 << pmc; + } + + /* In continuous sampling mode, update SDAR on TLB miss */ + mmcra = MMCRA_SDAR_MODE_TLB; + mmcr1 = mmcr2 = 0; + + /* Second pass: assign PMCs, set all MMCR1 fields */ + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; + unit = (event[i] >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; + combine = (event[i] >> EVENT_COMBINE_SHIFT) & EVENT_COMBINE_MASK; + psel = event[i] & EVENT_PSEL_MASK; + + if (!pmc) { + for (pmc = 1; pmc <= 4; ++pmc) { + if (!(pmc_inuse & (1 << pmc))) + break; + } + + pmc_inuse |= 1 << pmc; + } + + if (pmc <= 4) { + mmcr1 |= unit << MMCR1_UNIT_SHIFT(pmc); + mmcr1 |= combine << MMCR1_COMBINE_SHIFT(pmc); + mmcr1 |= psel << MMCR1_PMCSEL_SHIFT(pmc); + } + + if (event[i] & EVENT_IS_L1) { + cache = event[i] >> EVENT_CACHE_SEL_SHIFT; + mmcr1 |= (cache & 1) << MMCR1_IC_QUAL_SHIFT; + cache >>= 1; + mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT; + } + + if (event[i] & EVENT_IS_MARKED) { + mmcra |= MMCRA_SAMPLE_ENABLE; + + val = (event[i] >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK; + if (val) { + mmcra |= (val & 3) << MMCRA_SAMP_MODE_SHIFT; + mmcra |= (val >> 2) << MMCRA_SAMP_ELIG_SHIFT; + } + } + + /* + * PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC, + * the threshold bits are used for the match value. + */ + if (event_is_fab_match(event[i])) { + mmcr1 |= ((event[i] >> EVENT_THR_CTL_SHIFT) & + EVENT_THR_CTL_MASK) << MMCR1_FAB_SHIFT; + } else { + val = (event[i] >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK; + mmcra |= val << MMCRA_THR_CTL_SHIFT; + val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK; + mmcra |= val << MMCRA_THR_SEL_SHIFT; + val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; + mmcra |= val << MMCRA_THR_CMP_SHIFT; + } + + if (event[i] & EVENT_WANTS_BHRB) { + val = (event[i] >> EVENT_IFM_SHIFT) & EVENT_IFM_MASK; + mmcra |= val << MMCRA_IFM_SHIFT; + } + + if (pevents[i]->attr.exclude_user) + mmcr2 |= MMCR2_FCP(pmc); + + if (pevents[i]->attr.exclude_hv) + mmcr2 |= MMCR2_FCH(pmc); + + if (pevents[i]->attr.exclude_kernel) { + if (cpu_has_feature(CPU_FTR_HVMODE)) + mmcr2 |= MMCR2_FCH(pmc); + else + mmcr2 |= MMCR2_FCS(pmc); + } + + hwc[i] = pmc - 1; + } + + /* Return MMCRx values */ + mmcr[0] = 0; + + /* pmc_inuse is 1-based */ + if (pmc_inuse & 2) + mmcr[0] = MMCR0_PMC1CE; + + if (pmc_inuse & 0x7c) + mmcr[0] |= MMCR0_PMCjCE; + + /* If we're not using PMC 5 or 6, freeze them */ + if (!(pmc_inuse & 0x60)) + mmcr[0] |= MMCR0_FC56; + + mmcr[1] = mmcr1; + mmcr[2] = mmcra; + mmcr[3] = mmcr2; + + return 0; +} + +void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[]) +{ + if (pmc <= 3) + mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1)); +} diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 03205f5..4d0a4e5 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -227,4 +227,10 @@ #define MAX_ALT 2 #define MAX_PMU_COUNTERS 6 +int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp); +int isa207_compute_mmcr(u64 event[], int n_ev, + unsigned int hwc[], unsigned long mmcr[], + struct perf_event *pevents[]); +void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[]); + #endif diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 4303e9b..5fde2b1 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -30,250 +30,6 @@ enum { #define POWER8_MMCRA_IFM2 0x0000000080000000UL #define POWER8_MMCRA_IFM3 0x00000000C0000000UL -static inline bool event_is_fab_match(u64 event) -{ - /* Only check pmc, unit and pmcxsel, ignore the edge bit (0) */ - event &= 0xff0fe; - - /* PM_MRK_FAB_RSP_MATCH & PM_MRK_FAB_RSP_MATCH_CYC */ - return (event == 0x30056 || event == 0x4f052); -} - -static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) -{ - unsigned int unit, pmc, cache, ebb; - unsigned long mask, value; - - mask = value = 0; - - if (event & ~EVENT_VALID_MASK) - return -1; - - pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; - unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; - cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK; - ebb = (event >> EVENT_EBB_SHIFT) & EVENT_EBB_MASK; - - if (pmc) { - u64 base_event; - - if (pmc > 6) - return -1; - - /* Ignore Linux defined bits when checking event below */ - base_event = event & ~EVENT_LINUX_MASK; - - if (pmc >= 5 && base_event != PM_RUN_INST_CMPL && - base_event != PM_RUN_CYC) - return -1; - - mask |= CNST_PMC_MASK(pmc); - value |= CNST_PMC_VAL(pmc); - } - - if (pmc <= 4) { - /* - * Add to number of counters in use. Note this includes events with - * a PMC of 0 - they still need a PMC, it's just assigned later. - * Don't count events on PMC 5 & 6, there is only one valid event - * on each of those counters, and they are handled above. - */ - mask |= CNST_NC_MASK; - value |= CNST_NC_VAL; - } - - if (unit >= 6 && unit <= 9) { - /* - * L2/L3 events contain a cache selector field, which is - * supposed to be programmed into MMCRC. However MMCRC is only - * HV writable, and there is no API for guest kernels to modify - * it. The solution is for the hypervisor to initialise the - * field to zeroes, and for us to only ever allow events that - * have a cache selector of zero. The bank selector (bit 3) is - * irrelevant, as long as the rest of the value is 0. - */ - if (cache & 0x7) - return -1; - - } else if (event & EVENT_IS_L1) { - mask |= CNST_L1_QUAL_MASK; - value |= CNST_L1_QUAL_VAL(cache); - } - - if (event & EVENT_IS_MARKED) { - mask |= CNST_SAMPLE_MASK; - value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT); - } - - /* - * Special case for PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC, - * the threshold control bits are used for the match value. - */ - if (event_is_fab_match(event)) { - mask |= CNST_FAB_MATCH_MASK; - value |= CNST_FAB_MATCH_VAL(event >> EVENT_THR_CTL_SHIFT); - } else { - /* - * Check the mantissa upper two bits are not zero, unless the - * exponent is also zero. See the THRESH_CMP_MANTISSA doc. - */ - unsigned int cmp, exp; - - cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; - exp = cmp >> 7; - - if (exp && (cmp & 0x60) == 0) - return -1; - - mask |= CNST_THRESH_MASK; - value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT); - } - - if (!pmc && ebb) - /* EBB events must specify the PMC */ - return -1; - - if (event & EVENT_WANTS_BHRB) { - if (!ebb) - /* Only EBB events can request BHRB */ - return -1; - - mask |= CNST_IFM_MASK; - value |= CNST_IFM_VAL(event >> EVENT_IFM_SHIFT); - } - - /* - * All events must agree on EBB, either all request it or none. - * EBB events are pinned & exclusive, so this should never actually - * hit, but we leave it as a fallback in case. - */ - mask |= CNST_EBB_VAL(ebb); - value |= CNST_EBB_MASK; - - *maskp = mask; - *valp = value; - - return 0; -} - -static int power8_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], unsigned long mmcr[], - struct perf_event *pevents[]) -{ - unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val; - unsigned int pmc, pmc_inuse; - int i; - - pmc_inuse = 0; - - /* First pass to count resource use */ - for (i = 0; i < n_ev; ++i) { - pmc = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; - if (pmc) - pmc_inuse |= 1 << pmc; - } - - /* In continuous sampling mode, update SDAR on TLB miss */ - mmcra = MMCRA_SDAR_MODE_TLB; - mmcr1 = mmcr2 = 0; - - /* Second pass: assign PMCs, set all MMCR1 fields */ - for (i = 0; i < n_ev; ++i) { - pmc = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK; - unit = (event[i] >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK; - combine = (event[i] >> EVENT_COMBINE_SHIFT) & EVENT_COMBINE_MASK; - psel = event[i] & EVENT_PSEL_MASK; - - if (!pmc) { - for (pmc = 1; pmc <= 4; ++pmc) { - if (!(pmc_inuse & (1 << pmc))) - break; - } - - pmc_inuse |= 1 << pmc; - } - - if (pmc <= 4) { - mmcr1 |= unit << MMCR1_UNIT_SHIFT(pmc); - mmcr1 |= combine << MMCR1_COMBINE_SHIFT(pmc); - mmcr1 |= psel << MMCR1_PMCSEL_SHIFT(pmc); - } - - if (event[i] & EVENT_IS_L1) { - cache = event[i] >> EVENT_CACHE_SEL_SHIFT; - mmcr1 |= (cache & 1) << MMCR1_IC_QUAL_SHIFT; - cache >>= 1; - mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT; - } - - if (event[i] & EVENT_IS_MARKED) { - mmcra |= MMCRA_SAMPLE_ENABLE; - - val = (event[i] >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK; - if (val) { - mmcra |= (val & 3) << MMCRA_SAMP_MODE_SHIFT; - mmcra |= (val >> 2) << MMCRA_SAMP_ELIG_SHIFT; - } - } - - /* - * PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC, - * the threshold bits are used for the match value. - */ - if (event_is_fab_match(event[i])) { - mmcr1 |= ((event[i] >> EVENT_THR_CTL_SHIFT) & - EVENT_THR_CTL_MASK) << MMCR1_FAB_SHIFT; - } else { - val = (event[i] >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK; - mmcra |= val << MMCRA_THR_CTL_SHIFT; - val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK; - mmcra |= val << MMCRA_THR_SEL_SHIFT; - val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK; - mmcra |= val << MMCRA_THR_CMP_SHIFT; - } - - if (event[i] & EVENT_WANTS_BHRB) { - val = (event[i] >> EVENT_IFM_SHIFT) & EVENT_IFM_MASK; - mmcra |= val << MMCRA_IFM_SHIFT; - } - - if (pevents[i]->attr.exclude_user) - mmcr2 |= MMCR2_FCP(pmc); - - if (pevents[i]->attr.exclude_hv) - mmcr2 |= MMCR2_FCH(pmc); - - if (pevents[i]->attr.exclude_kernel) { - if (cpu_has_feature(CPU_FTR_HVMODE)) - mmcr2 |= MMCR2_FCH(pmc); - else - mmcr2 |= MMCR2_FCS(pmc); - } - - hwc[i] = pmc - 1; - } - - /* Return MMCRx values */ - mmcr[0] = 0; - - /* pmc_inuse is 1-based */ - if (pmc_inuse & 2) - mmcr[0] = MMCR0_PMC1CE; - - if (pmc_inuse & 0x7c) - mmcr[0] |= MMCR0_PMCjCE; - - /* If we're not using PMC 5 or 6, freeze them */ - if (!(pmc_inuse & 0x60)) - mmcr[0] |= MMCR0_FC56; - - mmcr[1] = mmcr1; - mmcr[2] = mmcra; - mmcr[3] = mmcr2; - - return 0; -} - /* Table of alternatives, sorted by column 0 */ static const unsigned int event_alternatives[][MAX_ALT] = { { PM_MRK_ST_CMPL, PM_MRK_ST_CMPL_ALT }, @@ -354,12 +110,6 @@ static int power8_get_alternatives(u64 event, unsigned int flags, u64 alt[]) return num_alt; } -static void power8_disable_pmc(unsigned int pmc, unsigned long mmcr[]) -{ - if (pmc <= 3) - mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1)); -} - GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC); GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_GCT_NOSLOT_CYC); GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL); @@ -632,12 +382,12 @@ static struct power_pmu power8_pmu = { .max_alternatives = MAX_ALT + 1, .add_fields = ISA207_ADD_FIELDS, .test_adder = ISA207_TEST_ADDER, - .compute_mmcr = power8_compute_mmcr, + .compute_mmcr = isa207_compute_mmcr, .config_bhrb = power8_config_bhrb, .bhrb_filter_map = power8_bhrb_filter_map, - .get_constraint = power8_get_constraint, + .get_constraint = isa207_get_constraint, .get_alternatives = power8_get_alternatives, - .disable_pmc = power8_disable_pmc, + .disable_pmc = isa207_disable_pmc, .flags = PPMU_HAS_SIER | PPMU_ARCH_207S, .n_generic = ARRAY_SIZE(power8_generic_events), .generic_events = power8_generic_events, -- cgit v0.10.2 From 393eb79ad32fedbdcbcd51bca38cf66291f6d528 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sun, 26 Jun 2016 23:07:06 +0530 Subject: powerpc/perf: factor out power8 __init_pmu code Factor out the power8 pmu init functions to share with power9. Monitor Mode Control Register S(MMCRS) and Monitor Mode Control Register H(MMCRH) registers are dropped in Power9. These registers are added to new function which are included for power8 init. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 584e119..ec8a228 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -51,6 +51,7 @@ _GLOBAL(__setup_cpu_power8) mflr r11 bl __init_FSCR bl __init_PMU + bl __init_PMU_ISA207 bl __init_hvmode_206 mtlr r11 beqlr @@ -62,6 +63,7 @@ _GLOBAL(__setup_cpu_power8) bl __init_HFSCR bl __init_tlb_power8 bl __init_PMU_HV + bl __init_PMU_HV_ISA207 mtlr r11 blr @@ -69,6 +71,7 @@ _GLOBAL(__restore_cpu_power8) mflr r11 bl __init_FSCR bl __init_PMU + bl __init_PMU_ISA207 mfmsr r3 rldicl. r0,r3,4,63 mtlr r11 @@ -81,12 +84,14 @@ _GLOBAL(__restore_cpu_power8) bl __init_HFSCR bl __init_tlb_power8 bl __init_PMU_HV + bl __init_PMU_HV_ISA207 mtlr r11 blr _GLOBAL(__setup_cpu_power9) mflr r11 bl __init_FSCR + bl __init_PMU bl __init_hvmode_206 mtlr r11 beqlr @@ -97,12 +102,14 @@ _GLOBAL(__setup_cpu_power9) bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 + bl __init_PMU_HV mtlr r11 blr _GLOBAL(__restore_cpu_power9) mflr r11 bl __init_FSCR + bl __init_PMU mfmsr r3 rldicl. r0,r3,4,63 mtlr r11 @@ -114,6 +121,7 @@ _GLOBAL(__restore_cpu_power9) bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 + bl __init_PMU_HV mtlr r11 blr @@ -208,14 +216,22 @@ __init_tlb_power9: __init_PMU_HV: li r5,0 mtspr SPRN_MMCRC,r5 + blr + +__init_PMU_HV_ISA207: + li r5,0 mtspr SPRN_MMCRH,r5 blr __init_PMU: li r5,0 - mtspr SPRN_MMCRS,r5 mtspr SPRN_MMCRA,r5 mtspr SPRN_MMCR0,r5 mtspr SPRN_MMCR1,r5 mtspr SPRN_MMCR2,r5 blr + +__init_PMU_ISA207: + li r5,0 + mtspr SPRN_MMCRS,r5 + blr -- cgit v0.10.2 From 34922527a2bcb36bee9b4e9b813cf7af1e4ada1d Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sun, 26 Jun 2016 23:07:07 +0530 Subject: powerpc/perf: Add power9 event list macros for generic and cache events Add macros for the generic and cache events on Power9 Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h new file mode 100644 index 0000000..cda6fcb --- /dev/null +++ b/arch/powerpc/perf/power9-events-list.h @@ -0,0 +1,55 @@ +/* + * Performance counter support for POWER9 processors. + * + * Copyright 2016 Madhavan Srinivasan, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * Power9 event codes. + */ +EVENT(PM_CYC, 0x0001e) +EVENT(PM_ICT_NOSLOT_CYC, 0x100f8) +EVENT(PM_CMPLU_STALL, 0x1e054) +EVENT(PM_INST_CMPL, 0x00002) +EVENT(PM_BRU_CMPL, 0x40060) +EVENT(PM_BR_MPRED_CMPL, 0x400f6) + +/* All L1 D cache load references counted at finish, gated by reject */ +EVENT(PM_LD_REF_L1, 0x100fc) +/* Load Missed L1 */ +EVENT(PM_LD_MISS_L1_FIN, 0x2c04e) +/* Store Missed L1 */ +EVENT(PM_ST_MISS_L1, 0x300f0) +/* L1 cache data prefetches */ +EVENT(PM_L1_PREF, 0x20054) +/* Instruction fetches from L1 */ +EVENT(PM_INST_FROM_L1, 0x04080) +/* Demand iCache Miss */ +EVENT(PM_L1_ICACHE_MISS, 0x200fd) +/* Instruction Demand sectors wriittent into IL1 */ +EVENT(PM_L1_DEMAND_WRITE, 0x0408c) +/* Instruction prefetch written into IL1 */ +EVENT(PM_IC_PREF_WRITE, 0x0408e) +/* The data cache was reloaded from local core's L3 due to a demand load */ +EVENT(PM_DATA_FROM_L3, 0x4c042) +/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ +EVENT(PM_DATA_FROM_L3MISS, 0x300fe) +/* All successful D-side store dispatches for this thread */ +EVENT(PM_L2_ST, 0x16081) +/* All successful D-side store dispatches for this thread that were L2 Miss */ +EVENT(PM_L2_ST_MISS, 0x26081) +/* Total HW L3 prefetches(Load+store) */ +EVENT(PM_L3_PREF_ALL, 0x4e052) +/* Data PTEG reload */ +EVENT(PM_DTLB_MISS, 0x300fc) +/* ITLB Reloaded */ +EVENT(PM_ITLB_MISS, 0x400fc) +/* Run_Instructions */ +EVENT(PM_RUN_INST_CMPL, 0x500fa) +/* Run_cycles */ +EVENT(PM_RUN_CYC, 0x600f4) -- cgit v0.10.2 From 8c002dbd05eecbb2933e9668da9614b33c7a97d2 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sun, 26 Jun 2016 23:07:08 +0530 Subject: powerpc/perf: Power9 PMU support This patch adds base enablement for the power9 PMU. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index 92f8ea4..f102d53 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o \ - isa207-common.o power8-pmu.o + isa207-common.o power8-pmu.o power9-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c new file mode 100644 index 0000000..a2798b5 --- /dev/null +++ b/arch/powerpc/perf/power9-pmu.c @@ -0,0 +1,271 @@ +/* + * Performance counter support for POWER9 processors. + * + * Copyright 2009 Paul Mackerras, IBM Corporation. + * Copyright 2013 Michael Ellerman, IBM Corporation. + * Copyright 2016 Madhavan Srinivasan, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or later version. + */ + +#define pr_fmt(fmt) "power9-pmu: " fmt + +#include "isa207-common.h" + +/* + * Some power9 event codes. + */ +#define EVENT(_name, _code) _name = _code, + +enum { +#include "power9-events-list.h" +}; + +#undef EVENT + +/* MMCRA IFM bits - POWER9 */ +#define POWER9_MMCRA_IFM1 0x0000000040000000UL +#define POWER9_MMCRA_IFM2 0x0000000080000000UL +#define POWER9_MMCRA_IFM3 0x00000000C0000000UL + + +PMU_FORMAT_ATTR(event, "config:0-49"); +PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); +PMU_FORMAT_ATTR(mark, "config:8"); +PMU_FORMAT_ATTR(combine, "config:11"); +PMU_FORMAT_ATTR(unit, "config:12-15"); +PMU_FORMAT_ATTR(pmc, "config:16-19"); +PMU_FORMAT_ATTR(cache_sel, "config:20-23"); +PMU_FORMAT_ATTR(sample_mode, "config:24-28"); +PMU_FORMAT_ATTR(thresh_sel, "config:29-31"); +PMU_FORMAT_ATTR(thresh_stop, "config:32-35"); +PMU_FORMAT_ATTR(thresh_start, "config:36-39"); +PMU_FORMAT_ATTR(thresh_cmp, "config:40-49"); + +static struct attribute *power9_pmu_format_attr[] = { + &format_attr_event.attr, + &format_attr_pmcxsel.attr, + &format_attr_mark.attr, + &format_attr_combine.attr, + &format_attr_unit.attr, + &format_attr_pmc.attr, + &format_attr_cache_sel.attr, + &format_attr_sample_mode.attr, + &format_attr_thresh_sel.attr, + &format_attr_thresh_stop.attr, + &format_attr_thresh_start.attr, + &format_attr_thresh_cmp.attr, + NULL, +}; + +struct attribute_group power9_pmu_format_group = { + .name = "format", + .attrs = power9_pmu_format_attr, +}; + +static const struct attribute_group *power9_pmu_attr_groups[] = { + &power9_pmu_format_group, + NULL, +}; + +static int power9_generic_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_ICT_NOSLOT_CYC, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL, + [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BRU_CMPL, + [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL, + [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, + [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1_FIN, +}; + +static u64 power9_bhrb_filter_map(u64 branch_sample_type) +{ + u64 pmu_bhrb_filter = 0; + + /* BHRB and regular PMU events share the same privilege state + * filter configuration. BHRB is always recorded along with a + * regular PMU event. As the privilege state filter is handled + * in the basic PMC configuration of the accompanying regular + * PMU event, we ignore any separate BHRB specific request. + */ + + /* No branch filter requested */ + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY) + return pmu_bhrb_filter; + + /* Invalid branch filter options - HW does not support */ + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + return -1; + + if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL) + return -1; + + if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL) + return -1; + + if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) { + pmu_bhrb_filter |= POWER9_MMCRA_IFM1; + return pmu_bhrb_filter; + } + + /* Every thing else is unsupported */ + return -1; +} + +static void power9_config_bhrb(u64 pmu_bhrb_filter) +{ + /* Enable BHRB filter in PMU */ + mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter)); +} + +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = PM_LD_REF_L1, + [ C(RESULT_MISS) ] = PM_LD_MISS_L1_FIN, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = PM_ST_MISS_L1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = PM_L1_PREF, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(L1I) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = PM_INST_FROM_L1, + [ C(RESULT_MISS) ] = PM_L1_ICACHE_MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = PM_L1_DEMAND_WRITE, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = PM_IC_PREF_WRITE, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = PM_DATA_FROM_L3, + [ C(RESULT_MISS) ] = PM_DATA_FROM_L3MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = PM_L2_ST, + [ C(RESULT_MISS) ] = PM_L2_ST_MISS, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = PM_L3_PREF_ALL, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = PM_DTLB_MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = PM_ITLB_MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = PM_BRU_CMPL, + [ C(RESULT_MISS) ] = PM_BR_MPRED_CMPL, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +#undef C + +static struct power_pmu power9_pmu = { + .name = "POWER9", + .n_counter = MAX_PMU_COUNTERS, + .add_fields = ISA207_ADD_FIELDS, + .test_adder = ISA207_TEST_ADDER, + .compute_mmcr = isa207_compute_mmcr, + .config_bhrb = power9_config_bhrb, + .bhrb_filter_map = power9_bhrb_filter_map, + .get_constraint = isa207_get_constraint, + .disable_pmc = isa207_disable_pmc, + .flags = PPMU_HAS_SIER | PPMU_ARCH_207S, + .n_generic = ARRAY_SIZE(power9_generic_events), + .generic_events = power9_generic_events, + .cache_events = &power9_cache_events, + .attr_groups = power9_pmu_attr_groups, + .bhrb_nr = 32, +}; + +static int __init init_power9_pmu(void) +{ + int rc; + + /* Comes from cpu_specs[] */ + if (!cur_cpu_spec->oprofile_cpu_type || + strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power9")) + return -ENODEV; + + rc = register_power_pmu(&power9_pmu); + if (rc) + return rc; + + /* Tell userspace that EBB is supported */ + cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB; + + return 0; +} +early_initcall(init_power9_pmu); -- cgit v0.10.2 From f1fb60bfde65fe4c4372d480d1b5d57bdba20367 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sun, 26 Jun 2016 23:07:09 +0530 Subject: powerpc/perf: Export Power9 generic and cache events to sysfs Export the generic hardware and cache perf events for Power9 to sysfs, so users can determine the PMU event monitored. Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index a2798b5..7883463 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -31,6 +31,64 @@ enum { #define POWER9_MMCRA_IFM2 0x0000000080000000UL #define POWER9_MMCRA_IFM3 0x00000000C0000000UL +GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC); +GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_ICT_NOSLOT_CYC); +GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL); +GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL); +GENERIC_EVENT_ATTR(branch-instructions, PM_BRU_CMPL); +GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL); +GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1); +GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1_FIN); + +CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1_FIN); +CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1); +CACHE_EVENT_ATTR(L1-dcache-prefetches, PM_L1_PREF); +CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1); +CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS); +CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1); +CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_WRITE); +CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS); +CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3); +CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PREF_ALL); +CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS); +CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST); +CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL); +CACHE_EVENT_ATTR(branch-loads, PM_BRU_CMPL); +CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS); +CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS); + +static struct attribute *power9_events_attr[] = { + GENERIC_EVENT_PTR(PM_CYC), + GENERIC_EVENT_PTR(PM_ICT_NOSLOT_CYC), + GENERIC_EVENT_PTR(PM_CMPLU_STALL), + GENERIC_EVENT_PTR(PM_INST_CMPL), + GENERIC_EVENT_PTR(PM_BRU_CMPL), + GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL), + GENERIC_EVENT_PTR(PM_LD_REF_L1), + GENERIC_EVENT_PTR(PM_LD_MISS_L1_FIN), + CACHE_EVENT_PTR(PM_LD_MISS_L1_FIN), + CACHE_EVENT_PTR(PM_LD_REF_L1), + CACHE_EVENT_PTR(PM_L1_PREF), + CACHE_EVENT_PTR(PM_ST_MISS_L1), + CACHE_EVENT_PTR(PM_L1_ICACHE_MISS), + CACHE_EVENT_PTR(PM_INST_FROM_L1), + CACHE_EVENT_PTR(PM_IC_PREF_WRITE), + CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS), + CACHE_EVENT_PTR(PM_DATA_FROM_L3), + CACHE_EVENT_PTR(PM_L3_PREF_ALL), + CACHE_EVENT_PTR(PM_L2_ST_MISS), + CACHE_EVENT_PTR(PM_L2_ST), + CACHE_EVENT_PTR(PM_BR_MPRED_CMPL), + CACHE_EVENT_PTR(PM_BRU_CMPL), + CACHE_EVENT_PTR(PM_DTLB_MISS), + CACHE_EVENT_PTR(PM_ITLB_MISS), + NULL +}; + +static struct attribute_group power9_pmu_events_group = { + .name = "events", + .attrs = power9_events_attr, +}; PMU_FORMAT_ATTR(event, "config:0-49"); PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); @@ -68,6 +126,7 @@ struct attribute_group power9_pmu_format_group = { static const struct attribute_group *power9_pmu_attr_groups[] = { &power9_pmu_format_group, + &power9_pmu_events_group, NULL, }; -- cgit v0.10.2 From d4ecdff2ecdb5c01c46f2a6b1bf3f161279b330e Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Tue, 1 Mar 2016 15:26:36 +1100 Subject: selftests/powerpc/pmu: Use signed long to read perf_event_paranoid Excerpt from man 2 perf_event_open: /proc/sys/kernel/perf_event_paranoid The perf_event_paranoid file can be set to restrict access to the performance counters. 2 allow only user-space measurements. 1 allow both kernel and user measurements (default). 0 allow access to CPU-specific data but not raw tracepoint samples. -1 no restrictions. require_paranoia_below() should return 0 if perf_event_paranoid is below a specified level, the value from perf_event_paranoid is read into an unsigned long so the incorrect value is returned when perf_event_paranoid is set to -1. Without this patch applied there is the same number of selftests/powerpc which skip when /proc/sys/kernel/perf_event_paranoid is set to 1 or -1 but no skips when set to zero. With this patch applied there are no skipped selftests/powerpc test when /proc/sys/kernel/perf_event_paranoid is set to 0 or -1. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman diff --git a/tools/testing/selftests/powerpc/pmu/lib.c b/tools/testing/selftests/powerpc/pmu/lib.c index a361ad3..8b992fa 100644 --- a/tools/testing/selftests/powerpc/pmu/lib.c +++ b/tools/testing/selftests/powerpc/pmu/lib.c @@ -190,7 +190,7 @@ int parse_proc_maps(void) bool require_paranoia_below(int level) { - unsigned long current; + long current; char *end, buf[16]; FILE *f; int rc; @@ -208,7 +208,7 @@ bool require_paranoia_below(int level) goto out_close; } - current = strtoul(buf, &end, 10); + current = strtol(buf, &end, 10); if (end == buf) { printf("Couldn't parse " PARANOID_PATH "?\n"); @@ -216,7 +216,7 @@ bool require_paranoia_below(int level) } if (current >= level) - goto out; + goto out_close; rc = 0; out_close: -- cgit v0.10.2 From 94fa56a96a39914551694673fdb483b8924b0e56 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Fri, 4 Mar 2016 10:06:39 +1100 Subject: selftests/powerpc: Fix usage message in context_switch When we inverted the behaviour of the flags we forgot to update the usage message. Fixes: 51c21e72eb99 ("selftests/powerpc: Make context_switch touch FP/altivec/vector by default") Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c index 7b78594..e6af382 100644 --- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c +++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c @@ -369,11 +369,11 @@ static void usage(void) fprintf(stderr, "\t\t--process\tUse processes (default threads)\n"); fprintf(stderr, "\t\t--timeout=X\tDuration in seconds to run (default 30)\n"); fprintf(stderr, "\t\t--vdso\t\ttouch VDSO\n"); - fprintf(stderr, "\t\t--fp\t\ttouch FP\n"); + fprintf(stderr, "\t\t--no-fp\t\tDon't touch FP\n"); #ifdef __powerpc__ - fprintf(stderr, "\t\t--altivec\ttouch altivec\n"); + fprintf(stderr, "\t\t--no-altivec\tDon't touch altivec\n"); #endif - fprintf(stderr, "\t\t--vector\ttouch vector\n"); + fprintf(stderr, "\t\t--no-vector\tDon't touch vector\n"); } int main(int argc, char *argv[]) -- cgit v0.10.2 From f2418ae8a81760b4dec8d5e3e7f1faf45c422e9d Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Fri, 4 Mar 2016 10:06:40 +1100 Subject: selftests/powerpc: Fix generation of vector instructions/types in context_switch Currently it doesn't appear the resulting binary actually uses any Altivec or VSX instructions the solution is to explicitly tell GCC to use vector instructions and use vector types in the code. Part of this this issue can be GCC version specific: GCC 4.9.x is happy to use Altivec and VSX instructions if altivec.h is includedi (and possibly if vector types are used), this also means that 4.9.x will use VSX instructions even if only -maltivec is passed. It is also possible that Altivec instructions will be used even without -maltivec or -mabi=altivec. GCC 5.2.x complains about the lack of -maltivec parameter if altivec.h is included and will not use VSX unless -mvsx is present on commandline. GCC 5.3.0 has a regression that means __attribute__((__target__("no-vsx")) fails to build. A fix is targeted for 5.4. Furthermore LTO (Link Time Optimisation) doesn't play well with __attribute__((__target__("no-vsx")), LTO can cause GCC to forget about the attribute and compile with VSX instructions regardless. Be wary when enabling -flfo for this test. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile index 912445f..6816fc2 100644 --- a/tools/testing/selftests/powerpc/benchmarks/Makefile +++ b/tools/testing/selftests/powerpc/benchmarks/Makefile @@ -7,6 +7,7 @@ all: $(TEST_PROGS) $(TEST_PROGS): ../harness.c context_switch: ../utils.c +context_switch: CFLAGS += -maltivec -mvsx -mabi=altivec context_switch: LDLIBS += -lpthread include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c index e6af382..a36883a 100644 --- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c +++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c @@ -25,7 +25,9 @@ #include #include #include - +#ifdef __powerpc__ +#include +#endif #include "../utils.h" static unsigned int timeout = 30; @@ -37,12 +39,15 @@ static int touch_fp = 1; double fp; static int touch_vector = 1; -typedef int v4si __attribute__ ((vector_size (16))); -v4si a, b, c; +vector int a, b, c; #ifdef __powerpc__ static int touch_altivec = 1; +/* + * Note: LTO (Link Time Optimisation) doesn't play well with this function + * attribute. Be very careful enabling LTO for this test. + */ static void __attribute__((__target__("no-vsx"))) altivec_touch_fn(void) { c = a + b; -- cgit v0.10.2 From 0c63e8b7b97fb72ef38c8edbfbe751d3602e03a1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 2 Jun 2016 22:02:01 +1000 Subject: selftests/powerpc: Import Anton's mmap & futex micro benchmarks These are useful little loops for smoke testing performance. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman diff --git a/tools/testing/selftests/powerpc/benchmarks/.gitignore b/tools/testing/selftests/powerpc/benchmarks/.gitignore index 6fa6733..bce49eb 100644 --- a/tools/testing/selftests/powerpc/benchmarks/.gitignore +++ b/tools/testing/selftests/powerpc/benchmarks/.gitignore @@ -1,2 +1,4 @@ gettimeofday context_switch +mmap_bench +futex_bench \ No newline at end of file diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile index 6816fc2..a9adfb7 100644 --- a/tools/testing/selftests/powerpc/benchmarks/Makefile +++ b/tools/testing/selftests/powerpc/benchmarks/Makefile @@ -1,4 +1,4 @@ -TEST_PROGS := gettimeofday context_switch +TEST_PROGS := gettimeofday context_switch mmap_bench futex_bench CFLAGS += -O2 diff --git a/tools/testing/selftests/powerpc/benchmarks/futex_bench.c b/tools/testing/selftests/powerpc/benchmarks/futex_bench.c new file mode 100644 index 0000000..2fc711d9 --- /dev/null +++ b/tools/testing/selftests/powerpc/benchmarks/futex_bench.c @@ -0,0 +1,42 @@ +/* + * Copyright 2016, Anton Blanchard, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include + +#include "utils.h" + +#define ITERATIONS 100000000 + +#define futex(A, B, C, D, E, F) syscall(__NR_futex, A, B, C, D, E, F) + +int test_futex(void) +{ + struct timespec ts_start, ts_end; + unsigned long i = ITERATIONS; + + clock_gettime(CLOCK_MONOTONIC, &ts_start); + + while (i--) { + unsigned int addr = 0; + futex(&addr, FUTEX_WAKE, 1, NULL, NULL, 0); + } + + clock_gettime(CLOCK_MONOTONIC, &ts_end); + + printf("time = %.6f\n", ts_end.tv_sec - ts_start.tv_sec + (ts_end.tv_nsec - ts_start.tv_nsec) / 1e9); + + return 0; +} + +int main(void) +{ + return test_harness(test_futex, "futex_bench"); +} diff --git a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c new file mode 100644 index 0000000..8d084a2 --- /dev/null +++ b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c @@ -0,0 +1,41 @@ +/* + * Copyright 2016, Anton Blanchard, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include +#include +#include +#include + +#include "utils.h" + +#define ITERATIONS 5000000 + +#define MEMSIZE (128 * 1024 * 1024) + +int test_mmap(void) +{ + struct timespec ts_start, ts_end; + unsigned long i = ITERATIONS; + + clock_gettime(CLOCK_MONOTONIC, &ts_start); + + while (i--) { + char *c = mmap(NULL, MEMSIZE, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + FAIL_IF(c == MAP_FAILED); + munmap(c, MEMSIZE); + } + + clock_gettime(CLOCK_MONOTONIC, &ts_end); + + printf("time = %.6f\n", ts_end.tv_sec - ts_start.tv_sec + (ts_end.tv_nsec - ts_start.tv_nsec) / 1e9); + + return 0; +} + +int main(void) +{ + return test_harness(test_mmap, "mmap_bench"); +} -- cgit v0.10.2 From ae26b36f8098c793a754549662771099215904ed Mon Sep 17 00:00:00 2001 From: Chris Smart Date: Fri, 17 Jun 2016 09:33:45 +1000 Subject: powerpc: Send SIGBUS on unaligned copy and paste Calling ISA 3.0 instructions copy, copy_first, paste and paste_last generates an alignment fault when copying or pasting unaligned data (128 byte). We catch this and send SIGBUS to the userspace process that caused it. We do not emulate these because paste may contain additional metadata when pasting to a co-processor and paste_last is the synchronisation point for preceding copy/paste sequences. Thanks to Michael Neuling for his help. Signed-off-by: Chris Smart Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 6a77d130..9de9df1 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -131,6 +131,8 @@ /* sorted alphabetically */ #define PPC_INST_BHRBE 0x7c00025c #define PPC_INST_CLRBHRB 0x7c00035c +#define PPC_INST_COPY 0x7c00060c +#define PPC_INST_COPY_FIRST 0x7c20060c #define PPC_INST_CP_ABORT 0x7c00068c #define PPC_INST_DCBA 0x7c0005ec #define PPC_INST_DCBA_MASK 0xfc0007fe @@ -161,6 +163,8 @@ #define PPC_INST_MSGSNDP 0x7c00011c #define PPC_INST_MTTMR 0x7c0003dc #define PPC_INST_NOP 0x60000000 +#define PPC_INST_PASTE 0x7c00070c +#define PPC_INST_PASTE_LAST 0x7c20070d #define PPC_INST_POPCNTB 0x7c0000f4 #define PPC_INST_POPCNTB_MASK 0xfc0007fe #define PPC_INST_POPCNTD 0x7c0003f4 diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index d7ad66b..c7097f9 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -873,6 +873,20 @@ int fix_alignment(struct pt_regs *regs) return emulate_vsx(addr, reg, areg, regs, flags, nb, elsize); } #endif + + /* + * ISA 3.0 (such as P9) copy, copy_first, paste and paste_last alignment + * check. + * + * Send a SIGBUS to the process that caused the fault. + * + * We do not emulate these because paste may contain additional metadata + * when pasting to a co-processor. Furthermore, paste_last is the + * synchronisation point for preceding copy/paste sequences. + */ + if ((instruction & 0xfc0006fe) == PPC_INST_COPY) + return -EIO; + /* A size of 0 indicates an instruction we don't support, with * the exception of DCBZ which is handled as a special case here */ -- cgit v0.10.2 From 4375088072295b09cc5d9bf7a8cd2333b608492a Mon Sep 17 00:00:00 2001 From: Chris Smart Date: Fri, 17 Jun 2016 09:34:47 +1000 Subject: selftests/powerpc: Test unaligned copy and paste Test that an ISA 3.0 compliant machine performing an unaligned copy, copy_first, paste or paste_last is sent a SIGBUS. Signed-off-by: Chris Smart Signed-off-by: Michael Ellerman diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 4ca83fe..3c40c9d 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -12,7 +12,8 @@ CFLAGS := -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $ export CFLAGS -SUB_DIRS = benchmarks \ +SUB_DIRS = alignment \ + benchmarks \ copyloops \ context_switch \ dscr \ diff --git a/tools/testing/selftests/powerpc/alignment/.gitignore b/tools/testing/selftests/powerpc/alignment/.gitignore new file mode 100644 index 0000000..1d980e3 --- /dev/null +++ b/tools/testing/selftests/powerpc/alignment/.gitignore @@ -0,0 +1,5 @@ +copy_unaligned +copy_first_unaligned +paste_unaligned +paste_last_unaligned +copy_paste_unaligned_common diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile new file mode 100644 index 0000000..ad6a4e4 --- /dev/null +++ b/tools/testing/selftests/powerpc/alignment/Makefile @@ -0,0 +1,10 @@ +TEST_PROGS := copy_unaligned copy_first_unaligned paste_unaligned paste_last_unaligned + +all: $(TEST_PROGS) + +$(TEST_PROGS): ../harness.c ../utils.c copy_paste_unaligned_common.c + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) diff --git a/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c b/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c new file mode 100644 index 0000000..47b73b3 --- /dev/null +++ b/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c @@ -0,0 +1,41 @@ +/* + * Copyright 2016, Chris Smart, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Calls to copy_first which are not 128-byte aligned should be + * caught and sent a SIGBUS. + * + */ + +#include +#include +#include "utils.h" +#include "instructions.h" +#include "copy_paste_unaligned_common.h" + +unsigned int expected_instruction = PPC_INST_COPY_FIRST; +unsigned int instruction_mask = 0xfc2007fe; + +int test_copy_first_unaligned(void) +{ + /* Only run this test on a P9 or later */ + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00)); + + /* Register our signal handler with SIGBUS */ + setup_signal_handler(); + + /* +1 makes buf unaligned */ + copy_first(cacheline_buf+1); + + /* We should not get here */ + return 1; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_copy_first_unaligned, "test_copy_first_unaligned"); +} diff --git a/tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.c b/tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.c new file mode 100644 index 0000000..d35fa5f --- /dev/null +++ b/tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.c @@ -0,0 +1,53 @@ +/* + * Copyright 2016, Chris Smart, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Common code for copy, copy_first, paste and paste_last unaligned + * tests. + * + */ + +#include +#include +#include +#include "utils.h" +#include "instructions.h" +#include "copy_paste_unaligned_common.h" + +unsigned int expected_instruction; +unsigned int instruction_mask; + +char cacheline_buf[128] __cacheline_aligned; + +void signal_action_handler(int signal_num, siginfo_t *info, void *ptr) +{ + ucontext_t *ctx = ptr; +#if defined(__powerpc64__) + unsigned int *pc = (unsigned int *)ctx->uc_mcontext.gp_regs[PT_NIP]; +#else + unsigned int *pc = (unsigned int *)ctx->uc_mcontext.uc_regs->gregs[PT_NIP]; +#endif + + /* + * Check that the signal was on the correct instruction, using a + * mask because the compiler assigns the register at RB. + */ + if ((*pc & instruction_mask) == expected_instruction) + _exit(0); /* We hit the right instruction */ + + _exit(1); +} + +void setup_signal_handler(void) +{ + struct sigaction signal_action; + + memset(&signal_action, 0, sizeof(signal_action)); + signal_action.sa_sigaction = signal_action_handler; + signal_action.sa_flags = SA_SIGINFO; + sigaction(SIGBUS, &signal_action, NULL); +} diff --git a/tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.h b/tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.h new file mode 100644 index 0000000..053899f --- /dev/null +++ b/tools/testing/selftests/powerpc/alignment/copy_paste_unaligned_common.h @@ -0,0 +1,26 @@ +/* + * Copyright 2016, Chris Smart, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Declarations for common code for copy, copy_first, paste and + * paste_last unaligned tests. + * + */ + +#ifndef _SELFTESTS_POWERPC_COPY_PASTE_H +#define _SELFTESTS_POWERPC_COPY_PASTE_H + +#include + +int main(int argc, char *argv[]); +void signal_action_handler(int signal_num, siginfo_t *info, void *ptr); +void setup_signal_handler(void); +extern char cacheline_buf[128] __cacheline_aligned; +extern unsigned int expected_instruction; +extern unsigned int instruction_mask; + +#endif /* _SELFTESTS_POWERPC_COPY_PASTE_H */ diff --git a/tools/testing/selftests/powerpc/alignment/copy_unaligned.c b/tools/testing/selftests/powerpc/alignment/copy_unaligned.c new file mode 100644 index 0000000..3a4e264 --- /dev/null +++ b/tools/testing/selftests/powerpc/alignment/copy_unaligned.c @@ -0,0 +1,41 @@ +/* + * Copyright 2016, Chris Smart, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Calls to copy which are not 128-byte aligned should be caught + * and sent a SIGBUS. + * + */ + +#include +#include +#include "utils.h" +#include "instructions.h" +#include "copy_paste_unaligned_common.h" + +unsigned int expected_instruction = PPC_INST_COPY; +unsigned int instruction_mask = 0xfc0007fe; + +int test_copy_unaligned(void) +{ + /* Only run this test on a P9 or later */ + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00)); + + /* Register our signal handler with SIGBUS */ + setup_signal_handler(); + + /* +1 makes buf unaligned */ + copy(cacheline_buf+1); + + /* We should not get here */ + return 1; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_copy_unaligned, "test_copy_unaligned"); +} diff --git a/tools/testing/selftests/powerpc/alignment/paste_last_unaligned.c b/tools/testing/selftests/powerpc/alignment/paste_last_unaligned.c new file mode 100644 index 0000000..6e0ad04 --- /dev/null +++ b/tools/testing/selftests/powerpc/alignment/paste_last_unaligned.c @@ -0,0 +1,43 @@ +/* + * Copyright 2016, Chris Smart, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Calls to paste_last which are not 128-byte aligned should be + * caught and sent a SIGBUS. + * + */ + +#include +#include +#include "utils.h" +#include "instructions.h" +#include "copy_paste_unaligned_common.h" + +unsigned int expected_instruction = PPC_INST_PASTE_LAST; +unsigned int instruction_mask = 0xfc2007ff; + +int test_paste_last_unaligned(void) +{ + /* Only run this test on a P9 or later */ + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00)); + + /* Register our signal handler with SIGBUS */ + setup_signal_handler(); + + copy(cacheline_buf); + + /* +1 makes buf unaligned */ + paste_last(cacheline_buf+1); + + /* We should not get here */ + return 1; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_paste_last_unaligned, "test_paste_last_unaligned"); +} diff --git a/tools/testing/selftests/powerpc/alignment/paste_unaligned.c b/tools/testing/selftests/powerpc/alignment/paste_unaligned.c new file mode 100644 index 0000000..6f982b4 --- /dev/null +++ b/tools/testing/selftests/powerpc/alignment/paste_unaligned.c @@ -0,0 +1,43 @@ +/* + * Copyright 2016, Chris Smart, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Calls to paste which are not 128-byte aligned should be caught + * and sent a SIGBUS. + * + */ + +#include +#include +#include "utils.h" +#include "instructions.h" +#include "copy_paste_unaligned_common.h" + +unsigned int expected_instruction = PPC_INST_PASTE; +unsigned int instruction_mask = 0xfc0007fe; + +int test_paste_unaligned(void) +{ + /* Only run this test on a P9 or later */ + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00)); + + /* Register our signal handler with SIGBUS */ + setup_signal_handler(); + + copy(cacheline_buf); + + /* +1 makes buf unaligned */ + paste(cacheline_buf+1); + + /* We should not get here */ + return 1; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_paste_unaligned, "test_paste_unaligned"); +} diff --git a/tools/testing/selftests/powerpc/instructions.h b/tools/testing/selftests/powerpc/instructions.h new file mode 100644 index 0000000..0fb0bd3 --- /dev/null +++ b/tools/testing/selftests/powerpc/instructions.h @@ -0,0 +1,68 @@ +#ifndef _SELFTESTS_POWERPC_INSTRUCTIONS_H +#define _SELFTESTS_POWERPC_INSTRUCTIONS_H + +#include +#include + +/* This defines the "copy" instruction from Power ISA 3.0 Book II, section 4.4. */ +#define __COPY(RA, RB, L) \ + (0x7c00060c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10)) +#define COPY(RA, RB, L) \ + .long __COPY((RA), (RB), (L)) + +static inline void copy(void *i) +{ + asm volatile(str(COPY(0, %0, 0))";" + : + : "b" (i) + : "memory" + ); +} + +static inline void copy_first(void *i) +{ + asm volatile(str(COPY(0, %0, 1))";" + : + : "b" (i) + : "memory" + ); +} + +/* This defines the "paste" instruction from Power ISA 3.0 Book II, section 4.4. */ +#define __PASTE(RA, RB, L, RC) \ + (0x7c00070c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10) | (RC) << (31-31)) +#define PASTE(RA, RB, L, RC) \ + .long __PASTE((RA), (RB), (L), (RC)) + +static inline int paste(void *i) +{ + int cr; + + asm volatile(str(PASTE(0, %1, 0, 0))";" + "mfcr %0;" + : "=r" (cr) + : "b" (i) + : "memory" + ); + return cr; +} + +static inline int paste_last(void *i) +{ + int cr; + + asm volatile(str(PASTE(0, %1, 1, 1))";" + "mfcr %0;" + : "=r" (cr) + : "b" (i) + : "memory" + ); + return cr; +} + +#define PPC_INST_COPY __COPY(0, 0, 0) +#define PPC_INST_COPY_FIRST __COPY(0, 0, 1) +#define PPC_INST_PASTE __PASTE(0, 0, 0, 0) +#define PPC_INST_PASTE_LAST __PASTE(0, 0, 1, 1) + +#endif /* _SELFTESTS_POWERPC_INSTRUCTIONS_H */ -- cgit v0.10.2 From a9862c7440f191439a51f77233f89f7e40efe02e Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 18 Mar 2016 17:36:33 +1100 Subject: powerpc/rtas: Fix array overrun in ppc_rtas() syscall If ppc_rtas() is called with args.nargs == 16 and args.nret == 0, args.rets is set to point to &args.args[16], which is beyond the end of the args.args array. This results in a minor read overrun of the array when we check the first return code (which, per PAPR, is a required output of all RTAS calls) to see if there's been a hardware error. Change the nargs/nret check to ensure nargs is <= 15, allowing room for the status code. Users shouldn't be calling with nret == 0, but there's no real harm if they do, so we don't stop them. Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 28736ff..8da209f 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1070,7 +1070,7 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) nret = be32_to_cpu(args.nret); token = be32_to_cpu(args.token); - if (nargs > ARRAY_SIZE(args.args) + if (nargs >= ARRAY_SIZE(args.args) || nret > ARRAY_SIZE(args.args) || nargs + nret > ARRAY_SIZE(args.args)) return -EINVAL; -- cgit v0.10.2 From e2be23712a9c8e3710c9e2684a3204bd20c685d6 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 8 Mar 2016 22:26:17 +0100 Subject: powerpc/pseries: Fix error return value in cmm_mem_going_offline() cmm_mem_going_offline() is (only) called from cmm_memory_cb(), which sends the return value through notifier_from_errno(). The latter expects 0 or -errno (notifier_to_errno(notifier_from_errno(x)) is 0 for any x >= 0, so passing a positive value cannot make sense). Hence negate ENOMEM. Signed-off-by: Rasmus Villemoes Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index fc44ad0..66e7227 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -574,7 +574,7 @@ static int cmm_mem_going_offline(void *arg) cmm_dbg("Failed to allocate memory for list " "management. Memory hotplug " "failed.\n"); - return ENOMEM; + return -ENOMEM; } memcpy(npa, pa_curr, PAGE_SIZE); if (pa_curr == cmm_page_list) -- cgit v0.10.2 From 9ddf0075f9184f1e1dabf7bfea6397b00ee17e46 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 26 Nov 2015 10:45:49 +1100 Subject: powerpc: Avoid -maltivec when using clang integrated assembler Check the assembler supports -maltivec by wrapping it with call as-option. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 709a22a..b41f4c6 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -221,7 +221,7 @@ KBUILD_CFLAGS += -mno-sched-epilog endif cpu-as-$(CONFIG_4xx) += -Wa,-m405 -cpu-as-$(CONFIG_ALTIVEC) += -Wa,-maltivec +cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec) cpu-as-$(CONFIG_E200) += -Wa,-me200 KBUILD_AFLAGS += $(cpu-as-y) -- cgit v0.10.2 From 799010244685334b34e674d354a1a71a3a6b6148 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 1 Jul 2016 16:20:39 +1000 Subject: powerpc/timer: Large Decrementer support Power ISAv3 adds a large decrementer (LD) mode which increases the size of the decrementer register. The size of the enlarged decrementer register is between 32 and 64 bits with the exact size being dependent on the implementation. When in LD mode, reads are sign extended to 64 bits and a decrementer exception is raised when the high bit is set (i.e the value goes below zero). Writes however are truncated to the physical register width so some care needs to be taken to ensure that the high bit is not set when reloading the decrementer. This patch adds support for using the LD inside the host kernel on processors that support it. When LD mode is supported firmware will supply the ibm,dec-bits property for CPU nodes to allow the kernel to determine the maximum decrementer value. Enabling LD mode is a hypervisor privileged operation so the kernel can only enable it manually when running in hypervisor mode. Guests that support LD mode can request it using the "ibm,client-architecture-support" firmware call (not implemented in this patch) or some other platform specific method. If this property is not supplied then the traditional decrementer width of 32 bit is assumed and LD mode will not be enabled. This patch was based on initial work by Jack Miller. Signed-off-by: Oliver O'Halloran Signed-off-by: Balbir Singh Acked-by: Michael Neuling Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index ce44fe2..320136f 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -337,6 +337,7 @@ #define LPCR_AIL_0 0x00000000 /* MMU off exception offset 0x0 */ #define LPCR_AIL_3 0x01800000 /* MMU on exception offset 0xc00...4xxx */ #define LPCR_ONL 0x00040000 /* online - PURR/SPURR count */ +#define LPCR_LD 0x00020000 /* large decremeter */ #define LPCR_PECE 0x0001f000 /* powersave exit cause enable */ #define LPCR_PECEDP 0x00010000 /* directed priv dbells cause exit */ #define LPCR_PECEDH 0x00008000 /* directed hyp dbells cause exit */ diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 1092fdd..0921164 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -146,7 +146,7 @@ static inline void set_tb(unsigned int upper, unsigned int lower) * in auto-reload mode. The problem is PIT stops counting when it * hits zero. If it would wrap, we could use it just like a decrementer. */ -static inline unsigned int get_dec(void) +static inline u64 get_dec(void) { #if defined(CONFIG_40x) return (mfspr(SPRN_PIT)); @@ -160,10 +160,10 @@ static inline unsigned int get_dec(void) * in when the decrementer generates its interrupt: on the 1 to 0 * transition for Book E/4xx, but on the 0 to -1 transition for others. */ -static inline void set_dec(int val) +static inline void set_dec(u64 val) { #if defined(CONFIG_40x) - mtspr(SPRN_PIT, val); + mtspr(SPRN_PIT, (u32) val); #else #ifndef CONFIG_BOOKE --val; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 3ed9a5a..6b4d01d 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -96,7 +96,8 @@ static struct clocksource clocksource_timebase = { .read = timebase_read, }; -#define DECREMENTER_MAX 0x7fffffff +#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF +u64 decrementer_max = DECREMENTER_DEFAULT_MAX; static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev); @@ -504,8 +505,8 @@ static void __timer_interrupt(void) __this_cpu_inc(irq_stat.timer_irqs_event); } else { now = *next_tb - now; - if (now <= DECREMENTER_MAX) - set_dec((int)now); + if (now <= decrementer_max) + set_dec(now); /* We may have raced with new irq work */ if (test_irq_work_pending()) set_dec(1); @@ -535,7 +536,7 @@ void timer_interrupt(struct pt_regs * regs) /* Ensure a positive value is written to the decrementer, or else * some CPUs will continue to take decrementer exceptions. */ - set_dec(DECREMENTER_MAX); + set_dec(decrementer_max); /* Some implementations of hotplug will get timer interrupts while * offline, just ignore these and we also need to set @@ -583,9 +584,9 @@ static void generic_suspend_disable_irqs(void) * with suspending. */ - set_dec(DECREMENTER_MAX); + set_dec(decrementer_max); local_irq_disable(); - set_dec(DECREMENTER_MAX); + set_dec(decrementer_max); } static void generic_suspend_enable_irqs(void) @@ -866,7 +867,7 @@ static int decrementer_set_next_event(unsigned long evt, static int decrementer_shutdown(struct clock_event_device *dev) { - decrementer_set_next_event(DECREMENTER_MAX, dev); + decrementer_set_next_event(decrementer_max, dev); return 0; } @@ -892,6 +893,49 @@ static void register_decrementer_clockevent(int cpu) clockevents_register_device(dec); } +static void enable_large_decrementer(void) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return; + + if (decrementer_max <= DECREMENTER_DEFAULT_MAX) + return; + + /* + * If we're running as the hypervisor we need to enable the LD manually + * otherwise firmware should have done it for us. + */ + if (cpu_has_feature(CPU_FTR_HVMODE)) + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_LD); +} + +static void __init set_decrementer_max(void) +{ + struct device_node *cpu; + u32 bits = 32; + + /* Prior to ISAv3 the decrementer is always 32 bit */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return; + + cpu = of_find_node_by_type(NULL, "cpu"); + + if (of_property_read_u32(cpu, "ibm,dec-bits", &bits) == 0) { + if (bits > 64 || bits < 32) { + pr_warn("time_init: firmware supplied invalid ibm,dec-bits"); + bits = 32; + } + + /* calculate the signed maximum given this many bits */ + decrementer_max = (1ul << (bits - 1)) - 1; + } + + of_node_put(cpu); + + pr_info("time_init: %u bit decrementer (max: %llx)\n", + bits, decrementer_max); +} + static void __init init_decrementer_clockevent(void) { int cpu = smp_processor_id(); @@ -899,7 +943,7 @@ static void __init init_decrementer_clockevent(void) clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4); decrementer_clockevent.max_delta_ns = - clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent); + clockevent_delta2ns(decrementer_max, &decrementer_clockevent); decrementer_clockevent.min_delta_ns = clockevent_delta2ns(2, &decrementer_clockevent); @@ -908,6 +952,9 @@ static void __init init_decrementer_clockevent(void) void secondary_cpu_time_init(void) { + /* Enable and test the large decrementer for this cpu */ + enable_large_decrementer(); + /* Start the decrementer on CPUs that have manual control * such as BookE */ @@ -973,6 +1020,10 @@ void __init time_init(void) vdso_data->tb_update_count = 0; vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; + /* initialise and enable the large decrementer (if we have one) */ + set_decrementer_max(); + enable_large_decrementer(); + /* Start the decrementer on CPUs that have manual control * such as BookE */ -- cgit v0.10.2 From faf7882962e78a4c8ebb846f4520c858ee184dca Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 5 Jul 2016 11:43:21 +1000 Subject: powerpc/mm: Add a parameter to disable 1TB segs This patch adds the kernel command line parameter "no_tb_segs" which forces the kernel to use 256MB rather than 1TB segments. Forcing the use of 256MB segments makes it considerably easier to test code that depends on an SLB miss occurring. Suggested-by: Michael Neuling Suggested-by: Michael Ellerman Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 82b42c9..738bae4 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -920,6 +920,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. dhash_entries= [KNL] Set number of hash buckets for dentry cache. + disable_1tb_segments [PPC] + Disables the use of 1TB hash page table segments. This + causes the kernel to fall back to 256MB segments which + can be useful when debugging issues that require an SLB + miss to occur. + disable= [IPV6] See Documentation/networking/ipv6.txt. diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index b2740c6..fab46db 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -317,6 +317,15 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, return ret; } +static bool disable_1tb_segments = false; + +static int __init parse_disable_1tb_segments(char *p) +{ + disable_1tb_segments = true; + return 0; +} +early_param("disable_1tb_segments", parse_disable_1tb_segments); + static int __init htab_dt_scan_seg_sizes(unsigned long node, const char *uname, int depth, void *data) @@ -335,6 +344,12 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node, for (; size >= 4; size -= 4, ++prop) { if (be32_to_cpu(prop[0]) == 40) { DBG("1T segment support detected\n"); + + if (disable_1tb_segments) { + DBG("1T segments disabled by command line\n"); + break; + } + cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT; return 1; } -- cgit v0.10.2 From 656ad58ef19e2a763fa5c938b20ae0f6b8d67242 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Fri, 1 Jul 2016 00:34:37 +1000 Subject: powerpc/boot: Add OPAL console to epapr wrappers This patch adds an OPAL console backend to the powerpc boot wrapper so that decompression failures inside the wrapper can be reported to the user. This is important since it typically indicates data corruption in the firmware and other nasty things. Currently this only works when building a little endian kernel. When compiling a 64 bit BE kernel the wrapper is always build 32 bit to be compatible with some 32 bit firmwares. BE support will be added at a later date. Another limitation of this is that only the "raw" type of OPAL console is supported, however machines that provide a hvsi console also provide a raw console so this is not an issue in practice. Actually-written-by: Benjamin Herrenschmidt Signed-off-by: Oliver O'Halloran [mpe: Move #ifdef __powerpc64__ to avoid warnings on 32-bit] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 8fe78a3..00cf88a 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -70,7 +70,7 @@ $(addprefix $(obj)/,$(zlib) cuboot-c2k.o gunzip_util.o main.o): \ libfdt := fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c libfdtheader := fdt.h libfdt.h libfdt_internal.h -$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o): \ +$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o): \ $(addprefix $(obj)/,$(libfdtheader)) src-wlib-y := string.S crt0.S crtsavres.S stdio.c main.c \ @@ -78,7 +78,7 @@ src-wlib-y := string.S crt0.S crtsavres.S stdio.c main.c \ ns16550.c serial.c simple_alloc.c div64.S util.S \ gunzip_util.c elf_util.c $(zlib) devtree.c stdlib.c \ oflib.c ofconsole.c cuboot.c mpsc.c cpm-serial.c \ - uartlite.c mpc52xx-psc.c + uartlite.c mpc52xx-psc.c opal.c opal-calls.S src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c fsl-soc.c diff --git a/arch/powerpc/boot/opal-calls.S b/arch/powerpc/boot/opal-calls.S new file mode 100644 index 0000000..ff2f1b9 --- /dev/null +++ b/arch/powerpc/boot/opal-calls.S @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "ppc_asm.h" +#include "../include/asm/opal-api.h" + + .text + +#define OPAL_CALL(name, token) \ + .globl name; \ +name: \ + li r0, token; \ + b opal_call; + +opal_call: + mflr r11 + std r11,16(r1) + mfcr r12 + stw r12,8(r1) + mr r13,r2 + + /* Set opal return address */ + ld r11,opal_return@got(r2) + mtlr r11 + mfmsr r12 + + /* switch to BE when we enter OPAL */ + li r11,MSR_LE + andc r12,r12,r11 + mtspr SPRN_HSRR1,r12 + + /* load the opal call entry point and base */ + ld r11,opal@got(r2) + ld r12,8(r11) + ld r2,0(r11) + mtspr SPRN_HSRR0,r12 + hrfid + +opal_return: + FIXUP_ENDIAN + mr r2,r13; + lwz r11,8(r1); + ld r12,16(r1) + mtcr r11; + mtlr r12 + blr + +OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE); +OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ); +OPAL_CALL(opal_console_write_buffer_space, OPAL_CONSOLE_WRITE_BUFFER_SPACE); +OPAL_CALL(opal_poll_events, OPAL_POLL_EVENTS); +OPAL_CALL(opal_console_flush, OPAL_CONSOLE_FLUSH); diff --git a/arch/powerpc/boot/opal.c b/arch/powerpc/boot/opal.c new file mode 100644 index 0000000..1f37e1c --- /dev/null +++ b/arch/powerpc/boot/opal.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2016 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "ops.h" +#include "stdio.h" +#include "io.h" +#include +#include "../include/asm/opal-api.h" + +#ifdef __powerpc64__ + +/* Global OPAL struct used by opal-call.S */ +struct opal { + u64 base; + u64 entry; +} opal; + +static u32 opal_con_id; + +int64_t opal_console_write(int64_t term_number, u64 *length, const u8 *buffer); +int64_t opal_console_read(int64_t term_number, uint64_t *length, u8 *buffer); +int64_t opal_console_write_buffer_space(uint64_t term_number, uint64_t *length); +int64_t opal_console_flush(uint64_t term_number); +int64_t opal_poll_events(uint64_t *outstanding_event_mask); + +static int opal_con_open(void) +{ + return 0; +} + +static void opal_con_putc(unsigned char c) +{ + int64_t rc; + uint64_t olen, len; + + do { + rc = opal_console_write_buffer_space(opal_con_id, &olen); + len = be64_to_cpu(olen); + if (rc) + return; + opal_poll_events(NULL); + } while (len < 1); + + + olen = cpu_to_be64(1); + opal_console_write(opal_con_id, &olen, &c); +} + +static void opal_con_close(void) +{ + opal_console_flush(opal_con_id); +} + +static void opal_init(void) +{ + void *opal_node; + + opal_node = finddevice("/ibm,opal"); + if (!opal_node) + return; + if (getprop(opal_node, "opal-base-address", &opal.base, sizeof(u64)) < 0) + return; + opal.base = be64_to_cpu(opal.base); + if (getprop(opal_node, "opal-entry-address", &opal.entry, sizeof(u64)) < 0) + return; + opal.entry = be64_to_cpu(opal.entry); +} + +int opal_console_init(void *devp, struct serial_console_data *scdp) +{ + opal_init(); + + if (devp) { + int n = getprop(devp, "reg", &opal_con_id, sizeof(u32)); + if (n != sizeof(u32)) + return -1; + opal_con_id = be32_to_cpu(opal_con_id); + } else + opal_con_id = 0; + + scdp->open = opal_con_open; + scdp->putc = opal_con_putc; + scdp->close = opal_con_close; + + return 0; +} +#else +int opal_console_init(void *devp, struct serial_console_data *scdp) +{ + return -1; +} +#endif /* __powerpc64__ */ diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h index 5e75e1c..e19b64e 100644 --- a/arch/powerpc/boot/ops.h +++ b/arch/powerpc/boot/ops.h @@ -89,6 +89,7 @@ int mpsc_console_init(void *devp, struct serial_console_data *scdp); int cpm_console_init(void *devp, struct serial_console_data *scdp); int mpc5200_psc_console_init(void *devp, struct serial_console_data *scdp); int uartlite_console_init(void *devp, struct serial_console_data *scdp); +int opal_console_init(void *devp, struct serial_console_data *scdp); void *simple_alloc_init(char *base, unsigned long heap_size, unsigned long granularity, unsigned long max_allocs); extern void flush_cache(void *, unsigned long); diff --git a/arch/powerpc/boot/ppc_asm.h b/arch/powerpc/boot/ppc_asm.h index 35ea60c..b03373d 100644 --- a/arch/powerpc/boot/ppc_asm.h +++ b/arch/powerpc/boot/ppc_asm.h @@ -61,6 +61,10 @@ #define SPRN_TBRL 268 #define SPRN_TBRU 269 +#define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */ +#define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ + +#define MSR_LE 0x0000000000000001 #define FIXUP_ENDIAN \ tdi 0, 0, 0x48; /* Reverse endian of b . + 8 */ \ diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c index 167ee94..e04c1e4 100644 --- a/arch/powerpc/boot/serial.c +++ b/arch/powerpc/boot/serial.c @@ -132,6 +132,8 @@ int serial_console_init(void) else if (dt_is_compatible(devp, "xlnx,opb-uartlite-1.00.b") || dt_is_compatible(devp, "xlnx,xps-uartlite-1.00.a")) rc = uartlite_console_init(devp, &serial_cd); + else if (dt_is_compatible(devp, "ibm,opal-console-raw")) + rc = opal_console_init(devp, &serial_cd); /* Add other serial console driver calls here */ diff --git a/arch/powerpc/boot/types.h b/arch/powerpc/boot/types.h index 31393d1..85565a8 100644 --- a/arch/powerpc/boot/types.h +++ b/arch/powerpc/boot/types.h @@ -12,6 +12,16 @@ typedef short s16; typedef int s32; typedef long long s64; +/* required for opal-api.h */ +typedef u8 uint8_t; +typedef u16 uint16_t; +typedef u32 uint32_t; +typedef u64 uint64_t; +typedef s8 int8_t; +typedef s16 int16_t; +typedef s32 int32_t; +typedef s64 int64_t; + #define min(x,y) ({ \ typeof(x) _x = (x); \ typeof(y) _y = (y); \ -- cgit v0.10.2 From bc5c0a0d7fa94777acb9e8857177d9e4586df12f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 6 Jul 2016 15:17:34 +1000 Subject: selftests/powerpc: Use "Delta" rather than "Error" in normal output Use "Delta" to refer to the difference between measurements, rather than "Error", so scripts that look for "Error" aren't confused into thinking there was a failure. Signed-off-by: Michael Ellerman diff --git a/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c b/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c index 5da3551..ae9a790 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c @@ -51,7 +51,7 @@ static int do_count_loop(struct event *event, uint64_t instructions, printf("Looped for %lu instructions, overhead %lu\n", instructions, overhead); printf("Expected %lu\n", expected); printf("Actual %llu\n", event->result.value); - printf("Error %ld, %f%%\n", difference, percentage); + printf("Delta %ld, %f%%\n", difference, percentage); printf("Took %d EBBs\n", ebb_state.stats.ebb_count); } -- cgit v0.10.2 From e93d8e67737e5b1405792d0a5b71cbe70edf263e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:07:52 +1000 Subject: powerpc/mm: Fix build of Book3E/64 with 64K pages Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h index 570fb30..9083245 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h @@ -23,6 +23,7 @@ #ifndef __ASSEMBLY__ #define PTE_TABLE_SIZE PTE_FRAG_SIZE #define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) +#define PUD_TABLE_SIZE (0) #define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) #endif /* __ASSEMBLY__ */ -- cgit v0.10.2 From fecbfabe1dc940525f26eb1683cf64137befc3c2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:07:54 +1000 Subject: powerpc: Fix build with CONFIG_MEMORY_HOTPLUG on some configs For memory hotplug to work, the MMU code needs to provide the functions create_section_mapping() and remove_section_mapping() to respectively map and unmap portions of the linear mapping. At the moment only hash64 provides these, so we provide weak stubs that just error out. This fixes the build with configurations such as 64-bit BookE with CONFIG_MEMORY_HOTPLUG enabled. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 9eac9d4..5f84433 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -116,6 +116,16 @@ int memory_add_physaddr_to_nid(u64 start) } #endif +int __weak create_section_mapping(unsigned long start, unsigned long end) +{ + return -ENODEV; +} + +int __weak remove_section_mapping(unsigned long start, unsigned long end) +{ + return -ENODEV; +} + int arch_add_memory(int nid, u64 start, u64 size, bool for_device) { struct pglist_data *pgdata; -- cgit v0.10.2 From d468fcafb7a42f4e5a73219692dc4fd34b8440f3 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 5 Jul 2016 14:07:07 +1000 Subject: powerpc/pci: Fix build with PCI_IOV=y and EEH=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Despite attempting to fix this in commit fb36e9073693 ("powerpc/pci: Fix SRIOV not building without EEH enabled"), the build is still broken when PCI_IOV=y and EEH=n (eg. g5_defconfig with PCI_IOV=y): arch/powerpc/kernel/pci_dn.c: In function ‘remove_dev_pci_data’: arch/powerpc/kernel/pci_dn.c:230:18: error: unused variable ‘edev’ Incorporate Ben's idea of using __maybe_unused to avoid so many #ifdefs. Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index bfe60a1..5926934 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -31,6 +31,7 @@ #include #include #include +#include /* * The function is used to find the firmware data of one @@ -181,9 +182,6 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) { #ifdef CONFIG_PCI_IOV struct pci_dn *parent, *pdn; -#ifdef CONFIG_EEH - struct eeh_dev *edev; -#endif /* CONFIG_EEH */ int i; /* Only support IOV for now */ @@ -201,6 +199,8 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev) return NULL; for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { + struct eeh_dev *edev __maybe_unused; + pdn = add_one_dev_pci_data(parent, NULL, i, pci_iov_virtfn_bus(pdev, i), pci_iov_virtfn_devfn(pdev, i)); @@ -227,7 +227,6 @@ void remove_dev_pci_data(struct pci_dev *pdev) #ifdef CONFIG_PCI_IOV struct pci_dn *parent; struct pci_dn *pdn, *tmp; - struct eeh_dev *edev; int i; /* @@ -263,6 +262,8 @@ void remove_dev_pci_data(struct pci_dev *pdev) * a batch mode. */ for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) { + struct eeh_dev *edev __maybe_unused; + list_for_each_entry_safe(pdn, tmp, &parent->child_list, list) { if (pdn->busno != pci_iov_virtfn_bus(pdev, i) || -- cgit v0.10.2 From fc022fdf41b7f8c48714af154bec951a92cb6cb6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 29 Jun 2016 21:25:33 +1000 Subject: powerpc/kernel: Drop unused extern for current_set Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index c899fe3..e2d7ba1 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -80,9 +80,6 @@ EXPORT_SYMBOL(agp_special_page); void MMU_init(void); -/* XXX should be in current.h -- paulus */ -extern struct task_struct *current_set[NR_CPUS]; - /* * this tells the system to map all of ram with the segregs * (i.e. page tables) instead of the bats. -- cgit v0.10.2 From 63a72284b159c569ec52f380c9a8dd9342d43bb8 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Wed, 29 Jun 2016 15:14:22 -0300 Subject: powerpc/pci: Assign fixed PHB number based on device-tree properties The domain/PHB field of PCI addresses has its value obtained from a global variable, incremented each time a new domain (represented by struct pci_controller) is added on the system. The domain addition process happens during boot or due to PHB hotplug add. As recent kernels are using predictable naming for network interfaces, the network stack is more tied to PCI naming. This can be a problem in hotplug scenarios, because PCI addresses will change if devices are removed and then re-added. This situation seems unusual, but it can happen if a user wants to replace a NIC without rebooting the machine, for example. This patch changes the way PCI domain values are generated: now, we use device-tree properties to assign fixed PHB numbers to PCI addresses when available (meaning pSeries and PowerNV cases). We also use a bitmap to allow dynamic PHB numbering when device-tree properties are not used. This bitmap keeps track of used PHB numbers and if a PHB is released (by hotplug operations for example), it allows the reuse of this PHB number, avoiding PCI address to change in case of device remove and re-add soon after. No functional changes were introduced. Signed-off-by: Guilherme G. Piccoli Reviewed-by: Gavin Shan Reviewed-by: Ian Munsie Acked-by: Gavin Shan [mpe: Drop unnecessary machine_is(pseries) test] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index d1f91e1..c6ac4f0 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -41,11 +41,18 @@ #include #include +/* hose_spinlock protects accesses to the the phb_bitmap. */ static DEFINE_SPINLOCK(hose_spinlock); LIST_HEAD(hose_list); -/* XXX kill that some day ... */ -static int global_phb_number; /* Global phb counter */ +/* For dynamic PHB numbering on get_phb_number(): max number of PHBs. */ +#define MAX_PHBS 0x10000 + +/* + * For dynamic PHB numbering: used/free PHBs tracking bitmap. + * Accesses to this bitmap should be protected by hose_spinlock. + */ +static DECLARE_BITMAP(phb_bitmap, MAX_PHBS); /* ISA Memory physical address */ resource_size_t isa_mem_base; @@ -64,6 +71,42 @@ struct dma_map_ops *get_pci_dma_ops(void) } EXPORT_SYMBOL(get_pci_dma_ops); +/* + * This function should run under locking protection, specifically + * hose_spinlock. + */ +static int get_phb_number(struct device_node *dn) +{ + int ret, phb_id = -1; + u64 prop; + + /* + * Try fixed PHB numbering first, by checking archs and reading + * the respective device-tree properties. Firstly, try powernv by + * reading "ibm,opal-phbid", only present in OPAL environment. + */ + ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); + if (ret) + ret = of_property_read_u32_index(dn, "reg", 1, (u32 *)&prop); + + if (!ret) + phb_id = (int)(prop & (MAX_PHBS - 1)); + + /* We need to be sure to not use the same PHB number twice. */ + if ((phb_id >= 0) && !test_and_set_bit(phb_id, phb_bitmap)) + return phb_id; + + /* + * If not pseries nor powernv, or if fixed PHB numbering tried to add + * the same PHB number twice, then fallback to dynamic PHB numbering. + */ + phb_id = find_first_zero_bit(phb_bitmap, MAX_PHBS); + BUG_ON(phb_id >= MAX_PHBS); + set_bit(phb_id, phb_bitmap); + + return phb_id; +} + struct pci_controller *pcibios_alloc_controller(struct device_node *dev) { struct pci_controller *phb; @@ -72,7 +115,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev) if (phb == NULL) return NULL; spin_lock(&hose_spinlock); - phb->global_number = global_phb_number++; + phb->global_number = get_phb_number(dev); list_add_tail(&phb->list_node, &hose_list); spin_unlock(&hose_spinlock); phb->dn = dev; @@ -94,6 +137,11 @@ EXPORT_SYMBOL_GPL(pcibios_alloc_controller); void pcibios_free_controller(struct pci_controller *phb) { spin_lock(&hose_spinlock); + + /* Clear bit of phb_bitmap to allow reuse of this PHB number. */ + if (phb->global_number < MAX_PHBS) + clear_bit(phb->global_number, phb_bitmap); + list_del(&phb->list_node); spin_unlock(&hose_spinlock); -- cgit v0.10.2 From 8c6a0a1f4041f19559538649e0b9f3d9224b03a8 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 15 Jun 2016 22:26:41 +0200 Subject: powerpc/pseries: start rtasd before PCI probing A strange behaviour is observed when comparing PCI hotplug in QEMU, between x86 and pseries. If you consider the following steps: - start a VM - add a PCI device via the QEMU monitor before the rtasd has started (for example starting the VM in paused state, or hotplug during FW or boot loader) - resume the VM execution The x86 kernel detects the PCI device, but the pseries one does not. This happens because the rtasd kernel worker is currently started under device_initcall, while PCI probing happens earlier under subsys_initcall. As a consequence, if we have a pending RTAS event at boot time, a message is printed and the event is dropped. This patch moves all the initialization of rtasd to arch_initcall, which is run before subsys_call: this way, logging_enabled is true when the RTAS event pops up and it is not lost anymore. The proc fs bits stay at device_initcall because they cannot be run before fs_initcall. Signed-off-by: Greg Kurz Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index e864b7c..a26a020 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -526,10 +526,8 @@ void rtas_cancel_event_scan(void) } EXPORT_SYMBOL_GPL(rtas_cancel_event_scan); -static int __init rtas_init(void) +static int __init rtas_event_scan_init(void) { - struct proc_dir_entry *entry; - if (!machine_is(pseries) && !machine_is(chrp)) return 0; @@ -562,13 +560,27 @@ static int __init rtas_init(void) return -ENOMEM; } + start_event_scan(); + + return 0; +} +arch_initcall(rtas_event_scan_init); + +static int __init rtas_init(void) +{ + struct proc_dir_entry *entry; + + if (!machine_is(pseries) && !machine_is(chrp)) + return 0; + + if (!rtas_log_buf) + return -ENODEV; + entry = proc_create("powerpc/rtas/error_log", S_IRUSR, NULL, &proc_rtas_log_operations); if (!entry) printk(KERN_ERR "Failed to create error_log proc entry\n"); - start_event_scan(); - return 0; } __initcall(rtas_init); -- cgit v0.10.2 From 68a2d80c80e1563b8dc942d4ffd5b3773b356bb6 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 8 Jul 2016 13:13:15 +1000 Subject: powerpc: Add MTD_BLOCK to powernv_defconfig This is so we can use the powernv_flash mtd driver as an block device. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 959d32b..c357458 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -80,6 +80,7 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_MTD=y +CONFIG_MTD_BLOCK=y CONFIG_MTD_POWERNV_FLASH=y CONFIG_PARPORT=m CONFIG_PARPORT_PC=m -- cgit v0.10.2 From a203658b5ed37c11e5016d3fbbbab9ce018c1b78 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 4 Jul 2016 14:51:44 +1000 Subject: powerpc/opal: Wake up kopald polling thread before waiting for events On some environments (prototype machines, some simulators, etc...) there is no functional interrupt source to signal completion, so we rely on the fairly slow OPAL heartbeat. In a number of cases, the calls complete very quickly or even immediately. We've observed that it helps a lot to wakeup the OPAL heartbeat thread before waiting for event in those cases, it will call OPAL immediately to collect completions for anything that finished fast enough. Signed-off-by: Benjamin Herrenschmidt Acked-By: Michael Neuling Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 3b369e9..52b9f4a 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -292,6 +292,8 @@ static inline int opal_get_async_rc(struct opal_msg msg) return be64_to_cpu(msg.params[1]); } +void opal_wake_poller(void); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_OPAL_H */ diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c index bdc8c0c..83bebee 100644 --- a/arch/powerpc/platforms/powernv/opal-async.c +++ b/arch/powerpc/platforms/powernv/opal-async.c @@ -117,6 +117,11 @@ int opal_async_wait_response(uint64_t token, struct opal_msg *msg) return -EINVAL; } + /* Wakeup the poller before we wait for events to speed things + * up on platforms or simulators where the interrupts aren't + * functional. + */ + opal_wake_poller(); wait_event(opal_async_wait, test_bit(token, opal_async_complete_map)); memcpy(msg, &opal_async_responses[token], sizeof(*msg)); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 228751a..da40d6b 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -55,6 +55,7 @@ struct device_node *opal_node; static DEFINE_SPINLOCK(opal_write_lock); static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX]; static uint32_t opal_heartbeat; +static struct task_struct *kopald_tsk; static void opal_reinit_cores(void) { @@ -653,6 +654,7 @@ static void opal_i2c_create_devs(void) static int kopald(void *unused) { + unsigned long timeout = msecs_to_jiffies(opal_heartbeat) + 1; __be64 events; set_freezable(); @@ -660,12 +662,18 @@ static int kopald(void *unused) try_to_freeze(); opal_poll_events(&events); opal_handle_events(be64_to_cpu(events)); - msleep_interruptible(opal_heartbeat); + schedule_timeout_interruptible(timeout); } while (!kthread_should_stop()); return 0; } +void opal_wake_poller(void) +{ + if (kopald_tsk) + wake_up_process(kopald_tsk); +} + static void opal_init_heartbeat(void) { /* Old firwmware, we assume the HVC heartbeat is sufficient */ @@ -674,7 +682,7 @@ static void opal_init_heartbeat(void) opal_heartbeat = 0; if (opal_heartbeat) - kthread_run(kopald, NULL, "kopald"); + kopald_tsk = kthread_run(kopald, NULL, "kopald"); } static int __init opal_init(void) -- cgit v0.10.2 From 91dc068202a61741a458232de7de0627d6ac9952 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 24 Jun 2016 15:54:22 +1000 Subject: powerpc/eeh: Fix pr_debug()s in eeh_cache.c eeh_cache.c doesn't build cleanly with -DDEBUG when CONFIG_PHYS_ADDR_T_64BIT is set, as a couple of pr_debug()s use "%lx" for resource_size_t parameters. Use "%pap" instead, as it's the correct format specifier for types deriving from phys_addr_t. Signed-off-by: Andrew Donnellan Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index ddbcfab..d4cc266 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -114,9 +114,9 @@ static void eeh_addr_cache_print(struct pci_io_addr_cache *cache) while (n) { struct pci_io_addr_range *piar; piar = rb_entry(n, struct pci_io_addr_range, rb_node); - pr_debug("PCI: %s addr range %d [%lx-%lx]: %s\n", + pr_debug("PCI: %s addr range %d [%pap-%pap]: %s\n", (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt, - piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev)); + &piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev)); cnt++; n = rb_next(n); } @@ -159,8 +159,8 @@ eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo, piar->flags = flags; #ifdef DEBUG - pr_debug("PIAR: insert range=[%lx:%lx] dev=%s\n", - alo, ahi, pci_name(dev)); + pr_debug("PIAR: insert range=[%pap:%pap] dev=%s\n", + &alo, &ahi, pci_name(dev)); #endif rb_link_node(&piar->rb_node, parent, p); -- cgit v0.10.2 From 53775c43feae462e145b4bb88aa2115123c3c565 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Tue, 5 Jul 2016 16:12:33 +1000 Subject: powerpc/ps3: Fix typo in comment reference to CONFIG_PS3_REPOSITORY_WRITE Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c index bfccdc7..814a7ea 100644 --- a/arch/powerpc/platforms/ps3/repository.c +++ b/arch/powerpc/platforms/ps3/repository.c @@ -1198,7 +1198,7 @@ int ps3_repository_delete_highmem_info(unsigned int region_index) return result ? -1 : 0; } -#endif /* defined(CONFIG_PS3_WRITE_REPOSITORY) */ +#endif /* defined(CONFIG_PS3_REPOSITORY_WRITE) */ #if defined(DEBUG) -- cgit v0.10.2 From fa2cff3f54cfec5c0b83afdb4f79975f5447a0b4 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Tue, 5 Jul 2016 16:12:34 +1000 Subject: powerpc: Fix typo in comment reference to CONFIG_TRACE_IRQFLAGS Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 3cb46a3..58217ae 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -250,7 +250,7 @@ notrace void arch_local_irq_restore(unsigned long en) if (WARN_ON(mfmsr() & MSR_EE)) __hard_irq_disable(); } -#endif /* CONFIG_TRACE_IRQFLAG */ +#endif /* CONFIG_TRACE_IRQFLAGS */ set_soft_enabled(0); -- cgit v0.10.2 From bdd910017c6a4d0a53e4e69d1bf92d576e305a47 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Tue, 5 Jul 2016 16:12:35 +1000 Subject: powerpc/configs: Remove old symbols from defconfigs Update defconfigs to remove old symbols and comments referencing old symbols. Dropped: * AVERAGE * INET_LRO * EXT3_DEFAULTS_TO_ORDERED * EXT3_FS_XATTR * I2O * INFINIBAND_AMSO1100 * INFINIBAND_EHCA * IP1000 Replaced: * BLK_DEV_XIP -> BLK_DEV_RAM_DAX * CLK_PPC_CORENET -> CLK_QORIQ * EXT2_FS_XIP -> FS_DAX * EXT3_FS* -> EXT4_FS* Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/configs/40x/acadia_defconfig b/arch/powerpc/configs/40x/acadia_defconfig index 9110a5c..3438ed9 100644 --- a/arch/powerpc/configs/40x/acadia_defconfig +++ b/arch/powerpc/configs/40x/acadia_defconfig @@ -21,7 +21,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y diff --git a/arch/powerpc/configs/40x/ep405_defconfig b/arch/powerpc/configs/40x/ep405_defconfig index 7903666..36c44c0 100644 --- a/arch/powerpc/configs/40x/ep405_defconfig +++ b/arch/powerpc/configs/40x/ep405_defconfig @@ -20,7 +20,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y diff --git a/arch/powerpc/configs/40x/kilauea_defconfig b/arch/powerpc/configs/40x/kilauea_defconfig index 01bd71b..ad2156c 100644 --- a/arch/powerpc/configs/40x/kilauea_defconfig +++ b/arch/powerpc/configs/40x/kilauea_defconfig @@ -23,7 +23,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y diff --git a/arch/powerpc/configs/40x/klondike_defconfig b/arch/powerpc/configs/40x/klondike_defconfig index e2036b7..28adb78 100644 --- a/arch/powerpc/configs/40x/klondike_defconfig +++ b/arch/powerpc/configs/40x/klondike_defconfig @@ -32,8 +32,6 @@ CONFIG_SCSI_SAS_ATTRS=y # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y @@ -44,7 +42,6 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_AVERAGE=y CONFIG_MAGIC_SYSRQ=y # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_BUGVERBOSE is not set diff --git a/arch/powerpc/configs/40x/makalu_defconfig b/arch/powerpc/configs/40x/makalu_defconfig index efd5170..a00f434 100644 --- a/arch/powerpc/configs/40x/makalu_defconfig +++ b/arch/powerpc/configs/40x/makalu_defconfig @@ -20,7 +20,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y diff --git a/arch/powerpc/configs/40x/obs600_defconfig b/arch/powerpc/configs/40x/obs600_defconfig index 5ded3dcd..e500e6a 100644 --- a/arch/powerpc/configs/40x/obs600_defconfig +++ b/arch/powerpc/configs/40x/obs600_defconfig @@ -23,7 +23,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y diff --git a/arch/powerpc/configs/40x/virtex_defconfig b/arch/powerpc/configs/40x/virtex_defconfig index bcb0c4d..65dc084 100644 --- a/arch/powerpc/configs/40x/virtex_defconfig +++ b/arch/powerpc/configs/40x/virtex_defconfig @@ -27,7 +27,6 @@ CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_LRO is not set CONFIG_NETFILTER=y CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_FILTER=m diff --git a/arch/powerpc/configs/40x/walnut_defconfig b/arch/powerpc/configs/40x/walnut_defconfig index 37c838f..567f99b 100644 --- a/arch/powerpc/configs/40x/walnut_defconfig +++ b/arch/powerpc/configs/40x/walnut_defconfig @@ -18,7 +18,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig index ea4ef02a..143b2fb 100644 --- a/arch/powerpc/configs/44x/akebono_defconfig +++ b/arch/powerpc/configs/44x/akebono_defconfig @@ -32,7 +32,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y @@ -110,10 +109,9 @@ CONFIG_MMC=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_M41T80=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set CONFIG_VFAT_FS=y diff --git a/arch/powerpc/configs/44x/arches_defconfig b/arch/powerpc/configs/44x/arches_defconfig index 9549420..6bba1a5 100644 --- a/arch/powerpc/configs/44x/arches_defconfig +++ b/arch/powerpc/configs/44x/arches_defconfig @@ -23,7 +23,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y diff --git a/arch/powerpc/configs/44x/bamboo_defconfig b/arch/powerpc/configs/44x/bamboo_defconfig index a046f08..477d99f 100644 --- a/arch/powerpc/configs/44x/bamboo_defconfig +++ b/arch/powerpc/configs/44x/bamboo_defconfig @@ -21,7 +21,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y diff --git a/arch/powerpc/configs/44x/bluestone_defconfig b/arch/powerpc/configs/44x/bluestone_defconfig index a326b77..6b77aea 100644 --- a/arch/powerpc/configs/44x/bluestone_defconfig +++ b/arch/powerpc/configs/44x/bluestone_defconfig @@ -49,7 +49,7 @@ CONFIG_SENSORS_AD7414=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_M41T80=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y diff --git a/arch/powerpc/configs/44x/canyonlands_defconfig b/arch/powerpc/configs/44x/canyonlands_defconfig index d939e71..c8e6f04 100644 --- a/arch/powerpc/configs/44x/canyonlands_defconfig +++ b/arch/powerpc/configs/44x/canyonlands_defconfig @@ -23,7 +23,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig index 5aa312a..3799a26 100644 --- a/arch/powerpc/configs/44x/currituck_defconfig +++ b/arch/powerpc/configs/44x/currituck_defconfig @@ -30,7 +30,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y @@ -71,10 +70,9 @@ CONFIG_USB_OHCI_HCD=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_M41T80=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y diff --git a/arch/powerpc/configs/44x/ebony_defconfig b/arch/powerpc/configs/44x/ebony_defconfig index 5909e01..c265f54 100644 --- a/arch/powerpc/configs/44x/ebony_defconfig +++ b/arch/powerpc/configs/44x/ebony_defconfig @@ -19,7 +19,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y diff --git a/arch/powerpc/configs/44x/eiger_defconfig b/arch/powerpc/configs/44x/eiger_defconfig index 57499d2..bb6bd6d 100644 --- a/arch/powerpc/configs/44x/eiger_defconfig +++ b/arch/powerpc/configs/44x/eiger_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y @@ -43,7 +42,6 @@ CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y CONFIG_FUSION=y CONFIG_FUSION_SAS=y -CONFIG_I2O=y CONFIG_NETDEVICES=y CONFIG_IBM_EMAC=y CONFIG_IBM_EMAC_RXB=256 diff --git a/arch/powerpc/configs/44x/icon_defconfig b/arch/powerpc/configs/44x/icon_defconfig index 5d52185..060f2ed 100644 --- a/arch/powerpc/configs/44x/icon_defconfig +++ b/arch/powerpc/configs/44x/icon_defconfig @@ -23,7 +23,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y @@ -76,8 +75,7 @@ CONFIG_LOGO=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig index 0ad3e44..115a6b2 100644 --- a/arch/powerpc/configs/44x/iss476-smp_defconfig +++ b/arch/powerpc/configs/44x/iss476-smp_defconfig @@ -32,7 +32,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y @@ -56,10 +55,9 @@ CONFIG_SERIAL_OF_PLATFORM=y CONFIG_THERMAL=y # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y diff --git a/arch/powerpc/configs/44x/katmai_defconfig b/arch/powerpc/configs/44x/katmai_defconfig index a042335..b999048 100644 --- a/arch/powerpc/configs/44x/katmai_defconfig +++ b/arch/powerpc/configs/44x/katmai_defconfig @@ -21,7 +21,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y diff --git a/arch/powerpc/configs/44x/rainier_defconfig b/arch/powerpc/configs/44x/rainier_defconfig index 91c2aff..b8c9ee4 100644 --- a/arch/powerpc/configs/44x/rainier_defconfig +++ b/arch/powerpc/configs/44x/rainier_defconfig @@ -22,7 +22,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y diff --git a/arch/powerpc/configs/44x/redwood_defconfig b/arch/powerpc/configs/44x/redwood_defconfig index 7fddf3f..a4bb0484 100644 --- a/arch/powerpc/configs/44x/redwood_defconfig +++ b/arch/powerpc/configs/44x/redwood_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y @@ -41,7 +40,6 @@ CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y CONFIG_FUSION=y CONFIG_FUSION_SAS=y -CONFIG_I2O=y CONFIG_NETDEVICES=y CONFIG_IBM_EMAC=y CONFIG_IBM_EMAC_RXB=256 diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig index 6928012..63302fb 100644 --- a/arch/powerpc/configs/44x/sam440ep_defconfig +++ b/arch/powerpc/configs/44x/sam440ep_defconfig @@ -26,7 +26,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y @@ -85,9 +84,8 @@ CONFIG_RTC_DRV_M41T80_WDT=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_REISERFS_FS=y CONFIG_AUTOFS4_FS=y CONFIG_ISO9660_FS=y diff --git a/arch/powerpc/configs/44x/sequoia_defconfig b/arch/powerpc/configs/44x/sequoia_defconfig index c294369..b3792fd 100644 --- a/arch/powerpc/configs/44x/sequoia_defconfig +++ b/arch/powerpc/configs/44x/sequoia_defconfig @@ -23,7 +23,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y diff --git a/arch/powerpc/configs/44x/taishan_defconfig b/arch/powerpc/configs/44x/taishan_defconfig index e779228..ff6f862 100644 --- a/arch/powerpc/configs/44x/taishan_defconfig +++ b/arch/powerpc/configs/44x/taishan_defconfig @@ -21,7 +21,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y diff --git a/arch/powerpc/configs/44x/virtex5_defconfig b/arch/powerpc/configs/44x/virtex5_defconfig index 53d0300..ce05206 100644 --- a/arch/powerpc/configs/44x/virtex5_defconfig +++ b/arch/powerpc/configs/44x/virtex5_defconfig @@ -26,7 +26,6 @@ CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_LRO is not set CONFIG_NETFILTER=y CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_FILTER=m diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig index ee43437..ab93248 100644 --- a/arch/powerpc/configs/44x/warp_defconfig +++ b/arch/powerpc/configs/44x/warp_defconfig @@ -23,7 +23,6 @@ CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_VLAN_8021Q=y @@ -73,9 +72,7 @@ CONFIG_LEDS_GPIO=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/powerpc/configs/52xx/cm5200_defconfig b/arch/powerpc/configs/52xx/cm5200_defconfig index 19fad0e..c1faac8 100644 --- a/arch/powerpc/configs/52xx/cm5200_defconfig +++ b/arch/powerpc/configs/52xx/cm5200_defconfig @@ -22,7 +22,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -61,8 +60,7 @@ CONFIG_USB_STORAGE=y CONFIG_DMADEVICES=y CONFIG_PPC_BESTCOMM=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/powerpc/configs/52xx/lite5200b_defconfig b/arch/powerpc/configs/52xx/lite5200b_defconfig index 5f40ba9..9493b02 100644 --- a/arch/powerpc/configs/52xx/lite5200b_defconfig +++ b/arch/powerpc/configs/52xx/lite5200b_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -53,8 +52,7 @@ CONFIG_I2C_MPC=y CONFIG_DMADEVICES=y CONFIG_PPC_BESTCOMM=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y diff --git a/arch/powerpc/configs/52xx/motionpro_defconfig b/arch/powerpc/configs/52xx/motionpro_defconfig index 909e185..fe8126b 100644 --- a/arch/powerpc/configs/52xx/motionpro_defconfig +++ b/arch/powerpc/configs/52xx/motionpro_defconfig @@ -22,7 +22,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -73,8 +72,7 @@ CONFIG_RTC_DRV_DS1307=y CONFIG_DMADEVICES=y CONFIG_PPC_BESTCOMM=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/powerpc/configs/52xx/pcm030_defconfig b/arch/powerpc/configs/52xx/pcm030_defconfig index 649a01a..1554de6 100644 --- a/arch/powerpc/configs/52xx/pcm030_defconfig +++ b/arch/powerpc/configs/52xx/pcm030_defconfig @@ -34,7 +34,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -74,8 +73,7 @@ CONFIG_RTC_DRV_PCF8563=m CONFIG_DMADEVICES=y CONFIG_PPC_BESTCOMM=y CONFIG_EXT2_FS=m -CONFIG_EXT3_FS=m -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=m # CONFIG_DNOTIFY is not set CONFIG_VFAT_FS=m CONFIG_FAT_DEFAULT_CODEPAGE=850 diff --git a/arch/powerpc/configs/52xx/tqm5200_defconfig b/arch/powerpc/configs/52xx/tqm5200_defconfig index efab838..b8b316b 100644 --- a/arch/powerpc/configs/52xx/tqm5200_defconfig +++ b/arch/powerpc/configs/52xx/tqm5200_defconfig @@ -26,7 +26,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -75,8 +74,7 @@ CONFIG_RTC_DRV_DS1374=y CONFIG_DMADEVICES=y CONFIG_PPC_BESTCOMM=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/powerpc/configs/83xx/asp8347_defconfig b/arch/powerpc/configs/83xx/asp8347_defconfig index bcdfb07..b60cac0 100644 --- a/arch/powerpc/configs/83xx/asp8347_defconfig +++ b/arch/powerpc/configs/83xx/asp8347_defconfig @@ -26,7 +26,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -63,8 +62,7 @@ CONFIG_USB_EHCI_FSL=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1374=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y diff --git a/arch/powerpc/configs/83xx/kmeter1_defconfig b/arch/powerpc/configs/83xx/kmeter1_defconfig index 11a9592..9547dcd 100644 --- a/arch/powerpc/configs/83xx/kmeter1_defconfig +++ b/arch/powerpc/configs/83xx/kmeter1_defconfig @@ -28,7 +28,6 @@ CONFIG_IP_PNP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_TIPC=y CONFIG_BRIDGE=m diff --git a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig index b47a41f..80aa844 100644 --- a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig @@ -23,7 +23,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -79,8 +78,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_INTF_DEV_UIE_EMUL=y CONFIG_RTC_DRV_DS1307=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y diff --git a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig index e28c83f..d89d13b 100644 --- a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig @@ -23,7 +23,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -77,8 +76,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_INTF_DEV_UIE_EMUL=y CONFIG_RTC_DRV_DS1307=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y diff --git a/arch/powerpc/configs/83xx/mpc832x_mds_defconfig b/arch/powerpc/configs/83xx/mpc832x_mds_defconfig index e84d35b..e789518 100644 --- a/arch/powerpc/configs/83xx/mpc832x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc832x_mds_defconfig @@ -26,7 +26,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -52,8 +51,7 @@ CONFIG_WATCHDOG=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1374=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y diff --git a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig index ae145f4..917a49c 100644 --- a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig @@ -26,7 +26,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -64,8 +63,7 @@ CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_MMC_SPI=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig index 87fc15b..00f636e 100644 --- a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig +++ b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -75,8 +74,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_INTF_DEV_UIE_EMUL=y CONFIG_RTC_DRV_DS1307=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig index 9a2ff25..a539d44 100644 --- a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig +++ b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -66,8 +65,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_INTF_DEV_UIE_EMUL=y CONFIG_RTC_DRV_DS1307=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/powerpc/configs/83xx/mpc834x_mds_defconfig b/arch/powerpc/configs/83xx/mpc834x_mds_defconfig index e44edc5..9f0ddc83 100644 --- a/arch/powerpc/configs/83xx/mpc834x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc834x_mds_defconfig @@ -25,7 +25,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -51,8 +50,7 @@ CONFIG_WATCHDOG=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1374=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y diff --git a/arch/powerpc/configs/83xx/mpc836x_mds_defconfig b/arch/powerpc/configs/83xx/mpc836x_mds_defconfig index 94a7d85..ceed4c1 100644 --- a/arch/powerpc/configs/83xx/mpc836x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc836x_mds_defconfig @@ -25,7 +25,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -57,8 +56,7 @@ CONFIG_WATCHDOG=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1374=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y diff --git a/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig index 761ed8e..a6819bf 100644 --- a/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig +++ b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y @@ -65,8 +64,7 @@ CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y diff --git a/arch/powerpc/configs/83xx/mpc837x_mds_defconfig b/arch/powerpc/configs/83xx/mpc837x_mds_defconfig index bcf1b48..4bd1992 100644 --- a/arch/powerpc/configs/83xx/mpc837x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc837x_mds_defconfig @@ -22,7 +22,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -50,8 +49,7 @@ CONFIG_I2C_CHARDEV=y CONFIG_I2C_MPC=y CONFIG_WATCHDOG=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y diff --git a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig index f0f0ebf..2d4bb63 100644 --- a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig @@ -24,7 +24,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -77,8 +76,7 @@ CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_FSL=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y diff --git a/arch/powerpc/configs/83xx/sbc834x_defconfig b/arch/powerpc/configs/83xx/sbc834x_defconfig index d2e4d82..b3380db 100644 --- a/arch/powerpc/configs/83xx/sbc834x_defconfig +++ b/arch/powerpc/configs/83xx/sbc834x_defconfig @@ -22,7 +22,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -65,9 +64,7 @@ CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_FSL=y CONFIG_USB_STORAGE=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig index b0939dd..c79283b 100644 --- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig +++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig @@ -165,10 +165,8 @@ CONFIG_FSL_DMA=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_FUSE_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y diff --git a/arch/powerpc/configs/85xx/kmp204x_defconfig b/arch/powerpc/configs/85xx/kmp204x_defconfig index e94d3eb..aaaaa60 100644 --- a/arch/powerpc/configs/85xx/kmp204x_defconfig +++ b/arch/powerpc/configs/85xx/kmp204x_defconfig @@ -64,7 +64,6 @@ CONFIG_IP_PIMSM_V2=y CONFIG_INET_AH=y CONFIG_INET_ESP=y CONFIG_INET_IPCOMP=y -# CONFIG_INET_LRO is not set CONFIG_IPV6=y CONFIG_IP_SCTP=m CONFIG_TIPC=y @@ -189,7 +188,7 @@ CONFIG_RTC_DRV_DS3232=y CONFIG_RTC_DRV_CMOS=y CONFIG_UIO=y CONFIG_STAGING=y -CONFIG_CLK_PPC_CORENET=y +CONFIG_CLK_QORIQ=y CONFIG_EXT2_FS=y CONFIG_NTFS_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/powerpc/configs/85xx/ksi8560_defconfig b/arch/powerpc/configs/85xx/ksi8560_defconfig index 6f753a7..bd814df 100644 --- a/arch/powerpc/configs/85xx/ksi8560_defconfig +++ b/arch/powerpc/configs/85xx/ksi8560_defconfig @@ -21,7 +21,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -49,8 +48,7 @@ CONFIG_SERIAL_CPM=y CONFIG_SERIAL_CPM_CONSOLE=y CONFIG_GEN_RTC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y diff --git a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig index e38c373..32af10d 100644 --- a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig +++ b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig @@ -22,7 +22,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -41,8 +40,7 @@ CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_HW_RANDOM is not set CONFIG_GEN_RTC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y diff --git a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig index 48fc8e3..a52b217 100644 --- a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig +++ b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig @@ -21,7 +21,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -44,8 +43,7 @@ CONFIG_SERIAL_CPM=y CONFIG_SERIAL_CPM_CONSOLE=y CONFIG_GEN_RTC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y diff --git a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig index ecb0c3b..002bb48 100644 --- a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig +++ b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig @@ -23,7 +23,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -46,8 +45,7 @@ CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_HW_RANDOM is not set CONFIG_GEN_RTC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y diff --git a/arch/powerpc/configs/85xx/sbc8548_defconfig b/arch/powerpc/configs/85xx/sbc8548_defconfig index 72b7ccf..97ae023 100644 --- a/arch/powerpc/configs/85xx/sbc8548_defconfig +++ b/arch/powerpc/configs/85xx/sbc8548_defconfig @@ -20,7 +20,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set diff --git a/arch/powerpc/configs/85xx/socrates_defconfig b/arch/powerpc/configs/85xx/socrates_defconfig index 0ad7bd5..13579cb 100644 --- a/arch/powerpc/configs/85xx/socrates_defconfig +++ b/arch/powerpc/configs/85xx/socrates_defconfig @@ -22,7 +22,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_CAN=y CONFIG_MTD=y @@ -79,8 +78,7 @@ CONFIG_USB_OHCI_HCD_PPC_OF_BE=y CONFIG_USB_STORAGE=y CONFIG_RTC_CLASS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y diff --git a/arch/powerpc/configs/85xx/stx_gp3_defconfig b/arch/powerpc/configs/85xx/stx_gp3_defconfig index b451905..384926f 100644 --- a/arch/powerpc/configs/85xx/stx_gp3_defconfig +++ b/arch/powerpc/configs/85xx/stx_gp3_defconfig @@ -17,7 +17,6 @@ CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_IP_NF_IPTABLES=m @@ -53,8 +52,7 @@ CONFIG_AGP=m CONFIG_DRM=m CONFIG_SOUND=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_AUTOFS4_FS=y CONFIG_ISO9660_FS=m CONFIG_UDF_FS=m diff --git a/arch/powerpc/configs/85xx/tqm8540_defconfig b/arch/powerpc/configs/85xx/tqm8540_defconfig index 4daaf29..908f388 100644 --- a/arch/powerpc/configs/85xx/tqm8540_defconfig +++ b/arch/powerpc/configs/85xx/tqm8540_defconfig @@ -20,7 +20,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y @@ -50,8 +49,7 @@ CONFIG_I2C_MPC=y CONFIG_HWMON_DEBUG_CHIP=y CONFIG_SENSORS_LM75=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y diff --git a/arch/powerpc/configs/85xx/tqm8541_defconfig b/arch/powerpc/configs/85xx/tqm8541_defconfig index bb402b3..f47e576 100644 --- a/arch/powerpc/configs/85xx/tqm8541_defconfig +++ b/arch/powerpc/configs/85xx/tqm8541_defconfig @@ -20,7 +20,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y @@ -52,8 +51,7 @@ CONFIG_I2C_MPC=y CONFIG_HWMON_DEBUG_CHIP=y CONFIG_SENSORS_LM75=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y diff --git a/arch/powerpc/configs/85xx/tqm8548_defconfig b/arch/powerpc/configs/85xx/tqm8548_defconfig index 685d0fb..42f5d0a 100644 --- a/arch/powerpc/configs/85xx/tqm8548_defconfig +++ b/arch/powerpc/configs/85xx/tqm8548_defconfig @@ -28,7 +28,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set diff --git a/arch/powerpc/configs/85xx/tqm8555_defconfig b/arch/powerpc/configs/85xx/tqm8555_defconfig index 02a931d..71552b7 100644 --- a/arch/powerpc/configs/85xx/tqm8555_defconfig +++ b/arch/powerpc/configs/85xx/tqm8555_defconfig @@ -20,7 +20,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y @@ -52,8 +51,7 @@ CONFIG_I2C_MPC=y CONFIG_HWMON_DEBUG_CHIP=y CONFIG_SENSORS_LM75=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y diff --git a/arch/powerpc/configs/85xx/tqm8560_defconfig b/arch/powerpc/configs/85xx/tqm8560_defconfig index 633d5b7..25aac97 100644 --- a/arch/powerpc/configs/85xx/tqm8560_defconfig +++ b/arch/powerpc/configs/85xx/tqm8560_defconfig @@ -20,7 +20,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y @@ -52,8 +51,7 @@ CONFIG_I2C_MPC=y CONFIG_HWMON_DEBUG_CHIP=y CONFIG_SENSORS_LM75=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig index 858b539..dbd961d 100644 --- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig +++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig @@ -54,7 +54,6 @@ CONFIG_IP_PIMSM_V2=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set CONFIG_IPV6=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y @@ -124,8 +123,7 @@ CONFIG_RTC_DRV_CMOS=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_ZISOFS=y diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig index d89ff40..6a3f825 100644 --- a/arch/powerpc/configs/adder875_defconfig +++ b/arch/powerpc/configs/adder875_defconfig @@ -24,7 +24,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set diff --git a/arch/powerpc/configs/amigaone_defconfig b/arch/powerpc/configs/amigaone_defconfig index 84f1b41..8b83ce8 100644 --- a/arch/powerpc/configs/amigaone_defconfig +++ b/arch/powerpc/configs/amigaone_defconfig @@ -29,7 +29,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_NETFILTER=y # CONFIG_NETFILTER_ADVANCED is not set @@ -106,7 +105,6 @@ CONFIG_USB_STORAGE=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=y CONFIG_MSDOS_FS=m diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig index 340685c..7c9d953 100644 --- a/arch/powerpc/configs/c2k_defconfig +++ b/arch/powerpc/configs/c2k_defconfig @@ -306,15 +306,13 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_MAD=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_INFINIBAND_MTHCA=m -CONFIG_INFINIBAND_AMSO1100=m CONFIG_INFINIBAND_IPOIB=m CONFIG_INFINIBAND_IPOIB_CM=y CONFIG_INFINIBAND_SRP=m CONFIG_DMADEVICES=y -CONFIG_EXT3_FS=m -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=m +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QFMT_V2=y CONFIG_AUTOFS4_FS=m diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index db328e6..7b6f30d 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -184,7 +184,7 @@ CONFIG_EDAC_MM_EDAC=y CONFIG_EDAC_CELL=y CONFIG_UIO=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_AUTOFS4_FS=m CONFIG_ISO9660_FS=m CONFIG_JOLIET=y diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig index 253a9f2..ac9a50d 100644 --- a/arch/powerpc/configs/chrp32_defconfig +++ b/arch/powerpc/configs/chrp32_defconfig @@ -110,7 +110,6 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_UHCI_HCD=y CONFIG_USB_STORAGE=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=y CONFIG_MSDOS_FS=m diff --git a/arch/powerpc/configs/ep8248e_defconfig b/arch/powerpc/configs/ep8248e_defconfig index 7c13704..3403b85 100644 --- a/arch/powerpc/configs/ep8248e_defconfig +++ b/arch/powerpc/configs/ep8248e_defconfig @@ -23,7 +23,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set CONFIG_NETFILTER=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -50,9 +49,7 @@ CONFIG_SERIAL_CPM_CONSOLE=y # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y CONFIG_AUTOFS4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig index ee96be8..95411ae 100644 --- a/arch/powerpc/configs/ep88xc_defconfig +++ b/arch/powerpc/configs/ep88xc_defconfig @@ -26,7 +26,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config index 41e4d35..1a61e81 100644 --- a/arch/powerpc/configs/fsl-emb-nonhw.config +++ b/arch/powerpc/configs/fsl-emb-nonhw.config @@ -33,8 +33,7 @@ CONFIG_DUMMY=y CONFIG_EFS_FS=m CONFIG_EXPERT=y CONFIG_EXT2_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_FB=y CONFIG_FHANDLE=y CONFIG_FIXED_PHY=y @@ -55,7 +54,6 @@ CONFIG_IKCONFIG=y CONFIG_INET_AH=y CONFIG_INET_ESP=y CONFIG_INET_IPCOMP=y -# CONFIG_INET_LRO is not set # CONFIG_INET_XFRM_MODE_BEET is not set CONFIG_INET=y CONFIG_IP_ADVANCED_ROUTER=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 1d9ad85..3b2511c 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -216,14 +216,13 @@ CONFIG_USB_SERIAL_CYBERJACK=m CONFIG_USB_SERIAL_XIRCOM=m CONFIG_USB_SERIAL_OMNINET=m CONFIG_USB_APPLEDISPLAY=m +CONFIG_FS_DAX=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT2_FS_XIP=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_REISERFS_FS=y CONFIG_REISERFS_FS_XATTR=y diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig index 6c6c60f..c0eec4a 100644 --- a/arch/powerpc/configs/gamecube_defconfig +++ b/arch/powerpc/configs/gamecube_defconfig @@ -32,7 +32,6 @@ CONFIG_IP_PNP_RARP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set @@ -76,9 +75,7 @@ CONFIG_SND_SEQUENCER_OSS=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_GENERIC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_MSDOS_FS=y diff --git a/arch/powerpc/configs/holly_defconfig b/arch/powerpc/configs/holly_defconfig index 5e0f255..e56e800 100644 --- a/arch/powerpc/configs/holly_defconfig +++ b/arch/powerpc/configs/holly_defconfig @@ -25,7 +25,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -52,7 +51,7 @@ CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set CONFIG_GEN_RTC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig index 62ae929..b413c19 100644 --- a/arch/powerpc/configs/linkstation_defconfig +++ b/arch/powerpc/configs/linkstation_defconfig @@ -23,7 +23,6 @@ CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_NF_CONNTRACK=m @@ -109,8 +108,7 @@ CONFIG_USB_SERIAL_FTDI_SIO=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_RS5C372=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_XFS_FS=m CONFIG_ISO9660_FS=m CONFIG_JOLIET=y diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index ac9666f..27abfab 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -35,7 +35,6 @@ CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_RAM=y @@ -102,9 +101,7 @@ CONFIG_USB_SERIAL_KEYSPAN_USA49W=y CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y CONFIG_USB_SERIAL_TI=m CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XIP=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_FS_DAX=y CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y diff --git a/arch/powerpc/configs/mgcoge_defconfig b/arch/powerpc/configs/mgcoge_defconfig index 666922c..197acaa 100644 --- a/arch/powerpc/configs/mgcoge_defconfig +++ b/arch/powerpc/configs/mgcoge_defconfig @@ -27,7 +27,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_TIPC=y diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig index d16d6c5..0b4854c 100644 --- a/arch/powerpc/configs/mpc512x_defconfig +++ b/arch/powerpc/configs/mpc512x_defconfig @@ -27,7 +27,6 @@ CONFIG_IP_PNP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_CAN=y @@ -53,7 +52,7 @@ CONFIG_MTD_UBI=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=1 CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_BLK_DEV_XIP=y +CONFIG_BLK_DEV_RAM_DAX=y CONFIG_EEPROM_AT24=y CONFIG_EEPROM_AT25=y CONFIG_SCSI=y @@ -113,10 +112,9 @@ CONFIG_RTC_DRV_MPC5121=y CONFIG_DMADEVICES=y CONFIG_MPC512X_DMA=y CONFIG_MPC512x_LPBFIFO=y +CONFIG_FS_DAX=y CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XIP=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_VFAT_FS=y CONFIG_TMPFS=y diff --git a/arch/powerpc/configs/mpc5200_defconfig b/arch/powerpc/configs/mpc5200_defconfig index 9fd041b..88336d0 100644 --- a/arch/powerpc/configs/mpc5200_defconfig +++ b/arch/powerpc/configs/mpc5200_defconfig @@ -26,7 +26,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y @@ -114,8 +113,7 @@ CONFIG_RTC_DRV_PCF8563=m CONFIG_DMADEVICES=y CONFIG_PPC_BESTCOMM=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y diff --git a/arch/powerpc/configs/mpc7448_hpc2_defconfig b/arch/powerpc/configs/mpc7448_hpc2_defconfig index e2647d5..d933326 100644 --- a/arch/powerpc/configs/mpc7448_hpc2_defconfig +++ b/arch/powerpc/configs/mpc7448_hpc2_defconfig @@ -23,7 +23,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -49,8 +48,7 @@ CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_HW_RANDOM is not set CONFIG_GEN_RTC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y diff --git a/arch/powerpc/configs/mpc8272_ads_defconfig b/arch/powerpc/configs/mpc8272_ads_defconfig index 825b052..4cb0f61 100644 --- a/arch/powerpc/configs/mpc8272_ads_defconfig +++ b/arch/powerpc/configs/mpc8272_ads_defconfig @@ -22,7 +22,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set CONFIG_NETFILTER=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -56,8 +55,7 @@ CONFIG_SERIAL_CPM_CONSOLE=y # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_AUTOFS4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig index 671e220..6574477 100644 --- a/arch/powerpc/configs/mpc83xx_defconfig +++ b/arch/powerpc/configs/mpc83xx_defconfig @@ -37,7 +37,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y CONFIG_INET_ESP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y @@ -101,8 +100,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y CONFIG_RTC_DRV_DS1374=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig index 321412c..9984544 100644 --- a/arch/powerpc/configs/mpc866_ads_defconfig +++ b/arch/powerpc/configs/mpc866_ads_defconfig @@ -24,7 +24,6 @@ CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_BLK_DEV_LOOP=y CONFIG_NETDEVICES=y @@ -37,8 +36,7 @@ CONFIG_SERIAL_CPM_CONSOLE=y CONFIG_GEN_RTC=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 2a10f98..91f53f1 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -25,7 +25,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig index 525a2cb..139add9 100644 --- a/arch/powerpc/configs/mvme5100_defconfig +++ b/arch/powerpc/configs/mvme5100_defconfig @@ -32,7 +32,6 @@ CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_NF_CONNTRACK=m @@ -92,8 +91,7 @@ CONFIG_I2C_MPC=y # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT2_FS=m -CONFIG_EXT3_FS=m -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=m CONFIG_XFS_FS=m CONFIG_ISO9660_FS=m CONFIG_JOLIET=y diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 8f94782..76f4edd 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -152,8 +152,7 @@ CONFIG_RTC_DRV_DS1307=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_AUTOFS4_FS=y CONFIG_ISO9660_FS=y CONFIG_UDF_FS=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index ea8705f..88d5b49 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -40,7 +40,6 @@ CONFIG_INET_AH=y CONFIG_INET_ESP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_NF_CONNTRACK=m @@ -281,10 +280,8 @@ CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y CONFIG_USB_APPLEDISPLAY=m CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=m CONFIG_ISO9660_FS=y diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index c357458..dce352e 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -235,9 +235,9 @@ CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_REISERFS_FS=y CONFIG_REISERFS_FS_XATTR=y CONFIG_REISERFS_FS_POSIX_ACL=y diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig index a43bf6e..370c0bb 100644 --- a/arch/powerpc/configs/ppc40x_defconfig +++ b/arch/powerpc/configs/ppc40x_defconfig @@ -25,7 +25,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y @@ -67,8 +66,7 @@ CONFIG_THERMAL=y CONFIG_FB=m CONFIG_FB_XILINX=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=m -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=m CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig index bbc7f76..2766e8f 100644 --- a/arch/powerpc/configs/ppc44x_defconfig +++ b/arch/powerpc/configs/ppc44x_defconfig @@ -35,7 +35,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set CONFIG_BRIDGE=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y @@ -89,8 +88,7 @@ CONFIG_USB_OHCI_HCD_PPC_OF_BE=y # CONFIG_USB_OHCI_HCD_PCI is not set CONFIG_USB_STORAGE=m CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=m -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=m CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index b041fb6..0a8d250 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -247,7 +247,6 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_MAD=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_INFINIBAND_MTHCA=m -CONFIG_INFINIBAND_EHCA=m CONFIG_INFINIBAND_CXGB3=m CONFIG_INFINIBAND_CXGB4=m CONFIG_MLX4_INFINIBAND=m @@ -262,14 +261,11 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m +CONFIG_FS_DAX=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT2_FS_XIP=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index ddf9773..fd2edd6 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -178,15 +178,11 @@ CONFIG_EDAC=y CONFIG_EDAC_MM_EDAC=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y +CONFIG_FS_DAX=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT2_FS_XIP=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 99ccbeba..db1bde3 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -512,7 +512,6 @@ CONFIG_E1000E=m CONFIG_IGB=m CONFIG_IXGB=m CONFIG_IXGBE=m -CONFIG_IP1000=m CONFIG_MV643XX_ETH=m CONFIG_SKGE=m CONFIG_SKY2=m @@ -1029,14 +1028,14 @@ CONFIG_UIO_CIF=m CONFIG_UIO_PDRV_GENIRQ=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m +CONFIG_FS_DAX=y CONFIG_EXT2_FS=m CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT2_FS_XIP=y -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=m +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_JBD2_DEBUG=y CONFIG_REISERFS_FS=m diff --git a/arch/powerpc/configs/pq2fads_defconfig b/arch/powerpc/configs/pq2fads_defconfig index 3e336ee..5432c7a 100644 --- a/arch/powerpc/configs/pq2fads_defconfig +++ b/arch/powerpc/configs/pq2fads_defconfig @@ -23,7 +23,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set CONFIG_NETFILTER=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -59,8 +58,7 @@ CONFIG_SERIAL_CPM_CONSOLE=y CONFIG_USB_GADGET=y CONFIG_USB_ETH=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_AUTOFS4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index c400460..ee0ec5a 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -51,7 +51,6 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set CONFIG_BT=m CONFIG_BT_RFCOMM=m diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 36871a4..99dec9d 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -223,7 +223,6 @@ CONFIG_INFINIBAND=m CONFIG_INFINIBAND_USER_MAD=m CONFIG_INFINIBAND_USER_ACCESS=m CONFIG_INFINIBAND_MTHCA=m -CONFIG_INFINIBAND_EHCA=m CONFIG_INFINIBAND_CXGB3=m CONFIG_INFINIBAND_CXGB4=m CONFIG_MLX4_INFINIBAND=m @@ -233,14 +232,11 @@ CONFIG_INFINIBAND_SRP=m CONFIG_INFINIBAND_ISER=m CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m +CONFIG_FS_DAX=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT2_FS_XIP=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig index b5db7df..e9122b1 100644 --- a/arch/powerpc/configs/storcenter_defconfig +++ b/arch/powerpc/configs/storcenter_defconfig @@ -25,7 +25,6 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y @@ -72,8 +71,7 @@ CONFIG_USB_STORAGE=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set +CONFIG_EXT4_FS=y CONFIG_XFS_FS=m CONFIG_PROC_KCORE=y CONFIG_TMPFS=y diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig index 4c973c5..78fddf2 100644 --- a/arch/powerpc/configs/tqm8xx_defconfig +++ b/arch/powerpc/configs/tqm8xx_defconfig @@ -29,7 +29,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig index 34eaf52..dcdd51b 100644 --- a/arch/powerpc/configs/wii_defconfig +++ b/arch/powerpc/configs/wii_defconfig @@ -32,7 +32,6 @@ CONFIG_IP_PNP_RARP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_BT=y @@ -96,9 +95,7 @@ CONFIG_MMC_SDHCI=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_GENERIC=y CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -# CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y CONFIG_FUSE_FS=m CONFIG_ISO9660_FS=y CONFIG_JOLIET=y -- cgit v0.10.2 From 49e9c99f47fc43abc9598f9fcf5ba3336d0c09a6 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 29 Jun 2016 22:16:25 +1000 Subject: cxl: Fix allowing bogus AFU descriptors with 0 maximum processes If the AFU descriptor of an AFU directed AFU indicates that it supports 0 maximum processes, we will accept that value and attempt to use it. The SPA will still be allocated (with 2 pages due to another minor bug and room for 958 processes), and when a context is allocated we will pass the value of 0 to idr_alloc as the maximum. However, idr_alloc will treat that as meaning no maximum and will allocate a context number and we return a valid context. Conceivably, this could lead to a buffer overflow of the SPA if more than 958 contexts were allocated, however this is mitigated by the fact that there are no known AFUs in the wild with a bogus AFU descriptor like this, and that only the root user is allowed to flash an AFU image to a card. Add a check when validating the AFU descriptor to reject any with 0 maximum processes. We do still allow a dedicated process only AFU to indicate that it supports 0 contexts even though that is forbidden in the architecture, as in that case we ignore the value and use 1 instead. This is just on the off-chance that such a dedicated process AFU may exist (not that I am aware of any), since their developers are less likely to have cared about this value at all. Signed-off-by: Ian Munsie Reviewed-by: Frederic Barrat Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 648817a..58d7d821 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -775,6 +775,21 @@ static int cxl_afu_descriptor_looks_ok(struct cxl_afu *afu) } } + if ((afu->modes_supported & ~CXL_MODE_DEDICATED) && afu->max_procs_virtualised == 0) { + /* + * We could also check this for the dedicated process model + * since the architecture indicates it should be set to 1, but + * in that case we ignore the value and I'd rather not risk + * breaking any existing dedicated process AFUs that left it as + * 0 (not that I'm aware of any). It is clearly an error for an + * AFU directed AFU to set this to 0, and would have previously + * triggered a bug resulting in the maximum not being enforced + * at all since idr_alloc treats 0 as no maximum. + */ + dev_err(&afu->dev, "AFU does not support any processes\n"); + return -EINVAL; + } + return 0; } -- cgit v0.10.2 From 2224b6719b09052a9fbf29422a0e9b4f42407c35 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 29 Jun 2016 22:16:26 +1000 Subject: cxl: Fix allocating a minimum of 2 pages for the SPA The Scheduled Process Area is allocated dynamically with enough pages to fit at least as many processes as the AFU descriptor indicated. Since the calculation is non-trivial, it does this by calculating how many processes could fit in an allocation of a given order, and increasing that order until it can fit enough processes or hits the maximum supported size. Currently, it will start this search using a SPA of 2 pages instead of 1. This can waste a page of memory if the AFU's maximum number of supported processes was small enough to fit in one page. Fix the algorithm to start the search at 1 page. Signed-off-by: Ian Munsie Reviewed-by: Frederic Barrat Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index e80d8f7..120c468 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -189,7 +189,7 @@ int cxl_alloc_spa(struct cxl_afu *afu) unsigned spa_size; /* Work out how many pages to allocate */ - afu->native->spa_order = 0; + afu->native->spa_order = -1; do { afu->native->spa_order++; spa_size = (1 << afu->native->spa_order) * PAGE_SIZE; -- cgit v0.10.2 From 5e7823c9bc44965c2e7d1d755b382109830c4916 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Fri, 1 Jul 2016 02:50:40 +1000 Subject: cxl: Fix bug where AFU disable operation had no effect The AFU disable operation has a bug where it will not clear the enable bit and therefore will have no effect. To date this has likely been masked by fact that we perform an AFU reset before the disable, which also has the effect of clearing the enable bit, making the following disable operation effectively a noop on most hardware. This patch modifies the afu_control function to take a parameter to clear from the AFU control register so that the disable operation can clear the appropriate bit. This bug was uncovered on the Mellanox CX4, which uses an XSL rather than a PSL. On the XSL the reset operation will not complete while the AFU is enabled, meaning the enable bit was still set at the start of the disable and as a result this bug was hit and the disable also timed out. Because of this difference in behaviour between the PSL and XSL, this patch now makes the reset dependent on the card using a PSL to avoid waiting for a timeout on the XSL. It is entirely possible that we may be able to drop the reset altogether if it turns out we only ever needed it due to this bug - however I am not willing to drop it without further regression testing and have added comments to the code explaining the background. This also fixes a small issue where the AFU_Cntl register was read outside of the lock that protects it. Signed-off-by: Ian Munsie Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 27578fc..aafffa8 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -552,6 +552,7 @@ struct cxl_service_layer_ops { void (*write_timebase_ctrl)(struct cxl *adapter); u64 (*timebase_read)(struct cxl *adapter); int capi_mode; + bool needs_reset_before_disable; }; struct cxl_native { diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 120c468..e774505 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -21,10 +21,10 @@ #include "cxl.h" #include "trace.h" -static int afu_control(struct cxl_afu *afu, u64 command, +static int afu_control(struct cxl_afu *afu, u64 command, u64 clear, u64 result, u64 mask, bool enabled) { - u64 AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + u64 AFU_Cntl; unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); int rc = 0; @@ -33,7 +33,8 @@ static int afu_control(struct cxl_afu *afu, u64 command, trace_cxl_afu_ctrl(afu, command); - cxl_p2n_write(afu, CXL_AFU_Cntl_An, AFU_Cntl | command); + AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); + cxl_p2n_write(afu, CXL_AFU_Cntl_An, (AFU_Cntl & ~clear) | command); AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); while ((AFU_Cntl & mask) != result) { @@ -67,7 +68,7 @@ static int afu_enable(struct cxl_afu *afu) { pr_devel("AFU enable request\n"); - return afu_control(afu, CXL_AFU_Cntl_An_E, + return afu_control(afu, CXL_AFU_Cntl_An_E, 0, CXL_AFU_Cntl_An_ES_Enabled, CXL_AFU_Cntl_An_ES_MASK, true); } @@ -76,7 +77,8 @@ int cxl_afu_disable(struct cxl_afu *afu) { pr_devel("AFU disable request\n"); - return afu_control(afu, 0, CXL_AFU_Cntl_An_ES_Disabled, + return afu_control(afu, 0, CXL_AFU_Cntl_An_E, + CXL_AFU_Cntl_An_ES_Disabled, CXL_AFU_Cntl_An_ES_MASK, false); } @@ -85,7 +87,7 @@ static int native_afu_reset(struct cxl_afu *afu) { pr_devel("AFU reset request\n"); - return afu_control(afu, CXL_AFU_Cntl_An_RA, + return afu_control(afu, CXL_AFU_Cntl_An_RA, 0, CXL_AFU_Cntl_An_RS_Complete | CXL_AFU_Cntl_An_ES_Disabled, CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK, false); @@ -595,7 +597,33 @@ static int deactivate_afu_directed(struct cxl_afu *afu) cxl_sysfs_afu_m_remove(afu); cxl_chardev_afu_remove(afu); - cxl_ops->afu_reset(afu); + /* + * The CAIA section 2.2.1 indicates that the procedure for starting and + * stopping an AFU in AFU directed mode is AFU specific, which is not + * ideal since this code is generic and with one exception has no + * knowledge of the AFU. This is in contrast to the procedure for + * disabling a dedicated process AFU, which is documented to just + * require a reset. The architecture does indicate that both an AFU + * reset and an AFU disable should result in the AFU being disabled and + * we do both followed by a PSL purge for safety. + * + * Notably we used to have some issues with the disable sequence on PSL + * cards, which is why we ended up using this heavy weight procedure in + * the first place, however a bug was discovered that had rendered the + * disable operation ineffective, so it is conceivable that was the + * sole explanation for those difficulties. Careful regression testing + * is recommended if anyone attempts to remove or reorder these + * operations. + * + * The XSL on the Mellanox CX4 behaves a little differently from the + * PSL based cards and will time out an AFU reset if the AFU is still + * enabled. That card is special in that we do have a means to identify + * it from this code, so in that case we skip the reset and just use a + * disable/purge to avoid the timeout and corresponding noise in the + * kernel log. + */ + if (afu->adapter->native->sl_ops->needs_reset_before_disable) + cxl_ops->afu_reset(afu); cxl_afu_disable(afu); cxl_psl_purge(afu); @@ -735,6 +763,22 @@ static int native_attach_process(struct cxl_context *ctx, bool kernel, static inline int detach_process_native_dedicated(struct cxl_context *ctx) { + /* + * The CAIA section 2.1.1 indicates that we need to do an AFU reset to + * stop the AFU in dedicated mode (we therefore do not make that + * optional like we do in the afu directed path). It does not indicate + * that we need to do an explicit disable (which should occur + * implicitly as part of the reset) or purge, but we do these as well + * to be on the safe side. + * + * Notably we used to have some issues with the disable sequence + * (before the sequence was spelled out in the architecture) which is + * why we were so heavy weight in the first place, however a bug was + * discovered that had rendered the disable operation ineffective, so + * it is conceivable that was the sole explanation for those + * difficulties. Point is, we should be careful and do some regression + * testing if we ever attempt to remove any part of this procedure. + */ cxl_ops->afu_reset(ctx->afu); cxl_afu_disable(ctx->afu); cxl_psl_purge(ctx->afu); diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 58d7d821..b7f2e96 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1309,6 +1309,7 @@ static const struct cxl_service_layer_ops psl_ops = { .write_timebase_ctrl = write_timebase_ctrl_psl, .timebase_read = timebase_read_psl, .capi_mode = OPAL_PHB_CAPI_MODE_CAPI, + .needs_reset_before_disable = true, }; static const struct cxl_service_layer_ops xsl_ops = { -- cgit v0.10.2 From 2a4f667aadb2d61c289a52a0d6dbc7e4e3aa009f Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 30 Jun 2016 04:51:26 +1000 Subject: cxl: Workaround XSL bug that does not clear the RA bit after a reset An issue was noted in our debug logs where the XSL would leave the RA bit asserted after an AFU reset operation, which would effectively prevent further AFU reset operations from working. Workaround the issue by clearing the RA bit with an MMIO write if it is still asserted after any AFU control operation. Signed-off-by: Ian Munsie Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index e774505..04c27e4 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -55,6 +55,16 @@ static int afu_control(struct cxl_afu *afu, u64 command, u64 clear, cpu_relax(); AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); }; + + if (AFU_Cntl & CXL_AFU_Cntl_An_RA) { + /* + * Workaround for a bug in the XSL used in the Mellanox CX4 + * that fails to clear the RA bit after an AFU reset, + * preventing subsequent AFU resets from working. + */ + cxl_p2n_write(afu, CXL_AFU_Cntl_An, AFU_Cntl & ~CXL_AFU_Cntl_An_RA); + } + pr_devel("AFU command complete: %llx\n", command); afu->enabled = enabled; out: -- cgit v0.10.2 From f5c9df9a442f586b183947627210e167ded81d19 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 30 Jun 2016 04:55:17 +1000 Subject: cxl: Fix NULL pointer dereference on kernel contexts with no AFU interrupts If a kernel context is initialised and does not have any AFU interrupts allocated it will cause a NULL pointer dereference when the context is detached since the irq_names list will not have been initialised. Move the initialisation of the irq_names list into the cxl_context_init routine so that it will be valid for the entire lifetime of the context and will not cause a NULL pointer dereference. Signed-off-by: Ian Munsie Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 26d206b..edbb99e 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -67,6 +67,8 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master, ctx->pending_fault = false; ctx->pending_afu_err = false; + INIT_LIST_HEAD(&ctx->irq_names); + /* * When we have to destroy all contexts in cxl_context_detach_all() we * end up with afu_release_irqs() called from inside a diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index 8def455..f3a7d4a 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -260,9 +260,6 @@ int afu_allocate_irqs(struct cxl_context *ctx, u32 count) else alloc_count = count + 1; - /* Initialize the list head to hold irq names */ - INIT_LIST_HEAD(&ctx->irq_names); - if ((rc = cxl_ops->alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, alloc_count))) return rc; -- cgit v0.10.2 From 6e0c50f9e814220ada60497c522b60a8e1cc1e92 Mon Sep 17 00:00:00 2001 From: Philippe Bergheaud Date: Tue, 5 Jul 2016 13:08:06 +0200 Subject: cxl: Refine slice error debug messages The PSL Slice Error Register (PSL_SERR_An) reports implementation dependent AFU errors, in the form of a bitmap. The PSL_SERR_An register content is printed in the form of hex dump debug message. This patch decodes the PSL_ERR_An register contents, and prints a specific error message for each possible error bit. It also dumps the secondary registers AFU_ERR_An and PSL_DSISR_An, that may contain extra debug information. This patch also removes the large WARN message that used to report the cxl slice error interrupt, and replaces it by a short informative message, that draws attention to AFU implementation errors. Signed-off-by: Philippe Bergheaud Acked-by: Ian Munsie Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index aafffa8..36b3237 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -189,6 +189,18 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0}; #define CXL_PSL_ID_An_F (1ull << (63-31)) #define CXL_PSL_ID_An_L (1ull << (63-30)) +/****** CXL_PSL_SERR_An ****************************************************/ +#define CXL_PSL_SERR_An_afuto (1ull << (63-0)) +#define CXL_PSL_SERR_An_afudis (1ull << (63-1)) +#define CXL_PSL_SERR_An_afuov (1ull << (63-2)) +#define CXL_PSL_SERR_An_badsrc (1ull << (63-3)) +#define CXL_PSL_SERR_An_badctx (1ull << (63-4)) +#define CXL_PSL_SERR_An_llcmdis (1ull << (63-5)) +#define CXL_PSL_SERR_An_llcmdto (1ull << (63-6)) +#define CXL_PSL_SERR_An_afupar (1ull << (63-7)) +#define CXL_PSL_SERR_An_afudup (1ull << (63-8)) +#define CXL_PSL_SERR_An_AE (1ull << (63-30)) + /****** CXL_PSL_SCNTL_An ****************************************************/ #define CXL_PSL_SCNTL_An_CR (0x1ull << (63-15)) /* Programming Modes: */ @@ -916,4 +928,7 @@ extern const struct cxl_backend_ops *cxl_ops; /* check if the given pci_dev is on the the cxl vphb bus */ bool cxl_pci_is_vphb_device(struct pci_dev *dev); + +/* decode AFU error bits in the PSL register PSL_SERR_An */ +void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr); #endif diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c index 1edba52..ee7148e 100644 --- a/drivers/misc/cxl/guest.c +++ b/drivers/misc/cxl/guest.c @@ -196,15 +196,18 @@ static irqreturn_t guest_slice_irq_err(int irq, void *data) { struct cxl_afu *afu = data; int rc; - u64 serr; + u64 serr, afu_error, dsisr; - WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq); rc = cxl_h_get_fn_error_interrupt(afu->guest->handle, &serr); if (rc) { dev_crit(&afu->dev, "Couldn't read PSL_SERR_An: %d\n", rc); return IRQ_HANDLED; } - dev_crit(&afu->dev, "PSL_SERR_An: 0x%.16llx\n", serr); + afu_error = cxl_p2n_read(afu, CXL_AFU_ERR_An); + dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + cxl_afu_decode_psl_serr(afu, serr); + dev_crit(&afu->dev, "AFU_ERR_An: 0x%.16llx\n", afu_error); + dev_crit(&afu->dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr); rc = cxl_h_ack_fn_error_interrupt(afu->guest->handle, serr); if (rc) diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index f3a7d4a..dec60f5 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -371,3 +371,32 @@ void afu_release_irqs(struct cxl_context *ctx, void *cookie) ctx->irq_count = 0; } + +void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr) +{ + dev_crit(&afu->dev, + "PSL Slice error received. Check AFU for root cause.\n"); + dev_crit(&afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); + if (serr & CXL_PSL_SERR_An_afuto) + dev_crit(&afu->dev, "AFU MMIO Timeout\n"); + if (serr & CXL_PSL_SERR_An_afudis) + dev_crit(&afu->dev, + "MMIO targeted Accelerator that was not enabled\n"); + if (serr & CXL_PSL_SERR_An_afuov) + dev_crit(&afu->dev, "AFU CTAG Overflow\n"); + if (serr & CXL_PSL_SERR_An_badsrc) + dev_crit(&afu->dev, "Bad Interrupt Source\n"); + if (serr & CXL_PSL_SERR_An_badctx) + dev_crit(&afu->dev, "Bad Context Handle\n"); + if (serr & CXL_PSL_SERR_An_llcmdis) + dev_crit(&afu->dev, "LLCMD to Disabled AFU\n"); + if (serr & CXL_PSL_SERR_An_llcmdto) + dev_crit(&afu->dev, "LLCMD Timeout to AFU\n"); + if (serr & CXL_PSL_SERR_An_afupar) + dev_crit(&afu->dev, "AFU MMIO Parity Error\n"); + if (serr & CXL_PSL_SERR_An_afudup) + dev_crit(&afu->dev, "AFU MMIO Duplicate CTAG Error\n"); + if (serr & CXL_PSL_SERR_An_AE) + dev_crit(&afu->dev, + "AFU asserted JDONE with JERROR in AFU Directed Mode\n"); +} diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 04c27e4..3bcdaee 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -862,7 +862,7 @@ void cxl_native_psl_irq_dump_regs(struct cxl_context *ctx) dev_crit(&ctx->afu->dev, "PSL_FIR2: 0x%016llx\n", fir2); if (ctx->afu->adapter->native->sl_ops->register_serr_irq) { serr = cxl_p1n_read(ctx->afu, CXL_PSL_SERR_An); - dev_crit(&ctx->afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); + cxl_afu_decode_psl_serr(ctx->afu, serr); } dev_crit(&ctx->afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); dev_crit(&ctx->afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); @@ -956,21 +956,23 @@ void native_irq_wait(struct cxl_context *ctx) static irqreturn_t native_slice_irq_err(int irq, void *data) { struct cxl_afu *afu = data; - u64 fir_slice, errstat, serr, afu_debug; + u64 fir_slice, errstat, serr, afu_debug, afu_error, dsisr; /* * slice err interrupt is only used with full PSL (no XSL) */ - WARN(irq, "CXL SLICE ERROR interrupt %i\n", irq); - serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); fir_slice = cxl_p1n_read(afu, CXL_PSL_FIR_SLICE_An); errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); afu_debug = cxl_p1n_read(afu, CXL_AFU_DEBUG_An); - dev_crit(&afu->dev, "PSL_SERR_An: 0x%016llx\n", serr); + afu_error = cxl_p2n_read(afu, CXL_AFU_ERR_An); + dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + cxl_afu_decode_psl_serr(afu, serr); dev_crit(&afu->dev, "PSL_FIR_SLICE_An: 0x%016llx\n", fir_slice); dev_crit(&afu->dev, "CXL_PSL_ErrStat_An: 0x%016llx\n", errstat); dev_crit(&afu->dev, "CXL_PSL_AFU_DEBUG_An: 0x%016llx\n", afu_debug); + dev_crit(&afu->dev, "AFU_ERR_An: 0x%.16llx\n", afu_error); + dev_crit(&afu->dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr); cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); -- cgit v0.10.2 From e00878be3f414a99234b3c9613e2fcab7497cd4b Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Sun, 3 Jul 2016 16:31:53 -0400 Subject: cxl: make base more explicitly non-modular The Kconfig/Makefile currently controlling compilation of this code is: drivers/misc/cxl/Kconfig:config CXL_BASE drivers/misc/cxl/Kconfig: bool drivers/misc/cxl/Makefile:obj-$(CONFIG_CXL_BASE) += base.o ...meaning that it currently is not being built as a module by anyone. Lets convert the one module_init into device_initcall so that when reading the driver it more clear that it is builtin-only. Since module_init translates to device_initcall in the non-modular case, the init ordering remains unchanged with this commit. We don't replace module.h with init.h since the file is doing other modular stuff (module_get/put) even though it is built-in. Cc: Ian Munsie Cc: Michael Neuling Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Paul Gortmaker Acked-by: Ian Munsie Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c index 9b90ec6..e6f49ac 100644 --- a/drivers/misc/cxl/base.c +++ b/drivers/misc/cxl/base.c @@ -114,5 +114,4 @@ static int __init cxl_base_init(void) pr_devel("Found %d cxl device(s)\n", count); return 0; } - -module_init(cxl_base_init); +device_initcall(cxl_base_init); -- cgit v0.10.2 From 3b3dcd61fa4e3604d8f1bdfd8471fca7b7c012e4 Mon Sep 17 00:00:00 2001 From: Philippe Bergheaud Date: Fri, 1 Jul 2016 13:32:52 +0200 Subject: cxl: Ignore CAPI adapters misplaced in switched slots One should not attempt to switch a PHB into CAPI mode if there is a switch between the PHB and the adapter. This patch modifies the cxl driver to ignore CAPI adapters misplaced in switched slots. Signed-off-by: Philippe Bergheaud Reviewed-by: Frederic Barrat Acked-by: Ian Munsie Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index b7f2e96..3a5f980 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1402,6 +1402,30 @@ static void cxl_pci_remove_adapter(struct cxl *adapter) device_unregister(&adapter->dev); } +#define CXL_MAX_PCIEX_PARENT 2 + +static int cxl_slot_is_switched(struct pci_dev *dev) +{ + struct device_node *np; + int depth = 0; + const __be32 *prop; + + if (!(np = pci_device_to_OF_node(dev))) { + pr_err("cxl: np = NULL\n"); + return -ENODEV; + } + of_node_get(np); + while (np) { + np = of_get_next_parent(np); + prop = of_get_property(np, "device_type", NULL); + if (!prop || strcmp((char *)prop, "pciex")) + break; + depth++; + } + of_node_put(np); + return (depth > CXL_MAX_PCIEX_PARENT); +} + static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) { struct cxl *adapter; @@ -1413,6 +1437,11 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) return -ENODEV; } + if (cxl_slot_is_switched(dev)) { + dev_info(&dev->dev, "Ignoring card on incompatible PCI slot\n"); + return -ENODEV; + } + if (cxl_verbose) dump_cxl_config_space(dev); -- cgit v0.10.2 From 8ebf506ab2729012a3fe7dd8765a7d24202d409b Mon Sep 17 00:00:00 2001 From: Claudiu Manoil Date: Thu, 7 Jul 2016 15:49:02 +0300 Subject: powerpc/85xx: Don't report SRAM to L2 cache fallback as error If the SRAM region parameters are missing the SRAM driver probing exits and the L2 region is configured as L2 cache entirely. This is the expected default behaviour, so it makes no sense to report it as an error. Signed-off-by: Claudiu Manoil Signed-off-by: Scott Wood diff --git a/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c index 861cebf..c27058e 100644 --- a/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c +++ b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c @@ -90,12 +90,8 @@ static int mpc85xx_l2ctlr_of_probe(struct platform_device *dev) } l2cache_size = *prop; - if (get_cache_sram_params(&sram_params)) { - dev_err(&dev->dev, - "Entire L2 as cache, provide valid sram offset and size\n"); - return -EINVAL; - } - + if (get_cache_sram_params(&sram_params)) + return 0; /* fall back to L2 cache only */ rem = l2cache_size % sram_params.sram_size; ways = LOCK_WAYS_FULL * sram_params.sram_size / l2cache_size; -- cgit v0.10.2 From 3dde317654b3624ec00bebbe068392b959c2e3ac Mon Sep 17 00:00:00 2001 From: Sriram Dash Date: Fri, 10 Jun 2016 13:36:56 +0530 Subject: powerpc/85xx: Change T1040si USB controller version Change USB controller version name to 2.5 in compatible string for T1040 Signed-off-by: Sriram Dash Signed-off-by: Scott Wood diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi index 507649e..d3fbe72 100644 --- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi @@ -607,7 +607,7 @@ /include/ "qoriq-gpio-3.dtsi" /include/ "qoriq-usb2-mph-0.dtsi" usb0: usb@210000 { - compatible = "fsl-usb2-mph-v2.4", "fsl-usb2-mph"; + compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph"; fsl,iommu-parent = <&pamu0>; fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */ phy_type = "utmi"; @@ -615,7 +615,7 @@ }; /include/ "qoriq-usb2-dr-0.dtsi" usb1: usb@211000 { - compatible = "fsl-usb2-dr-v2.4", "fsl-usb2-dr"; + compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr"; fsl,iommu-parent = <&pamu0>; fsl,liodn-reg = <&guts 0x524>; /* USB2LIODNR */ dr_mode = "host"; -- cgit v0.10.2 From ae9ac1d3299ab8334f0b5293ddb77e18f2a9c9e5 Mon Sep 17 00:00:00 2001 From: Sriram Dash Date: Fri, 10 Jun 2016 13:36:39 +0530 Subject: powerpc/85xx: add aliases for usb nodes on t4240, b4860, and b4420 Add usb aliases for consistency with the other platforms. Signed-off-by: Laurentiu Tudor Signed-off-by: Sriram Dash Signed-off-by: Scott Wood diff --git a/arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi b/arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi index bc3bf93..88d8423 100644 --- a/arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi @@ -51,6 +51,7 @@ serial2 = &serial2; serial3 = &serial3; pci0 = &pci0; + usb0 = &usb0; dma0 = &dma0; dma1 = &dma1; sdhc = &sdhc; diff --git a/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi b/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi index 8797ce1..f3f968c 100644 --- a/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi @@ -51,6 +51,7 @@ serial2 = &serial2; serial3 = &serial3; pci0 = &pci0; + usb0 = &usb0; dma0 = &dma0; dma1 = &dma1; sdhc = &sdhc; diff --git a/arch/powerpc/boot/dts/fsl/qonverge-usb2-dr-0.dtsi b/arch/powerpc/boot/dts/fsl/qonverge-usb2-dr-0.dtsi index 29dad72..fcc7e5b 100644 --- a/arch/powerpc/boot/dts/fsl/qonverge-usb2-dr-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qonverge-usb2-dr-0.dtsi @@ -32,7 +32,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -usb@210000 { +usb0: usb@210000 { compatible = "fsl-usb2-dr"; reg = <0x210000 0x1000>; #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi index 1184a74..038cf8f 100644 --- a/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi +++ b/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi @@ -56,6 +56,8 @@ pci1 = &pci1; pci2 = &pci2; pci3 = &pci3; + usb0 = &usb0; + usb1 = &usb1; dma0 = &dma0; dma1 = &dma1; dma2 = &dma2; -- cgit v0.10.2 From 97493e2e9eeddfecaca741454f97a689d8141dcf Mon Sep 17 00:00:00 2001 From: Alessio Igor Bogani Date: Mon, 30 May 2016 11:47:16 +0200 Subject: powerpc/86xx: Add support for Emerson/Artesyn MVME7100 Add support for the Artesyn MVME7100 Single Board Computer. The MVME7100 is a 6U form factor VME64 computer with: - A two e600 cores Freescale MPC8641D CPU - 2 GB of DDR2 onboard memory - Four Gigabit Ethernets - Five 16550 compatible UARTs - One USB 2.0 port - Two PCI/PCI eXpress Mezzanine Card (PMC/XMC) Slots - A DS1375 Real Time Clock (RTC) - 512 KB of Non-Volatile Memory (NVRAM) - Two 64 KB EEPROMs - 128 MB NOR and 4/8 GB NAND Flash This patch is based on linux-4.7-rc1 and has been only boot tested. Limitations: This patch covers only models 171 and 173 No plans to support CPLD timers Know issues: All four PHYs work in polling mode Configuration is missing for: PCI IDSEL and PCI Interrupt definition Support is missing for: Cache and memory controllers (which are very similar to the 85xx ones but right now I don't know if we can re-use their support) Watchdog, USB, NVRAM, NOR, NAND, EEPROMs, VME, PMC/XMC and RTC Signed-off-by: Alessio Igor Bogani Signed-off-by: Scott Wood diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 00cf88a..4cd612a 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -113,6 +113,7 @@ src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c src-plat-$(CONFIG_PPC_PSERIES) += pseries-head.S src-plat-$(CONFIG_PPC_POWERNV) += pseries-head.S src-plat-$(CONFIG_PPC_IBM_CELL_BLADE) += pseries-head.S +src-plat-$(CONFIG_MVME7100) += motload-head.S mvme7100.c src-wlib := $(sort $(src-wlib-y)) src-plat := $(sort $(src-plat-y)) @@ -296,6 +297,9 @@ image-$(CONFIG_TQM8560) += cuImage.tqm8560 image-$(CONFIG_SBC8548) += cuImage.sbc8548 image-$(CONFIG_KSI8560) += cuImage.ksi8560 +# Board ports in arch/powerpc/platform/86xx/Kconfig +image-$(CONFIG_MVME7100) += dtbImage.mvme7100 + # Board ports in arch/powerpc/platform/embedded6xx/Kconfig image-$(CONFIG_STORCENTER) += cuImage.storcenter image-$(CONFIG_MPC7448HPC2) += cuImage.mpc7448hpc2 diff --git a/arch/powerpc/boot/dts/fsl/mvme7100.dts b/arch/powerpc/boot/dts/fsl/mvme7100.dts new file mode 100644 index 0000000..e2d306a --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/mvme7100.dts @@ -0,0 +1,153 @@ +/* + * Device tree source for the Emerson/Artesyn MVME7100 + * + * Copyright 2016 Elettra-Sincrotrone Trieste S.C.p.A. + * + * Author: Alessio Igor Bogani + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +/include/ "mpc8641si-pre.dtsi" + +/ { + model = "MVME7100"; + compatible = "artesyn,MVME7100"; + + memory { + device_type = "memory"; + reg = <0x00000000 0x80000000>; + }; + + soc: soc@f1000000 { + ranges = <0x00000000 0xf1000000 0x00100000>; + + i2c@3000 { + hwmon@4c { + compatible = "dallas,max6649"; + reg = <0x4c>; + }; + + rtc@68 { + status = "disabled"; + }; + }; + + + enet0: ethernet@24000 { + phy-handle = <&phy0>; + phy-connection-type = "rgmii-id"; + }; + + mdio@24520 { + phy0: ethernet-phy@1 { + reg = <1>; + }; + phy1: ethernet-phy@2 { + reg = <2>; + }; + phy2: ethernet-phy@3 { + reg = <3>; + }; + phy3: ethernet-phy@4 { + reg = <4>; + }; + }; + + enet1: ethernet@25000 { + phy-handle = <&phy1>; + phy-connection-type = "rgmii-id"; + }; + + mdio@25520 { + status = "disabled"; + }; + + enet2: ethernet@26000 { + phy-handle = <&phy2>; + phy-connection-type = "rgmii-id"; + }; + + mdio@26520 { + status = "disabled"; + }; + + enet3: ethernet@27000 { + phy-handle = <&phy3>; + phy-connection-type = "rgmii-id"; + }; + + mdio@27520 { + status = "disabled"; + }; + + serial1: serial@4600 { + status = "disabled"; + }; + }; + + lbc: localbus@f1005000 { + reg = <0xf1005000 0x1000>; + + ranges = <0 0 0xf8000000 0x08000000 // NOR Flash (128MB) + 2 0 0xf2030000 0x00010000 // NAND Flash (8GB) + 3 0 0xf2400000 0x00080000 // MRAM (512KB) + 4 0 0xf2000000 0x00010000 // BCSR + 5 0 0xf2010000 0x00010000>; // QUART + + bcsr@4,0 { + compatible = "artesyn,mvme7100-bcsr"; + reg = <4 0 0x10000>; + }; + + serial@5,1000 { + device_type = "serial"; + compatible = "ns16550"; + reg = <5 0x1000 0x100>; + clock-frequency = <1843200>; + interrupts = <11 1 0 0>; + }; + + serial@5,2000 { + device_type = "serial"; + compatible = "ns16550"; + reg = <5 0x2000 0x100>; + clock-frequency = <1843200>; + interrupts = <11 1 0 0>; + }; + + serial@5,3000 { + device_type = "serial"; + compatible = "ns16550"; + reg = <5 0x3000 0x100>; + clock-frequency = <1843200>; + interrupts = <11 1 0 0>; + }; + + serial@5,4000 { + device_type = "serial"; + compatible = "ns16550"; + reg = <5 0x4000 0x100>; + clock-frequency = <1843200>; + interrupts = <11 1 0 0>; + }; + }; + + pci0: pcie@f1008000 { + status = "disabled"; + }; + + pci1: pcie@f1009000 { + status = "disabled"; + }; + + chosen { + linux,stdout-path = &serial0; + }; +}; + +/include/ "mpc8641si-post.dtsi" diff --git a/arch/powerpc/boot/motload-head.S b/arch/powerpc/boot/motload-head.S new file mode 100644 index 0000000..41cabb4 --- /dev/null +++ b/arch/powerpc/boot/motload-head.S @@ -0,0 +1,11 @@ +#include "ppc_asm.h" + + .text + .globl _zimage_start +_zimage_start: + mfmsr r10 + rlwinm r10,r10,0,~(1<<15) /* Clear MSR_EE */ + sync + mtmsr r10 + isync + b _zimage_start_lib diff --git a/arch/powerpc/boot/mvme7100.c b/arch/powerpc/boot/mvme7100.c new file mode 100644 index 0000000..8b0a932 --- /dev/null +++ b/arch/powerpc/boot/mvme7100.c @@ -0,0 +1,59 @@ +/* + * Motload compatibility for the Emerson/Artesyn MVME7100 + * + * Copyright 2016 Elettra-Sincrotrone Trieste S.C.p.A. + * + * Author: Alessio Igor Bogani + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include "ops.h" +#include "stdio.h" +#include "cuboot.h" + +#define TARGET_86xx +#define TARGET_HAS_ETH1 +#define TARGET_HAS_ETH2 +#define TARGET_HAS_ETH3 +#include "ppcboot.h" + +static bd_t bd; + +BSS_STACK(16384); + +static void mvme7100_fixups(void) +{ + void *devp; + unsigned long busfreq = bd.bi_busfreq * 1000000; + + dt_fixup_cpu_clocks(bd.bi_intfreq * 1000000, busfreq / 4, busfreq); + + devp = finddevice("/soc@f1000000"); + if (devp) + setprop(devp, "bus-frequency", &busfreq, sizeof(busfreq)); + + devp = finddevice("/soc/serial@4500"); + if (devp) + setprop(devp, "clock-frequency", &busfreq, sizeof(busfreq)); + + dt_fixup_memory(bd.bi_memstart, bd.bi_memsize); + + dt_fixup_mac_address_by_alias("ethernet0", bd.bi_enetaddr); + dt_fixup_mac_address_by_alias("ethernet1", bd.bi_enet1addr); + dt_fixup_mac_address_by_alias("ethernet2", bd.bi_enet2addr); + dt_fixup_mac_address_by_alias("ethernet3", bd.bi_enet3addr); +} + +void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ + CUBOOT_INIT(); + fdt_init(_dtb_start); + serial_console_init(); + platform_ops.fixups = mvme7100_fixups; +} diff --git a/arch/powerpc/boot/ppcboot.h b/arch/powerpc/boot/ppcboot.h index 6ae6f90..453df42 100644 --- a/arch/powerpc/boot/ppcboot.h +++ b/arch/powerpc/boot/ppcboot.h @@ -43,7 +43,7 @@ typedef struct bd_info { unsigned long bi_sramstart; /* start of SRAM memory */ unsigned long bi_sramsize; /* size of SRAM memory */ #if defined(TARGET_8xx) || defined(TARGET_CPM2) || defined(TARGET_85xx) ||\ - defined(TARGET_83xx) + defined(TARGET_83xx) || defined(TARGET_86xx) unsigned long bi_immr_base; /* base of IMMR register */ #endif #if defined(TARGET_PPC_MPC52xx) diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 6a19fce..6681ec3 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -302,6 +302,11 @@ mvme5100) platformo="$object/fixed-head.o $object/mvme5100.o" binary=y ;; +mvme7100) + platformo="$object/motload-head.o $object/mvme7100.o" + link_address='0x4000000' + binary=y + ;; esac vmz="$tmpdir/`basename \"$kernel\"`.$ext" diff --git a/arch/powerpc/configs/86xx-hw.config b/arch/powerpc/configs/86xx-hw.config index f91f889..d3dd6b8 100644 --- a/arch/powerpc/configs/86xx-hw.config +++ b/arch/powerpc/configs/86xx-hw.config @@ -74,9 +74,9 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DETECT_IRQ=y CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y -CONFIG_SERIAL_8250_NR_UARTS=2 +CONFIG_SERIAL_8250_NR_UARTS=5 CONFIG_SERIAL_8250_RSA=y -CONFIG_SERIAL_8250_RUNTIME_UARTS=2 +CONFIG_SERIAL_8250_RUNTIME_UARTS=5 CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_8250=y CONFIG_SERIO_LIBPS2=y diff --git a/arch/powerpc/configs/mpc86xx_basic_defconfig b/arch/powerpc/configs/mpc86xx_basic_defconfig index 33af5c5..3283f05 100644 --- a/arch/powerpc/configs/mpc86xx_basic_defconfig +++ b/arch/powerpc/configs/mpc86xx_basic_defconfig @@ -8,3 +8,4 @@ CONFIG_GEF_SBC610=y CONFIG_MPC8610_HPCD=y CONFIG_MPC8641_HPCN=y CONFIG_SBC8641D=y +CONFIG_MVME7100=y diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig index 1afd1e4..37b166e 100644 --- a/arch/powerpc/platforms/86xx/Kconfig +++ b/arch/powerpc/platforms/86xx/Kconfig @@ -61,6 +61,11 @@ config GEF_SBC610 help This option enables support for the GE SBC610. +config MVME7100 + bool "Artesyn MVME7100" + help + This option enables support for the Emerson/Artesyn MVME7100 board. + endif config MPC8641 @@ -69,7 +74,8 @@ config MPC8641 select FSL_PCI if PCI select PPC_UDBG_16550 select MPIC - default y if MPC8641_HPCN || SBC8641D || GEF_SBC610 || GEF_SBC310 || GEF_PPC9A + default y if MPC8641_HPCN || SBC8641D || GEF_SBC610 || GEF_SBC310 || GEF_PPC9A \ + || MVME7100 config MPC8610 bool diff --git a/arch/powerpc/platforms/86xx/Makefile b/arch/powerpc/platforms/86xx/Makefile index 2d889ad..01958fe 100644 --- a/arch/powerpc/platforms/86xx/Makefile +++ b/arch/powerpc/platforms/86xx/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_MPC8610_HPCD) += mpc8610_hpcd.o obj-$(CONFIG_GEF_SBC610) += gef_sbc610.o obj-$(CONFIG_GEF_SBC310) += gef_sbc310.o obj-$(CONFIG_GEF_PPC9A) += gef_ppc9a.o +obj-$(CONFIG_MVME7100) += mvme7100.o diff --git a/arch/powerpc/platforms/86xx/mvme7100.c b/arch/powerpc/platforms/86xx/mvme7100.c new file mode 100644 index 0000000..addb41e --- /dev/null +++ b/arch/powerpc/platforms/86xx/mvme7100.c @@ -0,0 +1,121 @@ +/* + * Board setup routines for the Emerson/Artesyn MVME7100 + * + * Copyright 2016 Elettra-Sincrotrone Trieste S.C.p.A. + * + * Author: Alessio Igor Bogani + * + * Based on earlier code by: + * + * Ajit Prem + * Copyright 2008 Emerson + * + * USB host fixup is borrowed by: + * + * Martyn Welch + * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mpc86xx.h" + +#define MVME7100_INTERRUPT_REG_2_OFFSET 0x05 +#define MVME7100_DS1375_MASK 0x40 +#define MVME7100_MAX6649_MASK 0x20 +#define MVME7100_ABORT_MASK 0x10 + +/* + * Setup the architecture + */ +static void __init mvme7100_setup_arch(void) +{ + struct device_node *bcsr_node; + void __iomem *mvme7100_regs = NULL; + u8 reg; + + if (ppc_md.progress) + ppc_md.progress("mvme7100_setup_arch()", 0); + +#ifdef CONFIG_SMP + mpc86xx_smp_init(); +#endif + + fsl_pci_assign_primary(); + + /* Remap BCSR registers */ + bcsr_node = of_find_compatible_node(NULL, NULL, + "artesyn,mvme7100-bcsr"); + if (bcsr_node) { + mvme7100_regs = of_iomap(bcsr_node, 0); + of_node_put(bcsr_node); + } + + if (mvme7100_regs) { + /* Disable ds1375, max6649, and abort interrupts */ + reg = readb(mvme7100_regs + MVME7100_INTERRUPT_REG_2_OFFSET); + reg |= MVME7100_DS1375_MASK | MVME7100_MAX6649_MASK + | MVME7100_ABORT_MASK; + writeb(reg, mvme7100_regs + MVME7100_INTERRUPT_REG_2_OFFSET); + } else + pr_warn("Unable to map board registers\n"); + + pr_info("MVME7100 board from Artesyn\n"); +} + +/* + * Called very early, device-tree isn't unflattened + */ +static int __init mvme7100_probe(void) +{ + unsigned long root = of_get_flat_dt_root(); + + return of_flat_dt_is_compatible(root, "artesyn,MVME7100"); +} + +static void mvme7100_usb_host_fixup(struct pci_dev *pdev) +{ + unsigned int val; + + if (!machine_is(mvme7100)) + return; + + /* Ensure only ports 1 & 2 are enabled */ + pci_read_config_dword(pdev, 0xe0, &val); + pci_write_config_dword(pdev, 0xe0, (val & ~7) | 0x2); + + /* System clock is 48-MHz Oscillator and EHCI Enabled. */ + pci_write_config_dword(pdev, 0xe4, 1 << 5); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB, + mvme7100_usb_host_fixup); + +machine_arch_initcall(mvme7100, mpc86xx_common_publish_devices); + +define_machine(mvme7100) { + .name = "MVME7100", + .probe = mvme7100_probe, + .setup_arch = mvme7100_setup_arch, + .init_IRQ = mpc86xx_init_irq, + .get_irq = mpic_get_irq, + .restart = fsl_rstcr_restart, + .time_init = mpc86xx_time_init, + .calibrate_decr = generic_calibrate_decr, + .progress = udbg_progress, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, +#endif +}; -- cgit v0.10.2 From b58dfa6d884cc1197746db880f909368e54ffde6 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 3 Feb 2016 16:50:28 +0100 Subject: powerpc: disable IDE subsystem in pq2fads_defconfig This patch disables deprecated IDE subsystem in pq2fads_defconfig (no IDE host drivers are selected in this config so there is no valid reason to enable IDE subsystem itself). Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Scott Wood diff --git a/arch/powerpc/configs/pq2fads_defconfig b/arch/powerpc/configs/pq2fads_defconfig index 3e336ee..1e77d45 100644 --- a/arch/powerpc/configs/pq2fads_defconfig +++ b/arch/powerpc/configs/pq2fads_defconfig @@ -40,7 +40,6 @@ CONFIG_MTD_CFI_I4=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_BLK_DEV_LOOP=y -CONFIG_IDE=y CONFIG_NETDEVICES=y CONFIG_TUN=y CONFIG_FS_ENET=y -- cgit v0.10.2 From 2dc32d6d7f52f3f451b366241ffefaedb8a49243 Mon Sep 17 00:00:00 2001 From: Valentin Longchamp Date: Tue, 19 Apr 2016 11:37:21 +0200 Subject: powerpc: define the fman node for the kmcoge4 DTS Now that the FMAN mac driver has been merged the fman node is relevant. The kmcoge4 board implements 3 ethernet interfaces, 1 with a RGMII phy and 2 with fixed 1 Giga SGMII links. Signed-off-by: Valentin Longchamp Signed-off-by: Scott Wood diff --git a/arch/powerpc/boot/dts/fsl/kmcoge4.dts b/arch/powerpc/boot/dts/fsl/kmcoge4.dts index 2d4b64f..ae70a24 100644 --- a/arch/powerpc/boot/dts/fsl/kmcoge4.dts +++ b/arch/powerpc/boot/dts/fsl/kmcoge4.dts @@ -106,6 +106,43 @@ sata@221000 { status = "disabled"; }; + + fman0: fman@400000 { + enet0: ethernet@e0000 { + phy-connection-type = "sgmii"; + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + mdio0: mdio@e1120 { + front_phy: ethernet-phy@11 { + reg = <0x11>; + }; + }; + + enet1: ethernet@e2000 { + phy-connection-type = "sgmii"; + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + enet2: ethernet@e4000 { + status = "disabled"; + }; + + enet3: ethernet@e6000 { + status = "disabled"; + }; + enet4: ethernet@e8000 { + phy-handle = <&front_phy>; + phy-connection-type = "rgmii"; + }; + enet5: ethernet@f0000 { + status = "disabled"; + }; + }; }; rio: rapidio@ffe0c0000 { -- cgit v0.10.2 From f0590990ba6db16d5ca8af41a3ad9d3cd8567844 Mon Sep 17 00:00:00 2001 From: Zhao Qiang Date: Tue, 17 May 2016 10:38:57 +0800 Subject: QE: Add IC, SI and SIRAM document to device tree bindings. Add IC, SI and SIRAM document of QE to Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe.txt Signed-off-by: Zhao Qiang Acked-by: Rob Herring Signed-off-by: Scott Wood diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe.txt index 4f89302..d7afaff 100644 --- a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe.txt +++ b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe.txt @@ -69,6 +69,58 @@ Example: }; }; +* Interrupt Controller (IC) + +Required properties: +- compatible : should be "fsl,qe-ic". +- reg : Address range of IC register set. +- interrupts : interrupts generated by the device. +- interrupt-controller : this device is a interrupt controller. + +Example: + + qeic: interrupt-controller@80 { + interrupt-controller; + compatible = "fsl,qe-ic"; + #address-cells = <0>; + #interrupt-cells = <1>; + reg = <0x80 0x80>; + interrupts = <95 2 0 0 94 2 0 0>; + }; + +* Serial Interface Block (SI) + +The SI manages the routing of eight TDM lines to the QE block serial drivers +, the MCC and the UCCs, for receive and transmit. + +Required properties: +- compatible : must be "fsl,-qe-si". For t1040, must contain + "fsl,t1040-qe-si". +- reg : Address range of SI register set. + +Example: + + si1: si@700 { + compatible = "fsl,t1040-qe-si"; + reg = <0x700 0x80>; + }; + +* Serial Interface Block RAM(SIRAM) + +store the routing entries of SI + +Required properties: +- compatible : should be "fsl,-qe-siram". For t1040, must contain + "fsl,t1040-qe-siram". +- reg : Address range of SI RAM. + +Example: + + siram1: siram@1000 { + compatible = "fsl,t1040-qe-siram"; + reg = <0x1000 0x800>; + }; + * QE Firmware Node This node defines a firmware binary that is embedded in the device tree, for -- cgit v0.10.2 From 44a0554c1b8c5e4a6cd4cf776e1bbc9093c899ca Mon Sep 17 00:00:00 2001 From: Zhao Qiang Date: Tue, 17 May 2016 10:38:58 +0800 Subject: QE: Add ucc hdlc document to bindings Add ucc hdlc document to Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/network.txt Signed-off-by: Zhao Qiang Acked-by: Rob Herring Signed-off-by: Scott Wood diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/network.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/network.txt index 29b28b8..03c7416 100644 --- a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/network.txt +++ b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/network.txt @@ -41,3 +41,84 @@ Example: fsl,mdio-pin = <12>; fsl,mdc-pin = <13>; }; + +* HDLC + +Currently defined compatibles: +- fsl,ucc-hdlc + +Properties for fsl,ucc-hdlc: +- rx-clock-name +- tx-clock-name + Usage: required + Value type: + Definition : Must be "brg1"-"brg16" for internal clock source, + Must be "clk1"-"clk24" for external clock source. + +- fsl,tdm-interface + Usage: optional + Value type: + Definition : Specify that hdlc is based on tdm-interface + +The property below is dependent on fsl,tdm-interface: +- fsl,rx-sync-clock + Usage: required + Value type: + Definition : Must be "none", "rsync_pin", "brg9-11" and "brg13-15". + +- fsl,tx-sync-clock + Usage: required + Value type: + Definition : Must be "none", "tsync_pin", "brg9-11" and "brg13-15". + +- fsl,tdm-framer-type + Usage: required for tdm interface + Value type: + Definition : "e1" or "t1".Now e1 and t1 are used, other framer types + are not supported. + +- fsl,tdm-id + Usage: required for tdm interface + Value type: + Definition : number of TDM ID + +- fsl,tx-timeslot-mask +- fsl,rx-timeslot-mask + Usage: required for tdm interface + Value type: + Definition : time slot mask for TDM operation. Indicates which time + slots used for transmitting and receiving. + +- fsl,siram-entry-id + Usage: required for tdm interface + Value type: + Definition : Must be 0,2,4...64. the number of TDM entry. + +- fsl,tdm-internal-loopback + usage: optional for tdm interface + value type: + Definition : Internal loopback connecting on TDM layer. + +Example for tdm interface: + + ucc@2000 { + compatible = "fsl,ucc-hdlc"; + rx-clock-name = "clk8"; + tx-clock-name = "clk9"; + fsl,rx-sync-clock = "rsync_pin"; + fsl,tx-sync-clock = "tsync_pin"; + fsl,tx-timeslot-mask = <0xfffffffe>; + fsl,rx-timeslot-mask = <0xfffffffe>; + fsl,tdm-framer-type = "e1"; + fsl,tdm-id = <0>; + fsl,siram-entry-id = <0>; + fsl,tdm-interface; + }; + +Example for hdlc without tdm interface: + + ucc@2000 { + compatible = "fsl,ucc-hdlc"; + rx-clock-name = "brg1"; + tx-clock-name = "brg1"; + }; -- cgit v0.10.2 From ec31977aa50c390b8ab7d7d5af7414a00486ee4c Mon Sep 17 00:00:00 2001 From: Zhao Qiang Date: Tue, 17 May 2016 10:38:59 +0800 Subject: QE: Add uqe_serial document to bindings Add uqe_serial document to Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/uqe_serial.txt Signed-off-by: Zhao Qiang Acked-by: Rob Herring Signed-off-by: Scott Wood diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/uqe_serial.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/uqe_serial.txt new file mode 100644 index 0000000..8823c86 --- /dev/null +++ b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/uqe_serial.txt @@ -0,0 +1,17 @@ +* Serial + +Required Properties: +compatible : must be "fsl,-ucc-uart". For t1040, must be +"fsl,t1040-ucc-uart". +port-number : port number of UCC-UART +tx/rx-clock-name : should be "brg1"-"brg16" for internal clock source, + should be "clk1"-"clk28" for external clock source. + +Example: + + ucc_serial: ucc@2200 { + compatible = "fsl,t1040-ucc-uart"; + port-number = <0>; + rx-clock-name = "brg2"; + tx-clock-name = "brg2"; + }; -- cgit v0.10.2 From 0883c2c06fb5bcf5b9e008270827e63c09a88c1e Mon Sep 17 00:00:00 2001 From: Zhao Qiang Date: Tue, 17 May 2016 10:39:00 +0800 Subject: bindings: move cpm_qe binding from powerpc/fsl to soc/fsl cpm_qe is supported on both powerpc and arm. and the QE code has been moved from arch/powerpc into drivers/soc/fsl, so move cpm_qe binding from powerpc/fsl to soc/fsl Signed-off-by: Zhao Qiang Acked-by: Rob Herring Signed-off-by: Scott Wood diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm.txt deleted file mode 100644 index 160c752..0000000 --- a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm.txt +++ /dev/null @@ -1,67 +0,0 @@ -* Freescale Communications Processor Module - -NOTE: This is an interim binding, and will likely change slightly, -as more devices are supported. The QE bindings especially are -incomplete. - -* Root CPM node - -Properties: -- compatible : "fsl,cpm1", "fsl,cpm2", or "fsl,qe". -- reg : A 48-byte region beginning with CPCR. - -Example: - cpm@119c0 { - #address-cells = <1>; - #size-cells = <1>; - #interrupt-cells = <2>; - compatible = "fsl,mpc8272-cpm", "fsl,cpm2"; - reg = <119c0 30>; - } - -* Properties common to multiple CPM/QE devices - -- fsl,cpm-command : This value is ORed with the opcode and command flag - to specify the device on which a CPM command operates. - -- fsl,cpm-brg : Indicates which baud rate generator the device - is associated with. If absent, an unused BRG - should be dynamically allocated. If zero, the - device uses an external clock rather than a BRG. - -- reg : Unless otherwise specified, the first resource represents the - scc/fcc/ucc registers, and the second represents the device's - parameter RAM region (if it has one). - -* Multi-User RAM (MURAM) - -The multi-user/dual-ported RAM is expressed as a bus under the CPM node. - -Ranges must be set up subject to the following restrictions: - -- Children's reg nodes must be offsets from the start of all muram, even - if the user-data area does not begin at zero. -- If multiple range entries are used, the difference between the parent - address and the child address must be the same in all, so that a single - mapping can cover them all while maintaining the ability to determine - CPM-side offsets with pointer subtraction. It is recommended that - multiple range entries not be used. -- A child address of zero must be translatable, even if no reg resources - contain it. - -A child "data" node must exist, compatible with "fsl,cpm-muram-data", to -indicate the portion of muram that is usable by the OS for arbitrary -purposes. The data node may have an arbitrary number of reg resources, -all of which contribute to the allocatable muram pool. - -Example, based on mpc8272: - muram@0 { - #address-cells = <1>; - #size-cells = <1>; - ranges = <0 0 10000>; - - data@0 { - compatible = "fsl,cpm-muram-data"; - reg = <0 2000 9800 800>; - }; - }; diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/brg.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/brg.txt deleted file mode 100644 index 4c7d45e..0000000 --- a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/brg.txt +++ /dev/null @@ -1,21 +0,0 @@ -* Baud Rate Generators - -Currently defined compatibles: -fsl,cpm-brg -fsl,cpm1-brg -fsl,cpm2-brg - -Properties: -- reg : There may be an arbitrary number of reg resources; BRG - numbers are assigned to these in order. -- clock-frequency : Specifies the base frequency driving - the BRG. - -Example: - brg@119f0 { - compatible = "fsl,mpc8272-brg", - "fsl,cpm2-brg", - "fsl,cpm-brg"; - reg = <119f0 10 115f0 10>; - clock-frequency = ; - }; diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/i2c.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/i2c.txt deleted file mode 100644 index 87bc604..0000000 --- a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/i2c.txt +++ /dev/null @@ -1,41 +0,0 @@ -* I2C - -The I2C controller is expressed as a bus under the CPM node. - -Properties: -- compatible : "fsl,cpm1-i2c", "fsl,cpm2-i2c" -- reg : On CPM2 devices, the second resource doesn't specify the I2C - Parameter RAM itself, but the I2C_BASE field of the CPM2 Parameter RAM - (typically 0x8afc 0x2). -- #address-cells : Should be one. The cell is the i2c device address with - the r/w bit set to zero. -- #size-cells : Should be zero. -- clock-frequency : Can be used to set the i2c clock frequency. If - unspecified, a default frequency of 60kHz is being used. -The following two properties are deprecated. They are only used by legacy -i2c drivers to find the bus to probe: -- linux,i2c-index : Can be used to hard code an i2c bus number. By default, - the bus number is dynamically assigned by the i2c core. -- linux,i2c-class : Can be used to override the i2c class. The class is used - by legacy i2c device drivers to find a bus in a specific context like - system management, video or sound. By default, I2C_CLASS_HWMON (1) is - being used. The definition of the classes can be found in - include/i2c/i2c.h - -Example, based on mpc823: - - i2c@860 { - compatible = "fsl,mpc823-i2c", - "fsl,cpm1-i2c"; - reg = <0x860 0x20 0x3c80 0x30>; - interrupts = <16>; - interrupt-parent = <&CPM_PIC>; - fsl,cpm-command = <0x10>; - #address-cells = <1>; - #size-cells = <0>; - - rtc@68 { - compatible = "dallas,ds1307"; - reg = <0x68>; - }; - }; diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/pic.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/pic.txt deleted file mode 100644 index 8e3ee16..0000000 --- a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/pic.txt +++ /dev/null @@ -1,18 +0,0 @@ -* Interrupt Controllers - -Currently defined compatibles: -- fsl,cpm1-pic - - only one interrupt cell -- fsl,pq1-pic -- fsl,cpm2-pic - - second interrupt cell is level/sense: - - 2 is falling edge - - 8 is active low - -Example: - interrupt-controller@10c00 { - #interrupt-cells = <2>; - interrupt-controller; - reg = <10c00 80>; - compatible = "mpc8272-pic", "fsl,cpm2-pic"; - }; diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/usb.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/usb.txt deleted file mode 100644 index 74bfda4..0000000 --- a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/usb.txt +++ /dev/null @@ -1,15 +0,0 @@ -* USB (Universal Serial Bus Controller) - -Properties: -- compatible : "fsl,cpm1-usb", "fsl,cpm2-usb", "fsl,qe-usb" - -Example: - usb@11bc0 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,cpm2-usb"; - reg = <11b60 18 8b00 100>; - interrupts = ; - interrupt-parent = <&PIC>; - fsl,cpm-command = <2e600000>; - }; diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/gpio.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/gpio.txt deleted file mode 100644 index 349f79f..0000000 --- a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/gpio.txt +++ /dev/null @@ -1,38 +0,0 @@ -Every GPIO controller node must have #gpio-cells property defined, -this information will be used to translate gpio-specifiers. - -On CPM1 devices, all ports are using slightly different register layouts. -Ports A, C and D are 16bit ports and Ports B and E are 32bit ports. - -On CPM2 devices, all ports are 32bit ports and use a common register layout. - -Required properties: -- compatible : "fsl,cpm1-pario-bank-a", "fsl,cpm1-pario-bank-b", - "fsl,cpm1-pario-bank-c", "fsl,cpm1-pario-bank-d", - "fsl,cpm1-pario-bank-e", "fsl,cpm2-pario-bank" -- #gpio-cells : Should be two. The first cell is the pin number and the - second cell is used to specify optional parameters (currently unused). -- gpio-controller : Marks the port as GPIO controller. - -Example of three SOC GPIO banks defined as gpio-controller nodes: - - CPM1_PIO_A: gpio-controller@950 { - #gpio-cells = <2>; - compatible = "fsl,cpm1-pario-bank-a"; - reg = <0x950 0x10>; - gpio-controller; - }; - - CPM1_PIO_B: gpio-controller@ab8 { - #gpio-cells = <2>; - compatible = "fsl,cpm1-pario-bank-b"; - reg = <0xab8 0x10>; - gpio-controller; - }; - - CPM1_PIO_E: gpio-controller@ac8 { - #gpio-cells = <2>; - compatible = "fsl,cpm1-pario-bank-e"; - reg = <0xac8 0x18>; - gpio-controller; - }; diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/network.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/network.txt deleted file mode 100644 index 03c7416..0000000 --- a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/network.txt +++ /dev/null @@ -1,124 +0,0 @@ -* Network - -Currently defined compatibles: -- fsl,cpm1-scc-enet -- fsl,cpm2-scc-enet -- fsl,cpm1-fec-enet -- fsl,cpm2-fcc-enet (third resource is GFEMR) -- fsl,qe-enet - -Example: - - ethernet@11300 { - compatible = "fsl,mpc8272-fcc-enet", - "fsl,cpm2-fcc-enet"; - reg = <11300 20 8400 100 11390 1>; - local-mac-address = [ 00 00 00 00 00 00 ]; - interrupts = <20 8>; - interrupt-parent = <&PIC>; - phy-handle = <&PHY0>; - fsl,cpm-command = <12000300>; - }; - -* MDIO - -Currently defined compatibles: -fsl,pq1-fec-mdio (reg is same as first resource of FEC device) -fsl,cpm2-mdio-bitbang (reg is port C registers) - -Properties for fsl,cpm2-mdio-bitbang: -fsl,mdio-pin : pin of port C controlling mdio data -fsl,mdc-pin : pin of port C controlling mdio clock - -Example: - mdio@10d40 { - compatible = "fsl,mpc8272ads-mdio-bitbang", - "fsl,mpc8272-mdio-bitbang", - "fsl,cpm2-mdio-bitbang"; - reg = <10d40 14>; - #address-cells = <1>; - #size-cells = <0>; - fsl,mdio-pin = <12>; - fsl,mdc-pin = <13>; - }; - -* HDLC - -Currently defined compatibles: -- fsl,ucc-hdlc - -Properties for fsl,ucc-hdlc: -- rx-clock-name -- tx-clock-name - Usage: required - Value type: - Definition : Must be "brg1"-"brg16" for internal clock source, - Must be "clk1"-"clk24" for external clock source. - -- fsl,tdm-interface - Usage: optional - Value type: - Definition : Specify that hdlc is based on tdm-interface - -The property below is dependent on fsl,tdm-interface: -- fsl,rx-sync-clock - Usage: required - Value type: - Definition : Must be "none", "rsync_pin", "brg9-11" and "brg13-15". - -- fsl,tx-sync-clock - Usage: required - Value type: - Definition : Must be "none", "tsync_pin", "brg9-11" and "brg13-15". - -- fsl,tdm-framer-type - Usage: required for tdm interface - Value type: - Definition : "e1" or "t1".Now e1 and t1 are used, other framer types - are not supported. - -- fsl,tdm-id - Usage: required for tdm interface - Value type: - Definition : number of TDM ID - -- fsl,tx-timeslot-mask -- fsl,rx-timeslot-mask - Usage: required for tdm interface - Value type: - Definition : time slot mask for TDM operation. Indicates which time - slots used for transmitting and receiving. - -- fsl,siram-entry-id - Usage: required for tdm interface - Value type: - Definition : Must be 0,2,4...64. the number of TDM entry. - -- fsl,tdm-internal-loopback - usage: optional for tdm interface - value type: - Definition : Internal loopback connecting on TDM layer. - -Example for tdm interface: - - ucc@2000 { - compatible = "fsl,ucc-hdlc"; - rx-clock-name = "clk8"; - tx-clock-name = "clk9"; - fsl,rx-sync-clock = "rsync_pin"; - fsl,tx-sync-clock = "tsync_pin"; - fsl,tx-timeslot-mask = <0xfffffffe>; - fsl,rx-timeslot-mask = <0xfffffffe>; - fsl,tdm-framer-type = "e1"; - fsl,tdm-id = <0>; - fsl,siram-entry-id = <0>; - fsl,tdm-interface; - }; - -Example for hdlc without tdm interface: - - ucc@2000 { - compatible = "fsl,ucc-hdlc"; - rx-clock-name = "brg1"; - tx-clock-name = "brg1"; - }; diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe.txt deleted file mode 100644 index d7afaff..0000000 --- a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe.txt +++ /dev/null @@ -1,167 +0,0 @@ -* Freescale QUICC Engine module (QE) -This represents qe module that is installed on PowerQUICC II Pro. - -NOTE: This is an interim binding; it should be updated to fit -in with the CPM binding later in this document. - -Basically, it is a bus of devices, that could act more or less -as a complete entity (UCC, USB etc ). All of them should be siblings on -the "root" qe node, using the common properties from there. -The description below applies to the qe of MPC8360 and -more nodes and properties would be extended in the future. - -i) Root QE device - -Required properties: -- compatible : should be "fsl,qe"; -- model : precise model of the QE, Can be "QE", "CPM", or "CPM2" -- reg : offset and length of the device registers. -- bus-frequency : the clock frequency for QUICC Engine. -- fsl,qe-num-riscs: define how many RISC engines the QE has. -- fsl,qe-num-snums: define how many serial number(SNUM) the QE can use for the - threads. - -Optional properties: -- fsl,firmware-phandle: - Usage: required only if there is no fsl,qe-firmware child node - Value type: - Definition: Points to a firmware node (see "QE Firmware Node" below) - that contains the firmware that should be uploaded for this QE. - The compatible property for the firmware node should say, - "fsl,qe-firmware". - -Recommended properties -- brg-frequency : the internal clock source frequency for baud-rate - generators in Hz. - -Example: - qe@e0100000 { - #address-cells = <1>; - #size-cells = <1>; - #interrupt-cells = <2>; - compatible = "fsl,qe"; - ranges = <0 e0100000 00100000>; - reg = ; - brg-frequency = <0>; - bus-frequency = <179A7B00>; - } - -* Multi-User RAM (MURAM) - -Required properties: -- compatible : should be "fsl,qe-muram", "fsl,cpm-muram". -- mode : the could be "host" or "slave". -- ranges : Should be defined as specified in 1) to describe the - translation of MURAM addresses. -- data-only : sub-node which defines the address area under MURAM - bus that can be allocated as data/parameter - -Example: - - muram@10000 { - compatible = "fsl,qe-muram", "fsl,cpm-muram"; - ranges = <0 00010000 0000c000>; - - data-only@0{ - compatible = "fsl,qe-muram-data", - "fsl,cpm-muram-data"; - reg = <0 c000>; - }; - }; - -* Interrupt Controller (IC) - -Required properties: -- compatible : should be "fsl,qe-ic". -- reg : Address range of IC register set. -- interrupts : interrupts generated by the device. -- interrupt-controller : this device is a interrupt controller. - -Example: - - qeic: interrupt-controller@80 { - interrupt-controller; - compatible = "fsl,qe-ic"; - #address-cells = <0>; - #interrupt-cells = <1>; - reg = <0x80 0x80>; - interrupts = <95 2 0 0 94 2 0 0>; - }; - -* Serial Interface Block (SI) - -The SI manages the routing of eight TDM lines to the QE block serial drivers -, the MCC and the UCCs, for receive and transmit. - -Required properties: -- compatible : must be "fsl,-qe-si". For t1040, must contain - "fsl,t1040-qe-si". -- reg : Address range of SI register set. - -Example: - - si1: si@700 { - compatible = "fsl,t1040-qe-si"; - reg = <0x700 0x80>; - }; - -* Serial Interface Block RAM(SIRAM) - -store the routing entries of SI - -Required properties: -- compatible : should be "fsl,-qe-siram". For t1040, must contain - "fsl,t1040-qe-siram". -- reg : Address range of SI RAM. - -Example: - - siram1: siram@1000 { - compatible = "fsl,t1040-qe-siram"; - reg = <0x1000 0x800>; - }; - -* QE Firmware Node - -This node defines a firmware binary that is embedded in the device tree, for -the purpose of passing the firmware from bootloader to the kernel, or from -the hypervisor to the guest. - -The firmware node itself contains the firmware binary contents, a compatible -property, and any firmware-specific properties. The node should be placed -inside a QE node that needs it. Doing so eliminates the need for a -fsl,firmware-phandle property. Other QE nodes that need the same firmware -should define an fsl,firmware-phandle property that points to the firmware node -in the first QE node. - -The fsl,firmware property can be specified in the DTS (possibly using incbin) -or can be inserted by the boot loader at boot time. - -Required properties: - - compatible - Usage: required - Value type: - Definition: A standard property. Specify a string that indicates what - kind of firmware it is. For QE, this should be "fsl,qe-firmware". - - - fsl,firmware - Usage: required - Value type: , encoded as an array of bytes - Definition: A standard property. This property contains the firmware - binary "blob". - -Example: - qe1@e0080000 { - compatible = "fsl,qe"; - qe_firmware:qe-firmware { - compatible = "fsl,qe-firmware"; - fsl,firmware = [0x70 0xcd 0x00 0x00 0x01 0x46 0x45 ...]; - }; - ... - }; - - qe2@e0090000 { - compatible = "fsl,qe"; - fsl,firmware-phandle = <&qe_firmware>; - ... - }; diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/firmware.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/firmware.txt deleted file mode 100644 index 249db3a..0000000 --- a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/firmware.txt +++ /dev/null @@ -1,24 +0,0 @@ -* Uploaded QE firmware - - If a new firmware has been uploaded to the QE (usually by the - boot loader), then a 'firmware' child node should be added to the QE - node. This node provides information on the uploaded firmware that - device drivers may need. - - Required properties: - - id: The string name of the firmware. This is taken from the 'id' - member of the qe_firmware structure of the uploaded firmware. - Device drivers can search this string to determine if the - firmware they want is already present. - - extended-modes: The Extended Modes bitfield, taken from the - firmware binary. It is a 64-bit number represented - as an array of two 32-bit numbers. - - virtual-traps: The virtual traps, taken from the firmware binary. - It is an array of 8 32-bit numbers. - -Example: - firmware { - id = "Soft-UART"; - extended-modes = <0 0>; - virtual-traps = <0 0 0 0 0 0 0 0>; - }; diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/par_io.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/par_io.txt deleted file mode 100644 index 6098426..0000000 --- a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/par_io.txt +++ /dev/null @@ -1,51 +0,0 @@ -* Parallel I/O Ports - -This node configures Parallel I/O ports for CPUs with QE support. -The node should reside in the "soc" node of the tree. For each -device that using parallel I/O ports, a child node should be created. -See the definition of the Pin configuration nodes below for more -information. - -Required properties: -- device_type : should be "par_io". -- reg : offset to the register set and its length. -- num-ports : number of Parallel I/O ports - -Example: -par_io@1400 { - reg = <1400 100>; - #address-cells = <1>; - #size-cells = <0>; - device_type = "par_io"; - num-ports = <7>; - ucc_pin@01 { - ...... - }; - -Note that "par_io" nodes are obsolete, and should not be used for -the new device trees. Instead, each Par I/O bank should be represented -via its own gpio-controller node: - -Required properties: -- #gpio-cells : should be "2". -- compatible : should be "fsl,-qe-pario-bank", - "fsl,mpc8323-qe-pario-bank". -- reg : offset to the register set and its length. -- gpio-controller : node to identify gpio controllers. - -Example: - qe_pio_a: gpio-controller@1400 { - #gpio-cells = <2>; - compatible = "fsl,mpc8360-qe-pario-bank", - "fsl,mpc8323-qe-pario-bank"; - reg = <0x1400 0x18>; - gpio-controller; - }; - - qe_pio_e: gpio-controller@1460 { - #gpio-cells = <2>; - compatible = "fsl,mpc8360-qe-pario-bank", - "fsl,mpc8323-qe-pario-bank"; - reg = <0x1460 0x18>; - gpio-controller; - }; diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/pincfg.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/pincfg.txt deleted file mode 100644 index ec6ee2e..0000000 --- a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/pincfg.txt +++ /dev/null @@ -1,57 +0,0 @@ -* Pin configuration nodes - -Required properties: -- pio-map : array of pin configurations. Each pin is defined by 6 - integers. The six numbers are respectively: port, pin, dir, - open_drain, assignment, has_irq. - - port : port number of the pin; 0-6 represent port A-G in UM. - - pin : pin number in the port. - - dir : direction of the pin, should encode as follows: - - 0 = The pin is disabled - 1 = The pin is an output - 2 = The pin is an input - 3 = The pin is I/O - - - open_drain : indicates the pin is normal or wired-OR: - - 0 = The pin is actively driven as an output - 1 = The pin is an open-drain driver. As an output, the pin is - driven active-low, otherwise it is three-stated. - - - assignment : function number of the pin according to the Pin Assignment - tables in User Manual. Each pin can have up to 4 possible functions in - QE and two options for CPM. - - has_irq : indicates if the pin is used as source of external - interrupts. - -Example: - ucc_pin@01 { - pio-map = < - /* port pin dir open_drain assignment has_irq */ - 0 3 1 0 1 0 /* TxD0 */ - 0 4 1 0 1 0 /* TxD1 */ - 0 5 1 0 1 0 /* TxD2 */ - 0 6 1 0 1 0 /* TxD3 */ - 1 6 1 0 3 0 /* TxD4 */ - 1 7 1 0 1 0 /* TxD5 */ - 1 9 1 0 2 0 /* TxD6 */ - 1 a 1 0 2 0 /* TxD7 */ - 0 9 2 0 1 0 /* RxD0 */ - 0 a 2 0 1 0 /* RxD1 */ - 0 b 2 0 1 0 /* RxD2 */ - 0 c 2 0 1 0 /* RxD3 */ - 0 d 2 0 1 0 /* RxD4 */ - 1 1 2 0 2 0 /* RxD5 */ - 1 0 2 0 2 0 /* RxD6 */ - 1 4 2 0 2 0 /* RxD7 */ - 0 7 1 0 1 0 /* TX_EN */ - 0 8 1 0 1 0 /* TX_ER */ - 0 f 2 0 1 0 /* RX_DV */ - 0 10 2 0 1 0 /* RX_ER */ - 0 0 2 0 1 0 /* RX_CLK */ - 2 9 1 0 3 0 /* GTX_CLK - CLK10 */ - 2 8 2 0 1 0>; /* GTX125 - CLK9 */ - }; - - diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/ucc.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/ucc.txt deleted file mode 100644 index e47734b..0000000 --- a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/ucc.txt +++ /dev/null @@ -1,70 +0,0 @@ -* UCC (Unified Communications Controllers) - -Required properties: -- device_type : should be "network", "hldc", "uart", "transparent" - "bisync", "atm", or "serial". -- compatible : could be "ucc_geth" or "fsl_atm" and so on. -- cell-index : the ucc number(1-8), corresponding to UCCx in UM. -- reg : Offset and length of the register set for the device -- interrupts : where a is the interrupt number and b is a - field that represents an encoding of the sense and level - information for the interrupt. This should be encoded based on - the information in section 2) depending on the type of interrupt - controller you have. -- interrupt-parent : the phandle for the interrupt controller that - services interrupts for this device. -- pio-handle : The phandle for the Parallel I/O port configuration. -- port-number : for UART drivers, the port number to use, between 0 and 3. - This usually corresponds to the /dev/ttyQE device, e.g. <0> = /dev/ttyQE0. - The port number is added to the minor number of the device. Unlike the - CPM UART driver, the port-number is required for the QE UART driver. -- soft-uart : for UART drivers, if specified this means the QE UART device - driver should use "Soft-UART" mode, which is needed on some SOCs that have - broken UART hardware. Soft-UART is provided via a microcode upload. -- rx-clock-name: the UCC receive clock source - "none": clock source is disabled - "brg1" through "brg16": clock source is BRG1-BRG16, respectively - "clk1" through "clk24": clock source is CLK1-CLK24, respectively -- tx-clock-name: the UCC transmit clock source - "none": clock source is disabled - "brg1" through "brg16": clock source is BRG1-BRG16, respectively - "clk1" through "clk24": clock source is CLK1-CLK24, respectively -The following two properties are deprecated. rx-clock has been replaced -with rx-clock-name, and tx-clock has been replaced with tx-clock-name. -Drivers that currently use the deprecated properties should continue to -do so, in order to support older device trees, but they should be updated -to check for the new properties first. -- rx-clock : represents the UCC receive clock source. - 0x00 : clock source is disabled; - 0x1~0x10 : clock source is BRG1~BRG16 respectively; - 0x11~0x28: clock source is QE_CLK1~QE_CLK24 respectively. -- tx-clock: represents the UCC transmit clock source; - 0x00 : clock source is disabled; - 0x1~0x10 : clock source is BRG1~BRG16 respectively; - 0x11~0x28: clock source is QE_CLK1~QE_CLK24 respectively. - -Required properties for network device_type: -- mac-address : list of bytes representing the ethernet address. -- phy-handle : The phandle for the PHY connected to this controller. - -Recommended properties: -- phy-connection-type : a string naming the controller/PHY interface type, - i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id" (Internal - Delay), "rgmii-txid" (delay on TX only), "rgmii-rxid" (delay on RX only), - "tbi", or "rtbi". - -Example: - ucc@2000 { - device_type = "network"; - compatible = "ucc_geth"; - cell-index = <1>; - reg = <2000 200>; - interrupts = ; - interrupt-parent = <700>; - mac-address = [ 00 04 9f 00 23 23 ]; - rx-clock = "none"; - tx-clock = "clk9"; - phy-handle = <212000>; - phy-connection-type = "gmii"; - pio-handle = <140001>; - }; diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/usb.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/usb.txt deleted file mode 100644 index 9ccd5f3..0000000 --- a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/usb.txt +++ /dev/null @@ -1,37 +0,0 @@ -Freescale QUICC Engine USB Controller - -Required properties: -- compatible : should be "fsl,-qe-usb", "fsl,mpc8323-qe-usb". -- reg : the first two cells should contain usb registers location and - length, the next two two cells should contain PRAM location and - length. -- interrupts : should contain USB interrupt. -- interrupt-parent : interrupt source phandle. -- fsl,fullspeed-clock : specifies the full speed USB clock source: - "none": clock source is disabled - "brg1" through "brg16": clock source is BRG1-BRG16, respectively - "clk1" through "clk24": clock source is CLK1-CLK24, respectively -- fsl,lowspeed-clock : specifies the low speed USB clock source: - "none": clock source is disabled - "brg1" through "brg16": clock source is BRG1-BRG16, respectively - "clk1" through "clk24": clock source is CLK1-CLK24, respectively -- hub-power-budget : USB power budget for the root hub, in mA. -- gpios : should specify GPIOs in this order: USBOE, USBTP, USBTN, USBRP, - USBRN, SPEED (optional), and POWER (optional). - -Example: - -usb@6c0 { - compatible = "fsl,mpc8360-qe-usb", "fsl,mpc8323-qe-usb"; - reg = <0x6c0 0x40 0x8b00 0x100>; - interrupts = <11>; - interrupt-parent = <&qeic>; - fsl,fullspeed-clock = "clk21"; - gpios = <&qe_pio_b 2 0 /* USBOE */ - &qe_pio_b 3 0 /* USBTP */ - &qe_pio_b 8 0 /* USBTN */ - &qe_pio_b 9 0 /* USBRP */ - &qe_pio_b 11 0 /* USBRN */ - &qe_pio_e 20 0 /* SPEED */ - &qe_pio_e 21 0 /* POWER */>; -}; diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/serial.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/serial.txt deleted file mode 100644 index 2ea76d9..0000000 --- a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/serial.txt +++ /dev/null @@ -1,32 +0,0 @@ -* Serial - -Currently defined compatibles: -- fsl,cpm1-smc-uart -- fsl,cpm2-smc-uart -- fsl,cpm1-scc-uart -- fsl,cpm2-scc-uart -- fsl,qe-uart - -Modem control lines connected to GPIO controllers are listed in the gpios -property as described in booting-without-of.txt, section IX.1 in the following -order: - -CTS, RTS, DCD, DSR, DTR, and RI. - -The gpios property is optional and can be left out when control lines are -not used. - -Example: - - serial@11a00 { - device_type = "serial"; - compatible = "fsl,mpc8272-scc-uart", - "fsl,cpm2-scc-uart"; - reg = <11a00 20 8000 100>; - interrupts = <28 8>; - interrupt-parent = <&PIC>; - fsl,cpm-brg = <1>; - fsl,cpm-command = <00800000>; - gpios = <&gpio_c 15 0 - &gpio_d 29 0>; - }; diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/uqe_serial.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/uqe_serial.txt deleted file mode 100644 index 8823c86..0000000 --- a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/uqe_serial.txt +++ /dev/null @@ -1,17 +0,0 @@ -* Serial - -Required Properties: -compatible : must be "fsl,-ucc-uart". For t1040, must be -"fsl,t1040-ucc-uart". -port-number : port number of UCC-UART -tx/rx-clock-name : should be "brg1"-"brg16" for internal clock source, - should be "clk1"-"clk28" for external clock source. - -Example: - - ucc_serial: ucc@2200 { - compatible = "fsl,t1040-ucc-uart"; - port-number = <0>; - rx-clock-name = "brg2"; - tx-clock-name = "brg2"; - }; diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/cpm.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/cpm.txt new file mode 100644 index 0000000..160c752 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/cpm.txt @@ -0,0 +1,67 @@ +* Freescale Communications Processor Module + +NOTE: This is an interim binding, and will likely change slightly, +as more devices are supported. The QE bindings especially are +incomplete. + +* Root CPM node + +Properties: +- compatible : "fsl,cpm1", "fsl,cpm2", or "fsl,qe". +- reg : A 48-byte region beginning with CPCR. + +Example: + cpm@119c0 { + #address-cells = <1>; + #size-cells = <1>; + #interrupt-cells = <2>; + compatible = "fsl,mpc8272-cpm", "fsl,cpm2"; + reg = <119c0 30>; + } + +* Properties common to multiple CPM/QE devices + +- fsl,cpm-command : This value is ORed with the opcode and command flag + to specify the device on which a CPM command operates. + +- fsl,cpm-brg : Indicates which baud rate generator the device + is associated with. If absent, an unused BRG + should be dynamically allocated. If zero, the + device uses an external clock rather than a BRG. + +- reg : Unless otherwise specified, the first resource represents the + scc/fcc/ucc registers, and the second represents the device's + parameter RAM region (if it has one). + +* Multi-User RAM (MURAM) + +The multi-user/dual-ported RAM is expressed as a bus under the CPM node. + +Ranges must be set up subject to the following restrictions: + +- Children's reg nodes must be offsets from the start of all muram, even + if the user-data area does not begin at zero. +- If multiple range entries are used, the difference between the parent + address and the child address must be the same in all, so that a single + mapping can cover them all while maintaining the ability to determine + CPM-side offsets with pointer subtraction. It is recommended that + multiple range entries not be used. +- A child address of zero must be translatable, even if no reg resources + contain it. + +A child "data" node must exist, compatible with "fsl,cpm-muram-data", to +indicate the portion of muram that is usable by the OS for arbitrary +purposes. The data node may have an arbitrary number of reg resources, +all of which contribute to the allocatable muram pool. + +Example, based on mpc8272: + muram@0 { + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0 10000>; + + data@0 { + compatible = "fsl,cpm-muram-data"; + reg = <0 2000 9800 800>; + }; + }; diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/cpm/brg.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/cpm/brg.txt new file mode 100644 index 0000000..4c7d45e --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/cpm/brg.txt @@ -0,0 +1,21 @@ +* Baud Rate Generators + +Currently defined compatibles: +fsl,cpm-brg +fsl,cpm1-brg +fsl,cpm2-brg + +Properties: +- reg : There may be an arbitrary number of reg resources; BRG + numbers are assigned to these in order. +- clock-frequency : Specifies the base frequency driving + the BRG. + +Example: + brg@119f0 { + compatible = "fsl,mpc8272-brg", + "fsl,cpm2-brg", + "fsl,cpm-brg"; + reg = <119f0 10 115f0 10>; + clock-frequency = ; + }; diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/cpm/i2c.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/cpm/i2c.txt new file mode 100644 index 0000000..87bc604 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/cpm/i2c.txt @@ -0,0 +1,41 @@ +* I2C + +The I2C controller is expressed as a bus under the CPM node. + +Properties: +- compatible : "fsl,cpm1-i2c", "fsl,cpm2-i2c" +- reg : On CPM2 devices, the second resource doesn't specify the I2C + Parameter RAM itself, but the I2C_BASE field of the CPM2 Parameter RAM + (typically 0x8afc 0x2). +- #address-cells : Should be one. The cell is the i2c device address with + the r/w bit set to zero. +- #size-cells : Should be zero. +- clock-frequency : Can be used to set the i2c clock frequency. If + unspecified, a default frequency of 60kHz is being used. +The following two properties are deprecated. They are only used by legacy +i2c drivers to find the bus to probe: +- linux,i2c-index : Can be used to hard code an i2c bus number. By default, + the bus number is dynamically assigned by the i2c core. +- linux,i2c-class : Can be used to override the i2c class. The class is used + by legacy i2c device drivers to find a bus in a specific context like + system management, video or sound. By default, I2C_CLASS_HWMON (1) is + being used. The definition of the classes can be found in + include/i2c/i2c.h + +Example, based on mpc823: + + i2c@860 { + compatible = "fsl,mpc823-i2c", + "fsl,cpm1-i2c"; + reg = <0x860 0x20 0x3c80 0x30>; + interrupts = <16>; + interrupt-parent = <&CPM_PIC>; + fsl,cpm-command = <0x10>; + #address-cells = <1>; + #size-cells = <0>; + + rtc@68 { + compatible = "dallas,ds1307"; + reg = <0x68>; + }; + }; diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/cpm/pic.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/cpm/pic.txt new file mode 100644 index 0000000..8e3ee16 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/cpm/pic.txt @@ -0,0 +1,18 @@ +* Interrupt Controllers + +Currently defined compatibles: +- fsl,cpm1-pic + - only one interrupt cell +- fsl,pq1-pic +- fsl,cpm2-pic + - second interrupt cell is level/sense: + - 2 is falling edge + - 8 is active low + +Example: + interrupt-controller@10c00 { + #interrupt-cells = <2>; + interrupt-controller; + reg = <10c00 80>; + compatible = "mpc8272-pic", "fsl,cpm2-pic"; + }; diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/cpm/usb.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/cpm/usb.txt new file mode 100644 index 0000000..74bfda4 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/cpm/usb.txt @@ -0,0 +1,15 @@ +* USB (Universal Serial Bus Controller) + +Properties: +- compatible : "fsl,cpm1-usb", "fsl,cpm2-usb", "fsl,qe-usb" + +Example: + usb@11bc0 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,cpm2-usb"; + reg = <11b60 18 8b00 100>; + interrupts = ; + interrupt-parent = <&PIC>; + fsl,cpm-command = <2e600000>; + }; diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/gpio.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/gpio.txt new file mode 100644 index 0000000..349f79f --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/gpio.txt @@ -0,0 +1,38 @@ +Every GPIO controller node must have #gpio-cells property defined, +this information will be used to translate gpio-specifiers. + +On CPM1 devices, all ports are using slightly different register layouts. +Ports A, C and D are 16bit ports and Ports B and E are 32bit ports. + +On CPM2 devices, all ports are 32bit ports and use a common register layout. + +Required properties: +- compatible : "fsl,cpm1-pario-bank-a", "fsl,cpm1-pario-bank-b", + "fsl,cpm1-pario-bank-c", "fsl,cpm1-pario-bank-d", + "fsl,cpm1-pario-bank-e", "fsl,cpm2-pario-bank" +- #gpio-cells : Should be two. The first cell is the pin number and the + second cell is used to specify optional parameters (currently unused). +- gpio-controller : Marks the port as GPIO controller. + +Example of three SOC GPIO banks defined as gpio-controller nodes: + + CPM1_PIO_A: gpio-controller@950 { + #gpio-cells = <2>; + compatible = "fsl,cpm1-pario-bank-a"; + reg = <0x950 0x10>; + gpio-controller; + }; + + CPM1_PIO_B: gpio-controller@ab8 { + #gpio-cells = <2>; + compatible = "fsl,cpm1-pario-bank-b"; + reg = <0xab8 0x10>; + gpio-controller; + }; + + CPM1_PIO_E: gpio-controller@ac8 { + #gpio-cells = <2>; + compatible = "fsl,cpm1-pario-bank-e"; + reg = <0xac8 0x18>; + gpio-controller; + }; diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/network.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/network.txt new file mode 100644 index 0000000..03c7416 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/network.txt @@ -0,0 +1,124 @@ +* Network + +Currently defined compatibles: +- fsl,cpm1-scc-enet +- fsl,cpm2-scc-enet +- fsl,cpm1-fec-enet +- fsl,cpm2-fcc-enet (third resource is GFEMR) +- fsl,qe-enet + +Example: + + ethernet@11300 { + compatible = "fsl,mpc8272-fcc-enet", + "fsl,cpm2-fcc-enet"; + reg = <11300 20 8400 100 11390 1>; + local-mac-address = [ 00 00 00 00 00 00 ]; + interrupts = <20 8>; + interrupt-parent = <&PIC>; + phy-handle = <&PHY0>; + fsl,cpm-command = <12000300>; + }; + +* MDIO + +Currently defined compatibles: +fsl,pq1-fec-mdio (reg is same as first resource of FEC device) +fsl,cpm2-mdio-bitbang (reg is port C registers) + +Properties for fsl,cpm2-mdio-bitbang: +fsl,mdio-pin : pin of port C controlling mdio data +fsl,mdc-pin : pin of port C controlling mdio clock + +Example: + mdio@10d40 { + compatible = "fsl,mpc8272ads-mdio-bitbang", + "fsl,mpc8272-mdio-bitbang", + "fsl,cpm2-mdio-bitbang"; + reg = <10d40 14>; + #address-cells = <1>; + #size-cells = <0>; + fsl,mdio-pin = <12>; + fsl,mdc-pin = <13>; + }; + +* HDLC + +Currently defined compatibles: +- fsl,ucc-hdlc + +Properties for fsl,ucc-hdlc: +- rx-clock-name +- tx-clock-name + Usage: required + Value type: + Definition : Must be "brg1"-"brg16" for internal clock source, + Must be "clk1"-"clk24" for external clock source. + +- fsl,tdm-interface + Usage: optional + Value type: + Definition : Specify that hdlc is based on tdm-interface + +The property below is dependent on fsl,tdm-interface: +- fsl,rx-sync-clock + Usage: required + Value type: + Definition : Must be "none", "rsync_pin", "brg9-11" and "brg13-15". + +- fsl,tx-sync-clock + Usage: required + Value type: + Definition : Must be "none", "tsync_pin", "brg9-11" and "brg13-15". + +- fsl,tdm-framer-type + Usage: required for tdm interface + Value type: + Definition : "e1" or "t1".Now e1 and t1 are used, other framer types + are not supported. + +- fsl,tdm-id + Usage: required for tdm interface + Value type: + Definition : number of TDM ID + +- fsl,tx-timeslot-mask +- fsl,rx-timeslot-mask + Usage: required for tdm interface + Value type: + Definition : time slot mask for TDM operation. Indicates which time + slots used for transmitting and receiving. + +- fsl,siram-entry-id + Usage: required for tdm interface + Value type: + Definition : Must be 0,2,4...64. the number of TDM entry. + +- fsl,tdm-internal-loopback + usage: optional for tdm interface + value type: + Definition : Internal loopback connecting on TDM layer. + +Example for tdm interface: + + ucc@2000 { + compatible = "fsl,ucc-hdlc"; + rx-clock-name = "clk8"; + tx-clock-name = "clk9"; + fsl,rx-sync-clock = "rsync_pin"; + fsl,tx-sync-clock = "tsync_pin"; + fsl,tx-timeslot-mask = <0xfffffffe>; + fsl,rx-timeslot-mask = <0xfffffffe>; + fsl,tdm-framer-type = "e1"; + fsl,tdm-id = <0>; + fsl,siram-entry-id = <0>; + fsl,tdm-interface; + }; + +Example for hdlc without tdm interface: + + ucc@2000 { + compatible = "fsl,ucc-hdlc"; + rx-clock-name = "brg1"; + tx-clock-name = "brg1"; + }; diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt new file mode 100644 index 0000000..d7afaff --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe.txt @@ -0,0 +1,167 @@ +* Freescale QUICC Engine module (QE) +This represents qe module that is installed on PowerQUICC II Pro. + +NOTE: This is an interim binding; it should be updated to fit +in with the CPM binding later in this document. + +Basically, it is a bus of devices, that could act more or less +as a complete entity (UCC, USB etc ). All of them should be siblings on +the "root" qe node, using the common properties from there. +The description below applies to the qe of MPC8360 and +more nodes and properties would be extended in the future. + +i) Root QE device + +Required properties: +- compatible : should be "fsl,qe"; +- model : precise model of the QE, Can be "QE", "CPM", or "CPM2" +- reg : offset and length of the device registers. +- bus-frequency : the clock frequency for QUICC Engine. +- fsl,qe-num-riscs: define how many RISC engines the QE has. +- fsl,qe-num-snums: define how many serial number(SNUM) the QE can use for the + threads. + +Optional properties: +- fsl,firmware-phandle: + Usage: required only if there is no fsl,qe-firmware child node + Value type: + Definition: Points to a firmware node (see "QE Firmware Node" below) + that contains the firmware that should be uploaded for this QE. + The compatible property for the firmware node should say, + "fsl,qe-firmware". + +Recommended properties +- brg-frequency : the internal clock source frequency for baud-rate + generators in Hz. + +Example: + qe@e0100000 { + #address-cells = <1>; + #size-cells = <1>; + #interrupt-cells = <2>; + compatible = "fsl,qe"; + ranges = <0 e0100000 00100000>; + reg = ; + brg-frequency = <0>; + bus-frequency = <179A7B00>; + } + +* Multi-User RAM (MURAM) + +Required properties: +- compatible : should be "fsl,qe-muram", "fsl,cpm-muram". +- mode : the could be "host" or "slave". +- ranges : Should be defined as specified in 1) to describe the + translation of MURAM addresses. +- data-only : sub-node which defines the address area under MURAM + bus that can be allocated as data/parameter + +Example: + + muram@10000 { + compatible = "fsl,qe-muram", "fsl,cpm-muram"; + ranges = <0 00010000 0000c000>; + + data-only@0{ + compatible = "fsl,qe-muram-data", + "fsl,cpm-muram-data"; + reg = <0 c000>; + }; + }; + +* Interrupt Controller (IC) + +Required properties: +- compatible : should be "fsl,qe-ic". +- reg : Address range of IC register set. +- interrupts : interrupts generated by the device. +- interrupt-controller : this device is a interrupt controller. + +Example: + + qeic: interrupt-controller@80 { + interrupt-controller; + compatible = "fsl,qe-ic"; + #address-cells = <0>; + #interrupt-cells = <1>; + reg = <0x80 0x80>; + interrupts = <95 2 0 0 94 2 0 0>; + }; + +* Serial Interface Block (SI) + +The SI manages the routing of eight TDM lines to the QE block serial drivers +, the MCC and the UCCs, for receive and transmit. + +Required properties: +- compatible : must be "fsl,-qe-si". For t1040, must contain + "fsl,t1040-qe-si". +- reg : Address range of SI register set. + +Example: + + si1: si@700 { + compatible = "fsl,t1040-qe-si"; + reg = <0x700 0x80>; + }; + +* Serial Interface Block RAM(SIRAM) + +store the routing entries of SI + +Required properties: +- compatible : should be "fsl,-qe-siram". For t1040, must contain + "fsl,t1040-qe-siram". +- reg : Address range of SI RAM. + +Example: + + siram1: siram@1000 { + compatible = "fsl,t1040-qe-siram"; + reg = <0x1000 0x800>; + }; + +* QE Firmware Node + +This node defines a firmware binary that is embedded in the device tree, for +the purpose of passing the firmware from bootloader to the kernel, or from +the hypervisor to the guest. + +The firmware node itself contains the firmware binary contents, a compatible +property, and any firmware-specific properties. The node should be placed +inside a QE node that needs it. Doing so eliminates the need for a +fsl,firmware-phandle property. Other QE nodes that need the same firmware +should define an fsl,firmware-phandle property that points to the firmware node +in the first QE node. + +The fsl,firmware property can be specified in the DTS (possibly using incbin) +or can be inserted by the boot loader at boot time. + +Required properties: + - compatible + Usage: required + Value type: + Definition: A standard property. Specify a string that indicates what + kind of firmware it is. For QE, this should be "fsl,qe-firmware". + + - fsl,firmware + Usage: required + Value type: , encoded as an array of bytes + Definition: A standard property. This property contains the firmware + binary "blob". + +Example: + qe1@e0080000 { + compatible = "fsl,qe"; + qe_firmware:qe-firmware { + compatible = "fsl,qe-firmware"; + fsl,firmware = [0x70 0xcd 0x00 0x00 0x01 0x46 0x45 ...]; + }; + ... + }; + + qe2@e0090000 { + compatible = "fsl,qe"; + fsl,firmware-phandle = <&qe_firmware>; + ... + }; diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe/firmware.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe/firmware.txt new file mode 100644 index 0000000..249db3a --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe/firmware.txt @@ -0,0 +1,24 @@ +* Uploaded QE firmware + + If a new firmware has been uploaded to the QE (usually by the + boot loader), then a 'firmware' child node should be added to the QE + node. This node provides information on the uploaded firmware that + device drivers may need. + + Required properties: + - id: The string name of the firmware. This is taken from the 'id' + member of the qe_firmware structure of the uploaded firmware. + Device drivers can search this string to determine if the + firmware they want is already present. + - extended-modes: The Extended Modes bitfield, taken from the + firmware binary. It is a 64-bit number represented + as an array of two 32-bit numbers. + - virtual-traps: The virtual traps, taken from the firmware binary. + It is an array of 8 32-bit numbers. + +Example: + firmware { + id = "Soft-UART"; + extended-modes = <0 0>; + virtual-traps = <0 0 0 0 0 0 0 0>; + }; diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe/par_io.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe/par_io.txt new file mode 100644 index 0000000..6098426 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe/par_io.txt @@ -0,0 +1,51 @@ +* Parallel I/O Ports + +This node configures Parallel I/O ports for CPUs with QE support. +The node should reside in the "soc" node of the tree. For each +device that using parallel I/O ports, a child node should be created. +See the definition of the Pin configuration nodes below for more +information. + +Required properties: +- device_type : should be "par_io". +- reg : offset to the register set and its length. +- num-ports : number of Parallel I/O ports + +Example: +par_io@1400 { + reg = <1400 100>; + #address-cells = <1>; + #size-cells = <0>; + device_type = "par_io"; + num-ports = <7>; + ucc_pin@01 { + ...... + }; + +Note that "par_io" nodes are obsolete, and should not be used for +the new device trees. Instead, each Par I/O bank should be represented +via its own gpio-controller node: + +Required properties: +- #gpio-cells : should be "2". +- compatible : should be "fsl,-qe-pario-bank", + "fsl,mpc8323-qe-pario-bank". +- reg : offset to the register set and its length. +- gpio-controller : node to identify gpio controllers. + +Example: + qe_pio_a: gpio-controller@1400 { + #gpio-cells = <2>; + compatible = "fsl,mpc8360-qe-pario-bank", + "fsl,mpc8323-qe-pario-bank"; + reg = <0x1400 0x18>; + gpio-controller; + }; + + qe_pio_e: gpio-controller@1460 { + #gpio-cells = <2>; + compatible = "fsl,mpc8360-qe-pario-bank", + "fsl,mpc8323-qe-pario-bank"; + reg = <0x1460 0x18>; + gpio-controller; + }; diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe/pincfg.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe/pincfg.txt new file mode 100644 index 0000000..ec6ee2e --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe/pincfg.txt @@ -0,0 +1,57 @@ +* Pin configuration nodes + +Required properties: +- pio-map : array of pin configurations. Each pin is defined by 6 + integers. The six numbers are respectively: port, pin, dir, + open_drain, assignment, has_irq. + - port : port number of the pin; 0-6 represent port A-G in UM. + - pin : pin number in the port. + - dir : direction of the pin, should encode as follows: + + 0 = The pin is disabled + 1 = The pin is an output + 2 = The pin is an input + 3 = The pin is I/O + + - open_drain : indicates the pin is normal or wired-OR: + + 0 = The pin is actively driven as an output + 1 = The pin is an open-drain driver. As an output, the pin is + driven active-low, otherwise it is three-stated. + + - assignment : function number of the pin according to the Pin Assignment + tables in User Manual. Each pin can have up to 4 possible functions in + QE and two options for CPM. + - has_irq : indicates if the pin is used as source of external + interrupts. + +Example: + ucc_pin@01 { + pio-map = < + /* port pin dir open_drain assignment has_irq */ + 0 3 1 0 1 0 /* TxD0 */ + 0 4 1 0 1 0 /* TxD1 */ + 0 5 1 0 1 0 /* TxD2 */ + 0 6 1 0 1 0 /* TxD3 */ + 1 6 1 0 3 0 /* TxD4 */ + 1 7 1 0 1 0 /* TxD5 */ + 1 9 1 0 2 0 /* TxD6 */ + 1 a 1 0 2 0 /* TxD7 */ + 0 9 2 0 1 0 /* RxD0 */ + 0 a 2 0 1 0 /* RxD1 */ + 0 b 2 0 1 0 /* RxD2 */ + 0 c 2 0 1 0 /* RxD3 */ + 0 d 2 0 1 0 /* RxD4 */ + 1 1 2 0 2 0 /* RxD5 */ + 1 0 2 0 2 0 /* RxD6 */ + 1 4 2 0 2 0 /* RxD7 */ + 0 7 1 0 1 0 /* TX_EN */ + 0 8 1 0 1 0 /* TX_ER */ + 0 f 2 0 1 0 /* RX_DV */ + 0 10 2 0 1 0 /* RX_ER */ + 0 0 2 0 1 0 /* RX_CLK */ + 2 9 1 0 3 0 /* GTX_CLK - CLK10 */ + 2 8 2 0 1 0>; /* GTX125 - CLK9 */ + }; + + diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe/ucc.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe/ucc.txt new file mode 100644 index 0000000..e47734b --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe/ucc.txt @@ -0,0 +1,70 @@ +* UCC (Unified Communications Controllers) + +Required properties: +- device_type : should be "network", "hldc", "uart", "transparent" + "bisync", "atm", or "serial". +- compatible : could be "ucc_geth" or "fsl_atm" and so on. +- cell-index : the ucc number(1-8), corresponding to UCCx in UM. +- reg : Offset and length of the register set for the device +- interrupts : where a is the interrupt number and b is a + field that represents an encoding of the sense and level + information for the interrupt. This should be encoded based on + the information in section 2) depending on the type of interrupt + controller you have. +- interrupt-parent : the phandle for the interrupt controller that + services interrupts for this device. +- pio-handle : The phandle for the Parallel I/O port configuration. +- port-number : for UART drivers, the port number to use, between 0 and 3. + This usually corresponds to the /dev/ttyQE device, e.g. <0> = /dev/ttyQE0. + The port number is added to the minor number of the device. Unlike the + CPM UART driver, the port-number is required for the QE UART driver. +- soft-uart : for UART drivers, if specified this means the QE UART device + driver should use "Soft-UART" mode, which is needed on some SOCs that have + broken UART hardware. Soft-UART is provided via a microcode upload. +- rx-clock-name: the UCC receive clock source + "none": clock source is disabled + "brg1" through "brg16": clock source is BRG1-BRG16, respectively + "clk1" through "clk24": clock source is CLK1-CLK24, respectively +- tx-clock-name: the UCC transmit clock source + "none": clock source is disabled + "brg1" through "brg16": clock source is BRG1-BRG16, respectively + "clk1" through "clk24": clock source is CLK1-CLK24, respectively +The following two properties are deprecated. rx-clock has been replaced +with rx-clock-name, and tx-clock has been replaced with tx-clock-name. +Drivers that currently use the deprecated properties should continue to +do so, in order to support older device trees, but they should be updated +to check for the new properties first. +- rx-clock : represents the UCC receive clock source. + 0x00 : clock source is disabled; + 0x1~0x10 : clock source is BRG1~BRG16 respectively; + 0x11~0x28: clock source is QE_CLK1~QE_CLK24 respectively. +- tx-clock: represents the UCC transmit clock source; + 0x00 : clock source is disabled; + 0x1~0x10 : clock source is BRG1~BRG16 respectively; + 0x11~0x28: clock source is QE_CLK1~QE_CLK24 respectively. + +Required properties for network device_type: +- mac-address : list of bytes representing the ethernet address. +- phy-handle : The phandle for the PHY connected to this controller. + +Recommended properties: +- phy-connection-type : a string naming the controller/PHY interface type, + i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id" (Internal + Delay), "rgmii-txid" (delay on TX only), "rgmii-rxid" (delay on RX only), + "tbi", or "rtbi". + +Example: + ucc@2000 { + device_type = "network"; + compatible = "ucc_geth"; + cell-index = <1>; + reg = <2000 200>; + interrupts = ; + interrupt-parent = <700>; + mac-address = [ 00 04 9f 00 23 23 ]; + rx-clock = "none"; + tx-clock = "clk9"; + phy-handle = <212000>; + phy-connection-type = "gmii"; + pio-handle = <140001>; + }; diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe/usb.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe/usb.txt new file mode 100644 index 0000000..9ccd5f3 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/qe/usb.txt @@ -0,0 +1,37 @@ +Freescale QUICC Engine USB Controller + +Required properties: +- compatible : should be "fsl,-qe-usb", "fsl,mpc8323-qe-usb". +- reg : the first two cells should contain usb registers location and + length, the next two two cells should contain PRAM location and + length. +- interrupts : should contain USB interrupt. +- interrupt-parent : interrupt source phandle. +- fsl,fullspeed-clock : specifies the full speed USB clock source: + "none": clock source is disabled + "brg1" through "brg16": clock source is BRG1-BRG16, respectively + "clk1" through "clk24": clock source is CLK1-CLK24, respectively +- fsl,lowspeed-clock : specifies the low speed USB clock source: + "none": clock source is disabled + "brg1" through "brg16": clock source is BRG1-BRG16, respectively + "clk1" through "clk24": clock source is CLK1-CLK24, respectively +- hub-power-budget : USB power budget for the root hub, in mA. +- gpios : should specify GPIOs in this order: USBOE, USBTP, USBTN, USBRP, + USBRN, SPEED (optional), and POWER (optional). + +Example: + +usb@6c0 { + compatible = "fsl,mpc8360-qe-usb", "fsl,mpc8323-qe-usb"; + reg = <0x6c0 0x40 0x8b00 0x100>; + interrupts = <11>; + interrupt-parent = <&qeic>; + fsl,fullspeed-clock = "clk21"; + gpios = <&qe_pio_b 2 0 /* USBOE */ + &qe_pio_b 3 0 /* USBTP */ + &qe_pio_b 8 0 /* USBTN */ + &qe_pio_b 9 0 /* USBRP */ + &qe_pio_b 11 0 /* USBRN */ + &qe_pio_e 20 0 /* SPEED */ + &qe_pio_e 21 0 /* POWER */>; +}; diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/serial.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/serial.txt new file mode 100644 index 0000000..2ea76d9 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/serial.txt @@ -0,0 +1,32 @@ +* Serial + +Currently defined compatibles: +- fsl,cpm1-smc-uart +- fsl,cpm2-smc-uart +- fsl,cpm1-scc-uart +- fsl,cpm2-scc-uart +- fsl,qe-uart + +Modem control lines connected to GPIO controllers are listed in the gpios +property as described in booting-without-of.txt, section IX.1 in the following +order: + +CTS, RTS, DCD, DSR, DTR, and RI. + +The gpios property is optional and can be left out when control lines are +not used. + +Example: + + serial@11a00 { + device_type = "serial"; + compatible = "fsl,mpc8272-scc-uart", + "fsl,cpm2-scc-uart"; + reg = <11a00 20 8000 100>; + interrupts = <28 8>; + interrupt-parent = <&PIC>; + fsl,cpm-brg = <1>; + fsl,cpm-command = <00800000>; + gpios = <&gpio_c 15 0 + &gpio_d 29 0>; + }; diff --git a/Documentation/devicetree/bindings/soc/fsl/cpm_qe/uqe_serial.txt b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/uqe_serial.txt new file mode 100644 index 0000000..8823c86 --- /dev/null +++ b/Documentation/devicetree/bindings/soc/fsl/cpm_qe/uqe_serial.txt @@ -0,0 +1,17 @@ +* Serial + +Required Properties: +compatible : must be "fsl,-ucc-uart". For t1040, must be +"fsl,t1040-ucc-uart". +port-number : port number of UCC-UART +tx/rx-clock-name : should be "brg1"-"brg16" for internal clock source, + should be "clk1"-"clk28" for external clock source. + +Example: + + ucc_serial: ucc@2200 { + compatible = "fsl,t1040-ucc-uart"; + port-number = <0>; + rx-clock-name = "brg2"; + tx-clock-name = "brg2"; + }; -- cgit v0.10.2 From b7a7085204f42a2b0095396287341ad313c47444 Mon Sep 17 00:00:00 2001 From: Zhao Qiang Date: Tue, 17 May 2016 10:39:01 +0800 Subject: T104xD4RDB: Add qe node to t104xd4rdb add qe node to t104xd4rdb.dtsi and t1040si-post.dtsi. Signed-off-by: Zhao Qiang Signed-off-by: Scott Wood diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi index d3fbe72..44e399b 100644 --- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi @@ -673,3 +673,48 @@ }; }; }; + +&qe { + #address-cells = <1>; + #size-cells = <1>; + device_type = "qe"; + compatible = "fsl,qe"; + fsl,qe-num-riscs = <1>; + fsl,qe-num-snums = <28>; + + qeic: interrupt-controller@80 { + interrupt-controller; + compatible = "fsl,qe-ic"; + #address-cells = <0>; + #interrupt-cells = <1>; + reg = <0x80 0x80>; + interrupts = <95 2 0 0 94 2 0 0>; //high:79 low:78 + }; + + ucc@2000 { + cell-index = <1>; + reg = <0x2000 0x200>; + interrupts = <32>; + interrupt-parent = <&qeic>; + }; + + ucc@2200 { + cell-index = <3>; + reg = <0x2200 0x200>; + interrupts = <34>; + interrupt-parent = <&qeic>; + }; + + muram@10000 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,qe-muram", "fsl,cpm-muram"; + ranges = <0x0 0x10000 0x6000>; + + data-only@0 { + compatible = "fsl,qe-muram-data", + "fsl,cpm-muram-data"; + reg = <0x0 0x6000>; + }; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/t104xd4rdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xd4rdb.dtsi index 8c7ea6c..863f943 100644 --- a/arch/powerpc/boot/dts/fsl/t104xd4rdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xd4rdb.dtsi @@ -212,4 +212,42 @@ 0 0x00010000>; }; }; + + qe: qe@ffe140000 { + ranges = <0x0 0xf 0xfe140000 0x40000>; + reg = <0xf 0xfe140000 0 0x480>; + brg-frequency = <0>; + bus-frequency = <0>; + + si1: si@700 { + compatible = "fsl,t1040-qe-si"; + reg = <0x700 0x80>; + }; + + siram1: siram@1000 { + compatible = "fsl,t1040-qe-siram"; + reg = <0x1000 0x800>; + }; + + ucc_hdlc: ucc@2000 { + compatible = "fsl,ucc-hdlc"; + rx-clock-name = "clk8"; + tx-clock-name = "clk9"; + fsl,rx-sync-clock = "rsync_pin"; + fsl,tx-sync-clock = "tsync_pin"; + fsl,tx-timeslot-mask = <0xfffffffe>; + fsl,rx-timeslot-mask = <0xfffffffe>; + fsl,tdm-framer-type = "e1"; + fsl,tdm-id = <0>; + fsl,siram-entry-id = <0>; + fsl,tdm-interface; + }; + + ucc_serial: ucc@2200 { + compatible = "fsl,t1040-ucc-uart"; + port-number = <0>; + rx-clock-name = "brg2"; + tx-clock-name = "brg2"; + }; + }; }; -- cgit v0.10.2 From df02087d271ca3568ce0b8abd334305ecdda9060 Mon Sep 17 00:00:00 2001 From: Zhao Qiang Date: Tue, 17 May 2016 10:39:02 +0800 Subject: T104xRDB: Add qe node to t104xrdb add qe node to t104xrdb.dtsi Signed-off-by: Zhao Qiang Signed-off-by: Scott Wood diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi index 7c4afdb..5fdddbd 100644 --- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi @@ -222,4 +222,42 @@ 0 0x00010000>; }; }; + + qe: qe@ffe140000 { + ranges = <0x0 0xf 0xfe140000 0x40000>; + reg = <0xf 0xfe140000 0 0x480>; + brg-frequency = <0>; + bus-frequency = <0>; + + si1: si@700 { + compatible = "fsl,t1040-qe-si"; + reg = <0x700 0x80>; + }; + + siram1: siram@1000 { + compatible = "fsl,t1040-qe-siram"; + reg = <0x1000 0x800>; + }; + + ucc_hdlc: ucc@2000 { + compatible = "fsl,ucc-hdlc"; + rx-clock-name = "clk8"; + tx-clock-name = "clk9"; + fsl,rx-sync-clock = "rsync_pin"; + fsl,tx-sync-clock = "tsync_pin"; + fsl,tx-timeslot-mask = <0xfffffffe>; + fsl,rx-timeslot-mask = <0xfffffffe>; + fsl,tdm-framer-type = "e1"; + fsl,tdm-id = <0>; + fsl,siram-entry-id = <0>; + fsl,tdm-interface; + }; + + ucc_serial: ucc@2200 { + compatible = "fsl,t1040-ucc-uart"; + port-number = <0>; + rx-clock-name = "brg2"; + tx-clock-name = "brg2"; + }; + }; }; -- cgit v0.10.2 From 1afbf61750364864adf6a17818c5bbbea4dea531 Mon Sep 17 00:00:00 2001 From: Zhao Qiang Date: Tue, 17 May 2016 10:39:03 +0800 Subject: T104xQDS: Add qe node to t104xqds add qe node to t104xqds.dtsi Signed-off-by: Zhao Qiang Signed-off-by: Scott Wood diff --git a/arch/powerpc/boot/dts/fsl/t104xqds.dtsi b/arch/powerpc/boot/dts/fsl/t104xqds.dtsi index 977af35..2fd4cbe 100644 --- a/arch/powerpc/boot/dts/fsl/t104xqds.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xqds.dtsi @@ -366,4 +366,42 @@ 0 0x00010000>; }; }; + + qe: qe@ffe140000 { + ranges = <0x0 0xf 0xfe140000 0x40000>; + reg = <0xf 0xfe140000 0 0x480>; + brg-frequency = <0>; + bus-frequency = <0>; + + si1: si@700 { + compatible = "fsl,t1040-qe-si"; + reg = <0x700 0x80>; + }; + + siram1: siram@1000 { + compatible = "fsl,t1040-qe-siram"; + reg = <0x1000 0x800>; + }; + + ucc_hdlc: ucc@2000 { + compatible = "fsl,ucc-hdlc"; + rx-clock-name = "clk8"; + tx-clock-name = "clk9"; + fsl,rx-sync-clock = "rsync_pin"; + fsl,tx-sync-clock = "tsync_pin"; + fsl,tx-timeslot-mask = <0xfffffffe>; + fsl,rx-timeslot-mask = <0xfffffffe>; + fsl,tdm-framer-type = "e1"; + fsl,tdm-id = <0>; + fsl,siram-entry-id = <0>; + fsl,tdm-interface; + }; + + ucc_serial: ucc@2200 { + compatible = "fsl,t1040-ucc-uart"; + port-number = <0>; + rx-clock-name = "brg2"; + tx-clock-name = "brg2"; + }; + }; }; -- cgit v0.10.2 From c223c90386bc2306510e0ceacd768a0123ff2a2f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 May 2016 08:33:46 +0200 Subject: powerpc32: provide VIRT_CPU_ACCOUNTING This patch provides VIRT_CPU_ACCOUTING to PPC32 architecture. PPC32 doesn't have the PACA structure, so we use the task_info structure to store the accounting data. In order to reuse on PPC32 the PPC64 functions, all u64 data has been replaced by 'unsigned long' so that it is u32 on PPC32 and u64 on PPC64 Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ee82f9a..394f9dc 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -165,6 +165,7 @@ config PPC select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS + select HAVE_VIRT_CPU_ACCOUNTING config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/include/asm/accounting.h b/arch/powerpc/include/asm/accounting.h new file mode 100644 index 0000000..c133246 --- /dev/null +++ b/arch/powerpc/include/asm/accounting.h @@ -0,0 +1,24 @@ +/* + * Common time accounting prototypes and such for all ppc machines. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __POWERPC_ACCOUNTING_H +#define __POWERPC_ACCOUNTING_H + +/* Stuff for accurate time accounting */ +struct cpu_accounting_data { + unsigned long user_time; /* accumulated usermode TB ticks */ + unsigned long system_time; /* accumulated system TB ticks */ + unsigned long user_time_scaled; /* accumulated usermode SPURR ticks */ + unsigned long starttime; /* TB value snapshot */ + unsigned long starttime_user; /* TB value on exit to usermode */ + unsigned long startspurr; /* SPURR value snapshot */ + unsigned long utime_sspurr; /* ->user_time when ->startspurr set */ +}; + +#endif diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index e245255..2dfd4fc 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -90,11 +90,10 @@ static inline void setup_cputime_one_jiffy(void) static inline cputime64_t jiffies64_to_cputime64(const u64 jif) { u64 ct; - u64 sec; + u64 sec = jif; /* have to be a little careful about overflow */ - ct = jif % HZ; - sec = jif / HZ; + ct = do_div(sec, HZ); if (ct) { ct *= tb_ticks_per_sec; do_div(ct, HZ); @@ -230,7 +229,16 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk) #define cputime64_to_clock_t(ct) cputime_to_clock_t((cputime_t)(ct)) +/* + * PPC64 uses PACA which is task independent for storing accounting data while + * PPC32 uses struct thread_info, therefore at task switch the accounting data + * has to be populated in the new task + */ +#ifdef CONFIG_PPC64 static inline void arch_vtime_task_switch(struct task_struct *tsk) { } +#else +void arch_vtime_task_switch(struct task_struct *tsk); +#endif #endif /* __KERNEL__ */ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 93ae809..8bc38d1 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -287,7 +287,7 @@ do_kvm_##n: \ std r0,GPR0(r1); /* save r0 in stackframe */ \ std r10,GPR1(r1); /* save r1 in stackframe */ \ beq 4f; /* if from kernel mode */ \ - ACCOUNT_CPU_USER_ENTRY(r9, r10); \ + ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \ SAVE_PPR(area, r9, r10); \ 4: EXCEPTION_PROLOG_COMMON_2(area) \ EXCEPTION_PROLOG_COMMON_3(n) \ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 546540b..ad171e9 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -25,6 +25,7 @@ #ifdef CONFIG_KVM_BOOK3S_64_HANDLER #include #endif +#include register struct paca_struct *local_paca asm("r13"); @@ -184,13 +185,7 @@ struct paca_struct { #endif /* Stuff for accurate time accounting */ - u64 user_time; /* accumulated usermode TB ticks */ - u64 system_time; /* accumulated system TB ticks */ - u64 user_time_scaled; /* accumulated usermode SPURR ticks */ - u64 starttime; /* TB value snapshot */ - u64 starttime_user; /* TB value on exit to usermode */ - u64 startspurr; /* SPURR value snapshot */ - u64 utime_sspurr; /* ->user_time when ->startspurr set */ + struct cpu_accounting_data accounting; u64 stolen_time; /* TB ticks taken by hypervisor */ u64 dtl_ridx; /* read index in dispatch log */ struct dtl_entry *dtl_curr; /* pointer corresponding to dtl_ridx */ diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 7b591f9..96b06dc 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -24,27 +24,27 @@ */ #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -#define ACCOUNT_CPU_USER_ENTRY(ra, rb) -#define ACCOUNT_CPU_USER_EXIT(ra, rb) +#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) +#define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb) #define ACCOUNT_STOLEN_TIME #else -#define ACCOUNT_CPU_USER_ENTRY(ra, rb) \ +#define ACCOUNT_CPU_USER_ENTRY(ptr, ra, rb) \ MFTB(ra); /* get timebase */ \ - ld rb,PACA_STARTTIME_USER(r13); \ - std ra,PACA_STARTTIME(r13); \ + PPC_LL rb, ACCOUNT_STARTTIME_USER(ptr); \ + PPC_STL ra, ACCOUNT_STARTTIME(ptr); \ subf rb,rb,ra; /* subtract start value */ \ - ld ra,PACA_USER_TIME(r13); \ + PPC_LL ra, ACCOUNT_USER_TIME(ptr); \ add ra,ra,rb; /* add on to user time */ \ - std ra,PACA_USER_TIME(r13); \ + PPC_STL ra, ACCOUNT_USER_TIME(ptr); \ -#define ACCOUNT_CPU_USER_EXIT(ra, rb) \ +#define ACCOUNT_CPU_USER_EXIT(ptr, ra, rb) \ MFTB(ra); /* get timebase */ \ - ld rb,PACA_STARTTIME(r13); \ - std ra,PACA_STARTTIME_USER(r13); \ + PPC_LL rb, ACCOUNT_STARTTIME(ptr); \ + PPC_STL ra, ACCOUNT_STARTTIME_USER(ptr); \ subf rb,rb,ra; /* subtract start value */ \ - ld ra,PACA_SYSTEM_TIME(r13); \ + PPC_LL ra, ACCOUNT_SYSTEM_TIME(ptr); \ add ra,ra,rb; /* add on to system time */ \ - std ra,PACA_SYSTEM_TIME(r13) + PPC_STL ra, ACCOUNT_SYSTEM_TIME(ptr) #ifdef CONFIG_PPC_SPLPAR #define ACCOUNT_STOLEN_TIME \ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 320136f..d383f13 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1294,6 +1294,7 @@ static inline unsigned long mfvtb (void) asm volatile("mfspr %0, %1" : "=r" (rval) : \ "i" (SPRN_TBRU)); rval;}) #endif +#define mftb() mftbl() #endif /* !__powerpc64__ */ #define mttbl(v) asm volatile("mttbl %0":: "r"(v)) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 8febc3f..b21bb1f 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -33,6 +33,7 @@ #include #include #include +#include /* * low level task data. @@ -46,6 +47,9 @@ struct thread_info { #ifdef CONFIG_LIVEPATCH unsigned long *livepatch_sp; #endif +#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC32) + struct cpu_accounting_data accounting; +#endif /* low level flags - has atomic operations done on it */ unsigned long flags ____cacheline_aligned_in_smp; }; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 5b99f95..0478928 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -240,13 +240,28 @@ int main(void) DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); DEFINE(PACA_DSCR_DEFAULT, offsetof(struct paca_struct, dscr_default)); - DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime)); - DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user)); - DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time)); - DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); + DEFINE(ACCOUNT_STARTTIME, + offsetof(struct paca_struct, accounting.starttime)); + DEFINE(ACCOUNT_STARTTIME_USER, + offsetof(struct paca_struct, accounting.starttime_user)); + DEFINE(ACCOUNT_USER_TIME, + offsetof(struct paca_struct, accounting.user_time)); + DEFINE(ACCOUNT_SYSTEM_TIME, + offsetof(struct paca_struct, accounting.system_time)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost)); DEFINE(PACA_SPRG_VDSO, offsetof(struct paca_struct, sprg_vdso)); +#else /* CONFIG_PPC64 */ +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + DEFINE(ACCOUNT_STARTTIME, + offsetof(struct thread_info, accounting.starttime)); + DEFINE(ACCOUNT_STARTTIME_USER, + offsetof(struct thread_info, accounting.starttime_user)); + DEFINE(ACCOUNT_USER_TIME, + offsetof(struct thread_info, accounting.user_time)); + DEFINE(ACCOUNT_SYSTEM_TIME, + offsetof(struct thread_info, accounting.system_time)); +#endif #endif /* CONFIG_PPC64 */ /* RTAS */ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 2405631..9899032 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -175,6 +175,12 @@ transfer_to_handler: addi r12,r12,-1 stw r12,4(r11) #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + CURRENT_THREAD_INFO(r9, r1) + tophys(r9, r9) + ACCOUNT_CPU_USER_ENTRY(r9, r11, r12) +#endif + b 3f 2: /* if from kernel, check interrupted DOZE/NAP mode and @@ -398,6 +404,13 @@ BEGIN_FTR_SECTION lwarx r7,0,r1 END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) stwcx. r0,0,r1 /* to clear the reservation */ +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + andi. r4,r8,MSR_PR + beq 3f + CURRENT_THREAD_INFO(r4, r1) + ACCOUNT_CPU_USER_EXIT(r4, r5, r7) +3: +#endif lwz r4,_LINK(r1) lwz r5,_CCR(r1) mtlr r4 @@ -769,6 +782,10 @@ restore_user: andis. r10,r0,DBCR0_IDM@h bnel- load_dbcr0 #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + CURRENT_THREAD_INFO(r9, r1) + ACCOUNT_CPU_USER_EXIT(r9, r10, r11) +#endif b restore diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 2e0c565..fcb2887 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -72,7 +72,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) std r0,GPR0(r1) std r10,GPR1(r1) beq 2f /* if from kernel mode */ - ACCOUNT_CPU_USER_ENTRY(r10, r11) + ACCOUNT_CPU_USER_ENTRY(r13, r10, r11) 2: std r2,GPR2(r1) std r3,GPR3(r1) mfcr r2 @@ -246,7 +246,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) ld r4,_LINK(r1) beq- 1f - ACCOUNT_CPU_USER_EXIT(r11, r12) + ACCOUNT_CPU_USER_EXIT(r13, r11, r12) BEGIN_FTR_SECTION HMT_MEDIUM_LOW @@ -859,7 +859,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) BEGIN_FTR_SECTION mtspr SPRN_PPR,r2 /* Restore PPR */ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) - ACCOUNT_CPU_USER_EXIT(r2, r4) + ACCOUNT_CPU_USER_EXIT(r13, r2, r4) REST_GPR(13, r1) 1: mtspr SPRN_SRR1,r3 diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 2d3b40f..38a1f96 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -386,7 +386,7 @@ exc_##n##_common: \ std r10,_NIP(r1); /* save SRR0 to stackframe */ \ std r11,_MSR(r1); /* save SRR1 to stackframe */ \ beq 2f; /* if from kernel mode */ \ - ACCOUNT_CPU_USER_ENTRY(r10,r11);/* accounting (uses cr0+eq) */ \ + ACCOUNT_CPU_USER_ENTRY(r13,r10,r11);/* accounting (uses cr0+eq) */ \ 2: ld r3,excf+EX_R10(r13); /* get back r10 */ \ ld r4,excf+EX_R11(r13); /* get back r11 */ \ mfspr r5,scratch; /* get back r13 */ \ @@ -1059,7 +1059,7 @@ fast_exception_return: andi. r6,r10,MSR_PR REST_2GPRS(6, r1) beq 1f - ACCOUNT_CPU_USER_EXIT(r10, r11) + ACCOUNT_CPU_USER_EXIT(r13, r10, r11) ld r0,GPR13(r1) 1: stdcx. r0,0,r1 /* to clear the reservation */ diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 6b4d01d..4e7759c 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -167,7 +167,15 @@ DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta); cputime_t cputime_one_jiffy; +#ifdef CONFIG_PPC_SPLPAR void (*dtl_consumer)(struct dtl_entry *, u64); +#endif + +#ifdef CONFIG_PPC64 +#define get_accounting(tsk) (&get_paca()->accounting) +#else +#define get_accounting(tsk) (&task_thread_info(tsk)->accounting) +#endif static void calc_cputime_factors(void) { @@ -187,7 +195,7 @@ static void calc_cputime_factors(void) * Read the SPURR on systems that have it, otherwise the PURR, * or if that doesn't exist return the timebase value passed in. */ -static u64 read_spurr(u64 tb) +static unsigned long read_spurr(unsigned long tb) { if (cpu_has_feature(CPU_FTR_SPURR)) return mfspr(SPRN_SPURR); @@ -250,8 +258,8 @@ static u64 scan_dispatch_log(u64 stop_tb) void accumulate_stolen_time(void) { u64 sst, ust; - u8 save_soft_enabled = local_paca->soft_enabled; + struct cpu_accounting_data *acct = &local_paca->accounting; /* We are called early in the exception entry, before * soft/hard_enabled are sync'ed to the expected state @@ -261,10 +269,10 @@ void accumulate_stolen_time(void) */ local_paca->soft_enabled = 0; - sst = scan_dispatch_log(local_paca->starttime_user); - ust = scan_dispatch_log(local_paca->starttime); - local_paca->system_time -= sst; - local_paca->user_time -= ust; + sst = scan_dispatch_log(acct->starttime_user); + ust = scan_dispatch_log(acct->starttime); + acct->system_time -= sst; + acct->user_time -= ust; local_paca->stolen_time += ust + sst; local_paca->soft_enabled = save_soft_enabled; @@ -276,7 +284,7 @@ static inline u64 calculate_stolen_time(u64 stop_tb) if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) { stolen = scan_dispatch_log(stop_tb); - get_paca()->system_time -= stolen; + get_paca()->accounting.system_time -= stolen; } stolen += get_paca()->stolen_time; @@ -296,27 +304,29 @@ static inline u64 calculate_stolen_time(u64 stop_tb) * Account time for a transition between system, hard irq * or soft irq state. */ -static u64 vtime_delta(struct task_struct *tsk, - u64 *sys_scaled, u64 *stolen) +static unsigned long vtime_delta(struct task_struct *tsk, + unsigned long *sys_scaled, + unsigned long *stolen) { - u64 now, nowscaled, deltascaled; - u64 udelta, delta, user_scaled; + unsigned long now, nowscaled, deltascaled; + unsigned long udelta, delta, user_scaled; + struct cpu_accounting_data *acct = get_accounting(tsk); WARN_ON_ONCE(!irqs_disabled()); now = mftb(); nowscaled = read_spurr(now); - get_paca()->system_time += now - get_paca()->starttime; - get_paca()->starttime = now; - deltascaled = nowscaled - get_paca()->startspurr; - get_paca()->startspurr = nowscaled; + acct->system_time += now - acct->starttime; + acct->starttime = now; + deltascaled = nowscaled - acct->startspurr; + acct->startspurr = nowscaled; *stolen = calculate_stolen_time(now); - delta = get_paca()->system_time; - get_paca()->system_time = 0; - udelta = get_paca()->user_time - get_paca()->utime_sspurr; - get_paca()->utime_sspurr = get_paca()->user_time; + delta = acct->system_time; + acct->system_time = 0; + udelta = acct->user_time - acct->utime_sspurr; + acct->utime_sspurr = acct->user_time; /* * Because we don't read the SPURR on every kernel entry/exit, @@ -338,14 +348,14 @@ static u64 vtime_delta(struct task_struct *tsk, *sys_scaled = deltascaled; } } - get_paca()->user_time_scaled += user_scaled; + acct->user_time_scaled += user_scaled; return delta; } void vtime_account_system(struct task_struct *tsk) { - u64 delta, sys_scaled, stolen; + unsigned long delta, sys_scaled, stolen; delta = vtime_delta(tsk, &sys_scaled, &stolen); account_system_time(tsk, 0, delta, sys_scaled); @@ -356,7 +366,7 @@ EXPORT_SYMBOL_GPL(vtime_account_system); void vtime_account_idle(struct task_struct *tsk) { - u64 delta, sys_scaled, stolen; + unsigned long delta, sys_scaled, stolen; delta = vtime_delta(tsk, &sys_scaled, &stolen); account_idle_time(delta + stolen); @@ -374,15 +384,32 @@ void vtime_account_idle(struct task_struct *tsk) void vtime_account_user(struct task_struct *tsk) { cputime_t utime, utimescaled; + struct cpu_accounting_data *acct = get_accounting(tsk); - utime = get_paca()->user_time; - utimescaled = get_paca()->user_time_scaled; - get_paca()->user_time = 0; - get_paca()->user_time_scaled = 0; - get_paca()->utime_sspurr = 0; + utime = acct->user_time; + utimescaled = acct->user_time_scaled; + acct->user_time = 0; + acct->user_time_scaled = 0; + acct->utime_sspurr = 0; account_user_time(tsk, utime, utimescaled); } +#ifdef CONFIG_PPC32 +/* + * Called from the context switch with interrupts disabled, to charge all + * accumulated times to the current process, and to prepare accounting on + * the next process. + */ +void arch_vtime_task_switch(struct task_struct *prev) +{ + struct cpu_accounting_data *acct = get_accounting(current); + + acct->starttime = get_accounting(prev)->starttime; + acct->system_time = 0; + acct->user_time = 0; +} +#endif /* CONFIG_PPC32 */ + #else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #define calc_cputime_factors() #endif diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 77e9b8d..f32edec 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -1,7 +1,6 @@ config PPC64 bool "64-bit kernel" default n - select HAVE_VIRT_CPU_ACCOUNTING select ZLIB_DEFLATE help This option selects whether a 32-bit or a 64-bit kernel diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index c5e1551..4f7c29d 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2213,13 +2213,13 @@ static void dump_one_paca(int cpu) DUMP(p, subcore_sibling_mask, "x"); #endif - DUMP(p, user_time, "llx"); - DUMP(p, system_time, "llx"); - DUMP(p, user_time_scaled, "llx"); - DUMP(p, starttime, "llx"); - DUMP(p, starttime_user, "llx"); - DUMP(p, startspurr, "llx"); - DUMP(p, utime_sspurr, "llx"); + DUMP(p, accounting.user_time, "llx"); + DUMP(p, accounting.system_time, "llx"); + DUMP(p, accounting.user_time_scaled, "llx"); + DUMP(p, accounting.starttime, "llx"); + DUMP(p, accounting.starttime_user, "llx"); + DUMP(p, accounting.startspurr, "llx"); + DUMP(p, accounting.utime_sspurr, "llx"); DUMP(p, stolen_time, "llx"); #undef DUMP -- cgit v0.10.2 From f86ef74ed9193c52411277eeac2eec69af553392 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 May 2016 09:02:43 +0200 Subject: powerpc/8xx: Fix vaddr for IMMR early remap Memory: 124428K/131072K available (3748K kernel code, 188K rwdata, 648K rodata, 508K init, 290K bss, 6644K reserved) Kernel virtual memory layout: * 0xfffdf000..0xfffff000 : fixmap * 0xfde00000..0xfe000000 : consistent mem * 0xfddf6000..0xfde00000 : early ioremap * 0xc9000000..0xfddf6000 : vmalloc & ioremap SLUB: HWalign=16, Order=0-3, MinObjects=0, CPUs=1, Nodes=1 Today, IMMR is mapped 1:1 at startup Mapping IMMR 1:1 is just wrong because it may overlap with another area. On most mpc8xx boards it is OK as IMMR is set to 0xff000000 but for instance on EP88xC board, IMMR is at 0xfa200000 which overlaps with VM ioremap area This patch fixes the virtual address for remapping IMMR with the fixmap regardless of the value of IMMR. The size of IMMR area is 256kbytes (CPM at offset 0, security engine at offset 128k) so a 512k page is enough Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 90f604b..4508b32 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -51,6 +51,13 @@ enum fixed_addresses { FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, #endif +#ifdef CONFIG_PPC_8xx + /* For IMMR we need an aligned 512K area */ +#define FIX_IMMR_SIZE (512 * 1024 / PAGE_SIZE) + FIX_IMMR_START, + FIX_IMMR_BASE = __ALIGN_MASK(FIX_IMMR_START, FIX_IMMR_SIZE - 1) - 1 + + FIX_IMMR_SIZE, +#endif /* FIX_PCIE_MCFG, */ __end_of_fixed_addresses }; diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 0a566f1..3e0e492 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -169,6 +169,9 @@ typedef struct { unsigned int active; unsigned long vdso_base; } mm_context_t; + +#define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000) +#define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) #endif /* !__ASSEMBLY__ */ #if defined(CONFIG_PPC_4K_PAGES) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 0478928..247f640 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -68,6 +68,10 @@ #include "../mm/mmu_decl.h" #endif +#ifdef CONFIG_PPC_8xx +#include +#endif + int main(void) { DEFINE(THREAD, offsetof(struct task_struct, thread)); @@ -749,5 +753,9 @@ int main(void) DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); +#ifdef CONFIG_PPC_8xx + DEFINE(VIRT_IMMR_BASE, __fix_to_virt(FIX_IMMR_BASE)); +#endif + return 0; } diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 80c6947..378a185 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -30,6 +30,7 @@ #include #include #include +#include /* Macro to make the code more readable. */ #ifdef CONFIG_8xx_CPU6 @@ -763,7 +764,7 @@ start_here: * virtual to physical. Also, set the cache mode since that is defined * by TLB entries and perform any additional mapping (like of the IMMR). * If configured to pin some TLBs, we pin the first 8 Mbytes of kernel, - * 24 Mbytes of data, and the 8M IMMR space. Anything not covered by + * 24 Mbytes of data, and the 512k IMMR space. Anything not covered by * these mappings is mapped by page tables. */ initial_mmu: @@ -812,7 +813,7 @@ initial_mmu: ori r8, r8, MD_APG_INIT@l mtspr SPRN_MD_AP, r8 - /* Map another 8 MByte at the IMMR to get the processor + /* Map a 512k page for the IMMR to get the processor * internal registers (among other things). */ #ifdef CONFIG_PIN_TLB @@ -820,12 +821,12 @@ initial_mmu: mtspr SPRN_MD_CTR, r10 #endif mfspr r9, 638 /* Get current IMMR */ - andis. r9, r9, 0xff80 /* Get 8Mbyte boundary */ + andis. r9, r9, 0xfff8 /* Get 512 kbytes boundary */ - mr r8, r9 /* Create vaddr for TLB */ + lis r8, VIRT_IMMR_BASE@h /* Create vaddr for TLB */ ori r8, r8, MD_EVALID /* Mark it valid */ mtspr SPRN_MD_EPN, r8 - li r8, MD_PS8MEG /* Set 8M byte page */ + li r8, MD_PS512K | MD_GUARDED /* Set 512k byte page */ ori r8, r8, MD_SVALID /* Make it valid */ mtspr SPRN_MD_TWC, r8 mr r8, r9 /* Create paddr for TLB */ diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c index 0ac12e5..911456d 100644 --- a/arch/powerpc/sysdev/cpm_common.c +++ b/arch/powerpc/sysdev/cpm_common.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -37,25 +38,36 @@ #endif #ifdef CONFIG_PPC_EARLY_DEBUG_CPM -static u32 __iomem *cpm_udbg_txdesc = - (u32 __iomem __force *)CONFIG_PPC_EARLY_DEBUG_CPM_ADDR; +static u32 __iomem *cpm_udbg_txdesc; +static u8 __iomem *cpm_udbg_txbuf; static void udbg_putc_cpm(char c) { - u8 __iomem *txbuf = (u8 __iomem __force *)in_be32(&cpm_udbg_txdesc[1]); - if (c == '\n') udbg_putc_cpm('\r'); while (in_be32(&cpm_udbg_txdesc[0]) & 0x80000000) ; - out_8(txbuf, c); + out_8(cpm_udbg_txbuf, c); out_be32(&cpm_udbg_txdesc[0], 0xa0000001); } void __init udbg_init_cpm(void) { +#ifdef CONFIG_PPC_8xx + cpm_udbg_txdesc = (u32 __iomem __force *) + (CONFIG_PPC_EARLY_DEBUG_CPM_ADDR - PHYS_IMMR_BASE + + VIRT_IMMR_BASE); + cpm_udbg_txbuf = (u8 __iomem __force *) + (in_be32(&cpm_udbg_txdesc[1]) - PHYS_IMMR_BASE + + VIRT_IMMR_BASE); +#else + cpm_udbg_txdesc = (u32 __iomem __force *) + CONFIG_PPC_EARLY_DEBUG_CPM_ADDR; + cpm_udbg_txbuf = (u8 __iomem __force *)in_be32(&cpm_udbg_txdesc[1]); +#endif + if (cpm_udbg_txdesc) { #ifdef CONFIG_CPM2 setbat(1, 0xf0000000, 0xf0000000, 1024*1024, PAGE_KERNEL_NCG); -- cgit v0.10.2 From 4badd43ae44109c88438cc6421d208f513cf537f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 May 2016 09:02:45 +0200 Subject: powerpc/8xx: Map IMMR area with 512k page at a fixed address Once the linear memory space has been mapped with 8Mb pages, as seen in the related commit, we get 11 millions DTLB missed during the reference 600s period. 77% of the misses are on user addresses and 23% are on kernel addresses (1 fourth for linear address space and 3 fourth for virtual address space) Traditionaly, each driver manages one computer board which has its own components with its own memory maps. But on embedded chips like the MPC8xx, the SOC has all registers located in the same IO area. When looking at ioremaps done during startup, we see that many drivers are re-mapping small parts of the IMMR for their own use and all those small pieces gets their own 4k page, amplifying the number of TLB misses: in our system we get 0xff000000 mapped 31 times and 0xff003000 mapped 9 times. Even if each part of IMMR was mapped only once with 4k pages, it would still be several small mappings towards linear area. This patch maps the IMMR with a single 512k page. With this patch applied, the number of DTLB misses during the 10 min period is reduced to 11.8 millions for a duration of 5.8s, which represents 2% of the non-idle time hence yet another 10% reduction. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 378a185..44f4edb 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -384,6 +384,27 @@ InstructionTLBMiss: EXCEPTION_EPILOG_0 rfi +/* + * Bottom part of DataStoreTLBMiss handler for IMMR area + * not enough space in the DataStoreTLBMiss area + */ +DTLBMissIMMR: + mtcr r3 + /* Set 512k byte guarded page and mark it valid */ + li r10, MD_PS512K | MD_GUARDED | MD_SVALID + MTSPR_CPU6(SPRN_MD_TWC, r10, r3) + mfspr r10, SPRN_IMMR /* Get current IMMR */ + rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */ + ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ + _PAGE_PRESENT | _PAGE_NO_CACHE + MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ + + li r11, RPN_PATTERN + mfspr r3, SPRN_SPRG_SCRATCH2 + mtspr SPRN_DAR, r11 /* Tag DAR */ + EXCEPTION_EPILOG_0 + rfi + . = 0x1200 DataStoreTLBMiss: mtspr SPRN_SPRG_SCRATCH2, r3 @@ -397,6 +418,14 @@ DataStoreTLBMiss: IS_KERNEL(r11, r10) mfspr r11, SPRN_M_TW /* Get level 1 table */ BRANCH_UNLESS_KERNEL(3f) + + rlwinm r11, r10, 16, 0xfff8 +#ifndef CONFIG_PIN_TLB + cmpli cr0, r11, VIRT_IMMR_BASE@h +_ENTRY(DTLBMiss_jmp) + beq- DTLBMissIMMR +#endif + lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index 9491005..2207725 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -13,10 +13,43 @@ */ #include +#include +#include #include "mmu_decl.h" +#define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT) + extern int __map_without_ltlbs; + +/* + * Return PA for this VA if it is in IMMR area, or 0 + */ +phys_addr_t v_block_mapped(unsigned long va) +{ + unsigned long p = PHYS_IMMR_BASE; + + if (__map_without_ltlbs) + return 0; + if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE) + return p + va - VIRT_IMMR_BASE; + return 0; +} + +/* + * Return VA for a given PA or 0 if not mapped + */ +unsigned long p_block_mapped(phys_addr_t pa) +{ + unsigned long p = PHYS_IMMR_BASE; + + if (__map_without_ltlbs) + return 0; + if (pa >= p && pa < p + IMMR_SIZE) + return VIRT_IMMR_BASE + pa - p; + return 0; +} + /* * MMU_init_hw does the chip-specific initialization of the MMU hardware. */ @@ -29,6 +62,22 @@ void __init MMU_init_hw(void) #define LARGE_PAGE_SIZE_8M (1<<23) #define LARGE_PAGE_SIZE_64M (1<<26) +static void mmu_mapin_immr(void) +{ + unsigned long p = PHYS_IMMR_BASE; + unsigned long v = VIRT_IMMR_BASE; + unsigned long f = pgprot_val(PAGE_KERNEL_NCG); + int offset; + + for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE) + map_page(v + offset, p + offset, f); +} + +/* Address of instructions to patch */ +#ifndef CONFIG_PIN_TLB +extern unsigned int DTLBMiss_jmp; +#endif + unsigned long __init mmu_mapin_ram(unsigned long top) { unsigned long v, s, mapped; @@ -38,8 +87,13 @@ unsigned long __init mmu_mapin_ram(unsigned long top) p = 0; s = top; - if (__map_without_ltlbs) + if (__map_without_ltlbs) { + mmu_mapin_immr(); +#ifndef CONFIG_PIN_TLB + patch_instruction(&DTLBMiss_jmp, PPC_INST_NOP); +#endif return 0; + } #ifdef CONFIG_PPC_4K_PAGES while (s >= LARGE_PAGE_SIZE_8M) { diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 6af6532..f988db6 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -154,9 +154,10 @@ struct tlbcam { }; #endif -#if defined(CONFIG_6xx) || defined(CONFIG_FSL_BOOKE) +#if defined(CONFIG_6xx) || defined(CONFIG_FSL_BOOKE) || defined(CONFIG_PPC_8xx) /* 6xx have BATS */ /* FSL_BOOKE have TLBCAM */ +/* 8xx have LTLB */ phys_addr_t v_block_mapped(unsigned long va); unsigned long p_block_mapped(phys_addr_t pa); #else -- cgit v0.10.2 From 567a16d15296f42dd3df640ef60d04f642443251 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 May 2016 09:02:47 +0200 Subject: powerpc/8xx: CONFIG_PIN_TLB unneeded for CONFIG_PPC_EARLY_DEBUG_CPM IMMR is now mapped by a fixed 512k page managed by the TLB miss handler so it is not anymore necessary to PIN TLBs Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index d3fcf7e..5eccbb5 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -212,7 +212,6 @@ config PPC_EARLY_DEBUG_40x config PPC_EARLY_DEBUG_CPM bool "Early serial debugging for Freescale CPM-based serial ports" depends on SERIAL_CPM - select PIN_TLB if PPC_8xx help Select this to enable early debugging for Freescale chips using a CPM-based serial port. This assumes that the bootwrapper -- cgit v0.10.2 From 6264dbb98ff762d71c65e04ae3b2e632d28a5b84 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 May 2016 09:02:49 +0200 Subject: powerpc/8xx: unpin all TLBs before flushing Bootloader may have pinned some TLB entries so the kernel must unpin them before flushing TLBs with tlbia otherwise pinned TLB entries won't get flushed Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 44f4edb..d9a1656 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -797,6 +797,14 @@ start_here: * these mappings is mapped by page tables. */ initial_mmu: + li r8, 0 + mtspr SPRN_MI_CTR, r8 /* remove PINNED ITLB entries */ + lis r10, MD_RESETVAL@h +#ifndef CONFIG_8xx_COPYBACK + oris r10, r10, MD_WTDEF@h +#endif + mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */ + tlbia /* Invalidate all TLB entries */ /* Always pin the first 8 MB ITLB to prevent ITLB misses while mucking around with SRR0/SRR1 in asm @@ -807,16 +815,10 @@ initial_mmu: mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ #ifdef CONFIG_PIN_TLB - lis r10, (MD_RSV4I | MD_RESETVAL)@h + oris r10, r10, MD_RSV4I@h ori r10, r10, 0x1c00 - mr r8, r10 -#else - lis r10, MD_RESETVAL@h -#endif -#ifndef CONFIG_8xx_COPYBACK - oris r10, r10, MD_WTDEF@h -#endif mtspr SPRN_MD_CTR, r10 /* Set data TLB control */ +#endif /* Now map the lower 8 Meg into the TLBs. For this quick hack, * we can load the instruction and data TLB registers with the -- cgit v0.10.2 From bb7f380849f8c8722ea383ec5867a79d365d4574 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 May 2016 09:02:51 +0200 Subject: powerpc/8xx: Don't use page table for linear memory space Instead of using the first level page table to define mappings for the linear memory space, we can use direct mapping from the TLB handling routines. This has several advantages: * No need to read the tables at each TLB miss * No issue in 16k pages mode where the 1st level table maps 64 Mbytes The size of the available linear space is known at system startup. In order to avoid data access at each TLB miss to know the memory size, the TLB routine is patched at startup with the proper size This patch provides a 10%-15% improvment of TLB miss handling for kernel addresses Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index d9a1656..3de7d02 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -389,52 +389,52 @@ InstructionTLBMiss: * not enough space in the DataStoreTLBMiss area */ DTLBMissIMMR: - mtcr r3 + mtcr r10 /* Set 512k byte guarded page and mark it valid */ li r10, MD_PS512K | MD_GUARDED | MD_SVALID - MTSPR_CPU6(SPRN_MD_TWC, r10, r3) + MTSPR_CPU6(SPRN_MD_TWC, r10, r11) mfspr r10, SPRN_IMMR /* Get current IMMR */ rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ _PAGE_PRESENT | _PAGE_NO_CACHE - MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ + MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */ li r11, RPN_PATTERN - mfspr r3, SPRN_SPRG_SCRATCH2 mtspr SPRN_DAR, r11 /* Tag DAR */ EXCEPTION_EPILOG_0 rfi . = 0x1200 DataStoreTLBMiss: - mtspr SPRN_SPRG_SCRATCH2, r3 EXCEPTION_PROLOG_0 - mfcr r3 + mfcr r10 /* If we are faulting a kernel address, we have to use the * kernel page tables. */ - mfspr r10, SPRN_MD_EPN - IS_KERNEL(r11, r10) - mfspr r11, SPRN_M_TW /* Get level 1 table */ - BRANCH_UNLESS_KERNEL(3f) - - rlwinm r11, r10, 16, 0xfff8 + mfspr r11, SPRN_MD_EPN + rlwinm r11, r11, 16, 0xfff8 #ifndef CONFIG_PIN_TLB cmpli cr0, r11, VIRT_IMMR_BASE@h +#endif + cmpli cr7, r11, PAGE_OFFSET@h +#ifndef CONFIG_PIN_TLB _ENTRY(DTLBMiss_jmp) beq- DTLBMissIMMR #endif + bge- cr7, 4f - lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha + mfspr r11, SPRN_M_TW /* Get level 1 table */ 3: + mtcr r10 +#ifdef CONFIG_8xx_CPU6 + mtspr SPRN_SPRG_SCRATCH2, r3 +#endif + mfspr r10, SPRN_MD_EPN /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ - mtcr r11 - bt- 28,DTLBMiss8M /* bit 28 = Large page (8M) */ - mtcr r3 /* We have a pte table, so load fetch the pte from the table. */ @@ -482,29 +482,30 @@ _ENTRY(DTLBMiss_jmp) MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ +#ifdef CONFIG_8xx_CPU6 mfspr r3, SPRN_SPRG_SCRATCH2 +#endif mtspr SPRN_DAR, r11 /* Tag DAR */ EXCEPTION_EPILOG_0 rfi -DTLBMiss8M: - mtcr r3 - ori r11, r11, MD_SVALID - MTSPR_CPU6(SPRN_MD_TWC, r11, r3) -#ifdef CONFIG_PPC_16K_PAGES - /* - * In 16k pages mode, each PGD entry defines a 64M block. - * Here we select the 8M page within the block. - */ - rlwimi r11, r10, 0, 0x03800000 -#endif - rlwinm r10, r11, 0, 0xff800000 +4: +_ENTRY(DTLBMiss_cmp) + cmpli cr0, r11, PAGE_OFFSET@h + lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha + bge- 3b + + mtcr r10 + /* Set 8M byte page and mark it valid */ + li r10, MD_PS8MEG | MD_SVALID + MTSPR_CPU6(SPRN_MD_TWC, r10, r11) + mfspr r10, SPRN_MD_EPN + rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ _PAGE_PRESENT - MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ + MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */ li r11, RPN_PATTERN - mfspr r3, SPRN_SPRG_SCRATCH2 mtspr SPRN_DAR, r11 /* Tag DAR */ EXCEPTION_EPILOG_0 rfi @@ -583,12 +584,14 @@ FixupDAR:/* Entry point for dcbx workaround. */ IS_KERNEL(r11, r10) mfspr r11, SPRN_M_TW /* Get level 1 table */ BRANCH_UNLESS_KERNEL(3f) + rlwinm r11, r10, 16, 0xfff8 +_ENTRY(FixupDAR_cmp) + cmpli cr7, r11, PAGE_OFFSET@h + blt- cr7, 200f lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha /* Insert level 1 index */ 3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ - mtcr r11 - bt 28,200f /* bit 28 = Large page (8M) */ rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ /* Insert level 2 index */ rlwimi r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 @@ -614,8 +617,8 @@ FixupDAR:/* Entry point for dcbx workaround. */ 141: mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed /* Nope, go back to normal TLB processing */ - /* concat physical page address(r11) and page offset(r10) */ -200: rlwimi r11, r10, 0, 32 - (PAGE_SHIFT << 1), 31 + /* create physical page address from effective address */ +200: tophys(r11, r10) b 201b 144: mfspr r10, SPRN_DSISR diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index 2207725..996dfaa 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -58,9 +58,7 @@ void __init MMU_init_hw(void) /* Nothing to do for the time being but keep it similar to other PPC */ } -#define LARGE_PAGE_SIZE_4M (1<<22) #define LARGE_PAGE_SIZE_8M (1<<23) -#define LARGE_PAGE_SIZE_64M (1<<26) static void mmu_mapin_immr(void) { @@ -77,52 +75,33 @@ static void mmu_mapin_immr(void) #ifndef CONFIG_PIN_TLB extern unsigned int DTLBMiss_jmp; #endif +extern unsigned int DTLBMiss_cmp, FixupDAR_cmp; -unsigned long __init mmu_mapin_ram(unsigned long top) +void mmu_patch_cmp_limit(unsigned int *addr, unsigned long mapped) { - unsigned long v, s, mapped; - phys_addr_t p; + unsigned int instr = *addr; - v = KERNELBASE; - p = 0; - s = top; + instr &= 0xffff0000; + instr |= (unsigned long)__va(mapped) >> 16; + patch_instruction(addr, instr); +} + +unsigned long __init mmu_mapin_ram(unsigned long top) +{ + unsigned long mapped; if (__map_without_ltlbs) { + mapped = 0; mmu_mapin_immr(); #ifndef CONFIG_PIN_TLB patch_instruction(&DTLBMiss_jmp, PPC_INST_NOP); #endif - return 0; - } - -#ifdef CONFIG_PPC_4K_PAGES - while (s >= LARGE_PAGE_SIZE_8M) { - pmd_t *pmdp; - unsigned long val = p | MD_PS8MEG; - - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); - *pmdp++ = __pmd(val); - *pmdp++ = __pmd(val + LARGE_PAGE_SIZE_4M); - - v += LARGE_PAGE_SIZE_8M; - p += LARGE_PAGE_SIZE_8M; - s -= LARGE_PAGE_SIZE_8M; + } else { + mapped = top & ~(LARGE_PAGE_SIZE_8M - 1); } -#else /* CONFIG_PPC_16K_PAGES */ - while (s >= LARGE_PAGE_SIZE_64M) { - pmd_t *pmdp; - unsigned long val = p | MD_PS8MEG; - - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); - *pmdp++ = __pmd(val); - - v += LARGE_PAGE_SIZE_64M; - p += LARGE_PAGE_SIZE_64M; - s -= LARGE_PAGE_SIZE_64M; - } -#endif - mapped = top - s; + mmu_patch_cmp_limit(&DTLBMiss_cmp, mapped); + mmu_patch_cmp_limit(&FixupDAR_cmp, mapped); /* If the size of RAM is not an exact power of two, we may not * have covered RAM in its entirety with 8 MiB @@ -131,7 +110,8 @@ unsigned long __init mmu_mapin_ram(unsigned long top) * coverage with normal-sized pages (or other reasons) do not * attempt to allocate outside the allowed range. */ - memblock_set_current_limit(mapped); + if (mapped) + memblock_set_current_limit(mapped); return mapped; } -- cgit v0.10.2 From 4ad274502f66614eec3093aaa0cdeb4b70697ddf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 May 2016 09:02:54 +0200 Subject: powerpc/8xx: Rework CONFIG_PIN_TLB handling On recent kernels, with some debug options like for instance CONFIG_LOCKDEP, the BSS requires more than 8M memory, allthough the kernel code fits in the first 8M. Today, it is necessary to activate CONFIG_PIN_TLB to get more than 8M at startup, allthough pinning TLB is not necessary for that. We could have inconditionaly mapped 16 or 24M bytes at startup but some old hardware only have 8M and mapping non-existing RAM would be an issue due to speculative accesses. With the preceding patch however, the TLB entries are populated on demand. By setting up the TLB miss handler to handle up to 24M until the handler is patched for the entire memory space, it is possible to allow access up to more memory without mapping non-existing RAM. It is therefore not needed anymore to map memory data at all at startup. It will be handled by the TLB miss handler. One might still want to PIN the IMMR and the first 24M of RAM. It is now possible to do it in the C memory initialisation functions. In addition, we now know how much memory we have when we do it, so we are able to adapt the pining to the real amount of memory available. So boards with less than 24M can now also benefit from PIN_TLB. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 3de7d02..00cc9df 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -491,7 +491,7 @@ _ENTRY(DTLBMiss_jmp) 4: _ENTRY(DTLBMiss_cmp) - cmpli cr0, r11, PAGE_OFFSET@h + cmpli cr0, r11, (PAGE_OFFSET + 0x1800000)@h lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha bge- 3b @@ -586,7 +586,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ BRANCH_UNLESS_KERNEL(3f) rlwinm r11, r10, 16, 0xfff8 _ENTRY(FixupDAR_cmp) - cmpli cr7, r11, PAGE_OFFSET@h + cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h blt- cr7, 200f lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha /* Insert level 1 index */ @@ -823,23 +823,16 @@ initial_mmu: mtspr SPRN_MD_CTR, r10 /* Set data TLB control */ #endif - /* Now map the lower 8 Meg into the TLBs. For this quick hack, - * we can load the instruction and data TLB registers with the - * same values. - */ + /* Now map the lower 8 Meg into the ITLB. */ lis r8, KERNELBASE@h /* Create vaddr for TLB */ ori r8, r8, MI_EVALID /* Mark it valid */ mtspr SPRN_MI_EPN, r8 - mtspr SPRN_MD_EPN, r8 li r8, MI_PS8MEG | (2 << 5) /* Set 8M byte page, APG 2 */ ori r8, r8, MI_SVALID /* Make it valid */ mtspr SPRN_MI_TWC, r8 - li r8, MI_PS8MEG /* Set 8M byte page, APG 0 */ - ori r8, r8, MI_SVALID /* Make it valid */ - mtspr SPRN_MD_TWC, r8 li r8, MI_BOOTINIT /* Create RPN for address 0 */ mtspr SPRN_MI_RPN, r8 /* Store TLB entry */ - mtspr SPRN_MD_RPN, r8 + lis r8, MI_APG_INIT@h /* Set protection modes */ ori r8, r8, MI_APG_INIT@l mtspr SPRN_MI_AP, r8 @@ -851,9 +844,6 @@ initial_mmu: * internal registers (among other things). */ #ifdef CONFIG_PIN_TLB - addi r10, r10, 0x0100 - mtspr SPRN_MD_CTR, r10 -#endif mfspr r9, 638 /* Get current IMMR */ andis. r9, r9, 0xfff8 /* Get 512 kbytes boundary */ @@ -866,32 +856,6 @@ initial_mmu: mr r8, r9 /* Create paddr for TLB */ ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */ mtspr SPRN_MD_RPN, r8 - -#ifdef CONFIG_PIN_TLB - /* Map two more 8M kernel data pages. - */ - addi r10, r10, 0x0100 - mtspr SPRN_MD_CTR, r10 - - lis r8, KERNELBASE@h /* Create vaddr for TLB */ - addis r8, r8, 0x0080 /* Add 8M */ - ori r8, r8, MI_EVALID /* Mark it valid */ - mtspr SPRN_MD_EPN, r8 - li r9, MI_PS8MEG /* Set 8M byte page */ - ori r9, r9, MI_SVALID /* Make it valid */ - mtspr SPRN_MD_TWC, r9 - li r11, MI_BOOTINIT /* Create RPN for address 0 */ - addis r11, r11, 0x0080 /* Add 8M */ - mtspr SPRN_MD_RPN, r11 - - addi r10, r10, 0x0100 - mtspr SPRN_MD_CTR, r10 - - addis r8, r8, 0x0080 /* Add 8M */ - mtspr SPRN_MD_EPN, r8 - mtspr SPRN_MD_TWC, r9 - addis r11, r11, 0x0080 /* Add 8M */ - mtspr SPRN_MD_RPN, r11 #endif /* Since the cache is enabled according to the information we diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index 996dfaa..0f0a83e 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -50,16 +50,32 @@ unsigned long p_block_mapped(phys_addr_t pa) return 0; } +#define LARGE_PAGE_SIZE_8M (1<<23) + /* * MMU_init_hw does the chip-specific initialization of the MMU hardware. */ void __init MMU_init_hw(void) { - /* Nothing to do for the time being but keep it similar to other PPC */ + /* PIN up to the 3 first 8Mb after IMMR in DTLB table */ +#ifdef CONFIG_PIN_TLB + unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000; + unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY; + int i; + unsigned long addr = 0; + unsigned long mem = total_lowmem; + + for (i = 29; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { + mtspr(SPRN_MD_CTR, ctr | (i << 8)); + mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID); + mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID); + mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT); + addr += LARGE_PAGE_SIZE_8M; + mem -= LARGE_PAGE_SIZE_8M; + } +#endif } -#define LARGE_PAGE_SIZE_8M (1<<23) - static void mmu_mapin_immr(void) { unsigned long p = PHYS_IMMR_BASE; @@ -124,13 +140,8 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, */ BUG_ON(first_memblock_base != 0); -#ifdef CONFIG_PIN_TLB /* 8xx can only access 24MB at the moment */ memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01800000)); -#else - /* 8xx can only access 8MB at the moment */ - memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000)); -#endif } /* -- cgit v0.10.2 From 62f64b49d04dc70687cd713c804fecd80216b2d6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 17 May 2016 09:02:56 +0200 Subject: powerpc/8xx: add CONFIG_PIN_TLB_IMMR CONFIG_PIN_TLB maps IMMR area and the first 24 Mbytes of memory. In some circunstances it might be more interesting to not map IMMR but map 32 Mbytes of memory instead. Therefore we add config option CONFIG_PIN_TLB_IMMR to select if IMMR shall be pinned or not, hence whether we pin 24 or 32 Mbytes of RAM Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 394f9dc..fcfe533 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1056,6 +1056,11 @@ config CONSISTENT_SIZE config PIN_TLB bool "Pinned Kernel TLBs (860 ONLY)" depends on ADVANCED_OPTIONS && 8xx + +config PIN_TLB_IMMR + bool "Pinned TLB for IMMR" + depends on PIN_TLB + default y endmenu if PPC64 diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 00cc9df..43ddaae 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -414,11 +414,11 @@ DataStoreTLBMiss: */ mfspr r11, SPRN_MD_EPN rlwinm r11, r11, 16, 0xfff8 -#ifndef CONFIG_PIN_TLB +#ifndef CONFIG_PIN_TLB_IMMR cmpli cr0, r11, VIRT_IMMR_BASE@h #endif cmpli cr7, r11, PAGE_OFFSET@h -#ifndef CONFIG_PIN_TLB +#ifndef CONFIG_PIN_TLB_IMMR _ENTRY(DTLBMiss_jmp) beq- DTLBMissIMMR #endif @@ -819,7 +819,6 @@ initial_mmu: #ifdef CONFIG_PIN_TLB oris r10, r10, MD_RSV4I@h - ori r10, r10, 0x1c00 mtspr SPRN_MD_CTR, r10 /* Set data TLB control */ #endif @@ -843,7 +842,10 @@ initial_mmu: /* Map a 512k page for the IMMR to get the processor * internal registers (among other things). */ -#ifdef CONFIG_PIN_TLB +#ifdef CONFIG_PIN_TLB_IMMR + ori r10, r10, 0x1c00 + mtspr SPRN_MD_CTR, r10 + mfspr r9, 638 /* Get current IMMR */ andis. r9, r9, 0xfff8 /* Get 512 kbytes boundary */ diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index 0f0a83e..6c5025e 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -61,11 +61,15 @@ void __init MMU_init_hw(void) #ifdef CONFIG_PIN_TLB unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000; unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY; - int i; +#ifdef CONFIG_PIN_TLB_IMMR + int i = 29; +#else + int i = 28; +#endif unsigned long addr = 0; unsigned long mem = total_lowmem; - for (i = 29; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { + for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { mtspr(SPRN_MD_CTR, ctr | (i << 8)); mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID); mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID); @@ -88,7 +92,7 @@ static void mmu_mapin_immr(void) } /* Address of instructions to patch */ -#ifndef CONFIG_PIN_TLB +#ifndef CONFIG_PIN_TLB_IMMR extern unsigned int DTLBMiss_jmp; #endif extern unsigned int DTLBMiss_cmp, FixupDAR_cmp; @@ -109,7 +113,7 @@ unsigned long __init mmu_mapin_ram(unsigned long top) if (__map_without_ltlbs) { mapped = 0; mmu_mapin_immr(); -#ifndef CONFIG_PIN_TLB +#ifndef CONFIG_PIN_TLB_IMMR patch_instruction(&DTLBMiss_jmp, PPC_INST_NOP); #endif } else { -- cgit v0.10.2 From 9f595fd8b54809fed13fc30906ef1e90a3fcfbc9 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Sat, 9 Jul 2016 03:22:39 -0500 Subject: powerpc/8xx: Force VIRT_IMMR_BASE to be a positive number The asm-offsets mechanism generates signed numbers, even if the input value is explicitly unsigned. This causes a problem with older binutils (e.g. 2.23), which sign-extend a negative number when @h is applied. Thus, this instruction: cmpli cr0, r11, VIRT_IMMR_BASE@h resulted in this: Error: operand out of range (0xfffffff0 is not between 0x00000000 and 0x0000ffff) By casting to a larger type, we can force the output to be expressed as a positive number. Signed-off-by: Scott Wood Cc: Christophe Leroy diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 247f640..b89d14c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -754,7 +754,7 @@ int main(void) DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); #ifdef CONFIG_PPC_8xx - DEFINE(VIRT_IMMR_BASE, __fix_to_virt(FIX_IMMR_BASE)); + DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE)); #endif return 0; -- cgit v0.10.2 From bd7c93cca36911baf2eb2bc386956612af3b842d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:45 +1000 Subject: powerpc: Update obsolete comments in setup_32.c about entry conditions early_init() is called in-place before kernel relocation and using whatever MMU setup exists at the point the kernel is entered. machine_init() is called after relocation and after some initial mapping of PAGE_OFFSET has been established (typically using BATs on 6xx/7xx/7xxx processors or some form of bolted TLB on others). Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index d544fa3..2fc27ac 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -62,9 +62,7 @@ int icache_bsize; int ucache_bsize; /* - * We're called here very early in the boot. We determine the machine - * type and call the appropriate low-level setup functions. - * -- Cort + * We're called here very early in the boot. * * Note that the kernel may be running at an address which is different * from the address that it was linked at, so we must use RELOC/PTRRELOC @@ -105,6 +103,10 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) /* + * This is run before start_kernel(), the kernel has been relocated + * and we are running with enough of the MMU enabled to have our + * proper kernel virtual addresses + * * Find out what kind of machine we're on and save any data we need * from the early boot process (devtree is copied on pmac by prom_init()). * This is called very early on the boot process, after a minimal -- cgit v0.10.2 From 63c254a501049f70c53aea602525c6912362079e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:46 +1000 Subject: powerpc: Add comment explaining the purpose of setup_kdump_trampoline() Anything in early_setup() needs to be justified to be there, in this case, we need the trampolines before we can take exceptions and thus before we turn on the MMU. Also remove a pretty meaningless and misplaced debug message Signed-off-by: Benjamin Herrenschmidt [mpe: Fix comment formatting] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 5530bb5..98f72c6 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -279,10 +279,12 @@ void __init early_setup(unsigned long dt_ptr) /* Probe the machine type */ probe_machine(); + /* + * Setup the trampolines from the lowmem exception vectors + * to the kdump kernel when not using a relocatable kernel. + */ setup_kdump_trampoline(); - DBG("Found, Initializing memory management...\n"); - /* Initialize the hash table or TLB handling */ early_init_mmu(); -- cgit v0.10.2 From da6a97bf12d57e341029b3624ed112175ecff514 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:44 +1000 Subject: powerpc: Move epapr_paravirt_early_init() to early_init_devtree() The function is called by both 32-bit and 64-bit early setup right after early_init_devtree(). All it does is run yet another early DT parser which is precisely what early_init_devtree() is about, so move it in there. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 946e34f..48434be 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -56,6 +56,7 @@ #include #include #include +#include #include @@ -739,6 +740,7 @@ void __init early_init_devtree(void *params) /* Scan and build the list of machine check recoverable ranges */ of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); #endif + epapr_paravirt_early_init(); DBG(" <- early_init_devtree()\n"); } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 2fc27ac..4abefb5 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #define DBG(fmt...) @@ -125,8 +124,6 @@ notrace void __init machine_init(u64 dt_ptr) /* Do some early initialization based on the flat device tree */ early_init_devtree(__va(dt_ptr)); - epapr_paravirt_early_init(); - early_init_mmu(); probe_machine(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 98f72c6..521846c 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -68,7 +68,6 @@ #include #include #include -#include #include #ifdef DEBUG @@ -270,8 +269,6 @@ void __init early_setup(unsigned long dt_ptr) */ early_init_devtree(__va(dt_ptr)); - epapr_paravirt_early_init(); - /* Now we know the logical id of our boot cpu, setup the paca. */ setup_paca(&paca[boot_cpuid]); fixup_boot_paca(); -- cgit v0.10.2 From 69a94d84c7efc7bc146b5a8d6f05f6ed6f2d4e8f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:01 +1000 Subject: powerpc/cell: Don't use flat device-tree after boot Some bit of SPU code was using the FDT rather than the expanded device-tree. Fix it. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c index c3327f3..21b4bfb 100644 --- a/arch/powerpc/platforms/cell/spu_manage.c +++ b/arch/powerpc/platforms/cell/spu_manage.c @@ -535,8 +535,7 @@ static int __init init_affinity(void) if (of_has_vicinity()) { init_affinity_fw(); } else { - long root = of_get_flat_dt_root(); - if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0")) + if (of_machine_is_compatible("IBM,CPBW-1.0")) init_affinity_qs20_harcoded(); else printk("No affinity configuration found\n"); -- cgit v0.10.2 From b282788341933c4dcd462f3c93cb39b90334ed76 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:02 +1000 Subject: powerpc/85xx/ge_imp3a: Don't use the flat device-tree after boot ge_imp3a_pic_init() is called way beyond the unflattening of the tree, it shouldn't be using of_flat_dt_* Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c index 11790e0..55eefef 100644 --- a/arch/powerpc/platforms/85xx/ge_imp3a.c +++ b/arch/powerpc/platforms/85xx/ge_imp3a.c @@ -47,9 +47,8 @@ void __init ge_imp3a_pic_init(void) struct mpic *mpic; struct device_node *np; struct device_node *cascade_node = NULL; - unsigned long root = of_get_flat_dt_root(); - if (of_flat_dt_is_compatible(root, "fsl,MPC8572DS-CAMP")) { + if (of_machine_is_compatible("fsl,MPC8572DS-CAMP")) { mpic = mpic_alloc(NULL, 0, MPIC_NO_RESET | MPIC_BIG_ENDIAN | -- cgit v0.10.2 From 5b0f9f83684dff40014ce1d3c09a6cad749d351f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:03 +1000 Subject: powerpc/85xx/mpc85xx_ds: Don't use the flat device-tree after boot Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c index f858306..64a7e8c 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c @@ -66,9 +66,7 @@ void __init mpc85xx_ds_pic_init(void) struct device_node *cascade_node = NULL; int cascade_irq; #endif - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "fsl,MPC8572DS-CAMP")) { + if (of_machine_is_compatible("fsl,MPC8572DS-CAMP")) { mpic = mpic_alloc(NULL, 0, MPIC_NO_RESET | MPIC_BIG_ENDIAN | -- cgit v0.10.2 From acd3578ed9100565ef1b39685ec0e75e5124a0d6 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:04 +1000 Subject: powerpc/85xx/mpc85xx_rdb: Don't use the flat device-tree after boot Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c index 3f4dad1..761e504 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c @@ -47,13 +47,12 @@ void __init mpc85xx_rdb_pic_init(void) { struct mpic *mpic; - unsigned long root = of_get_flat_dt_root(); #ifdef CONFIG_QUICC_ENGINE struct device_node *np; #endif - if (of_flat_dt_is_compatible(root, "fsl,MPC85XXRDB-CAMP")) { + if (of_machine_is_compatible("fsl,MPC85XXRDB-CAMP")) { mpic = mpic_alloc(NULL, 0, MPIC_NO_RESET | MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, -- cgit v0.10.2 From 484cc1ed3c6b90459f02977f6f5ab7810db18705 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:54 +1000 Subject: powerpc/rtas: Don't test for machine type in rtas_initialize() The test is unnecessary, the FW_FEATURE_LPAR is sufficient as there exist no other LPAR type that has RTAS. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 8da209f..286354f 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1174,7 +1174,7 @@ void __init rtas_initialize(void) * the stop-self token if any */ #ifdef CONFIG_PPC64 - if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) { + if (firmware_has_feature(FW_FEATURE_LPAR)) { rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX); ibm_suspend_me_token = rtas_token("ibm,suspend-me"); } -- cgit v0.10.2 From 0f2b3442fb850626d50a9d7e533c9f859ef15e6a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:55 +1000 Subject: powerpc: Don't test for machine type in smp_setup_cpu_maps() The subsequent test for RTAS along with the LPAR test are sufficient Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 8ca79b7..2a3564c 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -494,7 +494,7 @@ void __init smp_setup_cpu_maps(void) * On pSeries LPAR, we need to know how many cpus * could possibly be added to this partition. */ - if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR) && + if (firmware_has_feature(FW_FEATURE_LPAR) && (dn = of_find_node_by_path("/rtas"))) { int num_addr_cell, num_size_cell, maxcpus; const __be32 *ireg; -- cgit v0.10.2 From a7d6392866e9777cb287ad194ce8eca00737066f Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Mon, 11 Jul 2016 14:17:31 +1000 Subject: powerpc/crash: Rearrange loop condition to avoid out of bounds array access The array crash_shutdown_handles[] has size CRASH_HANDLER_MAX, thus when we loop over the elements of the list we check crash_shutdown_handles[i] && i < CRASH_HANDLER_MAX. However this means that when we increment i to CRASH_HANDLER_MAX we will perform an out of bound array access checking the first condition before exiting on the second condition. To avoid the out of bounds access, simply reorder the loop conditions. Fixes: 1d1451655bad ("powerpc: Add array bounds checking to crash_shutdown_handlers") Signed-off-by: Suraj Jitindar Singh Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 888bdf1..47b63de 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -351,7 +351,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) old_handler = __debugger_fault_handler; __debugger_fault_handler = handle_fault; crash_shutdown_cpu = smp_processor_id(); - for (i = 0; crash_shutdown_handles[i] && i < CRASH_HANDLER_MAX; i++) { + for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) { if (setjmp(crash_shutdown_buf) == 0) { /* * Insert syncs and delay to ensure -- cgit v0.10.2 From 5b71eff78267a1e0d2f178a8b5397f4b23dfdf97 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Jul 2016 22:54:26 +1000 Subject: powerpc/xmon: Remove unused externs None of these are used, or have been since we merged ppc & ppc64. Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index c5e1551..f351ba6 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -184,9 +184,6 @@ static void dump_tlb_book3e(void); static int xmon_no_auto_backtrace; -extern void xmon_enter(void); -extern void xmon_leave(void); - #ifdef CONFIG_PPC64 #define REG "%.16lx" #else @@ -1686,8 +1683,6 @@ write_spr(int n, unsigned long val) } static unsigned long regno; -extern char exc_prolog; -extern char dec_exc; static void dump_one_spr(int spr, bool show_unimplemented) { -- cgit v0.10.2 From 13629dad1e30e310bb21baa102d1c0dcc17b47ae Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Jul 2016 22:54:27 +1000 Subject: powerpc/xmon: Move static regno into its only user Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index f351ba6..3695012 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1682,7 +1682,6 @@ write_spr(int n, unsigned long val) catch_spr_faults = 0; } -static unsigned long regno; static void dump_one_spr(int spr, bool show_unimplemented) { @@ -1714,6 +1713,7 @@ static void dump_one_spr(int spr, bool show_unimplemented) static void super_regs(void) { + static unsigned long regno; int cmd; int spr; -- cgit v0.10.2 From 56346ad88d65fd60dde7b0535ff08daac45b560b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Jul 2016 22:54:28 +1000 Subject: powerpc/xmon: Adjust spacing of existing SPRs to make room for more Purely to make it pleasing to the eye. Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 3695012..4300ad6 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1725,14 +1725,15 @@ static void super_regs(void) asm("mr %0,1" : "=r" (sp) :); asm("mr %0,2" : "=r" (toc) :); - printf("msr = "REG" sprg0= "REG"\n", + printf("msr = "REG" sprg0 = "REG"\n", mfmsr(), mfspr(SPRN_SPRG0)); - printf("pvr = "REG" sprg1= "REG"\n", + printf("pvr = "REG" sprg1 = "REG"\n", mfspr(SPRN_PVR), mfspr(SPRN_SPRG1)); - printf("dec = "REG" sprg2= "REG"\n", + printf("dec = "REG" sprg2 = "REG"\n", mfspr(SPRN_DEC), mfspr(SPRN_SPRG2)); - printf("sp = "REG" sprg3= "REG"\n", sp, mfspr(SPRN_SPRG3)); - printf("toc = "REG" dar = "REG"\n", toc, mfspr(SPRN_DAR)); + printf("sp = "REG" sprg3 = "REG"\n", sp, mfspr(SPRN_SPRG3)); + printf("toc = "REG" dar = "REG"\n", toc, mfspr(SPRN_DAR)); + return; } case 'w': { -- cgit v0.10.2 From 1846193b178dcc58435fdc57352db7b74826ef37 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Jul 2016 22:54:29 +1000 Subject: powerpc/xmon: Dump ISA 2.06 SPRs Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 4300ad6..8d95793 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1682,6 +1682,35 @@ write_spr(int n, unsigned long val) catch_spr_faults = 0; } +static void dump_206_sprs(void) +{ +#ifdef CONFIG_PPC64 + if (!cpu_has_feature(CPU_FTR_ARCH_206)) + return; + + /* Actually some of these pre-date 2.06, but whatevs */ + + printf("srr0 = %.16x srr1 = %.16x dsisr = %.8x\n", + mfspr(SPRN_SRR0), mfspr(SPRN_SRR1), mfspr(SPRN_DSISR)); + printf("dscr = %.16x ppr = %.16x pir = %.8x\n", + mfspr(SPRN_DSCR), mfspr(SPRN_PPR), mfspr(SPRN_PIR)); + + if (!(mfmsr() & MSR_HV)) + return; + + printf("sdr1 = %.16x hdar = %.16x hdsisr = %.8x\n", + mfspr(SPRN_SDR1), mfspr(SPRN_HDAR), mfspr(SPRN_HDSISR)); + printf("hsrr0 = %.16x hsrr1 = %.16x hdec = %.8x\n", + mfspr(SPRN_HSRR0), mfspr(SPRN_HSRR1), mfspr(SPRN_HDEC)); + printf("lpcr = %.16x pcr = %.16x lpidr = %.8x\n", + mfspr(SPRN_LPCR), mfspr(SPRN_PCR), mfspr(SPRN_LPID)); + printf("hsprg0 = %.16x hsprg1 = %.16x\n", + mfspr(SPRN_HSPRG0), mfspr(SPRN_HSPRG1)); + printf("dabr = %.16x dabrx = %.16x\n", + mfspr(SPRN_DABR), mfspr(SPRN_DABRX)); +#endif +} + static void dump_one_spr(int spr, bool show_unimplemented) { @@ -1734,6 +1763,8 @@ static void super_regs(void) printf("sp = "REG" sprg3 = "REG"\n", sp, mfspr(SPRN_SPRG3)); printf("toc = "REG" dar = "REG"\n", toc, mfspr(SPRN_DAR)); + dump_206_sprs(); + return; } case 'w': { -- cgit v0.10.2 From e0ddf7a24558b356d5cf5ecc12cb4e305c800953 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Jul 2016 22:54:30 +1000 Subject: powerpc/xmon: Dump ISA 2.07 SPRs Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 320136f..ac4be83 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -268,6 +268,7 @@ #define DSISR_KEYFAULT 0x00200000 /* Key fault */ #define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */ #define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ +#define SPRN_CIR 0x11B /* Chip Information Register (hyper, R/0) */ #define SPRN_TBWL 0x11C /* Time Base Lower Register (super, R/W) */ #define SPRN_TBWU 0x11D /* Time Base Upper Register (super, R/W) */ #define SPRN_TBU40 0x11E /* Timebase upper 40 bits (hyper, R/W) */ diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 8d95793..2555233 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1711,6 +1711,49 @@ static void dump_206_sprs(void) #endif } +static void dump_207_sprs(void) +{ +#ifdef CONFIG_PPC64 + unsigned long msr; + + if (!cpu_has_feature(CPU_FTR_ARCH_207S)) + return; + + printf("dpdes = %.16x tir = %.16x cir = %.8x\n", + mfspr(SPRN_DPDES), mfspr(SPRN_TIR), mfspr(SPRN_CIR)); + + printf("fscr = %.16x tar = %.16x pspb = %.8x\n", + mfspr(SPRN_FSCR), mfspr(SPRN_TAR), mfspr(SPRN_PSPB)); + + msr = mfmsr(); + if (msr & MSR_TM) { + /* Only if TM has been enabled in the kernel */ + printf("tfhar = %.16x tfiar = %.16x texasr = %.16x\n", + mfspr(SPRN_TFHAR), mfspr(SPRN_TFIAR), + mfspr(SPRN_TEXASR)); + } + + printf("mmcr0 = %.16x mmcr1 = %.16x mmcr2 = %.16x\n", + mfspr(SPRN_MMCR0), mfspr(SPRN_MMCR1), mfspr(SPRN_MMCR2)); + printf("pmc1 = %.8x pmc2 = %.8x pmc3 = %.8x pmc4 = %.8x\n", + mfspr(SPRN_PMC1), mfspr(SPRN_PMC2), + mfspr(SPRN_PMC3), mfspr(SPRN_PMC4)); + printf("mmcra = %.16x siar = %.16x pmc5 = %.8x\n", + mfspr(SPRN_MMCRA), mfspr(SPRN_SIAR), mfspr(SPRN_PMC5)); + printf("sdar = %.16x sier = %.16x pmc6 = %.8x\n", + mfspr(SPRN_SDAR), mfspr(SPRN_SIER), mfspr(SPRN_PMC6)); + printf("ebbhr = %.16x ebbrr = %.16x bescr = %.16x\n", + mfspr(SPRN_EBBHR), mfspr(SPRN_EBBRR), mfspr(SPRN_BESCR)); + + if (!(msr & MSR_HV)) + return; + + printf("hfscr = %.16x dhdes = %.16x rpr = %.16x\n", + mfspr(SPRN_HFSCR), mfspr(SPRN_DHDES), mfspr(SPRN_RPR)); + printf("dawr = %.16x dawrx = %.16x ciabr = %.16x\n", + mfspr(SPRN_DAWR), mfspr(SPRN_DAWRX), mfspr(SPRN_CIABR)); +#endif +} static void dump_one_spr(int spr, bool show_unimplemented) { @@ -1764,6 +1807,7 @@ static void super_regs(void) printf("toc = "REG" dar = "REG"\n", toc, mfspr(SPRN_DAR)); dump_206_sprs(); + dump_207_sprs(); return; } -- cgit v0.10.2 From 24af8c5a52a70bbfd275f59836feadd9b9ebc83b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 11 Jul 2016 15:25:18 +1000 Subject: selftests/powerpc: Add a test for PROT_SAO PROT_SAO is a powerpc-specific flag to mmap(), and we rely on arch specific logic to allow it to be passed to mmap(). Add a small test to ensure mmap() accepts PROT_SAO. We don't have a good way to test that it actually causes the mapping to be created with the right flags, so for now we just touch the mapping so it's faulted in. In future we might be able to do something better. Signed-off-by: Michael Ellerman diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index b43ade0..e715a3f 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -1,3 +1,4 @@ hugetlb_vs_thp_test subpage_prot tempfile +prot_sao \ No newline at end of file diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index ee179e2..3bdb96e 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -1,13 +1,15 @@ noarg: $(MAKE) -C ../ -TEST_PROGS := hugetlb_vs_thp_test subpage_prot +TEST_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao TEST_FILES := tempfile all: $(TEST_PROGS) $(TEST_FILES) $(TEST_PROGS): ../harness.c +prot_sao: ../utils.c + include ../../lib.mk tempfile: diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c new file mode 100644 index 0000000..611530d --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/prot_sao.c @@ -0,0 +1,42 @@ +/* + * Copyright 2016, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#include +#include +#include +#include + +#include + +#include "utils.h" + +#define SIZE (64 * 1024) + +int test_prot_sao(void) +{ + char *p; + + /* 2.06 or later should support SAO */ + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); + + /* + * Ensure we can ask for PROT_SAO. + * We can't really verify that it does the right thing, but at least we + * confirm the kernel will accept it. + */ + p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE | PROT_SAO, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + FAIL_IF(p == MAP_FAILED); + + /* Write to the mapping, to at least cause a fault */ + memset(p, 0xaa, SIZE); + + return 0; +} + +int main(void) +{ + return test_harness(test_prot_sao, "prot-sao"); +} diff --git a/tools/testing/selftests/powerpc/utils.h b/tools/testing/selftests/powerpc/utils.h index a985cfa..fbd33e5 100644 --- a/tools/testing/selftests/powerpc/utils.h +++ b/tools/testing/selftests/powerpc/utils.h @@ -27,6 +27,11 @@ int test_harness(int (test_function)(void), char *name); extern void *get_auxv_entry(int type); int pick_online_cpu(void); +static inline bool have_hwcap(unsigned long ftr) +{ + return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr; +} + static inline bool have_hwcap2(unsigned long ftr2) { return ((unsigned long)get_auxv_entry(AT_HWCAP2) & ftr2) == ftr2; -- cgit v0.10.2 From fc9f75ef2fdf46fc859b991dbf473a583edfb0e2 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 12 Jul 2016 11:30:11 +0000 Subject: cxl: Use for_each_compatible_node() macro Use for_each_compatible_node() macro instead of open coding it. Generated by Coccinelle. Signed-off-by: Wei Yongjun Reviewed-by: Andrew Donnellan Acked-by: Ian Munsie Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c index e6f49ac..2330980 100644 --- a/drivers/misc/cxl/base.c +++ b/drivers/misc/cxl/base.c @@ -95,7 +95,7 @@ EXPORT_SYMBOL_GPL(cxl_update_properties); static int __init cxl_base_init(void) { - struct device_node *np = NULL; + struct device_node *np; struct platform_device *dev; int count = 0; @@ -105,8 +105,7 @@ static int __init cxl_base_init(void) if (cpu_has_feature(CPU_FTR_HVMODE)) return 0; - while ((np = of_find_compatible_node(np, NULL, - "ibm,coherent-platform-facility"))) { + for_each_compatible_node(np, NULL, "ibm,coherent-platform-facility") { dev = of_platform_device_create(np, NULL, NULL); if (dev) count++; -- cgit v0.10.2 From f456834a6c1db36c290fdfe8ab53107adaf334e7 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 14 Jul 2016 07:17:00 +1000 Subject: powerpc/powernv: Split cxl code out into a separate file The support for using the Mellanox CX4 in cxl mode will require additions to the PHB code. In preparation for this, move the existing cxl code out of pci-ioda.c into a separate pci-cxl.c file to keep things more organised. Signed-off-by: Ian Munsie Reviewed-by: Andrew Donnellan Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index cd9711e..b5d98cb 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -6,6 +6,7 @@ obj-y += opal-kmsg.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o +obj-$(CONFIG_CXL_BASE) += pci-cxl.o obj-$(CONFIG_EEH) += eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c new file mode 100644 index 0000000..e0eeb00 --- /dev/null +++ b/arch/powerpc/platforms/powernv/pci-cxl.c @@ -0,0 +1,163 @@ +/* + * Copyright 2014-2016 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + +#include "pci.h" + +struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + + return of_node_get(hose->dn); +} +EXPORT_SYMBOL(pnv_pci_get_phb_node); + +int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct pnv_ioda_pe *pe; + int rc; + + pe = pnv_ioda_get_pe(dev); + if (!pe) + return -ENODEV; + + pe_info(pe, "Switching PHB to CXL\n"); + + rc = opal_pci_set_phb_cxl_mode(phb->opal_id, mode, pe->pe_number); + if (rc == OPAL_UNSUPPORTED) + dev_err(&dev->dev, "Required cxl mode not supported by firmware - update skiboot\n"); + else if (rc) + dev_err(&dev->dev, "opal_pci_set_phb_cxl_mode failed: %i\n", rc); + + return rc; +} +EXPORT_SYMBOL(pnv_phb_to_cxl_mode); + +/* Find PHB for cxl dev and allocate MSI hwirqs? + * Returns the absolute hardware IRQ number + */ +int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + int hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, num); + + if (hwirq < 0) { + dev_warn(&dev->dev, "Failed to find a free MSI\n"); + return -ENOSPC; + } + + return phb->msi_base + hwirq; +} +EXPORT_SYMBOL(pnv_cxl_alloc_hwirqs); + +void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, num); +} +EXPORT_SYMBOL(pnv_cxl_release_hwirqs); + +void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs, + struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + int i, hwirq; + + for (i = 1; i < CXL_IRQ_RANGES; i++) { + if (!irqs->range[i]) + continue; + pr_devel("cxl release irq range 0x%x: offset: 0x%lx limit: %ld\n", + i, irqs->offset[i], + irqs->range[i]); + hwirq = irqs->offset[i] - phb->msi_base; + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, + irqs->range[i]); + } +} +EXPORT_SYMBOL(pnv_cxl_release_hwirq_ranges); + +int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, + struct pci_dev *dev, int num) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + int i, hwirq, try; + + memset(irqs, 0, sizeof(struct cxl_irq_ranges)); + + /* 0 is reserved for the multiplexed PSL DSI interrupt */ + for (i = 1; i < CXL_IRQ_RANGES && num; i++) { + try = num; + while (try) { + hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, try); + if (hwirq >= 0) + break; + try /= 2; + } + if (!try) + goto fail; + + irqs->offset[i] = phb->msi_base + hwirq; + irqs->range[i] = try; + pr_devel("cxl alloc irq range 0x%x: offset: 0x%lx limit: %li\n", + i, irqs->offset[i], irqs->range[i]); + num -= try; + } + if (num) + goto fail; + + return 0; +fail: + pnv_cxl_release_hwirq_ranges(irqs, dev); + return -ENOSPC; +} +EXPORT_SYMBOL(pnv_cxl_alloc_hwirq_ranges); + +int pnv_cxl_get_irq_count(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + + return phb->msi_bmp.irq_count; +} +EXPORT_SYMBOL(pnv_cxl_get_irq_count); + +int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, + unsigned int virq) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + unsigned int xive_num = hwirq - phb->msi_base; + struct pnv_ioda_pe *pe; + int rc; + + if (!(pe = pnv_ioda_get_pe(dev))) + return -ENODEV; + + /* Assign XIVE to PE */ + rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); + if (rc) { + pe_warn(pe, "%s: OPAL error %d setting msi_base 0x%x " + "hwirq 0x%x XIVE 0x%x PE\n", + pci_name(dev), rc, phb->msi_base, hwirq, xive_num); + return -EIO; + } + pnv_set_msi_irq_chip(phb, virq); + + return 0; +} +EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2115ed7..e0d8103 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -595,7 +595,7 @@ static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no) * but in the meantime, we need to protect them to avoid warnings */ #ifdef CONFIG_PCI_MSI -static struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) +struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); struct pnv_phb *phb = hose->private_data; @@ -2700,7 +2700,7 @@ static void pnv_ioda2_msi_eoi(struct irq_data *d) } -static void set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) +void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) { struct irq_data *idata; struct irq_chip *ichip; @@ -2722,159 +2722,6 @@ static void set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) irq_set_chip(virq, &phb->ioda.irq_chip); } -#ifdef CONFIG_CXL_BASE - -struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - - return of_node_get(hose->dn); -} -EXPORT_SYMBOL(pnv_pci_get_phb_node); - -int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - struct pnv_ioda_pe *pe; - int rc; - - pe = pnv_ioda_get_pe(dev); - if (!pe) - return -ENODEV; - - pe_info(pe, "Switching PHB to CXL\n"); - - rc = opal_pci_set_phb_cxl_mode(phb->opal_id, mode, pe->pe_number); - if (rc == OPAL_UNSUPPORTED) - dev_err(&dev->dev, "Required cxl mode not supported by firmware - update skiboot\n"); - else if (rc) - dev_err(&dev->dev, "opal_pci_set_phb_cxl_mode failed: %i\n", rc); - - return rc; -} -EXPORT_SYMBOL(pnv_phb_to_cxl_mode); - -/* Find PHB for cxl dev and allocate MSI hwirqs? - * Returns the absolute hardware IRQ number - */ -int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - int hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, num); - - if (hwirq < 0) { - dev_warn(&dev->dev, "Failed to find a free MSI\n"); - return -ENOSPC; - } - - return phb->msi_base + hwirq; -} -EXPORT_SYMBOL(pnv_cxl_alloc_hwirqs); - -void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - - msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, num); -} -EXPORT_SYMBOL(pnv_cxl_release_hwirqs); - -void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs, - struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - int i, hwirq; - - for (i = 1; i < CXL_IRQ_RANGES; i++) { - if (!irqs->range[i]) - continue; - pr_devel("cxl release irq range 0x%x: offset: 0x%lx limit: %ld\n", - i, irqs->offset[i], - irqs->range[i]); - hwirq = irqs->offset[i] - phb->msi_base; - msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, - irqs->range[i]); - } -} -EXPORT_SYMBOL(pnv_cxl_release_hwirq_ranges); - -int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, - struct pci_dev *dev, int num) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - int i, hwirq, try; - - memset(irqs, 0, sizeof(struct cxl_irq_ranges)); - - /* 0 is reserved for the multiplexed PSL DSI interrupt */ - for (i = 1; i < CXL_IRQ_RANGES && num; i++) { - try = num; - while (try) { - hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, try); - if (hwirq >= 0) - break; - try /= 2; - } - if (!try) - goto fail; - - irqs->offset[i] = phb->msi_base + hwirq; - irqs->range[i] = try; - pr_devel("cxl alloc irq range 0x%x: offset: 0x%lx limit: %li\n", - i, irqs->offset[i], irqs->range[i]); - num -= try; - } - if (num) - goto fail; - - return 0; -fail: - pnv_cxl_release_hwirq_ranges(irqs, dev); - return -ENOSPC; -} -EXPORT_SYMBOL(pnv_cxl_alloc_hwirq_ranges); - -int pnv_cxl_get_irq_count(struct pci_dev *dev) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - - return phb->msi_bmp.irq_count; -} -EXPORT_SYMBOL(pnv_cxl_get_irq_count); - -int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, - unsigned int virq) -{ - struct pci_controller *hose = pci_bus_to_host(dev->bus); - struct pnv_phb *phb = hose->private_data; - unsigned int xive_num = hwirq - phb->msi_base; - struct pnv_ioda_pe *pe; - int rc; - - if (!(pe = pnv_ioda_get_pe(dev))) - return -ENODEV; - - /* Assign XIVE to PE */ - rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num); - if (rc) { - pe_warn(pe, "%s: OPAL error %d setting msi_base 0x%x " - "hwirq 0x%x XIVE 0x%x PE\n", - pci_name(dev), rc, phb->msi_base, hwirq, xive_num); - return -EIO; - } - set_msi_irq_chip(phb, virq); - - return 0; -} -EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup); -#endif - static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, unsigned int hwirq, unsigned int virq, unsigned int is_64, struct msi_msg *msg) @@ -2931,7 +2778,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, } msg->data = be32_to_cpu(data); - set_msi_irq_chip(phb, virq); + pnv_set_msi_irq_chip(phb, virq); pr_devel("%s: %s-bit MSI on hwirq %x (xive #%d)," " address=%x_%08x data=%x PE# %d\n", diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 3a97990..49c2997 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -1,6 +1,10 @@ #ifndef __POWERNV_PCI_H #define __POWERNV_PCI_H +#include +#include +#include + struct pci_dn; enum pnv_phb_type { @@ -212,6 +216,8 @@ extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev); extern void pnv_pci_dma_bus_setup(struct pci_bus *bus); extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); +extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev); +extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq); extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, const char *fmt, ...); -- cgit v0.10.2 From 4e56f858bdde5cbfb70f61baddfaa56a8ed851bf Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 14 Jul 2016 07:17:01 +1000 Subject: cxl: Add cxl_slot_is_supported API This extends the check that the adapter is in a CAPI capable slot so that it may be called by external users in the kernel API. This will be used by the upcoming Mellanox CX4 support, which needs to know ahead of time if the card can be switched to cxl mode so that it can leave it in PCI mode if it is not. This API takes a parameter to check if CAPP DMA mode is supported, which it currently only allows on P8NVL systems, since that mode currently has issues accessing memory < 4GB on P8, and we cannot realistically avoid that. This API does not currently check if a CAPP unit is available (i.e. not already assigned to another PHB) on P8. Doing so would be racy since it is assigned on a first come first serve basis, and so long as CAPP DMA mode is not supported on P8 we don't need this, since the only anticipated user of this API requires CAPP DMA mode. Cc: Philippe Bergheaud Signed-off-by: Ian Munsie Reviewed-by: Andrew Donnellan Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 3a5f980..6ac6b05 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1426,6 +1426,43 @@ static int cxl_slot_is_switched(struct pci_dev *dev) return (depth > CXL_MAX_PCIEX_PARENT); } +bool cxl_slot_is_supported(struct pci_dev *dev, int flags) +{ + if (!cpu_has_feature(CPU_FTR_HVMODE)) + return false; + + if ((flags & CXL_SLOT_FLAG_DMA) && (!pvr_version_is(PVR_POWER8NVL))) { + /* + * CAPP DMA mode is technically supported on regular P8, but + * will EEH if the card attempts to access memory < 4GB, which + * we cannot realistically avoid. We might be able to work + * around the issue, but until then return unsupported: + */ + return false; + } + + if (cxl_slot_is_switched(dev)) + return false; + + /* + * XXX: This gets a little tricky on regular P8 (not POWER8NVL) since + * the CAPP can be connected to PHB 0, 1 or 2 on a first come first + * served basis, which is racy to check from here. If we need to + * support this in future we might need to consider having this + * function effectively reserve it ahead of time. + * + * Currently, the only user of this API is the Mellanox CX4, which is + * only supported on P8NVL due to the above mentioned limitation of + * CAPP DMA mode and therefore does not need to worry about this. If the + * issue with CAPP DMA mode is later worked around on P8 we might need + * to revisit this. + */ + + return true; +} +EXPORT_SYMBOL_GPL(cxl_slot_is_supported); + + static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) { struct cxl *adapter; diff --git a/include/misc/cxl.h b/include/misc/cxl.h index b6d040f..dd9eebb 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -24,6 +24,21 @@ * generic PCI API. This API is agnostic to the actual AFU. */ +#define CXL_SLOT_FLAG_DMA 0x1 + +/* + * Checks if the given card is in a cxl capable slot. Pass CXL_SLOT_FLAG_DMA if + * the card requires CAPP DMA mode to also check if the system supports it. + * This is intended to be used by bi-modal devices to determine if they can use + * cxl mode or if they should continue running in PCI mode. + * + * Note that this only checks if the slot is cxl capable - it does not + * currently check if the CAPP is currently available for chips where it can be + * assigned to different PHBs on a first come first serve basis (i.e. P8) + */ +bool cxl_slot_is_supported(struct pci_dev *dev, int flags); + + /* Get the AFU associated with a pci_dev */ struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev); -- cgit v0.10.2 From 48b3adf33459c1c42766d9c2068a592216fe7812 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 14 Jul 2016 07:17:02 +1000 Subject: cxl: Enable bus mastering for devices using CAPP DMA mode Devices that use CAPP DMA mode (such as the Mellanox CX4) require bus master to be enabled in order for the CAPI traffic to flow. This should be harmless to enable for other cxl devices, so unconditionally enable it in the adapter init flow. Signed-off-by: Ian Munsie Reviewed-by: Andrew Donnellan Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 6ac6b05..deef9c7 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1264,6 +1264,9 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) if ((rc = adapter->native->sl_ops->adapter_regs_init(adapter, dev))) goto err; + /* Required for devices using CAPP DMA mode, harmless for others */ + pci_set_master(dev); + if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode))) goto err; -- cgit v0.10.2 From 62ccf2d2efefa01d0eb92cd6ecbb45ea0499fb1c Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 14 Jul 2016 07:17:03 +1000 Subject: cxl: Move cxl_afu_get / cxl_afu_put to base The Mellanox CX4 uses a model where the AFU is one physical function of the device, and is used by other peer physical functions of the same device. This will require those other devices to grab a reference on the AFU when they are initialised to make sure that it does not go away during their lifetime. Move the AFU refcount functions to base.c so they can be called from the PHB code. Signed-off-by: Ian Munsie Reviewed-by: Andrew Donnellan Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c index 2330980..7557835 100644 --- a/drivers/misc/cxl/base.c +++ b/drivers/misc/cxl/base.c @@ -54,6 +54,19 @@ static inline void cxl_calls_put(struct cxl_calls *calls) { } #endif /* CONFIG_CXL_MODULE */ +/* AFU refcount management */ +struct cxl_afu *cxl_afu_get(struct cxl_afu *afu) +{ + return (get_device(&afu->dev) == NULL) ? NULL : afu; +} +EXPORT_SYMBOL_GPL(cxl_afu_get); + +void cxl_afu_put(struct cxl_afu *afu) +{ + put_device(&afu->dev); +} +EXPORT_SYMBOL_GPL(cxl_afu_put); + void cxl_slbia(struct mm_struct *mm) { struct cxl_calls *calls; diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 36b3237..d4aae6f 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -440,18 +440,6 @@ struct cxl_afu { bool enabled; }; -/* AFU refcount management */ -static inline struct cxl_afu *cxl_afu_get(struct cxl_afu *afu) -{ - - return (get_device(&afu->dev) == NULL) ? NULL : afu; -} - -static inline void cxl_afu_put(struct cxl_afu *afu) -{ - put_device(&afu->dev); -} - struct cxl_irq_name { struct list_head list; diff --git a/include/misc/cxl-base.h b/include/misc/cxl-base.h index 5ae9625..f53808f 100644 --- a/include/misc/cxl-base.h +++ b/include/misc/cxl-base.h @@ -36,11 +36,15 @@ static inline void cxl_ctx_put(void) atomic_dec(&cxl_use_count); } +struct cxl_afu *cxl_afu_get(struct cxl_afu *afu); +void cxl_afu_put(struct cxl_afu *afu); void cxl_slbia(struct mm_struct *mm); #else /* CONFIG_CXL_BASE */ static inline bool cxl_ctx_in_use(void) { return false; } +static inline struct cxl_afu *cxl_afu_get(struct cxl_afu *afu) { return NULL; } +static inline void cxl_afu_put(struct cxl_afu *afu) {} static inline void cxl_slbia(struct mm_struct *mm) {} #endif /* CONFIG_CXL_BASE */ -- cgit v0.10.2 From a19bd79e31769626d288cc016e21a31b6f47bf6f Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 14 Jul 2016 07:17:04 +1000 Subject: cxl: Allow a default context to be associated with an external pci_dev The cxl kernel API has a concept of a default context associated with each PCI device under the virtual PHB. The Mellanox CX4 will also use the cxl kernel API, but it does not use a virtual PHB - rather, the AFU appears as a physical function as a peer to the networking functions. In order to allow the kernel API to work with those networking functions, we will need to associate a default context with them as well. To this end, refactor the corresponding code to do this in vphb.c and export it so that it can be called from the PHB code. Signed-off-by: Ian Munsie Reviewed-by: Frederic Barrat Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile index 8a55c1a..56e9a47 100644 --- a/drivers/misc/cxl/Makefile +++ b/drivers/misc/cxl/Makefile @@ -3,7 +3,7 @@ ccflags-$(CONFIG_PPC_WERROR) += -Werror cxl-y += main.o file.o irq.o fault.o native.o cxl-y += context.o sysfs.o debugfs.o pci.o trace.o -cxl-y += vphb.o api.o +cxl-y += vphb.o phb.o api.o cxl-$(CONFIG_PPC_PSERIES) += flash.o guest.o of.o hcalls.o obj-$(CONFIG_CXL) += cxl.o obj-$(CONFIG_CXL_BASE) += base.o diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c index 7557835..e1e80cb 100644 --- a/drivers/misc/cxl/base.c +++ b/drivers/misc/cxl/base.c @@ -106,6 +106,41 @@ int cxl_update_properties(struct device_node *dn, } EXPORT_SYMBOL_GPL(cxl_update_properties); +/* + * API calls into the driver that may be called from the PHB code and must be + * built in. + */ +bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu) +{ + bool ret; + struct cxl_calls *calls; + + calls = cxl_calls_get(); + if (!calls) + return false; + + ret = calls->cxl_pci_associate_default_context(dev, afu); + + cxl_calls_put(calls); + + return ret; +} +EXPORT_SYMBOL_GPL(cxl_pci_associate_default_context); + +void cxl_pci_disable_device(struct pci_dev *dev) +{ + struct cxl_calls *calls; + + calls = cxl_calls_get(); + if (!calls) + return; + + calls->cxl_pci_disable_device(dev); + + cxl_calls_put(calls); +} +EXPORT_SYMBOL_GPL(cxl_pci_disable_device); + static int __init cxl_base_init(void) { struct device_node *np; diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index d4aae6f..b81f476 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -719,9 +719,15 @@ static inline u64 cxl_p2n_read(struct cxl_afu *afu, cxl_p2n_reg_t reg) ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, loff_t off, size_t count); +/* Internal functions wrapped in cxl_base to allow PHB to call them */ +bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu); +void _cxl_pci_disable_device(struct pci_dev *dev); struct cxl_calls { void (*cxl_slbia)(struct mm_struct *mm); + bool (*cxl_pci_associate_default_context)(struct pci_dev *dev, struct cxl_afu *afu); + void (*cxl_pci_disable_device)(struct pci_dev *dev); + struct module *owner; }; int register_cxl_calls(struct cxl_calls *calls); diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index ae68c32..4e5474b 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -110,6 +110,8 @@ static inline void cxl_slbia_core(struct mm_struct *mm) static struct cxl_calls cxl_calls = { .cxl_slbia = cxl_slbia_core, + .cxl_pci_associate_default_context = _cxl_pci_associate_default_context, + .cxl_pci_disable_device = _cxl_pci_disable_device, .owner = THIS_MODULE, }; diff --git a/drivers/misc/cxl/phb.c b/drivers/misc/cxl/phb.c new file mode 100644 index 0000000..0935d44 --- /dev/null +++ b/drivers/misc/cxl/phb.c @@ -0,0 +1,44 @@ +/* + * Copyright 2014-2016 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include "cxl.h" + +bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu) +{ + struct cxl_context *ctx; + + /* + * Allocate a context to do cxl things to. This is used for interrupts + * in the peer model using a real phb, and if we eventually do DMA ops + * in the virtual phb, we'll need a default context to attach them to. + */ + ctx = cxl_dev_context_init(dev); + if (!ctx) + return false; + dev->dev.archdata.cxl_ctx = ctx; + + return (cxl_ops->afu_check_and_enable(afu) == 0); +} +/* exported via cxl_base */ + +void _cxl_pci_disable_device(struct pci_dev *dev) +{ + struct cxl_context *ctx = cxl_get_context(dev); + + if (ctx) { + if (ctx->status == STARTED) { + dev_err(&dev->dev, "Default context started\n"); + return; + } + dev->dev.archdata.cxl_ctx = NULL; + cxl_release_context(ctx); + } +} +/* exported via cxl_base */ diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index 012b6aa..c8a759f 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -44,7 +44,6 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev) { struct pci_controller *phb; struct cxl_afu *afu; - struct cxl_context *ctx; phb = pci_bus_to_host(dev->bus); afu = (struct cxl_afu *)phb->private_data; @@ -57,30 +56,7 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev) set_dma_ops(&dev->dev, &dma_direct_ops); set_dma_offset(&dev->dev, PAGE_OFFSET); - /* - * Allocate a context to do cxl things too. If we eventually do real - * DMA ops, we'll need a default context to attach them to - */ - ctx = cxl_dev_context_init(dev); - if (!ctx) - return false; - dev->dev.archdata.cxl_ctx = ctx; - - return (cxl_ops->afu_check_and_enable(afu) == 0); -} - -static void cxl_pci_disable_device(struct pci_dev *dev) -{ - struct cxl_context *ctx = cxl_get_context(dev); - - if (ctx) { - if (ctx->status == STARTED) { - dev_err(&dev->dev, "Default context started\n"); - return; - } - dev->dev.archdata.cxl_ctx = NULL; - cxl_release_context(ctx); - } + return _cxl_pci_associate_default_context(dev, afu); } static resource_size_t cxl_pci_window_alignment(struct pci_bus *bus, @@ -197,8 +173,8 @@ static struct pci_controller_ops cxl_pci_controller_ops = { .probe_mode = cxl_pci_probe_mode, .enable_device_hook = cxl_pci_enable_device_hook, - .disable_device = cxl_pci_disable_device, - .release_device = cxl_pci_disable_device, + .disable_device = _cxl_pci_disable_device, + .release_device = _cxl_pci_disable_device, .window_alignment = cxl_pci_window_alignment, .reset_secondary_bus = cxl_pci_reset_secondary_bus, .setup_msi_irqs = cxl_setup_msi_irqs, diff --git a/include/misc/cxl-base.h b/include/misc/cxl-base.h index f53808f..bb7e629 100644 --- a/include/misc/cxl-base.h +++ b/include/misc/cxl-base.h @@ -10,6 +10,8 @@ #ifndef _MISC_CXL_BASE_H #define _MISC_CXL_BASE_H +#include + #ifdef CONFIG_CXL_BASE #define CXL_IRQ_RANGES 4 @@ -39,6 +41,8 @@ static inline void cxl_ctx_put(void) struct cxl_afu *cxl_afu_get(struct cxl_afu *afu); void cxl_afu_put(struct cxl_afu *afu); void cxl_slbia(struct mm_struct *mm); +bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu); +void cxl_pci_disable_device(struct pci_dev *dev); #else /* CONFIG_CXL_BASE */ @@ -46,6 +50,8 @@ static inline bool cxl_ctx_in_use(void) { return false; } static inline struct cxl_afu *cxl_afu_get(struct cxl_afu *afu) { return NULL; } static inline void cxl_afu_put(struct cxl_afu *afu) {} static inline void cxl_slbia(struct mm_struct *mm) {} +static inline bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu) { return false; } +static inline void cxl_pci_disable_device(struct pci_dev *dev) {} #endif /* CONFIG_CXL_BASE */ -- cgit v0.10.2 From e4f5fc001a6cb82bef910372457ca7754defa84d Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 14 Jul 2016 07:17:05 +1000 Subject: cxl: Do not create vPHB if there are no AFU configuration records The vPHB model of the cxl kernel API is a hierarchy where the AFU is represented by the vPHB, and it's AFU configuration records are exposed as functions under that vPHB. If there are no AFU configuration records we will create a vPHB with nothing under it, which is a waste of resources and will opt us into EEH handling despite not having anything special to handle. This also does not make sense for cards using the peer model of the cxl kernel API, where the other functions of the device are exposed via additional peer physical functions rather than AFU configuration records. This model will also not work with the existing EEH handling in the cxl driver, as that is designed around the vPHB model. Skip creating the vPHB for AFUs without any AFU configuration records, and opt out of EEH handling for them. Signed-off-by: Ian Munsie Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index deef9c7..dd7ff22 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1572,6 +1572,9 @@ static pci_ers_result_t cxl_pci_error_detected(struct pci_dev *pdev, */ for (i = 0; i < adapter->slices; i++) { afu = adapter->afu[i]; + /* Only participate in EEH if we are on a virtual PHB */ + if (afu->phb == NULL) + return PCI_ERS_RESULT_NONE; cxl_vphb_error_detected(afu, state); } return PCI_ERS_RESULT_DISCONNECT; diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index c8a759f..8865e8d 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -188,6 +188,17 @@ int cxl_pci_vphb_add(struct cxl_afu *afu) struct device_node *vphb_dn; struct device *parent; + /* + * If there are no AFU configuration records we won't have anything to + * expose under the vPHB, so skip creating one, returning success since + * this is still a valid case. This will also opt us out of EEH + * handling since we won't have anything special to do if there are no + * kernel drivers attached to the vPHB, and EEH handling is not yet + * supported in the peer model. + */ + if (!afu->crs_num) + return 0; + /* The parent device is the adapter. Reuse the device node of * the adapter. * We don't seem to care what device node is used for the vPHB, -- cgit v0.10.2 From 4361b03430d685610e5feea3ec7846e8b9ae795f Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 14 Jul 2016 07:17:06 +1000 Subject: powerpc/powernv: Add support for the cxl kernel api on the real phb This adds support for the peer model of the cxl kernel api to the PowerNV PHB, in which physical function 0 represents the cxl function on the card (an XSL in the case of the CX4), which other physical functions will use for memory access and interrupt services. It is referred to as the peer model as these functions are peers of one another, as opposed to the Virtual PHB model which forms a hierarchy. This patch exports APIs to enable the peer mode, check if a PCI device is attached to a PHB in this mode, and to set and get the peer AFU for this mode. The cxl driver will enable this mode for supported cards by calling pnv_cxl_enable_phb_kernel_api(). This will set a flag in the PHB to note that this mode is enabled, and switch out it's controller_ops for the cxl version. The cxl version of the controller_ops struct implements it's own versions of the enable_device_hook and release_device to handle refcounting on the peer AFU and to allocate a default context for the device. Once enabled, the cxl kernel API may not be disabled on a PHB. Currently there is no safe way to disable cxl mode short of a reboot, so until that changes there is no reason to support the disable path. Signed-off-by: Ian Munsie Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index 791db1b..c47097f 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -38,6 +38,13 @@ int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, struct pci_dev *dev, int num); void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs, struct pci_dev *dev); + +/* Support for the cxl kernel api on the real PHB (instead of vPHB) */ +int pnv_cxl_enable_phb_kernel_api(struct pci_controller *hose, bool enable); +bool pnv_pci_on_cxl_phb(struct pci_dev *dev); +struct cxl_afu *pnv_cxl_phb_to_afu(struct pci_controller *hose); +void pnv_cxl_phb_set_peer_afu(struct pci_dev *dev, struct cxl_afu *afu); + #endif #endif diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c index e0eeb00..831bbfb 100644 --- a/arch/powerpc/platforms/powernv/pci-cxl.c +++ b/arch/powerpc/platforms/powernv/pci-cxl.c @@ -7,8 +7,11 @@ * 2 of the License, or (at your option) any later version. */ +#include +#include #include #include +#include #include "pci.h" @@ -161,3 +164,120 @@ int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, return 0; } EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup); + +/* + * Sets flags and switches the controller ops to enable the cxl kernel api. + * Originally the cxl kernel API operated on a virtual PHB, but certain cards + * such as the Mellanox CX4 use a peer model instead and for these cards the + * cxl kernel api will operate on the real PHB. + */ +int pnv_cxl_enable_phb_kernel_api(struct pci_controller *hose, bool enable) +{ + struct pnv_phb *phb = hose->private_data; + struct module *cxl_module; + + if (!enable) { + /* + * Once cxl mode is enabled on the PHB, there is currently no + * known safe method to disable it again, and trying risks a + * checkstop. If we can find a way to safely disable cxl mode + * in the future we can revisit this, but for now the only sane + * thing to do is to refuse to disable cxl mode: + */ + return -EPERM; + } + + /* + * Hold a reference to the cxl module since several PHB operations now + * depend on it, and it would be insane to allow it to be removed so + * long as we are in this mode (and since we can't safely disable this + * mode once enabled...). + */ + mutex_lock(&module_mutex); + cxl_module = find_module("cxl"); + if (cxl_module) + __module_get(cxl_module); + mutex_unlock(&module_mutex); + if (!cxl_module) + return -ENODEV; + + phb->flags |= PNV_PHB_FLAG_CXL; + hose->controller_ops = pnv_cxl_cx4_ioda_controller_ops; + + return 0; +} +EXPORT_SYMBOL_GPL(pnv_cxl_enable_phb_kernel_api); + +bool pnv_pci_on_cxl_phb(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + + return !!(phb->flags & PNV_PHB_FLAG_CXL); +} +EXPORT_SYMBOL_GPL(pnv_pci_on_cxl_phb); + +struct cxl_afu *pnv_cxl_phb_to_afu(struct pci_controller *hose) +{ + struct pnv_phb *phb = hose->private_data; + + return (struct cxl_afu *)phb->cxl_afu; +} +EXPORT_SYMBOL_GPL(pnv_cxl_phb_to_afu); + +void pnv_cxl_phb_set_peer_afu(struct pci_dev *dev, struct cxl_afu *afu) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + + phb->cxl_afu = afu; +} +EXPORT_SYMBOL_GPL(pnv_cxl_phb_set_peer_afu); + +/* + * In the peer cxl model, the XSL/PSL is physical function 0, and will be used + * by other functions on the device for memory access and interrupts. When the + * other functions are enabled we explicitly take a reference on the cxl + * function since they will use it, and allocate a default context associated + * with that function just like the vPHB model of the cxl kernel API. + */ +bool pnv_cxl_enable_device_hook(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct cxl_afu *afu = phb->cxl_afu; + + if (!pnv_pci_enable_device_hook(dev)) + return false; + + + /* No special handling for the cxl function, which is always PF 0 */ + if (PCI_FUNC(dev->devfn) == 0) + return true; + + if (!afu) { + dev_WARN(&dev->dev, "Attempted to enable function > 0 on CXL PHB without a peer AFU\n"); + return false; + } + + dev_info(&dev->dev, "Enabling function on CXL enabled PHB with peer AFU\n"); + + /* Make sure the peer AFU can't go away while this device is active */ + cxl_afu_get(afu); + + return cxl_pci_associate_default_context(dev, afu); +} + +void pnv_cxl_disable_device(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct cxl_afu *afu = phb->cxl_afu; + + /* No special handling for cxl function: */ + if (PCI_FUNC(dev->devfn) == 0) + return; + + cxl_pci_disable_device(dev); + cxl_afu_put(afu); +} diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index e0d8103..104c040 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3222,7 +3222,7 @@ static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, /* Prevent enabling devices for which we couldn't properly * assign a PE */ -static bool pnv_pci_enable_device_hook(struct pci_dev *dev) +bool pnv_pci_enable_device_hook(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); struct pnv_phb *phb = hose->private_data; @@ -3461,6 +3461,22 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { .shutdown = pnv_pci_ioda_shutdown, }; +#ifdef CONFIG_CXL_BASE +const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops = { + .dma_dev_setup = pnv_pci_dma_dev_setup, + .dma_bus_setup = pnv_pci_dma_bus_setup, + .enable_device_hook = pnv_cxl_enable_device_hook, + .disable_device = pnv_cxl_disable_device, + .release_device = pnv_pci_release_device, + .window_alignment = pnv_pci_window_alignment, + .setup_bridge = pnv_pci_setup_bridge, + .reset_secondary_bus = pnv_pci_reset_secondary_bus, + .dma_set_mask = pnv_pci_ioda_dma_set_mask, + .dma_get_required_mask = pnv_pci_ioda_dma_get_required_mask, + .shutdown = pnv_pci_ioda_shutdown, +}; +#endif + static void __init pnv_pci_init_ioda_phb(struct device_node *np, u64 hub_id, int ioda_type) { diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 49c2997..20c9a6b 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -76,6 +76,7 @@ struct pnv_ioda_pe { }; #define PNV_PHB_FLAG_EEH (1 << 0) +#define PNV_PHB_FLAG_CXL (1 << 1) /* Real PHB supporting the cxl kernel API */ struct pnv_phb { struct pci_controller *hose; @@ -177,6 +178,9 @@ struct pnv_phb { struct OpalIoP7IOCErrorData hub_diag; } diag; +#ifdef CONFIG_CXL_BASE + struct cxl_afu *cxl_afu; +#endif }; extern struct pci_ops pnv_pci_ops; @@ -218,6 +222,7 @@ extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev); extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq); +extern bool pnv_pci_enable_device_hook(struct pci_dev *dev); extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, const char *fmt, ...); @@ -238,4 +243,13 @@ extern long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num); extern void pnv_npu_take_ownership(struct pnv_ioda_pe *npe); extern void pnv_npu_release_ownership(struct pnv_ioda_pe *npe); + +/* cxl functions */ +extern bool pnv_cxl_enable_device_hook(struct pci_dev *dev); +extern void pnv_cxl_disable_device(struct pci_dev *dev); + + +/* phb ops (cxl switches these when enabling the kernel api on the phb) */ +extern const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops; + #endif /* __POWERNV_PCI_H */ -- cgit v0.10.2 From 317f5ef1b363417b6f1e93b90dfd2ffd6be6e867 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 14 Jul 2016 07:17:07 +1000 Subject: cxl: Add support for using the kernel API with a real PHB This hooks up support for using the kernel API with a real PHB. After the AFU initialisation has completed it calls into the PHB code to pass it the AFU that will be used by other peer physical functions on the adapter. The cxl_pci_to_afu API is extended to work with peer PCI devices, retrieving the peer AFU from the PHB. This API may also now return an error if it is called on a PCI device that is not associated with either a cxl vPHB or a peer PCI device to an AFU, and this error is propagated down. Signed-off-by: Ian Munsie Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 7707055..6a030bf 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "cxl.h" @@ -24,6 +25,8 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) int rc; afu = cxl_pci_to_afu(dev); + if (IS_ERR(afu)) + return ERR_CAST(afu); ctx = cxl_context_alloc(); if (IS_ERR(ctx)) { @@ -438,6 +441,8 @@ EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image); ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count) { struct cxl_afu *afu = cxl_pci_to_afu(dev); + if (IS_ERR(afu)) + return -ENODEV; return cxl_ops->read_adapter_vpd(afu->adapter, buf, count); } diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index dd7ff22..cb5d172 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1502,6 +1502,9 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) dev_err(&dev->dev, "AFU %i failed to start: %i\n", slice, rc); } + if (pnv_pci_on_cxl_phb(dev) && adapter->slices >= 1) + pnv_cxl_phb_set_peer_afu(dev, adapter->afu[0]); + return 0; } diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index 8865e8d..dee8def 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -9,6 +9,7 @@ #include #include +#include #include "cxl.h" static int cxl_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) @@ -258,13 +259,18 @@ void cxl_pci_vphb_remove(struct cxl_afu *afu) pcibios_free_controller(phb); } +static bool _cxl_pci_is_vphb_device(struct pci_controller *phb) +{ + return (phb->ops == &cxl_pcie_pci_ops); +} + bool cxl_pci_is_vphb_device(struct pci_dev *dev) { struct pci_controller *phb; phb = pci_bus_to_host(dev->bus); - return (phb->ops == &cxl_pcie_pci_ops); + return _cxl_pci_is_vphb_device(phb); } struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev) @@ -273,7 +279,13 @@ struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev) phb = pci_bus_to_host(dev->bus); - return (struct cxl_afu *)phb->private_data; + if (_cxl_pci_is_vphb_device(phb)) + return (struct cxl_afu *)phb->private_data; + + if (pnv_pci_on_cxl_phb(dev)) + return pnv_cxl_phb_to_afu(phb); + + return ERR_PTR(-ENODEV); } EXPORT_SYMBOL_GPL(cxl_pci_to_afu); -- cgit v0.10.2 From 79384e4b71240abf50c375eea56060b0d79c242a Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 14 Jul 2016 07:17:08 +1000 Subject: cxl: Add kernel APIs to get & set the max irqs per context These APIs will be used by the Mellanox CX4 support. While they function standalone to configure existing behaviour, their primary purpose is to allow the Mellanox driver to inform the cxl driver of a hardware limitation, which will be used in a future patch. Signed-off-by: Ian Munsie Reviewed-by: Frederic Barrat Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 6a030bf..1e2c0d9 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -447,3 +447,30 @@ ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count) return cxl_ops->read_adapter_vpd(afu->adapter, buf, count); } EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd); + +int cxl_set_max_irqs_per_process(struct pci_dev *dev, int irqs) +{ + struct cxl_afu *afu = cxl_pci_to_afu(dev); + if (IS_ERR(afu)) + return -ENODEV; + + if (irqs > afu->adapter->user_irqs) + return -EINVAL; + + /* Limit user_irqs to prevent the user increasing this via sysfs */ + afu->adapter->user_irqs = irqs; + afu->irqs_max = irqs; + + return 0; +} +EXPORT_SYMBOL_GPL(cxl_set_max_irqs_per_process); + +int cxl_get_max_irqs_per_process(struct pci_dev *dev) +{ + struct cxl_afu *afu = cxl_pci_to_afu(dev); + if (IS_ERR(afu)) + return -ENODEV; + + return afu->irqs_max; +} +EXPORT_SYMBOL_GPL(cxl_get_max_irqs_per_process); diff --git a/include/misc/cxl.h b/include/misc/cxl.h index dd9eebb..fc07ed4 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -166,6 +166,16 @@ void cxl_psa_unmap(void __iomem *addr); /* Get the process element for this context */ int cxl_process_element(struct cxl_context *ctx); +/* + * Limit the number of interrupts that a single context can allocate via + * cxl_start_work. If using the api with a real phb, this may be used to + * request that additional default contexts be created when allocating + * interrupts via pci_enable_msix_range. These will be set to the same running + * state as the default context, and if that is running it will reuse the + * parameters previously passed to cxl_start_context for the default context. + */ +int cxl_set_max_irqs_per_process(struct pci_dev *dev, int irqs); +int cxl_get_max_irqs_per_process(struct pci_dev *dev); /* * These calls allow drivers to create their own file descriptors and make them -- cgit v0.10.2 From cbce0917e2e47d4bf5aa3b5fd6b1247f33e1a126 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 14 Jul 2016 07:17:09 +1000 Subject: cxl: Add preliminary workaround for CX4 interrupt limitation The Mellanox CX4 has a hardware limitation where only 4 bits of the AFU interrupt number can be passed to the XSL when sending an interrupt, limiting it to only 15 interrupts per context (AFU interrupt number 0 is invalid). In order to overcome this, we will allocate additional contexts linked to the default context as extra address space for the extra interrupts - this will be implemented in the next patch. This patch adds the preliminary support to allow this, by way of adding a linked list in the context structure that we use to keep track of the contexts dedicated to interrupts, and an API to simultaneously iterate over the related context structures, AFU interrupt numbers and hardware interrupt numbers. The point of using a single API to iterate these is to hide some of the details of the iteration from external code, and to reduce the number of APIs that need to be exported via base.c to allow built in code to call. Signed-off-by: Ian Munsie Reviewed-by: Frederic Barrat Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 1e2c0d9..f02a859 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -97,6 +97,21 @@ static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num) return 0; } +int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq) +{ + if (*ctx == NULL || *afu_irq == 0) { + *afu_irq = 1; + *ctx = cxl_get_context(pdev); + } else { + (*afu_irq)++; + if (*afu_irq > cxl_get_max_irqs_per_process(pdev)) { + *ctx = list_next_entry(*ctx, extra_irq_contexts); + *afu_irq = 1; + } + } + return cxl_find_afu_irq(*ctx, *afu_irq); +} +/* Exported via cxl_base */ int cxl_set_priv(struct cxl_context *ctx, void *priv) { diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c index e1e80cb..fe90f89 100644 --- a/drivers/misc/cxl/base.c +++ b/drivers/misc/cxl/base.c @@ -141,6 +141,23 @@ void cxl_pci_disable_device(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(cxl_pci_disable_device); +int cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq) +{ + int ret; + struct cxl_calls *calls; + + calls = cxl_calls_get(); + if (!calls) + return -EBUSY; + + ret = calls->cxl_next_msi_hwirq(pdev, ctx, afu_irq); + + cxl_calls_put(calls); + + return ret; +} +EXPORT_SYMBOL_GPL(cxl_next_msi_hwirq); + static int __init cxl_base_init(void) { struct device_node *np; diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index edbb99e..2616cddb 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -68,6 +68,7 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master, ctx->pending_afu_err = false; INIT_LIST_HEAD(&ctx->irq_names); + INIT_LIST_HEAD(&ctx->extra_irq_contexts); /* * When we have to destroy all contexts in cxl_context_detach_all() we diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index b81f476..73b9a55 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -537,6 +537,14 @@ struct cxl_context { atomic_t afu_driver_events; struct rcu_head rcu; + + /* + * Only used when more interrupts are allocated via + * pci_enable_msix_range than are supported in the default context, to + * use additional contexts to overcome the limitation. i.e. Mellanox + * CX4 only: + */ + struct list_head extra_irq_contexts; }; struct cxl_service_layer_ops { @@ -722,11 +730,13 @@ ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, /* Internal functions wrapped in cxl_base to allow PHB to call them */ bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu); void _cxl_pci_disable_device(struct pci_dev *dev); +int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); struct cxl_calls { void (*cxl_slbia)(struct mm_struct *mm); bool (*cxl_pci_associate_default_context)(struct pci_dev *dev, struct cxl_afu *afu); void (*cxl_pci_disable_device)(struct pci_dev *dev); + int (*cxl_next_msi_hwirq)(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); struct module *owner; }; diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 4e5474b..66fac71 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -112,6 +112,7 @@ static struct cxl_calls cxl_calls = { .cxl_slbia = cxl_slbia_core, .cxl_pci_associate_default_context = _cxl_pci_associate_default_context, .cxl_pci_disable_device = _cxl_pci_disable_device, + .cxl_next_msi_hwirq = _cxl_next_msi_hwirq, .owner = THIS_MODULE, }; diff --git a/include/misc/cxl.h b/include/misc/cxl.h index fc07ed4..6c52cbc 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -178,6 +178,15 @@ int cxl_set_max_irqs_per_process(struct pci_dev *dev, int irqs); int cxl_get_max_irqs_per_process(struct pci_dev *dev); /* + * Use to simultaneously iterate over hardware interrupt numbers, contexts and + * afu interrupt numbers allocated for the device via pci_enable_msix_range and + * is a useful convenience function when working with hardware that has + * limitations on the number of interrupts per process. *ctx and *afu_irq + * should be NULL and 0 to start the iteration. + */ +int cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); + +/* * These calls allow drivers to create their own file descriptors and make them * identical to the cxl file descriptor user API. An example use case: * -- cgit v0.10.2 From a2f67d5ee8d950caaa7a6144cf0bfb256500b73e Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 14 Jul 2016 07:17:10 +1000 Subject: cxl: Add support for interrupts on the Mellanox CX4 The Mellanox CX4 in cxl mode uses a hybrid interrupt model, where interrupts are routed from the networking hardware to the XSL using the MSIX table, and from there will be transformed back into an MSIX interrupt using the cxl style interrupts (i.e. using IVTE entries and ranges to map a PE and AFU interrupt number to an MSIX address). We want to hide the implementation details of cxl interrupts as much as possible. To this end, we use a special version of the MSI setup & teardown routines in the PHB while in cxl mode to allocate the cxl interrupts and configure the IVTE entries in the process element. This function does not configure the MSIX table - the CX4 card uses a custom format in that table and it would not be appropriate to fill that out in generic code. The rest of the functionality is similar to the "Full MSI-X mode" described in the CAIA, and this could be easily extended to support other adapters that use that mode in the future. The interrupts will be associated with the default context. If the maximum number of interrupts per context has been limited (e.g. by the mlx5 driver), it will automatically allocate additional kernel contexts to associate extra interrupts as required. These contexts will be started using the same WED that was used to start the default context. Signed-off-by: Ian Munsie Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c index 831bbfb..3f34207 100644 --- a/arch/powerpc/platforms/powernv/pci-cxl.c +++ b/arch/powerpc/platforms/powernv/pci-cxl.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -281,3 +282,86 @@ void pnv_cxl_disable_device(struct pci_dev *dev) cxl_pci_disable_device(dev); cxl_afu_put(afu); } + +/* + * This is a special version of pnv_setup_msi_irqs for cards in cxl mode. This + * function handles setting up the IVTE entries for the XSL to use. + * + * We are currently not filling out the MSIX table, since the only currently + * supported adapter (CX4) uses a custom MSIX table format in cxl mode and it + * is up to their driver to fill that out. In the future we may fill out the + * MSIX table (and change the IVTE entries to be an index to the MSIX table) + * for adapters implementing the Full MSI-X mode described in the CAIA. + */ +int pnv_cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; + struct msi_desc *entry; + struct cxl_context *ctx = NULL; + unsigned int virq; + int hwirq; + int afu_irq = 0; + int rc; + + if (WARN_ON(!phb) || !phb->msi_bmp.bitmap) + return -ENODEV; + + if (pdev->no_64bit_msi && !phb->msi32_support) + return -ENODEV; + + rc = cxl_cx4_setup_msi_irqs(pdev, nvec, type); + if (rc) + return rc; + + for_each_pci_msi_entry(entry, pdev) { + if (!entry->msi_attrib.is_64 && !phb->msi32_support) { + pr_warn("%s: Supports only 64-bit MSIs\n", + pci_name(pdev)); + return -ENXIO; + } + + hwirq = cxl_next_msi_hwirq(pdev, &ctx, &afu_irq); + if (WARN_ON(hwirq <= 0)) + return (hwirq ? hwirq : -ENOMEM); + + virq = irq_create_mapping(NULL, hwirq); + if (virq == NO_IRQ) { + pr_warn("%s: Failed to map cxl mode MSI to linux irq\n", + pci_name(pdev)); + return -ENOMEM; + } + + rc = pnv_cxl_ioda_msi_setup(pdev, hwirq, virq); + if (rc) { + pr_warn("%s: Failed to setup cxl mode MSI\n", pci_name(pdev)); + irq_dispose_mapping(virq); + return rc; + } + + irq_set_msi_desc(virq, entry); + } + + return 0; +} + +void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) +{ + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; + struct msi_desc *entry; + irq_hw_number_t hwirq; + + if (WARN_ON(!phb)) + return; + + for_each_pci_msi_entry(entry, pdev) { + if (entry->irq == NO_IRQ) + continue; + hwirq = virq_to_hw(entry->irq); + irq_set_msi_desc(entry->irq, NULL); + irq_dispose_mapping(entry->irq); + } + + cxl_cx4_teardown_msi_irqs(pdev); +} diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 104c040..530d4af 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3465,6 +3465,10 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { const struct pci_controller_ops pnv_cxl_cx4_ioda_controller_ops = { .dma_dev_setup = pnv_pci_dma_dev_setup, .dma_bus_setup = pnv_pci_dma_bus_setup, +#ifdef CONFIG_PCI_MSI + .setup_msi_irqs = pnv_cxl_cx4_setup_msi_irqs, + .teardown_msi_irqs = pnv_cxl_cx4_teardown_msi_irqs, +#endif .enable_device_hook = pnv_cxl_enable_device_hook, .disable_device = pnv_cxl_disable_device, .release_device = pnv_pci_release_device, diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 20c9a6b..f0c276c 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -247,6 +247,8 @@ extern void pnv_npu_release_ownership(struct pnv_ioda_pe *npe); /* cxl functions */ extern bool pnv_cxl_enable_device_hook(struct pci_dev *dev); extern void pnv_cxl_disable_device(struct pci_dev *dev); +extern int pnv_cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); +extern void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev); /* phb ops (cxl switches these when enabling the kernel api on the phb) */ diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index f02a859..f3d34b9 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "cxl.h" @@ -489,3 +490,73 @@ int cxl_get_max_irqs_per_process(struct pci_dev *dev) return afu->irqs_max; } EXPORT_SYMBOL_GPL(cxl_get_max_irqs_per_process); + +/* + * This is a special interrupt allocation routine called from the PHB's MSI + * setup function. When capi interrupts are allocated in this manner they must + * still be associated with a running context, but since the MSI APIs have no + * way to specify this we use the default context associated with the device. + * + * The Mellanox CX4 has a hardware limitation that restricts the maximum AFU + * interrupt number, so in order to overcome this their driver informs us of + * the restriction by setting the maximum interrupts per context, and we + * allocate additional contexts as necessary so that we can keep the AFU + * interrupt number within the supported range. + */ +int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + struct cxl_context *ctx, *new_ctx, *default_ctx; + int remaining; + int rc; + + ctx = default_ctx = cxl_get_context(pdev); + if (WARN_ON(!default_ctx)) + return -ENODEV; + + remaining = nvec; + while (remaining > 0) { + rc = cxl_allocate_afu_irqs(ctx, min(remaining, ctx->afu->irqs_max)); + if (rc) { + pr_warn("%s: Failed to find enough free MSIs\n", pci_name(pdev)); + return rc; + } + remaining -= ctx->afu->irqs_max; + + if (ctx != default_ctx && default_ctx->status == STARTED) { + WARN_ON(cxl_start_context(ctx, + be64_to_cpu(default_ctx->elem->common.wed), + NULL)); + } + + if (remaining > 0) { + new_ctx = cxl_dev_context_init(pdev); + if (!new_ctx) { + pr_warn("%s: Failed to allocate enough contexts for MSIs\n", pci_name(pdev)); + return -ENOSPC; + } + list_add(&new_ctx->extra_irq_contexts, &ctx->extra_irq_contexts); + ctx = new_ctx; + } + } + + return 0; +} +/* Exported via cxl_base */ + +void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) +{ + struct cxl_context *ctx, *pos, *tmp; + + ctx = cxl_get_context(pdev); + if (WARN_ON(!ctx)) + return; + + cxl_free_afu_irqs(ctx); + list_for_each_entry_safe(pos, tmp, &ctx->extra_irq_contexts, extra_irq_contexts) { + cxl_stop_context(pos); + cxl_free_afu_irqs(pos); + list_del(&pos->extra_irq_contexts); + cxl_release_context(pos); + } +} +/* Exported via cxl_base */ diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c index fe90f89..cd54ce6 100644 --- a/drivers/misc/cxl/base.c +++ b/drivers/misc/cxl/base.c @@ -158,6 +158,37 @@ int cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_ } EXPORT_SYMBOL_GPL(cxl_next_msi_hwirq); +int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + int ret; + struct cxl_calls *calls; + + calls = cxl_calls_get(); + if (!calls) + return false; + + ret = calls->cxl_cx4_setup_msi_irqs(pdev, nvec, type); + + cxl_calls_put(calls); + + return ret; +} +EXPORT_SYMBOL_GPL(cxl_cx4_setup_msi_irqs); + +void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) +{ + struct cxl_calls *calls; + + calls = cxl_calls_get(); + if (!calls) + return; + + calls->cxl_cx4_teardown_msi_irqs(pdev); + + cxl_calls_put(calls); +} +EXPORT_SYMBOL_GPL(cxl_cx4_teardown_msi_irqs); + static int __init cxl_base_init(void) { struct device_node *np; diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 73b9a55..d50cdb1 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -731,12 +731,16 @@ ssize_t cxl_pci_afu_read_err_buffer(struct cxl_afu *afu, char *buf, bool _cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu); void _cxl_pci_disable_device(struct pci_dev *dev); int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); +int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); +void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev); struct cxl_calls { void (*cxl_slbia)(struct mm_struct *mm); bool (*cxl_pci_associate_default_context)(struct pci_dev *dev, struct cxl_afu *afu); void (*cxl_pci_disable_device)(struct pci_dev *dev); int (*cxl_next_msi_hwirq)(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq); + int (*cxl_cx4_setup_msi_irqs)(struct pci_dev *pdev, int nvec, int type); + void (*cxl_cx4_teardown_msi_irqs)(struct pci_dev *pdev); struct module *owner; }; diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 66fac71..d9be23b2 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -113,6 +113,8 @@ static struct cxl_calls cxl_calls = { .cxl_pci_associate_default_context = _cxl_pci_associate_default_context, .cxl_pci_disable_device = _cxl_pci_disable_device, .cxl_next_msi_hwirq = _cxl_next_msi_hwirq, + .cxl_cx4_setup_msi_irqs = _cxl_cx4_setup_msi_irqs, + .cxl_cx4_teardown_msi_irqs = _cxl_cx4_teardown_msi_irqs, .owner = THIS_MODULE, }; diff --git a/include/misc/cxl-base.h b/include/misc/cxl-base.h index bb7e629..b2ebc91 100644 --- a/include/misc/cxl-base.h +++ b/include/misc/cxl-base.h @@ -43,6 +43,8 @@ void cxl_afu_put(struct cxl_afu *afu); void cxl_slbia(struct mm_struct *mm); bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu); void cxl_pci_disable_device(struct pci_dev *dev); +int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); +void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev); #else /* CONFIG_CXL_BASE */ @@ -52,6 +54,8 @@ static inline void cxl_afu_put(struct cxl_afu *afu) {} static inline void cxl_slbia(struct mm_struct *mm) {} static inline bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu) { return false; } static inline void cxl_pci_disable_device(struct pci_dev *dev) {} +static inline int cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { return -ENODEV; } +static inline void cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) {} #endif /* CONFIG_CXL_BASE */ -- cgit v0.10.2 From f67a6722d650b864b020b19b3926e7152b55f1ff Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 14 Jul 2016 07:17:11 +1000 Subject: cxl: Workaround PE=0 hardware limitation in Mellanox CX4 The CX4 card cannot cope with a context with PE=0 due to a hardware limitation, resulting in: [ 34.166577] command failed, status limits exceeded(0x8), syndrome 0x5a7939 [ 34.166580] mlx5_core 0000:01:00.1: Failed allocating uar, aborting Since the kernel API allocates a default context very early during device init that will almost certainly get Process Element ID 0 there is no easy way for us to extend the API to allow the Mellanox to inform us of this limitation ahead of time. Instead, work around the issue by extending the XSL structure to include a minimum PE to allocate. Although the bug is not in the XSL, it is the easiest place to work around this limitation given that the CX4 is currently the only card that uses an XSL. Signed-off-by: Ian Munsie Reviewed-by: Andrew Donnellan Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 2616cddb..bdee9a0 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -90,7 +90,8 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master, */ mutex_lock(&afu->contexts_lock); idr_preload(GFP_KERNEL); - i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0, + i = idr_alloc(&ctx->afu->contexts_idr, ctx, + ctx->afu->adapter->native->sl_ops->min_pe, ctx->afu->num_procs, GFP_NOWAIT); idr_preload_end(); mutex_unlock(&afu->contexts_lock); diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index d50cdb1..de09053 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -561,6 +561,7 @@ struct cxl_service_layer_ops { u64 (*timebase_read)(struct cxl *adapter); int capi_mode; bool needs_reset_before_disable; + int min_pe; }; struct cxl_native { diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index cb5d172..efe202f 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1321,6 +1321,7 @@ static const struct cxl_service_layer_ops xsl_ops = { .write_timebase_ctrl = write_timebase_ctrl_xsl, .timebase_read = timebase_read_xsl, .capi_mode = OPAL_PHB_CAPI_MODE_DMA, + .min_pe = 1, /* Workaround for Mellanox CX4 HW bug */ }; static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev) -- cgit v0.10.2 From 89379f165a1be13aa9b4731a9095171142ee1c7b Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Thu, 14 Jul 2016 07:17:12 +1000 Subject: PCI/hotplug: pnv_php: export symbols and move struct types needed by cxl The cxl driver will use infrastructure from pnv_php to handle device tree updates when switching bi-modal CAPI cards into CAPI mode. To enable this, export pnv_php_find_slot() and pnv_php_set_slot_power_state(), and add corresponding declarations, as well as the definition of struct pnv_php_slot, to asm/pnv-pci.h. Cc: Gavin Shan Cc: linux-pci@vger.kernel.org Cc: Bjorn Helgaas Signed-off-by: Andrew Donnellan Signed-off-by: Ian Munsie Acked-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index c47097f..0cbd813 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -11,6 +11,7 @@ #define _ASM_PNV_PCI_H #include +#include #include #include @@ -47,4 +48,31 @@ void pnv_cxl_phb_set_peer_afu(struct pci_dev *dev, struct cxl_afu *afu); #endif +struct pnv_php_slot { + struct hotplug_slot slot; + struct hotplug_slot_info slot_info; + uint64_t id; + char *name; + int slot_no; + struct kref kref; +#define PNV_PHP_STATE_INITIALIZED 0 +#define PNV_PHP_STATE_REGISTERED 1 +#define PNV_PHP_STATE_POPULATED 2 +#define PNV_PHP_STATE_OFFLINE 3 + int state; + struct device_node *dn; + struct pci_dev *pdev; + struct pci_bus *bus; + bool power_state_check; + void *fdt; + void *dt; + struct of_changeset ocs; + struct pnv_php_slot *parent; + struct list_head children; + struct list_head link; +}; +extern struct pnv_php_slot *pnv_php_find_slot(struct device_node *dn); +extern int pnv_php_set_slot_power_state(struct hotplug_slot *slot, + uint8_t state); + #endif diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 6086db6..2d2f704 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -22,30 +22,6 @@ #define DRIVER_AUTHOR "Gavin Shan, IBM Corporation" #define DRIVER_DESC "PowerPC PowerNV PCI Hotplug Driver" -struct pnv_php_slot { - struct hotplug_slot slot; - struct hotplug_slot_info slot_info; - uint64_t id; - char *name; - int slot_no; - struct kref kref; -#define PNV_PHP_STATE_INITIALIZED 0 -#define PNV_PHP_STATE_REGISTERED 1 -#define PNV_PHP_STATE_POPULATED 2 -#define PNV_PHP_STATE_OFFLINE 3 - int state; - struct device_node *dn; - struct pci_dev *pdev; - struct pci_bus *bus; - bool power_state_check; - void *fdt; - void *dt; - struct of_changeset ocs; - struct pnv_php_slot *parent; - struct list_head children; - struct list_head link; -}; - static LIST_HEAD(pnv_php_slot_list); static DEFINE_SPINLOCK(pnv_php_lock); @@ -91,7 +67,7 @@ static struct pnv_php_slot *pnv_php_match(struct device_node *dn, return NULL; } -static struct pnv_php_slot *pnv_php_find_slot(struct device_node *dn) +struct pnv_php_slot *pnv_php_find_slot(struct device_node *dn) { struct pnv_php_slot *php_slot, *tmp; unsigned long flags; @@ -108,6 +84,7 @@ static struct pnv_php_slot *pnv_php_find_slot(struct device_node *dn) return NULL; } +EXPORT_SYMBOL_GPL(pnv_php_find_slot); /* * Remove pdn for all children of the indicated device node. @@ -316,8 +293,8 @@ out: return ret; } -static int pnv_php_set_slot_power_state(struct hotplug_slot *slot, - uint8_t state) +int pnv_php_set_slot_power_state(struct hotplug_slot *slot, + uint8_t state) { struct pnv_php_slot *php_slot = slot->private; struct opal_msg msg; @@ -347,6 +324,7 @@ static int pnv_php_set_slot_power_state(struct hotplug_slot *slot, return ret; } +EXPORT_SYMBOL_GPL(pnv_php_set_slot_power_state); static int pnv_php_get_power_state(struct hotplug_slot *slot, u8 *state) { -- cgit v0.10.2 From 5473a6bf635d35d5c1d12d0e132b51a861a5c973 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Thu, 14 Jul 2016 07:17:13 +1000 Subject: PCI/hotplug: pnv_php: handle OPAL_PCI_SLOT_OFFLINE power state When calling pnv_php_set_slot_power_state() with state == OPAL_PCI_SLOT_OFFLINE, remove devices from the device tree as if we're dealing with OPAL_PCI_SLOT_POWER_OFF. Cc: Gavin Shan Cc: linux-pci@vger.kernel.org Cc: Bjorn Helgaas Signed-off-by: Andrew Donnellan Signed-off-by: Ian Munsie Acked-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 2d2f704..e6245b0 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -317,7 +317,7 @@ int pnv_php_set_slot_power_state(struct hotplug_slot *slot, return ret; } - if (state == OPAL_PCI_SLOT_POWER_OFF) + if (state == OPAL_PCI_SLOT_POWER_OFF || state == OPAL_PCI_SLOT_OFFLINE) pnv_php_rmv_devtree(php_slot); else ret = pnv_php_add_devtree(php_slot); -- cgit v0.10.2 From b0b5e5918ad1babfd1d43d98c7281926a7b57b9f Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Thu, 14 Jul 2016 07:17:14 +1000 Subject: cxl: Add cxl_check_and_switch_mode() API to switch bi-modal cards Add a new API, cxl_check_and_switch_mode() to allow for switching of bi-modal CAPI cards, such as the Mellanox CX-4 network card. When a driver requests to switch a card to CAPI mode, use PCI hotplug infrastructure to remove all PCI devices underneath the slot. We then write an updated mode control register to the CAPI VSEC, hot reset the card, and reprobe the card. As the card may present a different set of PCI devices after the mode switch, use the infrastructure provided by the pnv_php driver and the OPAL PCI slot management facilities to ensure that: * the old devices are removed from both the OPAL and Linux device trees * the new devices are probed by OPAL and added to the OPAL device tree * the new devices are added to the Linux device tree and probed through the regular PCI device probe path As such, introduce a new option, CONFIG_CXL_BIMODAL, with a dependency on the pnv_php driver. Refactor existing code that touches the mode control register in the regular single mode case into a new function, setup_cxl_protocol_area(). Co-authored-by: Ian Munsie Cc: Gavin Shan Signed-off-by: Andrew Donnellan Signed-off-by: Ian Munsie Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig index 560412c..8d76770 100644 --- a/drivers/misc/cxl/Kconfig +++ b/drivers/misc/cxl/Kconfig @@ -38,3 +38,11 @@ config CXL CAPI adapters are found in POWER8 based systems. If unsure, say N. + +config CXL_BIMODAL + bool "Support for bi-modal CAPI cards" + depends on HOTPLUG_PCI_POWERNV = y && CXL || HOTPLUG_PCI_POWERNV = m && CXL = m + default y + help + Select this option to enable support for bi-modal CAPI cards, such as + the Mellanox CX-4. diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index efe202f..d152e2d 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -55,6 +55,8 @@ pci_read_config_byte(dev, vsec + 0xa, dest) #define CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val) \ pci_write_config_byte(dev, vsec + 0xa, val) +#define CXL_WRITE_VSEC_MODE_CONTROL_BUS(bus, devfn, vsec, val) \ + pci_bus_write_config_byte(bus, devfn, vsec + 0xa, val) #define CXL_VSEC_PROTOCOL_MASK 0xe0 #define CXL_VSEC_PROTOCOL_1024TB 0x80 #define CXL_VSEC_PROTOCOL_512TB 0x40 @@ -614,36 +616,234 @@ static int setup_cxl_bars(struct pci_dev *dev) return 0; } -/* pciex node: ibm,opal-m64-window = <0x3d058 0x0 0x3d058 0x0 0x8 0x0>; */ -static int switch_card_to_cxl(struct pci_dev *dev) -{ +#ifdef CONFIG_CXL_BIMODAL + +struct cxl_switch_work { + struct pci_dev *dev; + struct work_struct work; int vsec; + int mode; +}; + +static void switch_card_to_cxl(struct work_struct *work) +{ + struct cxl_switch_work *switch_work = + container_of(work, struct cxl_switch_work, work); + struct pci_dev *dev = switch_work->dev; + struct pci_bus *bus = dev->bus; + struct pci_controller *hose = pci_bus_to_host(bus); + struct pci_dev *bridge; + struct pnv_php_slot *php_slot; + unsigned int devfn; u8 val; int rc; - dev_info(&dev->dev, "switch card to CXL\n"); + dev_info(&bus->dev, "cxl: Preparing for mode switch...\n"); + bridge = list_first_entry_or_null(&hose->bus->devices, struct pci_dev, + bus_list); + if (!bridge) { + dev_WARN(&bus->dev, "cxl: Couldn't find root port!\n"); + goto err_dev_put; + } - if (!(vsec = find_cxl_vsec(dev))) { - dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n"); + php_slot = pnv_php_find_slot(pci_device_to_OF_node(bridge)); + if (!php_slot) { + dev_err(&bus->dev, "cxl: Failed to find slot hotplug " + "information. You may need to upgrade " + "skiboot. Aborting.\n"); + goto err_dev_put; + } + + rc = CXL_READ_VSEC_MODE_CONTROL(dev, switch_work->vsec, &val); + if (rc) { + dev_err(&bus->dev, "cxl: Failed to read CAPI mode control: %i\n", rc); + goto err_dev_put; + } + devfn = dev->devfn; + + /* Release the reference obtained in cxl_check_and_switch_mode() */ + pci_dev_put(dev); + + dev_dbg(&bus->dev, "cxl: Removing PCI devices from kernel\n"); + pci_lock_rescan_remove(); + pci_hp_remove_devices(bridge->subordinate); + pci_unlock_rescan_remove(); + + /* Switch the CXL protocol on the card */ + if (switch_work->mode == CXL_BIMODE_CXL) { + dev_info(&bus->dev, "cxl: Switching card to CXL mode\n"); + val &= ~CXL_VSEC_PROTOCOL_MASK; + val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE; + rc = pnv_cxl_enable_phb_kernel_api(hose, true); + if (rc) { + dev_err(&bus->dev, "cxl: Failed to enable kernel API" + " on real PHB, aborting\n"); + goto err_free_work; + } + } else { + dev_WARN(&bus->dev, "cxl: Switching card to PCI mode not supported!\n"); + goto err_free_work; + } + + rc = CXL_WRITE_VSEC_MODE_CONTROL_BUS(bus, devfn, switch_work->vsec, val); + if (rc) { + dev_err(&bus->dev, "cxl: Failed to configure CXL protocol: %i\n", rc); + goto err_free_work; + } + + /* + * The CAIA spec (v1.1, Section 10.6 Bi-modal Device Support) states + * we must wait 100ms after this mode switch before touching PCIe config + * space. + */ + msleep(100); + + /* + * Hot reset to cause the card to come back in cxl mode. A + * OPAL_RESET_PCI_LINK would be sufficient, but currently lacks support + * in skiboot, so we use a hot reset instead. + * + * We call pci_set_pcie_reset_state() on the bridge, as a CAPI card is + * guaranteed to sit directly under the root port, and setting the reset + * state on a device directly under the root port is equivalent to doing + * it on the root port iself. + */ + dev_info(&bus->dev, "cxl: Configuration write complete, resetting card\n"); + pci_set_pcie_reset_state(bridge, pcie_hot_reset); + pci_set_pcie_reset_state(bridge, pcie_deassert_reset); + + dev_dbg(&bus->dev, "cxl: Offlining slot\n"); + rc = pnv_php_set_slot_power_state(&php_slot->slot, OPAL_PCI_SLOT_OFFLINE); + if (rc) { + dev_err(&bus->dev, "cxl: OPAL offlining call failed: %i\n", rc); + goto err_free_work; + } + + dev_dbg(&bus->dev, "cxl: Onlining and probing slot\n"); + rc = pnv_php_set_slot_power_state(&php_slot->slot, OPAL_PCI_SLOT_ONLINE); + if (rc) { + dev_err(&bus->dev, "cxl: OPAL onlining call failed: %i\n", rc); + goto err_free_work; + } + + pci_lock_rescan_remove(); + pci_hp_add_devices(bridge->subordinate); + pci_unlock_rescan_remove(); + + dev_info(&bus->dev, "cxl: CAPI mode switch completed\n"); + kfree(switch_work); + return; + +err_dev_put: + /* Release the reference obtained in cxl_check_and_switch_mode() */ + pci_dev_put(dev); +err_free_work: + kfree(switch_work); +} + +int cxl_check_and_switch_mode(struct pci_dev *dev, int mode, int vsec) +{ + struct cxl_switch_work *work; + u8 val; + int rc; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) return -ENODEV; + + if (!vsec) { + vsec = find_cxl_vsec(dev); + if (!vsec) { + dev_info(&dev->dev, "CXL VSEC not found\n"); + return -ENODEV; + } } - if ((rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val))) { - dev_err(&dev->dev, "failed to read current mode control: %i", rc); + rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val); + if (rc) { + dev_err(&dev->dev, "Failed to read current mode control: %i", rc); return rc; } - val &= ~CXL_VSEC_PROTOCOL_MASK; - val |= CXL_VSEC_PROTOCOL_256TB | CXL_VSEC_PROTOCOL_ENABLE; - if ((rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val))) { - dev_err(&dev->dev, "failed to enable CXL protocol: %i", rc); - return rc; + + if (mode == CXL_BIMODE_PCI) { + if (!(val & CXL_VSEC_PROTOCOL_ENABLE)) { + dev_info(&dev->dev, "Card is already in PCI mode\n"); + return 0; + } + /* + * TODO: Before it's safe to switch the card back to PCI mode + * we need to disable the CAPP and make sure any cachelines the + * card holds have been flushed out. Needs skiboot support. + */ + dev_WARN(&dev->dev, "CXL mode switch to PCI unsupported!\n"); + return -EIO; } + + if (val & CXL_VSEC_PROTOCOL_ENABLE) { + dev_info(&dev->dev, "Card is already in CXL mode\n"); + return 0; + } + + dev_info(&dev->dev, "Card is in PCI mode, scheduling kernel thread " + "to switch to CXL mode\n"); + + work = kmalloc(sizeof(struct cxl_switch_work), GFP_KERNEL); + if (!work) + return -ENOMEM; + + pci_dev_get(dev); + work->dev = dev; + work->vsec = vsec; + work->mode = mode; + INIT_WORK(&work->work, switch_card_to_cxl); + + schedule_work(&work->work); + /* - * The CAIA spec (v0.12 11.6 Bi-modal Device Support) states - * we must wait 100ms after this mode switch before touching - * PCIe config space. + * We return a failure now to abort the driver init. Once the + * link has been cycled and the card is in cxl mode we will + * come back (possibly using the generic cxl driver), but + * return success as the card should then be in cxl mode. + * + * TODO: What if the card comes back in PCI mode even after + * the switch? Don't want to spin endlessly. */ - msleep(100); + return -EBUSY; +} +EXPORT_SYMBOL_GPL(cxl_check_and_switch_mode); + +#endif /* CONFIG_CXL_BIMODAL */ + +static int setup_cxl_protocol_area(struct pci_dev *dev) +{ + u8 val; + int rc; + int vsec = find_cxl_vsec(dev); + + if (!vsec) { + dev_info(&dev->dev, "CXL VSEC not found\n"); + return -ENODEV; + } + + rc = CXL_READ_VSEC_MODE_CONTROL(dev, vsec, &val); + if (rc) { + dev_err(&dev->dev, "Failed to read current mode control: %i\n", rc); + return rc; + } + + if (!(val & CXL_VSEC_PROTOCOL_ENABLE)) { + dev_err(&dev->dev, "Card not in CAPI mode!\n"); + return -EIO; + } + + if ((val & CXL_VSEC_PROTOCOL_MASK) != CXL_VSEC_PROTOCOL_256TB) { + val &= ~CXL_VSEC_PROTOCOL_MASK; + val |= CXL_VSEC_PROTOCOL_256TB; + rc = CXL_WRITE_VSEC_MODE_CONTROL(dev, vsec, val); + if (rc) { + dev_err(&dev->dev, "Failed to set CXL protocol area: %i\n", rc); + return rc; + } + } return 0; } @@ -1249,7 +1449,7 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) if ((rc = setup_cxl_bars(dev))) return rc; - if ((rc = switch_card_to_cxl(dev))) + if ((rc = setup_cxl_protocol_area(dev))) return rc; if ((rc = cxl_update_image_control(adapter))) diff --git a/include/misc/cxl.h b/include/misc/cxl.h index 6c52cbc..480d50a 100644 --- a/include/misc/cxl.h +++ b/include/misc/cxl.h @@ -39,6 +39,31 @@ bool cxl_slot_is_supported(struct pci_dev *dev, int flags); +#define CXL_BIMODE_CXL 1 +#define CXL_BIMODE_PCI 2 + +/* + * Check the mode that the given bi-modal CXL adapter is currently in and + * change it if necessary. This does not apply to AFU drivers. + * + * If the mode matches the requested mode this function will return 0 - if the + * driver was expecting the generic CXL driver to have bound to the adapter and + * it gets this return value it should fail the probe function to give the CXL + * driver a chance to probe it. + * + * If the mode does not match it will start a background task to unplug the + * device from Linux and switch its mode, and will return -EBUSY. At this + * point the calling driver should make sure it has released the device and + * fail its probe function. + * + * The offset of the CXL VSEC can be provided to this function. If 0 is passed, + * this function will search for a CXL VSEC with ID 0x1280 and return -ENODEV + * if it is not found. + */ +#ifdef CONFIG_CXL_BIMODAL +int cxl_check_and_switch_mode(struct pci_dev *dev, int mode, int vsec); +#endif + /* Get the AFU associated with a pci_dev */ struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev); -- cgit v0.10.2 From c011926fcbeb9565599f278148b91e536a07b68a Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Mon, 4 Apr 2016 11:30:01 +0200 Subject: powerpc/pmac/smp: Add missing FROZEN hotplug notifier transitions The FROZEN transitions are used when a CPU suspends/resumes. In case of a suspend/resume, only the up prepare (CPU_UP_PREPARE_FROZEN) is handled. The error handling transition CPU_UP_CANCELED_FROZEN as well as the CPU_ONLINE_FROZEN transition are not handled. Masking the switch case action argument with ~CPU_TASKS_FROZEN, to handle all FROZEN tasks the same way than the corresponding non frozen tasks. Cc: Benjamin Herrenschmidt Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Anna-Maria Gleixner Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 28a147c..834868b 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -857,9 +857,8 @@ static int smp_core99_cpu_notify(struct notifier_block *self, { int rc; - switch(action) { + switch(action & ~CPU_TASKS_FROZEN) { case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: /* Open i2c bus if it was used for tb sync */ if (pmac_tb_clock_chip_host) { rc = pmac_i2c_open(pmac_tb_clock_chip_host, 1); -- cgit v0.10.2 From f8750513b7001d5ae96313d4e19f782b56f1beb7 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 12 Jul 2016 10:54:48 +1000 Subject: powerpc/kvm: Clarify __user annotations kvmppc_h_put_tce_indirect labels a u64 pointer as __user. It also labelled the u64 where get_user puts the result as __user. This isn't a pointer and so doesn't need to be labelled __user. Split the u64 value definition onto a new line to make it clear that it doesn't get the annotation. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 18cf6d1..c379ff5 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -242,7 +242,8 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, struct kvmppc_spapr_tce_table *stt; long i, ret = H_SUCCESS, idx; unsigned long entry, ua = 0; - u64 __user *tces, tce; + u64 __user *tces; + u64 tce; stt = kvmppc_find_table(vcpu, liobn); if (!stt) -- cgit v0.10.2 From 62c2c5cf387beb4bbf45045c3041dc9cfb40e5df Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 12 Jul 2016 10:54:51 +1000 Subject: powerpc/sparse: Pass endianness to sparse Explicitly give sparse an endianness in the Makefile, so that it doesn't get confused. Normally we have #ifdef one and #else the other, so it doesn't usually matter, but we have been bitten by it before, and indeed this patch fixes a number of sparse errors. Suggested-by: Arnd Bergmann Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index b41f4c6..ca25454 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -181,6 +181,11 @@ KBUILD_CFLAGS += -pipe -Iarch/$(ARCH) $(CFLAGS-y) CPP = $(CC) -E $(KBUILD_CFLAGS) CHECKFLAGS += -m$(CONFIG_WORD_SIZE) -D__powerpc__ -D__powerpc$(CONFIG_WORD_SIZE)__ +ifdef CONFIG_CPU_BIG_ENDIAN +CHECKFLAGS += -D__BIG_ENDIAN__ +else +CHECKFLAGS += -D__LITTLE_ENDIAN__ +endif KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o -- cgit v0.10.2 From 95ec77c06e8e63fff50c497eca0668bf6da39813 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Tue, 12 Jul 2016 10:54:52 +1000 Subject: powerpc: Make ppc_md.{halt, restart} __noreturn powernv marks it's halt and restart calls as __noreturn. However, ppc_md does not have this annotation. Add the annotation to ppc_md, and then to every halt/restart function that is missing it. Additionally, I have verified that all of these functions do not return. Occasionally I have added a spin loop to be sure. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 6bdcd0d..a9af1bd 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -111,8 +111,8 @@ struct machdep_calls { /* To setup PHBs when using automatic OF platform driver for PCI */ int (*pci_setup_phb)(struct pci_controller *host); - void (*restart)(char *cmd); - void (*halt)(void); + void __noreturn (*restart)(char *cmd); + void __noreturn (*halt)(void); void (*panic)(char *str); void (*cpu_die)(void); diff --git a/arch/powerpc/include/asm/mpc52xx.h b/arch/powerpc/include/asm/mpc52xx.h index 0acc7c7c..e94cede 100644 --- a/arch/powerpc/include/asm/mpc52xx.h +++ b/arch/powerpc/include/asm/mpc52xx.h @@ -275,7 +275,7 @@ extern int mpc5200_psc_ac97_gpio_reset(int psc_number); extern void mpc52xx_map_common_devices(void); extern int mpc52xx_set_psc_clkdiv(int psc_id, int clkdiv); extern unsigned int mpc52xx_get_xtal_freq(struct device_node *node); -extern void mpc52xx_restart(char *cmd); +extern void __noreturn mpc52xx_restart(char *cmd); /* mpc52xx_gpt.c */ struct mpc52xx_gpt_priv; diff --git a/arch/powerpc/include/asm/ppc4xx.h b/arch/powerpc/include/asm/ppc4xx.h index 033039a..610a511 100644 --- a/arch/powerpc/include/asm/ppc4xx.h +++ b/arch/powerpc/include/asm/ppc4xx.h @@ -13,6 +13,6 @@ #ifndef __ASM_POWERPC_PPC4xx_H__ #define __ASM_POWERPC_PPC4xx_H__ -extern void ppc4xx_reset_system(char *cmd); +extern void __noreturn ppc4xx_reset_system(char *cmd); #endif /* __ASM_POWERPC_PPC4xx_H__ */ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 51400ba..fa3e3c4 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -339,9 +339,9 @@ extern int rtas_service_present(const char *service); extern int rtas_call(int token, int, int, int *, ...); void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...); -extern void rtas_restart(char *cmd); +extern void __noreturn rtas_restart(char *cmd); extern void rtas_power_off(void); -extern void rtas_halt(void); +extern void __noreturn rtas_halt(void); extern void rtas_os_term(char *str); extern int rtas_get_sensor(int sensor, int index, int *state); extern int rtas_get_sensor_fast(int sensor, int index, int *state); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 286354f..6a3e5de 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -685,7 +685,7 @@ int rtas_set_indicator_fast(int indicator, int index, int new_value) return rc; } -void rtas_restart(char *cmd) +void __noreturn rtas_restart(char *cmd) { if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_RESTART); @@ -704,7 +704,7 @@ void rtas_power_off(void) for (;;); } -void rtas_halt(void) +void __noreturn rtas_halt(void) { if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_HALT); diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c index c11ce65..003973f 100644 --- a/arch/powerpc/platforms/44x/ppc476.c +++ b/arch/powerpc/platforms/44x/ppc476.c @@ -68,7 +68,7 @@ DECLARE_PCI_FIXUP_HEADER(0x1033, 0x0035, quirk_ppc_currituck_usb_fixup); #define AVR_PWRCTL_RESET (0x02) static struct i2c_client *avr_i2c_client; -static void avr_halt_system(int pwrctl_flags) +static void __noreturn avr_halt_system(int pwrctl_flags) { /* Request the AVR to reset the system */ i2c_smbus_write_byte_data(avr_i2c_client, @@ -84,7 +84,7 @@ static void avr_power_off_system(void) avr_halt_system(AVR_PWRCTL_PWROFF); } -static void avr_reset_system(char *cmd) +static void __noreturn avr_reset_system(char *cmd) { avr_halt_system(AVR_PWRCTL_RESET); } diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h index cc97f02..14ba49f 100644 --- a/arch/powerpc/platforms/512x/mpc512x.h +++ b/arch/powerpc/platforms/512x/mpc512x.h @@ -18,6 +18,6 @@ extern void __init mpc512x_setup_arch(void); extern int __init mpc5121_clk_init(void); extern const char *mpc512x_select_psc_compat(void); extern const char *mpc512x_select_reset_compat(void); -extern void mpc512x_restart(char *cmd); +extern void __noreturn mpc512x_restart(char *cmd); #endif /* __MPC512X_H__ */ diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index 452da23..6b4f4cb 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -47,7 +47,7 @@ static void __init mpc512x_restart_init(void) of_node_put(np); } -void mpc512x_restart(char *cmd) +void __noreturn mpc512x_restart(char *cmd) { if (reset_module_base) { /* Enable software reset "RSTE" */ diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c index 2699382..565e3a8 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_common.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c @@ -243,8 +243,7 @@ EXPORT_SYMBOL(mpc52xx_get_xtal_freq); /** * mpc52xx_restart: ppc_md->restart hook for mpc5200 using the watchdog timer */ -void -mpc52xx_restart(char *cmd) +void __noreturn mpc52xx_restart(char *cmd) { local_irq_disable(); diff --git a/arch/powerpc/platforms/82xx/pq2.c b/arch/powerpc/platforms/82xx/pq2.c index fc8b2d6..c4f7029 100644 --- a/arch/powerpc/platforms/82xx/pq2.c +++ b/arch/powerpc/platforms/82xx/pq2.c @@ -22,7 +22,7 @@ #define RMR_CSRE 0x00000001 -void pq2_restart(char *cmd) +void __noreturn pq2_restart(char *cmd) { local_irq_disable(); setbits32(&cpm2_immr->im_clkrst.car_rmr, RMR_CSRE); diff --git a/arch/powerpc/platforms/82xx/pq2.h b/arch/powerpc/platforms/82xx/pq2.h index a41f84a..3080ce34 100644 --- a/arch/powerpc/platforms/82xx/pq2.h +++ b/arch/powerpc/platforms/82xx/pq2.h @@ -1,7 +1,7 @@ #ifndef _PQ2_H #define _PQ2_H -void pq2_restart(char *cmd); +void __noreturn pq2_restart(char *cmd); #ifdef CONFIG_PCI int pq2ads_pci_init_irq(void); diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c index 7e923ca..8899aa9 100644 --- a/arch/powerpc/platforms/83xx/misc.c +++ b/arch/powerpc/platforms/83xx/misc.c @@ -35,7 +35,7 @@ static int __init mpc83xx_restart_init(void) arch_initcall(mpc83xx_restart_init); -void mpc83xx_restart(char *cmd) +void __noreturn mpc83xx_restart(char *cmd) { #define RST_OFFSET 0x00000900 #define RST_PROT_REG 0x00000018 diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h index 0cf74d7..ad48419 100644 --- a/arch/powerpc/platforms/83xx/mpc83xx.h +++ b/arch/powerpc/platforms/83xx/mpc83xx.h @@ -65,7 +65,7 @@ * mpc83xx_* files. Mostly for use by mpc83xx_setup */ -extern void mpc83xx_restart(char *cmd); +extern void __noreturn mpc83xx_restart(char *cmd); extern long mpc83xx_time_init(void); extern int mpc837x_usb_cfg(void); extern int mpc834x_usb_cfg(void); diff --git a/arch/powerpc/platforms/85xx/ksi8560.c b/arch/powerpc/platforms/85xx/ksi8560.c index 3dc1bda..867a107 100644 --- a/arch/powerpc/platforms/85xx/ksi8560.c +++ b/arch/powerpc/platforms/85xx/ksi8560.c @@ -44,7 +44,7 @@ static void __iomem *cpld_base = NULL; -static void machine_restart(char *cmd) +static void __noreturn machine_restart(char *cmd) { if (cpld_base) out_8(cpld_base + KSI8560_CPLD_RCR1, KSI8560_CPLD_RCR1_CPUHR); diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c index d7e87ff..5e0a0a2 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -83,7 +83,7 @@ static int mpc85xx_exclude_device(struct pci_controller *hose, return PCIBIOS_SUCCESSFUL; } -static void mpc85xx_cds_restart(char *cmd) +static void __noreturn mpc85xx_cds_restart(char *cmd) { struct pci_dev *dev; u_char tmp; diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index c289fc7..b1ab6e9 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -198,7 +198,7 @@ void mpc8xx_get_rtc_time(struct rtc_time *tm) return; } -void mpc8xx_restart(char *cmd) +void __noreturn mpc8xx_restart(char *cmd) { car8xx_t __iomem *clk_r = immr_map(im_clkrst); diff --git a/arch/powerpc/platforms/8xx/mpc8xx.h b/arch/powerpc/platforms/8xx/mpc8xx.h index 239a243..31cc2ec 100644 --- a/arch/powerpc/platforms/8xx/mpc8xx.h +++ b/arch/powerpc/platforms/8xx/mpc8xx.h @@ -11,7 +11,7 @@ #ifndef __MPC8xx_H #define __MPC8xx_H -extern void mpc8xx_restart(char *cmd); +extern void __noreturn mpc8xx_restart(char *cmd); extern void mpc8xx_calibrate_decr(void); extern int mpc8xx_set_rtc_time(struct rtc_time *tm); extern void mpc8xx_get_rtc_time(struct rtc_time *tm); diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c index 2fe1204..3e12d87 100644 --- a/arch/powerpc/platforms/amigaone/setup.c +++ b/arch/powerpc/platforms/amigaone/setup.c @@ -123,7 +123,7 @@ static int __init request_isa_regions(void) } machine_device_initcall(amigaone, request_isa_regions); -void amigaone_restart(char *cmd) +void __noreturn amigaone_restart(char *cmd) { local_irq_disable(); diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index 987d1b8..c55002f 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -239,7 +239,7 @@ out: of_node_put(np); } -static void briq_restart(char *cmd) +static void __noreturn briq_restart(char *cmd) { local_irq_disable(); if (briq_SPOR) diff --git a/arch/powerpc/platforms/embedded6xx/c2k.c b/arch/powerpc/platforms/embedded6xx/c2k.c index ebd3963..7820662 100644 --- a/arch/powerpc/platforms/embedded6xx/c2k.c +++ b/arch/powerpc/platforms/embedded6xx/c2k.c @@ -99,7 +99,7 @@ static void c2k_reset_board(void) out_le32(mv64x60_gpp_reg_base + MV64x60_GPP_VALUE_SET, 0x00080004); } -static void c2k_restart(char *cmd) +static void __noreturn c2k_restart(char *cmd) { c2k_reset_board(); msleep(100); diff --git a/arch/powerpc/platforms/embedded6xx/gamecube.c b/arch/powerpc/platforms/embedded6xx/gamecube.c index fe0ed6e..b17705c 100644 --- a/arch/powerpc/platforms/embedded6xx/gamecube.c +++ b/arch/powerpc/platforms/embedded6xx/gamecube.c @@ -29,14 +29,14 @@ #include "usbgecko_udbg.h" -static void gamecube_spin(void) +static void __noreturn gamecube_spin(void) { /* spin until power button pressed */ for (;;) cpu_relax(); } -static void gamecube_restart(char *cmd) +static void __noreturn gamecube_restart(char *cmd) { local_irq_disable(); flipper_platform_reset(); @@ -49,7 +49,7 @@ static void gamecube_power_off(void) gamecube_spin(); } -static void gamecube_halt(void) +static void __noreturn gamecube_halt(void) { gamecube_restart(NULL); } diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index 8c305c7..8b6e761 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -193,7 +193,7 @@ void holly_show_cpuinfo(struct seq_file *m) seq_printf(m, "machine\t\t: PPC750 GX/CL\n"); } -void holly_restart(char *cmd) +void __noreturn holly_restart(char *cmd) { __be32 __iomem *ocn_bar1 = NULL; unsigned long bar; diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c index 540eeb5..4c5089f 100644 --- a/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -100,7 +100,7 @@ static void __init linkstation_init_IRQ(void) extern void avr_uart_configure(void); extern void avr_uart_send(const char); -static void linkstation_restart(char *cmd) +static void __noreturn linkstation_restart(char *cmd) { local_irq_disable(); @@ -113,7 +113,7 @@ static void linkstation_restart(char *cmd) avr_uart_send('G'); /* "kick" */ } -static void linkstation_power_off(void) +static void __noreturn linkstation_power_off(void) { local_irq_disable(); @@ -127,7 +127,7 @@ static void linkstation_power_off(void) /* NOTREACHED */ } -static void linkstation_halt(void) +static void __noreturn linkstation_halt(void) { linkstation_power_off(); /* NOTREACHED */ diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index df4ad95..3eda5df 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -146,7 +146,7 @@ void mpc7448_hpc2_show_cpuinfo(struct seq_file *m) seq_printf(m, "vendor\t\t: Freescale Semiconductor\n"); } -void mpc7448_hpc2_restart(char *cmd) +static void __noreturn mpc7448_hpc2_restart(char *cmd) { local_irq_disable(); diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c index 8f65aa3..1382e1f 100644 --- a/arch/powerpc/platforms/embedded6xx/mvme5100.c +++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c @@ -177,7 +177,7 @@ static void mvme5100_show_cpuinfo(struct seq_file *m) seq_puts(m, "Machine\t\t: MVME5100\n"); } -static void mvme5100_restart(char *cmd) +static void __noreturn mvme5100_restart(char *cmd) { local_irq_disable(); diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c index d572833..4596cba 100644 --- a/arch/powerpc/platforms/embedded6xx/storcenter.c +++ b/arch/powerpc/platforms/embedded6xx/storcenter.c @@ -96,7 +96,7 @@ static void __init storcenter_init_IRQ(void) mpic_init(mpic); } -static void storcenter_restart(char *cmd) +static void __noreturn storcenter_restart(char *cmd) { local_irq_disable(); diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index 352592d..ebaecb8 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -112,7 +112,7 @@ unsigned long __init wii_mmu_mapin_mem2(unsigned long top) return delta + bl; } -static void wii_spin(void) +static void __noreturn wii_spin(void) { local_irq_disable(); for (;;) @@ -160,7 +160,7 @@ static void __init wii_setup_arch(void) } } -static void wii_restart(char *cmd) +static void __noreturn wii_restart(char *cmd) { local_irq_disable(); @@ -185,7 +185,7 @@ static void wii_power_off(void) wii_spin(); } -static void wii_halt(void) +static void __noreturn wii_halt(void) { if (ppc_md.restart) ppc_md.restart(NULL); diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index a837188..5f8f6f9 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -94,7 +94,7 @@ static unsigned long maple_find_nvram_base(void) return result; } -static void maple_restart(char *cmd) +static void __noreturn maple_restart(char *cmd) { unsigned int maple_nvram_base; const unsigned int *maple_nvram_offset, *maple_nvram_command; @@ -119,9 +119,10 @@ static void maple_restart(char *cmd) for (;;) ; fail: printk(KERN_EMERG "Maple: Manual Restart Required\n"); + for (;;) ; } -static void maple_power_off(void) +static void __noreturn maple_power_off(void) { unsigned int maple_nvram_base; const unsigned int *maple_nvram_offset, *maple_nvram_command; @@ -146,9 +147,10 @@ static void maple_power_off(void) for (;;) ; fail: printk(KERN_EMERG "Maple: Manual Power-Down Required\n"); + for (;;) ; } -static void maple_halt(void) +static void __noreturn maple_halt(void) { maple_power_off(); } diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index d71b2c7..7349644 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -62,7 +62,7 @@ static int num_mce_regs; static int nmi_virq = NO_IRQ; -static void pas_restart(char *cmd) +static void __noreturn pas_restart(char *cmd) { /* Need to put others cpu in hold loop so they're not sleeping */ smp_send_stop(); diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 8dd78f4..bd83b52c 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -383,7 +383,7 @@ void __init_refok note_bootable_part(dev_t dev, int part, int goodness) } #ifdef CONFIG_ADB_CUDA -static void cuda_restart(void) +static void __noreturn cuda_restart(void) { struct adb_request req; @@ -392,7 +392,7 @@ static void cuda_restart(void) cuda_poll(); } -static void cuda_shutdown(void) +static void __noreturn cuda_shutdown(void) { struct adb_request req; @@ -416,7 +416,7 @@ static void cuda_shutdown(void) #define smu_shutdown() #endif -static void pmac_restart(char *cmd) +static void __noreturn pmac_restart(char *cmd) { switch (sys_ctrler) { case SYS_CTRLER_CUDA: @@ -430,9 +430,10 @@ static void pmac_restart(char *cmd) break; default: ; } + while (1) ; } -static void pmac_power_off(void) +static void __noreturn pmac_power_off(void) { switch (sys_ctrler) { case SYS_CTRLER_CUDA: @@ -446,9 +447,10 @@ static void pmac_power_off(void) break; default: ; } + while (1) ; } -static void +static void __noreturn pmac_halt(void) { pmac_power_off(); diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 799c858..486ecd0 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -80,7 +80,7 @@ static void ps3_power_save(void) lv1_pause(0); } -static void ps3_restart(char *cmd) +static void __noreturn ps3_restart(char *cmd) { DBG("%s:%d cmd '%s'\n", __func__, __LINE__, cmd); @@ -96,7 +96,7 @@ static void ps3_power_off(void) ps3_sys_manager_power_off(); /* never returns */ } -static void ps3_halt(void) +static void __noreturn ps3_halt(void) { DBG("%s:%d\n", __func__, __LINE__); diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index 99269c0..a09ca70 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -204,7 +204,7 @@ static int __init setup_rstcr(void) arch_initcall(setup_rstcr); -void fsl_rstcr_restart(char *cmd) +void __noreturn fsl_rstcr_restart(char *cmd) { local_irq_disable(); if (rstcr) @@ -228,10 +228,11 @@ EXPORT_SYMBOL(diu_ops); * to initiate a partition restart when we're running under the Freescale * hypervisor. */ -void fsl_hv_restart(char *cmd) +void __noreturn fsl_hv_restart(char *cmd) { pr_info("hv restart\n"); fh_partition_restart(-1); + while (1) ; } /* @@ -241,9 +242,10 @@ void fsl_hv_restart(char *cmd) * function pointers, to shut down the partition when we're running under * the Freescale hypervisor. */ -void fsl_hv_halt(void) +void __noreturn fsl_hv_halt(void) { pr_info("hv exit\n"); fh_partition_stop(-1); + while (1) ; } #endif diff --git a/arch/powerpc/sysdev/fsl_soc.h b/arch/powerpc/sysdev/fsl_soc.h index 4c5a19e..433566a 100644 --- a/arch/powerpc/sysdev/fsl_soc.h +++ b/arch/powerpc/sysdev/fsl_soc.h @@ -19,7 +19,7 @@ extern u32 fsl_get_sys_freq(void); struct spi_board_info; struct device_node; -extern void fsl_rstcr_restart(char *cmd); +extern void __noreturn fsl_rstcr_restart(char *cmd); /* The different ports that the DIU can be connected to */ enum fsl_diu_monitor_port { @@ -42,8 +42,8 @@ struct platform_diu_data_ops { extern struct platform_diu_data_ops diu_ops; -void fsl_hv_restart(char *cmd); -void fsl_hv_halt(void); +void __noreturn fsl_hv_restart(char *cmd); +void __noreturn fsl_hv_halt(void); #endif #endif -- cgit v0.10.2 From 6bcb80143e792becfd2b9cc6a339ce523e4e2219 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 6 Jul 2016 14:58:06 +1000 Subject: powerpc/tm: Fix stack pointer corruption in __tm_recheckpoint() At the start of __tm_recheckpoint() we save the kernel stack pointer (r1) in SPRG SCRATCH0 (SPRG2) so that we can restore it after the trecheckpoint. Unfortunately, the same SPRG is used in the SLB miss handler. If an SLB miss is taken between the save and restore of r1 to the SPRG, the SPRG is changed and hence r1 is also corrupted. We can end up with the following crash when we start using r1 again after the restore from the SPRG: Oops: Bad kernel stack pointer, sig: 6 [#1] SMP NR_CPUS=2048 NUMA pSeries CPU: 658 PID: 143777 Comm: htm_demo Tainted: G EL X 4.4.13-0-default #1 task: c0000b56993a7810 ti: c00000000cfec000 task.ti: c0000b56993bc000 NIP: c00000000004f188 LR: 00000000100040b8 CTR: 0000000010002570 REGS: c00000000cfefd40 TRAP: 0300 Tainted: G EL X (4.4.13-0-default) MSR: 8000000300001033 CR: 02000424 XER: 20000000 CFAR: c000000000008468 DAR: 00003ffd84e66880 DSISR: 40000000 SOFTE: 0 PACATMSCRATCH: 00003ffbc865e680 GPR00: fffffffcfabc4268 00003ffd84e667a0 00000000100d8c38 000000030544bb80 GPR04: 0000000000000002 00000000100cf200 0000000000000449 00000000100cf100 GPR08: 000000000000c350 0000000000002569 0000000000002569 00000000100d6c30 GPR12: 00000000100d6c28 c00000000e6a6b00 00003ffd84660000 0000000000000000 GPR16: 0000000000000003 0000000000000449 0000000010002570 0000010009684f20 GPR20: 0000000000800000 00003ffd84e5f110 00003ffd84e5f7a0 00000000100d0f40 GPR24: 0000000000000000 0000000000000000 0000000000000000 00003ffff0673f50 GPR28: 00003ffd84e5e960 00000000003d0f00 00003ffd84e667a0 00003ffd84e5e680 NIP [c00000000004f188] restore_gprs+0x110/0x17c LR [00000000100040b8] 0x100040b8 Call Trace: Instruction dump: f8a1fff0 e8e700a8 38a00000 7ca10164 e8a1fff8 e821fff0 7c0007dd 7c421378 7db142a6 7c3242a6 38800002 7c810164 e9e100e8 ea0100f0 ea2100f8 We hit this on large memory machines (> 2TB) but it can also be hit on smaller machines when 1TB segments are disabled. To hit this, you also need to be virtualised to ensure SLBs are periodically removed by the hypervisor. This patches moves the saving of r1 to the SPRG to the region where we are guaranteed not to take any further SLB misses. Fixes: 98ae22e15b43 ("powerpc: Add helper functions for transactional memory context switching") Cc: stable@vger.kernel.org # v3.9+ Signed-off-by: Michael Neuling Acked-by: Cyril Bur Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index b7019b5..298afcf 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -338,8 +338,6 @@ _GLOBAL(__tm_recheckpoint) */ subi r7, r7, STACK_FRAME_OVERHEAD - SET_SCRATCH0(r1) - mfmsr r6 /* R4 = original MSR to indicate whether thread used FP/Vector etc. */ @@ -468,6 +466,7 @@ restore_gprs: * until we turn MSR RI back on. */ + SET_SCRATCH0(r1) ld r5, -8(r1) ld r1, -16(r1) -- cgit v0.10.2 From 236977609a81150c4561c33fa62b5c1f37f2b234 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 28 Jun 2016 15:02:46 +1000 Subject: powerpc/pseries: HVC early debug options should depend on HVC_CONSOLE The pseries HVC early debug options, CONFIG_PPC_EARLY_DEBUG_LPAR and CONFIG_PPC_EARLY_DEBUG_LPAR_HVSI both require code that is part of the hvc driver. If we turn them on but not CONFIG_HVC_CONSOLE then we get: arch/powerpc/kernel/built-in.o: In function `.udbg_early_init': arch/powerpc/kernel/built-in.o:(.debug_addr+0x9a00): undefined reference to `udbg_init_debug_lpar' Similarly for HVSI. So make them both depend on CONFIG_HVC_CONSOLE. Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index d3fcf7e..cfe08ea 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -149,14 +149,14 @@ config PPC_EARLY_DEBUG_BOOTX config PPC_EARLY_DEBUG_LPAR bool "LPAR HV Console" - depends on PPC_PSERIES + depends on PPC_PSERIES && HVC_CONSOLE help Select this to enable early debugging for a machine with a HVC console on vterm 0. config PPC_EARLY_DEBUG_LPAR_HVSI bool "LPAR HVSI Console" - depends on PPC_PSERIES + depends on PPC_PSERIES && HVC_CONSOLE help Select this to enable early debugging for a machine with a HVSI console on a specified vterm. -- cgit v0.10.2 From c2101c9039fe2a005f7b4138e028ed9a8468a48a Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Mon, 20 Jun 2016 09:00:39 -0500 Subject: powerpc/pseries: Move property cloning into its own routine Move property cloning code into its own routine Split the pieces of dlpar_clone_drconf_property() that create a copy of the property struct into its own routine. This allows for creating clones of more than just the ibm,dynamic-memory property used in memory hotplug. Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 2ce1385..3dbc82b 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -69,13 +69,36 @@ unsigned long pseries_memory_block_size(void) return memblock_size; } -static void dlpar_free_drconf_property(struct property *prop) +static void dlpar_free_property(struct property *prop) { kfree(prop->name); kfree(prop->value); kfree(prop); } +static struct property *dlpar_clone_property(struct property *prop, + u32 prop_size) +{ + struct property *new_prop; + + new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL); + if (!new_prop) + return NULL; + + new_prop->name = kstrdup(prop->name, GFP_KERNEL); + new_prop->value = kzalloc(prop_size, GFP_KERNEL); + if (!new_prop->name || !new_prop->value) { + dlpar_free_property(new_prop); + return NULL; + } + + memcpy(new_prop->value, prop->value, prop->length); + new_prop->length = prop_size; + + of_property_set_flag(new_prop, OF_DYNAMIC); + return new_prop; +} + static struct property *dlpar_clone_drconf_property(struct device_node *dn) { struct property *prop, *new_prop; @@ -87,19 +110,10 @@ static struct property *dlpar_clone_drconf_property(struct device_node *dn) if (!prop) return NULL; - new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL); + new_prop = dlpar_clone_property(prop, prop->length); if (!new_prop) return NULL; - new_prop->name = kstrdup(prop->name, GFP_KERNEL); - new_prop->value = kmemdup(prop->value, prop->length, GFP_KERNEL); - if (!new_prop->name || !new_prop->value) { - dlpar_free_drconf_property(new_prop); - return NULL; - } - - new_prop->length = prop->length; - /* Convert the property to cpu endian-ness */ p = new_prop->value; *p = be32_to_cpu(*p); @@ -748,7 +762,7 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) break; } - dlpar_free_drconf_property(prop); + dlpar_free_property(prop); dlpar_memory_out: of_node_put(dn); -- cgit v0.10.2 From c05a5a40969e63d3490901acf60989979b0fcd27 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Mon, 20 Jun 2016 09:01:38 -0500 Subject: powerpc/pseries: Dynamic add entires to associativity lookup array Dynamically add entries to the associativity lookup array The ibm,associativity-lookup-arrays property may only contain associativity arrays for LMBs present at boot time. When hotplug adding a LMB its associativity array may not be in the associativity lookup array, this patch adds the ability to add new entries to the associativity lookup array. Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 3dbc82b..08e51f6 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -191,14 +191,74 @@ static int dlpar_update_device_tree_lmb(struct of_drconf_cell *lmb) return 0; } +static u32 find_aa_index(struct device_node *dr_node, + struct property *ala_prop, const u32 *lmb_assoc) +{ + u32 *assoc_arrays; + u32 aa_index; + int aa_arrays, aa_array_entries, aa_array_sz; + int i, index; + + /* + * The ibm,associativity-lookup-arrays property is defined to be + * a 32-bit value specifying the number of associativity arrays + * followed by a 32-bitvalue specifying the number of entries per + * array, followed by the associativity arrays. + */ + assoc_arrays = ala_prop->value; + + aa_arrays = be32_to_cpu(assoc_arrays[0]); + aa_array_entries = be32_to_cpu(assoc_arrays[1]); + aa_array_sz = aa_array_entries * sizeof(u32); + + aa_index = -1; + for (i = 0; i < aa_arrays; i++) { + index = (i * aa_array_entries) + 2; + + if (memcmp(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz)) + continue; + + aa_index = i; + break; + } + + if (aa_index == -1) { + struct property *new_prop; + u32 new_prop_size; + + new_prop_size = ala_prop->length + aa_array_sz; + new_prop = dlpar_clone_property(ala_prop, new_prop_size); + if (!new_prop) + return -1; + + assoc_arrays = new_prop->value; + + /* increment the number of entries in the lookup array */ + assoc_arrays[0] = cpu_to_be32(aa_arrays + 1); + + /* copy the new associativity into the lookup array */ + index = aa_arrays * aa_array_entries + 2; + memcpy(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz); + + of_update_property(dr_node, new_prop); + + /* + * The associativity lookup array index for this lmb is + * number of entries - 1 since we added its associativity + * to the end of the lookup array. + */ + aa_index = be32_to_cpu(assoc_arrays[0]) - 1; + } + + return aa_index; +} + static u32 lookup_lmb_associativity_index(struct of_drconf_cell *lmb) { struct device_node *parent, *lmb_node, *dr_node; + struct property *ala_prop; const u32 *lmb_assoc; - const u32 *assoc_arrays; u32 aa_index; - int aa_arrays, aa_array_entries, aa_array_sz; - int i; parent = of_find_node_by_path("/"); if (!parent) @@ -222,34 +282,15 @@ static u32 lookup_lmb_associativity_index(struct of_drconf_cell *lmb) return -ENODEV; } - assoc_arrays = of_get_property(dr_node, - "ibm,associativity-lookup-arrays", - NULL); - of_node_put(dr_node); - if (!assoc_arrays) { + ala_prop = of_find_property(dr_node, "ibm,associativity-lookup-arrays", + NULL); + if (!ala_prop) { + of_node_put(dr_node); dlpar_free_cc_nodes(lmb_node); return -ENODEV; } - /* The ibm,associativity-lookup-arrays property is defined to be - * a 32-bit value specifying the number of associativity arrays - * followed by a 32-bitvalue specifying the number of entries per - * array, followed by the associativity arrays. - */ - aa_arrays = be32_to_cpu(assoc_arrays[0]); - aa_array_entries = be32_to_cpu(assoc_arrays[1]); - aa_array_sz = aa_array_entries * sizeof(u32); - - aa_index = -1; - for (i = 0; i < aa_arrays; i++) { - int indx = (i * aa_array_entries) + 2; - - if (memcmp(&assoc_arrays[indx], &lmb_assoc[1], aa_array_sz)) - continue; - - aa_index = i; - break; - } + aa_index = find_aa_index(dr_node, ala_prop, lmb_assoc); dlpar_free_cc_nodes(lmb_node); return aa_index; -- cgit v0.10.2 From ec999072442ad531cccbecbd6d5a569b0af6c8e3 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Wed, 29 Jun 2016 12:19:14 -0500 Subject: powerpc/pseries: Auto-online hotplugged memory A recent update (commit id 31bc3858ea3) allows for automatically onlining memory that is added. This patch sets the config option CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y for pseries and updates the pseries memory hotplug code so that DLPAR added memory can be automatically onlined instead of explicitly onlining the memory. Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 99dec9d..654aeff 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -53,6 +53,7 @@ CONFIG_KEXEC=y CONFIG_IRQ_ALL_CPUS=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y +CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_PPC_64K_PAGES=y diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 08e51f6..8baad18 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -590,7 +590,6 @@ static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop) static int dlpar_add_lmb_memory(struct of_drconf_cell *lmb) { - struct memory_block *mem_block; unsigned long block_sz; int nid, rc; @@ -611,19 +610,6 @@ static int dlpar_add_lmb_memory(struct of_drconf_cell *lmb) return rc; } - mem_block = lmb_to_memblock(lmb); - if (!mem_block) { - remove_memory(nid, lmb->base_addr, block_sz); - return -EINVAL; - } - - rc = device_online(&mem_block->dev); - put_device(&mem_block->dev); - if (rc) { - remove_memory(nid, lmb->base_addr, block_sz); - return rc; - } - lmb->flags |= DRCONF_MEM_ASSIGNED; return 0; } -- cgit v0.10.2 From fdb4f6e99ffacfa643ab2d2da99171b3084d4446 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Wed, 29 Jun 2016 12:20:30 -0500 Subject: powerpc/pseries: Remove call to memblock_add() The call to memblock_add is not needed, this is already done by memory_add(). This patch removes this call which shrinks dlpar_add_lmb_memory() enough that it can be merged into dlpar_add_lmb(). Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 8baad18..43f7beb 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -588,36 +588,11 @@ static int dlpar_memory_remove_by_index(u32 drc_index, struct property *prop) #endif /* CONFIG_MEMORY_HOTREMOVE */ -static int dlpar_add_lmb_memory(struct of_drconf_cell *lmb) +static int dlpar_add_lmb(struct of_drconf_cell *lmb) { unsigned long block_sz; int nid, rc; - block_sz = memory_block_size_bytes(); - - /* Find the node id for this address */ - nid = memory_add_physaddr_to_nid(lmb->base_addr); - - /* Add the memory */ - rc = add_memory(nid, lmb->base_addr, block_sz); - if (rc) - return rc; - - /* Register this block of memory */ - rc = memblock_add(lmb->base_addr, block_sz); - if (rc) { - remove_memory(nid, lmb->base_addr, block_sz); - return rc; - } - - lmb->flags |= DRCONF_MEM_ASSIGNED; - return 0; -} - -static int dlpar_add_lmb(struct of_drconf_cell *lmb) -{ - int rc; - if (lmb->flags & DRCONF_MEM_ASSIGNED) return -EINVAL; @@ -633,10 +608,18 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb) return rc; } - rc = dlpar_add_lmb_memory(lmb); + block_sz = memory_block_size_bytes(); + + /* Find the node id for this address */ + nid = memory_add_physaddr_to_nid(lmb->base_addr); + + /* Add the memory */ + rc = add_memory(nid, lmb->base_addr, block_sz); if (rc) { dlpar_remove_device_tree_lmb(lmb); dlpar_release_drc(lmb->drc_index); + } else { + lmb->flags |= DRCONF_MEM_ASSIGNED; } return rc; -- cgit v0.10.2 From ec5619fdba66f091b5b987f980eac690593039d1 Mon Sep 17 00:00:00 2001 From: Stewart Smith Date: Mon, 23 May 2016 11:27:01 +1000 Subject: powerpc/lib: Clarify that adde is an instruction and we mean plural Signed-off-by: Stewart Smith Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index 8e6e510..fdec6e6 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -74,9 +74,9 @@ _GLOBAL(__csum_partial) ld r11,24(r3) /* - * On POWER6 and POWER7 back to back addes take 2 cycles because of - * the XER dependency. This means the fastest this loop can go is - * 16 cycles per iteration. The scheduling of the loop below has + * On POWER6 and POWER7 back to back adde instructions take 2 cycles + * because of the XER dependency. This means the fastest this loop can + * go is 16 cycles per iteration. The scheduling of the loop below has * been shown to hit this on both POWER6 and POWER7. */ .align 5 @@ -275,9 +275,9 @@ source; ld r10,16(r3) source; ld r11,24(r3) /* - * On POWER6 and POWER7 back to back addes take 2 cycles because of - * the XER dependency. This means the fastest this loop can go is - * 16 cycles per iteration. The scheduling of the loop below has + * On POWER6 and POWER7 back to back adde instructions take 2 cycles + * because of the XER dependency. This means the fastest this loop can + * go is 16 cycles per iteration. The scheduling of the loop below has * been shown to hit this on both POWER6 and POWER7. */ .align 5 -- cgit v0.10.2 From bfd1b7ae5e0f6aa3f31d590936d580c6db099bab Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Fri, 8 Jul 2016 11:50:43 +0530 Subject: powerpc/powernv: Use PNV_THREAD_WINKLE macro while requesting for winkle Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 470ceeb..705c867 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -252,7 +252,7 @@ _GLOBAL(power7_sleep) /* No return */ _GLOBAL(power7_winkle) - li r3,3 + li r3,PNV_THREAD_WINKLE li r4,1 b power7_powersave_common /* No return */ -- cgit v0.10.2 From 1706567117ba93cfa27f6fcc0846b1606e039cc5 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Fri, 8 Jul 2016 11:50:44 +0530 Subject: powerpc/kvm: make hypervisor state restore a function In the current code, when the thread wakes up in reset vector, some of the state restore code and check for whether a thread needs to branch to kvm is duplicated. Reorder the code such that this duplication is avoided. At a higher level this is what the change looks like- Before this patch - power7_wakeup_tb_loss: restore hypervisor state if (thread needed by kvm) goto kvm_start_guest restore nvgprs, cr, pc rfid to process context power7_wakeup_loss: restore nvgprs, cr, pc rfid to process context reset vector: if (waking from deep idle states) goto power7_wakeup_tb_loss else if (thread needed by kvm) goto kvm_start_guest goto power7_wakeup_loss After this patch - power7_wakeup_tb_loss: restore hypervisor state return power7_restore_hyp_resource(): if (waking from deep idle states) goto power7_wakeup_tb_loss return power7_wakeup_loss: restore nvgprs, cr, pc rfid to process context reset vector: power7_restore_hyp_resource() if (thread needed by kvm) goto kvm_start_guest goto power7_wakeup_loss Reviewed-by: Paul Mackerras Reviewed-by: Gautham R. Shenoy Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 8bcc1b4..612a65b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -107,25 +107,9 @@ BEGIN_FTR_SECTION beq 9f cmpwi cr3,r13,2 - - /* - * Check if last bit of HSPGR0 is set. This indicates whether we are - * waking up from winkle. - */ GET_PACA(r13) - clrldi r5,r13,63 - clrrdi r13,r13,1 - cmpwi cr4,r5,1 - mtspr SPRN_HSPRG0,r13 + bl power7_restore_hyp_resource - lbz r0,PACA_THREAD_IDLE_STATE(r13) - cmpwi cr2,r0,PNV_THREAD_NAP - bgt cr2,8f /* Either sleep or Winkle */ - - /* Waking up from nap should not cause hypervisor state loss */ - bgt cr3,. - - /* Waking up from nap */ li r0,PNV_THREAD_RUNNING stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ @@ -143,13 +127,9 @@ BEGIN_FTR_SECTION /* Return SRR1 from power7_nap() */ mfspr r3,SPRN_SRR1 - beq cr3,2f - b power7_wakeup_noloss -2: b power7_wakeup_loss - - /* Fast Sleep wakeup on PowerNV */ -8: GET_PACA(r13) - b power7_wakeup_tb_loss + blt cr3,2f + b power7_wakeup_loss +2: b power7_wakeup_noloss 9: END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index 705c867..d5def06 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -276,6 +276,39 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ 20: nop; +/* + * Called from reset vector. Check whether we have woken up with + * hypervisor state loss. If yes, restore hypervisor state and return + * back to reset vector. + * + * r13 - Contents of HSPRG0 + * cr3 - set to gt if waking up with partial/complete hypervisor state loss + */ +_GLOBAL(power7_restore_hyp_resource) + /* + * Check if last bit of HSPGR0 is set. This indicates whether we are + * waking up from winkle. + */ + clrldi r5,r13,63 + clrrdi r13,r13,1 + cmpwi cr4,r5,1 + mtspr SPRN_HSPRG0,r13 + + lbz r0,PACA_THREAD_IDLE_STATE(r13) + cmpwi cr2,r0,PNV_THREAD_NAP + bgt cr2,power7_wakeup_tb_loss /* Either sleep or Winkle */ + + /* + * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking + * up from nap. At this stage CR3 shouldn't contains 'gt' since that + * indicates we are waking with hypervisor state loss from nap. + */ + bgt cr3,. + + blr /* Return back to System Reset vector from where + power7_restore_hyp_resource was invoked */ + + _GLOBAL(power7_wakeup_tb_loss) ld r2,PACATOC(r13); ld r1,PACAR1(r13) @@ -284,11 +317,13 @@ _GLOBAL(power7_wakeup_tb_loss) * and they are restored before switching to the process context. Hence * until they are restored, they are free to be used. * - * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode - * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the - * wakeup reason if we branch to kvm_start_guest. + * Save SRR1 and LR in NVGPRs as they might be clobbered in + * opal_call_realmode (called in CHECK_HMI_INTERRUPT). SRR1 is required + * to determine the wakeup reason if we branch to kvm_start_guest. LR + * is required to return back to reset vector after hypervisor state + * restore is complete. */ - + mflr r17 mfspr r16,SPRN_SRR1 BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT @@ -438,33 +473,10 @@ common_exit: hypervisor_state_restored: - li r5,PNV_THREAD_RUNNING - stb r5,PACA_THREAD_IDLE_STATE(r13) - mtspr SPRN_SRR1,r16 -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - li r0,KVM_HWTHREAD_IN_KERNEL - stb r0,HSTATE_HWTHREAD_STATE(r13) - /* Order setting hwthread_state vs. testing hwthread_req */ - sync - lbz r0,HSTATE_HWTHREAD_REQ(r13) - cmpwi r0,0 - beq 6f - b kvm_start_guest -6: -#endif - - REST_NVGPRS(r1) - REST_GPR(2, r1) - ld r3,_CCR(r1) - ld r4,_MSR(r1) - ld r5,_NIP(r1) - addi r1,r1,INT_FRAME_SIZE - mtcr r3 - mfspr r3,SPRN_SRR1 /* Return SRR1 */ - mtspr SPRN_SRR1,r4 - mtspr SPRN_SRR0,r5 - rfid + mtlr r17 + blr /* Return back to System Reset vector from where + power7_restore_hyp_resource was invoked */ fastsleep_workaround_at_exit: li r3,1 -- cgit v0.10.2 From 83289f909a72596d4902be3b3e1dffe48e6074af Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Fri, 8 Jul 2016 11:50:45 +0530 Subject: powerpc/powernv: Rename idle_power7.S to idle_book3s.S idle_power7.S handles idle entry/exit for POWER7, POWER8 and in next patch for POWER9. Rename the file to a non-hardware specific name. Reviewed-by: Gautham R. Shenoy Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 2da380f..9e7bfc32 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -47,7 +47,7 @@ obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o -obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o +obj-$(CONFIG_PPC_P7_NAP) += idle_book3s.o procfs-y := proc_powerpc.o obj-$(CONFIG_PROC_FS) += $(procfs-y) rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S new file mode 100644 index 0000000..d5def06 --- /dev/null +++ b/arch/powerpc/kernel/idle_book3s.S @@ -0,0 +1,527 @@ +/* + * This file contains the power_save function for Power7 CPUs. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG + +/* + * Use unused space in the interrupt stack to save and restore + * registers for winkle support. + */ +#define _SDR1 GPR3 +#define _RPR GPR4 +#define _SPURR GPR5 +#define _PURR GPR6 +#define _TSCR GPR7 +#define _DSCR GPR8 +#define _AMOR GPR9 +#define _WORT GPR10 +#define _WORC GPR11 + +/* Idle state entry routines */ + +#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ + /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ + std r0,0(r1); \ + ptesync; \ + ld r0,0(r1); \ +1: cmp cr0,r0,r0; \ + bne 1b; \ + IDLE_INST; \ + b . + + .text + +/* + * Used by threads when the lock bit of core_idle_state is set. + * Threads will spin in HMT_LOW until the lock bit is cleared. + * r14 - pointer to core_idle_state + * r15 - used to load contents of core_idle_state + */ + +core_idle_lock_held: + HMT_LOW +3: lwz r15,0(r14) + andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT + bne 3b + HMT_MEDIUM + lwarx r15,0,r14 + blr + +/* + * Pass requested state in r3: + * r3 - PNV_THREAD_NAP/SLEEP/WINKLE + * + * To check IRQ_HAPPENED in r4 + * 0 - don't check + * 1 - check + */ +_GLOBAL(power7_powersave_common) + /* Use r3 to pass state nap/sleep/winkle */ + /* NAP is a state loss, we create a regs frame on the + * stack, fill it up with the state we care about and + * stick a pointer to it in PACAR1. We really only + * need to save PC, some CR bits and the NV GPRs, + * but for now an interrupt frame will do. + */ + mflr r0 + std r0,16(r1) + stdu r1,-INT_FRAME_SIZE(r1) + std r0,_LINK(r1) + std r0,_NIP(r1) + + /* Hard disable interrupts */ + mfmsr r9 + rldicl r9,r9,48,1 + rotldi r9,r9,16 + mtmsrd r9,1 /* hard-disable interrupts */ + + /* Check if something happened while soft-disabled */ + lbz r0,PACAIRQHAPPENED(r13) + andi. r0,r0,~PACA_IRQ_HARD_DIS@l + beq 1f + cmpwi cr0,r4,0 + beq 1f + addi r1,r1,INT_FRAME_SIZE + ld r0,16(r1) + li r3,0 /* Return 0 (no nap) */ + mtlr r0 + blr + +1: /* We mark irqs hard disabled as this is the state we'll + * be in when returning and we need to tell arch_local_irq_restore() + * about it + */ + li r0,PACA_IRQ_HARD_DIS + stb r0,PACAIRQHAPPENED(r13) + + /* We haven't lost state ... yet */ + li r0,0 + stb r0,PACA_NAPSTATELOST(r13) + + /* Continue saving state */ + SAVE_GPR(2, r1) + SAVE_NVGPRS(r1) + mfcr r4 + std r4,_CCR(r1) + std r9,_MSR(r1) + std r1,PACAR1(r13) + + /* + * Go to real mode to do the nap, as required by the architecture. + * Also, we need to be in real mode before setting hwthread_state, + * because as soon as we do that, another thread can switch + * the MMU context to the guest. + */ + LOAD_REG_IMMEDIATE(r5, MSR_IDLE) + li r6, MSR_RI + andc r6, r9, r6 + LOAD_REG_ADDR(r7, power7_enter_nap_mode) + mtmsrd r6, 1 /* clear RI before setting SRR0/1 */ + mtspr SPRN_SRR0, r7 + mtspr SPRN_SRR1, r5 + rfid + + .globl power7_enter_nap_mode +power7_enter_nap_mode: +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Tell KVM we're napping */ + li r4,KVM_HWTHREAD_IN_NAP + stb r4,HSTATE_HWTHREAD_STATE(r13) +#endif + stb r3,PACA_THREAD_IDLE_STATE(r13) + cmpwi cr3,r3,PNV_THREAD_SLEEP + bge cr3,2f + IDLE_STATE_ENTER_SEQ(PPC_NAP) + /* No return */ +2: + /* Sleep or winkle */ + lbz r7,PACA_THREAD_MASK(r13) + ld r14,PACA_CORE_IDLE_STATE_PTR(r13) +lwarx_loop1: + lwarx r15,0,r14 + + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bnel core_idle_lock_held + + andc r15,r15,r7 /* Clear thread bit */ + + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS + +/* + * If cr0 = 0, then current thread is the last thread of the core entering + * sleep. Last thread needs to execute the hardware bug workaround code if + * required by the platform. + * Make the workaround call unconditionally here. The below branch call is + * patched out when the idle states are discovered if the platform does not + * require it. + */ +.global pnv_fastsleep_workaround_at_entry +pnv_fastsleep_workaround_at_entry: + beq fastsleep_workaround_at_entry + + stwcx. r15,0,r14 + bne- lwarx_loop1 + isync + +common_enter: /* common code for all the threads entering sleep or winkle */ + bgt cr3,enter_winkle + IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + +fastsleep_workaround_at_entry: + ori r15,r15,PNV_CORE_IDLE_LOCK_BIT + stwcx. r15,0,r14 + bne- lwarx_loop1 + isync + + /* Fast sleep workaround */ + li r3,1 + li r4,1 + li r0,OPAL_CONFIG_CPU_IDLE_STATE + bl opal_call_realmode + + /* Clear Lock bit */ + li r0,0 + lwsync + stw r0,0(r14) + b common_enter + +enter_winkle: + /* + * Note all register i.e per-core, per-subcore or per-thread is saved + * here since any thread in the core might wake up first + */ + mfspr r3,SPRN_SDR1 + std r3,_SDR1(r1) + mfspr r3,SPRN_RPR + std r3,_RPR(r1) + mfspr r3,SPRN_SPURR + std r3,_SPURR(r1) + mfspr r3,SPRN_PURR + std r3,_PURR(r1) + mfspr r3,SPRN_TSCR + std r3,_TSCR(r1) + mfspr r3,SPRN_DSCR + std r3,_DSCR(r1) + mfspr r3,SPRN_AMOR + std r3,_AMOR(r1) + mfspr r3,SPRN_WORT + std r3,_WORT(r1) + mfspr r3,SPRN_WORC + std r3,_WORC(r1) + IDLE_STATE_ENTER_SEQ(PPC_WINKLE) + +_GLOBAL(power7_idle) + /* Now check if user or arch enabled NAP mode */ + LOAD_REG_ADDRBASE(r3,powersave_nap) + lwz r4,ADDROFF(powersave_nap)(r3) + cmpwi 0,r4,0 + beqlr + li r3, 1 + /* fall through */ + +_GLOBAL(power7_nap) + mr r4,r3 + li r3,PNV_THREAD_NAP + b power7_powersave_common + /* No return */ + +_GLOBAL(power7_sleep) + li r3,PNV_THREAD_SLEEP + li r4,1 + b power7_powersave_common + /* No return */ + +_GLOBAL(power7_winkle) + li r3,PNV_THREAD_WINKLE + li r4,1 + b power7_powersave_common + /* No return */ + +#define CHECK_HMI_INTERRUPT \ + mfspr r0,SPRN_SRR1; \ +BEGIN_FTR_SECTION_NESTED(66); \ + rlwinm r0,r0,45-31,0xf; /* extract wake reason field (P8) */ \ +FTR_SECTION_ELSE_NESTED(66); \ + rlwinm r0,r0,45-31,0xe; /* P7 wake reason field is 3 bits */ \ +ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ + cmpwi r0,0xa; /* Hypervisor maintenance ? */ \ + bne 20f; \ + /* Invoke opal call to handle hmi */ \ + ld r2,PACATOC(r13); \ + ld r1,PACAR1(r13); \ + std r3,ORIG_GPR3(r1); /* Save original r3 */ \ + li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ + bl opal_call_realmode; \ + ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ +20: nop; + + +/* + * Called from reset vector. Check whether we have woken up with + * hypervisor state loss. If yes, restore hypervisor state and return + * back to reset vector. + * + * r13 - Contents of HSPRG0 + * cr3 - set to gt if waking up with partial/complete hypervisor state loss + */ +_GLOBAL(power7_restore_hyp_resource) + /* + * Check if last bit of HSPGR0 is set. This indicates whether we are + * waking up from winkle. + */ + clrldi r5,r13,63 + clrrdi r13,r13,1 + cmpwi cr4,r5,1 + mtspr SPRN_HSPRG0,r13 + + lbz r0,PACA_THREAD_IDLE_STATE(r13) + cmpwi cr2,r0,PNV_THREAD_NAP + bgt cr2,power7_wakeup_tb_loss /* Either sleep or Winkle */ + + /* + * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking + * up from nap. At this stage CR3 shouldn't contains 'gt' since that + * indicates we are waking with hypervisor state loss from nap. + */ + bgt cr3,. + + blr /* Return back to System Reset vector from where + power7_restore_hyp_resource was invoked */ + + +_GLOBAL(power7_wakeup_tb_loss) + ld r2,PACATOC(r13); + ld r1,PACAR1(r13) + /* + * Before entering any idle state, the NVGPRs are saved in the stack + * and they are restored before switching to the process context. Hence + * until they are restored, they are free to be used. + * + * Save SRR1 and LR in NVGPRs as they might be clobbered in + * opal_call_realmode (called in CHECK_HMI_INTERRUPT). SRR1 is required + * to determine the wakeup reason if we branch to kvm_start_guest. LR + * is required to return back to reset vector after hypervisor state + * restore is complete. + */ + mflr r17 + mfspr r16,SPRN_SRR1 +BEGIN_FTR_SECTION + CHECK_HMI_INTERRUPT +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) + + lbz r7,PACA_THREAD_MASK(r13) + ld r14,PACA_CORE_IDLE_STATE_PTR(r13) +lwarx_loop2: + lwarx r15,0,r14 + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + /* + * Lock bit is set in one of the 2 cases- + * a. In the sleep/winkle enter path, the last thread is executing + * fastsleep workaround code. + * b. In the wake up path, another thread is executing fastsleep + * workaround undo code or resyncing timebase or restoring context + * In either case loop until the lock bit is cleared. + */ + bnel core_idle_lock_held + + cmpwi cr2,r15,0 + lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) + and r4,r4,r15 + cmpwi cr1,r4,0 /* Check if first in subcore */ + + /* + * At this stage + * cr1 - 0b0100 if first thread to wakeup in subcore + * cr2 - 0b0100 if first thread to wakeup in core + * cr3- 0b0010 if waking up from sleep or winkle + * cr4 - 0b0100 if waking up from winkle + */ + + or r15,r15,r7 /* Set thread bit */ + + beq cr1,first_thread_in_subcore + + /* Not first thread in subcore to wake up */ + stwcx. r15,0,r14 + bne- lwarx_loop2 + isync + b common_exit + +first_thread_in_subcore: + /* First thread in subcore to wakeup */ + ori r15,r15,PNV_CORE_IDLE_LOCK_BIT + stwcx. r15,0,r14 + bne- lwarx_loop2 + isync + + /* + * If waking up from sleep, subcore state is not lost. Hence + * skip subcore state restore + */ + bne cr4,subcore_state_restored + + /* Restore per-subcore state */ + ld r4,_SDR1(r1) + mtspr SPRN_SDR1,r4 + ld r4,_RPR(r1) + mtspr SPRN_RPR,r4 + ld r4,_AMOR(r1) + mtspr SPRN_AMOR,r4 + +subcore_state_restored: + /* + * Check if the thread is also the first thread in the core. If not, + * skip to clear_lock. + */ + bne cr2,clear_lock + +first_thread_in_core: + + /* + * First thread in the core waking up from fastsleep. It needs to + * call the fastsleep workaround code if the platform requires it. + * Call it unconditionally here. The below branch instruction will + * be patched out when the idle states are discovered if platform + * does not require workaround. + */ +.global pnv_fastsleep_workaround_at_exit +pnv_fastsleep_workaround_at_exit: + b fastsleep_workaround_at_exit + +timebase_resync: + /* Do timebase resync if we are waking up from sleep. Use cr3 value + * set in exceptions-64s.S */ + ble cr3,clear_lock + /* Time base re-sync */ + li r0,OPAL_RESYNC_TIMEBASE + bl opal_call_realmode; + /* TODO: Check r3 for failure */ + + /* + * If waking up from sleep, per core state is not lost, skip to + * clear_lock. + */ + bne cr4,clear_lock + + /* Restore per core state */ + ld r4,_TSCR(r1) + mtspr SPRN_TSCR,r4 + ld r4,_WORC(r1) + mtspr SPRN_WORC,r4 + +clear_lock: + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS + lwsync + stw r15,0(r14) + +common_exit: + /* + * Common to all threads. + * + * If waking up from sleep, hypervisor state is not lost. Hence + * skip hypervisor state restore. + */ + bne cr4,hypervisor_state_restored + + /* Waking up from winkle */ + + /* Restore per thread state */ + bl __restore_cpu_power8 + + /* Restore SLB from PACA */ + ld r8,PACA_SLBSHADOWPTR(r13) + + .rept SLB_NUM_BOLTED + li r3, SLBSHADOW_SAVEAREA + LDX_BE r5, r8, r3 + addi r3, r3, 8 + LDX_BE r6, r8, r3 + andis. r7,r5,SLB_ESID_V@h + beq 1f + slbmte r6,r5 +1: addi r8,r8,16 + .endr + + ld r4,_SPURR(r1) + mtspr SPRN_SPURR,r4 + ld r4,_PURR(r1) + mtspr SPRN_PURR,r4 + ld r4,_DSCR(r1) + mtspr SPRN_DSCR,r4 + ld r4,_WORT(r1) + mtspr SPRN_WORT,r4 + +hypervisor_state_restored: + + mtspr SPRN_SRR1,r16 + mtlr r17 + blr /* Return back to System Reset vector from where + power7_restore_hyp_resource was invoked */ + +fastsleep_workaround_at_exit: + li r3,1 + li r4,0 + li r0,OPAL_CONFIG_CPU_IDLE_STATE + bl opal_call_realmode + b timebase_resync + +/* + * R3 here contains the value that will be returned to the caller + * of power7_nap. + */ +_GLOBAL(power7_wakeup_loss) + ld r1,PACAR1(r13) +BEGIN_FTR_SECTION + CHECK_HMI_INTERRUPT +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) + REST_NVGPRS(r1) + REST_GPR(2, r1) + ld r6,_CCR(r1) + ld r4,_MSR(r1) + ld r5,_NIP(r1) + addi r1,r1,INT_FRAME_SIZE + mtcr r6 + mtspr SPRN_SRR1,r4 + mtspr SPRN_SRR0,r5 + rfid + +/* + * R3 here contains the value that will be returned to the caller + * of power7_nap. + */ +_GLOBAL(power7_wakeup_noloss) + lbz r0,PACA_NAPSTATELOST(r13) + cmpwi r0,0 + bne power7_wakeup_loss +BEGIN_FTR_SECTION + CHECK_HMI_INTERRUPT +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) + ld r1,PACAR1(r13) + ld r6,_CCR(r1) + ld r4,_MSR(r1) + ld r5,_NIP(r1) + addi r1,r1,INT_FRAME_SIZE + mtcr r6 + mtspr SPRN_SRR1,r4 + mtspr SPRN_SRR0,r5 + rfid diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S deleted file mode 100644 index d5def06..0000000 --- a/arch/powerpc/kernel/idle_power7.S +++ /dev/null @@ -1,527 +0,0 @@ -/* - * This file contains the power_save function for Power7 CPUs. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#undef DEBUG - -/* - * Use unused space in the interrupt stack to save and restore - * registers for winkle support. - */ -#define _SDR1 GPR3 -#define _RPR GPR4 -#define _SPURR GPR5 -#define _PURR GPR6 -#define _TSCR GPR7 -#define _DSCR GPR8 -#define _AMOR GPR9 -#define _WORT GPR10 -#define _WORC GPR11 - -/* Idle state entry routines */ - -#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ - /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ - std r0,0(r1); \ - ptesync; \ - ld r0,0(r1); \ -1: cmp cr0,r0,r0; \ - bne 1b; \ - IDLE_INST; \ - b . - - .text - -/* - * Used by threads when the lock bit of core_idle_state is set. - * Threads will spin in HMT_LOW until the lock bit is cleared. - * r14 - pointer to core_idle_state - * r15 - used to load contents of core_idle_state - */ - -core_idle_lock_held: - HMT_LOW -3: lwz r15,0(r14) - andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT - bne 3b - HMT_MEDIUM - lwarx r15,0,r14 - blr - -/* - * Pass requested state in r3: - * r3 - PNV_THREAD_NAP/SLEEP/WINKLE - * - * To check IRQ_HAPPENED in r4 - * 0 - don't check - * 1 - check - */ -_GLOBAL(power7_powersave_common) - /* Use r3 to pass state nap/sleep/winkle */ - /* NAP is a state loss, we create a regs frame on the - * stack, fill it up with the state we care about and - * stick a pointer to it in PACAR1. We really only - * need to save PC, some CR bits and the NV GPRs, - * but for now an interrupt frame will do. - */ - mflr r0 - std r0,16(r1) - stdu r1,-INT_FRAME_SIZE(r1) - std r0,_LINK(r1) - std r0,_NIP(r1) - - /* Hard disable interrupts */ - mfmsr r9 - rldicl r9,r9,48,1 - rotldi r9,r9,16 - mtmsrd r9,1 /* hard-disable interrupts */ - - /* Check if something happened while soft-disabled */ - lbz r0,PACAIRQHAPPENED(r13) - andi. r0,r0,~PACA_IRQ_HARD_DIS@l - beq 1f - cmpwi cr0,r4,0 - beq 1f - addi r1,r1,INT_FRAME_SIZE - ld r0,16(r1) - li r3,0 /* Return 0 (no nap) */ - mtlr r0 - blr - -1: /* We mark irqs hard disabled as this is the state we'll - * be in when returning and we need to tell arch_local_irq_restore() - * about it - */ - li r0,PACA_IRQ_HARD_DIS - stb r0,PACAIRQHAPPENED(r13) - - /* We haven't lost state ... yet */ - li r0,0 - stb r0,PACA_NAPSTATELOST(r13) - - /* Continue saving state */ - SAVE_GPR(2, r1) - SAVE_NVGPRS(r1) - mfcr r4 - std r4,_CCR(r1) - std r9,_MSR(r1) - std r1,PACAR1(r13) - - /* - * Go to real mode to do the nap, as required by the architecture. - * Also, we need to be in real mode before setting hwthread_state, - * because as soon as we do that, another thread can switch - * the MMU context to the guest. - */ - LOAD_REG_IMMEDIATE(r5, MSR_IDLE) - li r6, MSR_RI - andc r6, r9, r6 - LOAD_REG_ADDR(r7, power7_enter_nap_mode) - mtmsrd r6, 1 /* clear RI before setting SRR0/1 */ - mtspr SPRN_SRR0, r7 - mtspr SPRN_SRR1, r5 - rfid - - .globl power7_enter_nap_mode -power7_enter_nap_mode: -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* Tell KVM we're napping */ - li r4,KVM_HWTHREAD_IN_NAP - stb r4,HSTATE_HWTHREAD_STATE(r13) -#endif - stb r3,PACA_THREAD_IDLE_STATE(r13) - cmpwi cr3,r3,PNV_THREAD_SLEEP - bge cr3,2f - IDLE_STATE_ENTER_SEQ(PPC_NAP) - /* No return */ -2: - /* Sleep or winkle */ - lbz r7,PACA_THREAD_MASK(r13) - ld r14,PACA_CORE_IDLE_STATE_PTR(r13) -lwarx_loop1: - lwarx r15,0,r14 - - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - bnel core_idle_lock_held - - andc r15,r15,r7 /* Clear thread bit */ - - andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS - -/* - * If cr0 = 0, then current thread is the last thread of the core entering - * sleep. Last thread needs to execute the hardware bug workaround code if - * required by the platform. - * Make the workaround call unconditionally here. The below branch call is - * patched out when the idle states are discovered if the platform does not - * require it. - */ -.global pnv_fastsleep_workaround_at_entry -pnv_fastsleep_workaround_at_entry: - beq fastsleep_workaround_at_entry - - stwcx. r15,0,r14 - bne- lwarx_loop1 - isync - -common_enter: /* common code for all the threads entering sleep or winkle */ - bgt cr3,enter_winkle - IDLE_STATE_ENTER_SEQ(PPC_SLEEP) - -fastsleep_workaround_at_entry: - ori r15,r15,PNV_CORE_IDLE_LOCK_BIT - stwcx. r15,0,r14 - bne- lwarx_loop1 - isync - - /* Fast sleep workaround */ - li r3,1 - li r4,1 - li r0,OPAL_CONFIG_CPU_IDLE_STATE - bl opal_call_realmode - - /* Clear Lock bit */ - li r0,0 - lwsync - stw r0,0(r14) - b common_enter - -enter_winkle: - /* - * Note all register i.e per-core, per-subcore or per-thread is saved - * here since any thread in the core might wake up first - */ - mfspr r3,SPRN_SDR1 - std r3,_SDR1(r1) - mfspr r3,SPRN_RPR - std r3,_RPR(r1) - mfspr r3,SPRN_SPURR - std r3,_SPURR(r1) - mfspr r3,SPRN_PURR - std r3,_PURR(r1) - mfspr r3,SPRN_TSCR - std r3,_TSCR(r1) - mfspr r3,SPRN_DSCR - std r3,_DSCR(r1) - mfspr r3,SPRN_AMOR - std r3,_AMOR(r1) - mfspr r3,SPRN_WORT - std r3,_WORT(r1) - mfspr r3,SPRN_WORC - std r3,_WORC(r1) - IDLE_STATE_ENTER_SEQ(PPC_WINKLE) - -_GLOBAL(power7_idle) - /* Now check if user or arch enabled NAP mode */ - LOAD_REG_ADDRBASE(r3,powersave_nap) - lwz r4,ADDROFF(powersave_nap)(r3) - cmpwi 0,r4,0 - beqlr - li r3, 1 - /* fall through */ - -_GLOBAL(power7_nap) - mr r4,r3 - li r3,PNV_THREAD_NAP - b power7_powersave_common - /* No return */ - -_GLOBAL(power7_sleep) - li r3,PNV_THREAD_SLEEP - li r4,1 - b power7_powersave_common - /* No return */ - -_GLOBAL(power7_winkle) - li r3,PNV_THREAD_WINKLE - li r4,1 - b power7_powersave_common - /* No return */ - -#define CHECK_HMI_INTERRUPT \ - mfspr r0,SPRN_SRR1; \ -BEGIN_FTR_SECTION_NESTED(66); \ - rlwinm r0,r0,45-31,0xf; /* extract wake reason field (P8) */ \ -FTR_SECTION_ELSE_NESTED(66); \ - rlwinm r0,r0,45-31,0xe; /* P7 wake reason field is 3 bits */ \ -ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ - cmpwi r0,0xa; /* Hypervisor maintenance ? */ \ - bne 20f; \ - /* Invoke opal call to handle hmi */ \ - ld r2,PACATOC(r13); \ - ld r1,PACAR1(r13); \ - std r3,ORIG_GPR3(r1); /* Save original r3 */ \ - li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ - bl opal_call_realmode; \ - ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ -20: nop; - - -/* - * Called from reset vector. Check whether we have woken up with - * hypervisor state loss. If yes, restore hypervisor state and return - * back to reset vector. - * - * r13 - Contents of HSPRG0 - * cr3 - set to gt if waking up with partial/complete hypervisor state loss - */ -_GLOBAL(power7_restore_hyp_resource) - /* - * Check if last bit of HSPGR0 is set. This indicates whether we are - * waking up from winkle. - */ - clrldi r5,r13,63 - clrrdi r13,r13,1 - cmpwi cr4,r5,1 - mtspr SPRN_HSPRG0,r13 - - lbz r0,PACA_THREAD_IDLE_STATE(r13) - cmpwi cr2,r0,PNV_THREAD_NAP - bgt cr2,power7_wakeup_tb_loss /* Either sleep or Winkle */ - - /* - * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking - * up from nap. At this stage CR3 shouldn't contains 'gt' since that - * indicates we are waking with hypervisor state loss from nap. - */ - bgt cr3,. - - blr /* Return back to System Reset vector from where - power7_restore_hyp_resource was invoked */ - - -_GLOBAL(power7_wakeup_tb_loss) - ld r2,PACATOC(r13); - ld r1,PACAR1(r13) - /* - * Before entering any idle state, the NVGPRs are saved in the stack - * and they are restored before switching to the process context. Hence - * until they are restored, they are free to be used. - * - * Save SRR1 and LR in NVGPRs as they might be clobbered in - * opal_call_realmode (called in CHECK_HMI_INTERRUPT). SRR1 is required - * to determine the wakeup reason if we branch to kvm_start_guest. LR - * is required to return back to reset vector after hypervisor state - * restore is complete. - */ - mflr r17 - mfspr r16,SPRN_SRR1 -BEGIN_FTR_SECTION - CHECK_HMI_INTERRUPT -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - - lbz r7,PACA_THREAD_MASK(r13) - ld r14,PACA_CORE_IDLE_STATE_PTR(r13) -lwarx_loop2: - lwarx r15,0,r14 - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - /* - * Lock bit is set in one of the 2 cases- - * a. In the sleep/winkle enter path, the last thread is executing - * fastsleep workaround code. - * b. In the wake up path, another thread is executing fastsleep - * workaround undo code or resyncing timebase or restoring context - * In either case loop until the lock bit is cleared. - */ - bnel core_idle_lock_held - - cmpwi cr2,r15,0 - lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) - and r4,r4,r15 - cmpwi cr1,r4,0 /* Check if first in subcore */ - - /* - * At this stage - * cr1 - 0b0100 if first thread to wakeup in subcore - * cr2 - 0b0100 if first thread to wakeup in core - * cr3- 0b0010 if waking up from sleep or winkle - * cr4 - 0b0100 if waking up from winkle - */ - - or r15,r15,r7 /* Set thread bit */ - - beq cr1,first_thread_in_subcore - - /* Not first thread in subcore to wake up */ - stwcx. r15,0,r14 - bne- lwarx_loop2 - isync - b common_exit - -first_thread_in_subcore: - /* First thread in subcore to wakeup */ - ori r15,r15,PNV_CORE_IDLE_LOCK_BIT - stwcx. r15,0,r14 - bne- lwarx_loop2 - isync - - /* - * If waking up from sleep, subcore state is not lost. Hence - * skip subcore state restore - */ - bne cr4,subcore_state_restored - - /* Restore per-subcore state */ - ld r4,_SDR1(r1) - mtspr SPRN_SDR1,r4 - ld r4,_RPR(r1) - mtspr SPRN_RPR,r4 - ld r4,_AMOR(r1) - mtspr SPRN_AMOR,r4 - -subcore_state_restored: - /* - * Check if the thread is also the first thread in the core. If not, - * skip to clear_lock. - */ - bne cr2,clear_lock - -first_thread_in_core: - - /* - * First thread in the core waking up from fastsleep. It needs to - * call the fastsleep workaround code if the platform requires it. - * Call it unconditionally here. The below branch instruction will - * be patched out when the idle states are discovered if platform - * does not require workaround. - */ -.global pnv_fastsleep_workaround_at_exit -pnv_fastsleep_workaround_at_exit: - b fastsleep_workaround_at_exit - -timebase_resync: - /* Do timebase resync if we are waking up from sleep. Use cr3 value - * set in exceptions-64s.S */ - ble cr3,clear_lock - /* Time base re-sync */ - li r0,OPAL_RESYNC_TIMEBASE - bl opal_call_realmode; - /* TODO: Check r3 for failure */ - - /* - * If waking up from sleep, per core state is not lost, skip to - * clear_lock. - */ - bne cr4,clear_lock - - /* Restore per core state */ - ld r4,_TSCR(r1) - mtspr SPRN_TSCR,r4 - ld r4,_WORC(r1) - mtspr SPRN_WORC,r4 - -clear_lock: - andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS - lwsync - stw r15,0(r14) - -common_exit: - /* - * Common to all threads. - * - * If waking up from sleep, hypervisor state is not lost. Hence - * skip hypervisor state restore. - */ - bne cr4,hypervisor_state_restored - - /* Waking up from winkle */ - - /* Restore per thread state */ - bl __restore_cpu_power8 - - /* Restore SLB from PACA */ - ld r8,PACA_SLBSHADOWPTR(r13) - - .rept SLB_NUM_BOLTED - li r3, SLBSHADOW_SAVEAREA - LDX_BE r5, r8, r3 - addi r3, r3, 8 - LDX_BE r6, r8, r3 - andis. r7,r5,SLB_ESID_V@h - beq 1f - slbmte r6,r5 -1: addi r8,r8,16 - .endr - - ld r4,_SPURR(r1) - mtspr SPRN_SPURR,r4 - ld r4,_PURR(r1) - mtspr SPRN_PURR,r4 - ld r4,_DSCR(r1) - mtspr SPRN_DSCR,r4 - ld r4,_WORT(r1) - mtspr SPRN_WORT,r4 - -hypervisor_state_restored: - - mtspr SPRN_SRR1,r16 - mtlr r17 - blr /* Return back to System Reset vector from where - power7_restore_hyp_resource was invoked */ - -fastsleep_workaround_at_exit: - li r3,1 - li r4,0 - li r0,OPAL_CONFIG_CPU_IDLE_STATE - bl opal_call_realmode - b timebase_resync - -/* - * R3 here contains the value that will be returned to the caller - * of power7_nap. - */ -_GLOBAL(power7_wakeup_loss) - ld r1,PACAR1(r13) -BEGIN_FTR_SECTION - CHECK_HMI_INTERRUPT -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - REST_NVGPRS(r1) - REST_GPR(2, r1) - ld r6,_CCR(r1) - ld r4,_MSR(r1) - ld r5,_NIP(r1) - addi r1,r1,INT_FRAME_SIZE - mtcr r6 - mtspr SPRN_SRR1,r4 - mtspr SPRN_SRR0,r5 - rfid - -/* - * R3 here contains the value that will be returned to the caller - * of power7_nap. - */ -_GLOBAL(power7_wakeup_noloss) - lbz r0,PACA_NAPSTATELOST(r13) - cmpwi r0,0 - bne power7_wakeup_loss -BEGIN_FTR_SECTION - CHECK_HMI_INTERRUPT -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - ld r1,PACAR1(r13) - ld r6,_CCR(r1) - ld r4,_MSR(r1) - ld r5,_NIP(r1) - addi r1,r1,INT_FRAME_SIZE - mtcr r6 - mtspr SPRN_SRR1,r4 - mtspr SPRN_SRR0,r5 - rfid -- cgit v0.10.2 From 5fa6b6bd7adf347f2989560e7a3b7f806be0187f Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Fri, 8 Jul 2016 11:50:46 +0530 Subject: powerpc/powernv: Rename reusable idle functions to hardware agnostic names Functions like power7_wakeup_loss, power7_wakeup_noloss, power7_wakeup_tb_loss are used by POWER7 and POWER8 hardware. They can also be used by POWER9. Hence rename these functions hardware agnostic names. Suggested-by: Gautham R. Shenoy Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 612a65b..5c009c5 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -108,7 +108,7 @@ BEGIN_FTR_SECTION cmpwi cr3,r13,2 GET_PACA(r13) - bl power7_restore_hyp_resource + bl pnv_restore_hyp_resource li r0,PNV_THREAD_RUNNING stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ @@ -128,8 +128,8 @@ BEGIN_FTR_SECTION /* Return SRR1 from power7_nap() */ mfspr r3,SPRN_SRR1 blt cr3,2f - b power7_wakeup_loss -2: b power7_wakeup_noloss + b pnv_wakeup_loss +2: b pnv_wakeup_noloss 9: END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) @@ -1269,7 +1269,7 @@ machine_check_handle_early: GET_PACA(r13) ld r1,PACAR1(r13) li r3,PNV_THREAD_NAP - b power7_enter_nap_mode + b pnv_enter_arch207_idle_mode 4: #endif /* diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index d5def06..34dbfc9 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -1,5 +1,6 @@ /* - * This file contains the power_save function for Power7 CPUs. + * This file contains idle entry/exit functions for POWER7 and + * POWER8 CPUs. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -75,7 +76,7 @@ core_idle_lock_held: * 0 - don't check * 1 - check */ -_GLOBAL(power7_powersave_common) +_GLOBAL(pnv_powersave_common) /* Use r3 to pass state nap/sleep/winkle */ /* NAP is a state loss, we create a regs frame on the * stack, fill it up with the state we care about and @@ -135,14 +136,14 @@ _GLOBAL(power7_powersave_common) LOAD_REG_IMMEDIATE(r5, MSR_IDLE) li r6, MSR_RI andc r6, r9, r6 - LOAD_REG_ADDR(r7, power7_enter_nap_mode) + LOAD_REG_ADDR(r7, pnv_enter_arch207_idle_mode) mtmsrd r6, 1 /* clear RI before setting SRR0/1 */ mtspr SPRN_SRR0, r7 mtspr SPRN_SRR1, r5 rfid - .globl power7_enter_nap_mode -power7_enter_nap_mode: + .globl pnv_enter_arch207_idle_mode +pnv_enter_arch207_idle_mode: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* Tell KVM we're napping */ li r4,KVM_HWTHREAD_IN_NAP @@ -242,19 +243,19 @@ _GLOBAL(power7_idle) _GLOBAL(power7_nap) mr r4,r3 li r3,PNV_THREAD_NAP - b power7_powersave_common + b pnv_powersave_common /* No return */ _GLOBAL(power7_sleep) li r3,PNV_THREAD_SLEEP li r4,1 - b power7_powersave_common + b pnv_powersave_common /* No return */ _GLOBAL(power7_winkle) li r3,PNV_THREAD_WINKLE li r4,1 - b power7_powersave_common + b pnv_powersave_common /* No return */ #define CHECK_HMI_INTERRUPT \ @@ -284,7 +285,7 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ * r13 - Contents of HSPRG0 * cr3 - set to gt if waking up with partial/complete hypervisor state loss */ -_GLOBAL(power7_restore_hyp_resource) +_GLOBAL(pnv_restore_hyp_resource) /* * Check if last bit of HSPGR0 is set. This indicates whether we are * waking up from winkle. @@ -296,7 +297,7 @@ _GLOBAL(power7_restore_hyp_resource) lbz r0,PACA_THREAD_IDLE_STATE(r13) cmpwi cr2,r0,PNV_THREAD_NAP - bgt cr2,power7_wakeup_tb_loss /* Either sleep or Winkle */ + bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */ /* * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking @@ -306,10 +307,10 @@ _GLOBAL(power7_restore_hyp_resource) bgt cr3,. blr /* Return back to System Reset vector from where - power7_restore_hyp_resource was invoked */ + pnv_restore_hyp_resource was invoked */ -_GLOBAL(power7_wakeup_tb_loss) +_GLOBAL(pnv_wakeup_tb_loss) ld r2,PACATOC(r13); ld r1,PACAR1(r13) /* @@ -476,7 +477,7 @@ hypervisor_state_restored: mtspr SPRN_SRR1,r16 mtlr r17 blr /* Return back to System Reset vector from where - power7_restore_hyp_resource was invoked */ + pnv_restore_hyp_resource was invoked */ fastsleep_workaround_at_exit: li r3,1 @@ -489,7 +490,7 @@ fastsleep_workaround_at_exit: * R3 here contains the value that will be returned to the caller * of power7_nap. */ -_GLOBAL(power7_wakeup_loss) +_GLOBAL(pnv_wakeup_loss) ld r1,PACAR1(r13) BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT @@ -509,10 +510,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * R3 here contains the value that will be returned to the caller * of power7_nap. */ -_GLOBAL(power7_wakeup_noloss) +_GLOBAL(pnv_wakeup_noloss) lbz r0,PACA_NAPSTATELOST(r13) cmpwi r0,0 - bne power7_wakeup_loss + bne pnv_wakeup_loss BEGIN_FTR_SECTION CHECK_HMI_INTERRUPT END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index e571ad2..86f0cae 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -392,7 +392,7 @@ kvm_no_guest: cmpwi r3, 0 bne 54f /* - * We jump to power7_wakeup_loss, which will return to the caller + * We jump to pnv_wakeup_loss, which will return to the caller * of power7_nap in the powernv cpu offline loop. The value we * put in r3 becomes the return value for power7_nap. */ @@ -401,7 +401,7 @@ kvm_no_guest: rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 mtspr SPRN_LPCR, r4 li r3, 0 - b power7_wakeup_loss + b pnv_wakeup_loss 53: HMT_LOW ld r5, HSTATE_KVM_VCORE(r13) -- cgit v0.10.2 From 4eae2c9ae54a5ef3ca32370e3b28a6f83c61401f Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Fri, 8 Jul 2016 11:50:47 +0530 Subject: powerpc/powernv: Make pnv_powersave_common more generic pnv_powersave_common does common steps needed before entering idle state and eventually changes MSR to MSR_IDLE and does rfid to pnv_enter_arch207_idle_mode. Move the updation of HSTATE_HWTHREAD_STATE to pnv_powersave_common from pnv_enter_arch207_idle_mode and make it more generic by passing the rfid address as a function parameter. Reviewed-by: Gautham R. Shenoy Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 34dbfc9..a8397e3 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -75,6 +75,8 @@ core_idle_lock_held: * To check IRQ_HAPPENED in r4 * 0 - don't check * 1 - check + * + * Address to 'rfid' to in r5 */ _GLOBAL(pnv_powersave_common) /* Use r3 to pass state nap/sleep/winkle */ @@ -127,28 +129,28 @@ _GLOBAL(pnv_powersave_common) std r9,_MSR(r1) std r1,PACAR1(r13) +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Tell KVM we're entering idle */ + li r4,KVM_HWTHREAD_IN_NAP + stb r4,HSTATE_HWTHREAD_STATE(r13) +#endif + /* * Go to real mode to do the nap, as required by the architecture. * Also, we need to be in real mode before setting hwthread_state, * because as soon as we do that, another thread can switch * the MMU context to the guest. */ - LOAD_REG_IMMEDIATE(r5, MSR_IDLE) + LOAD_REG_IMMEDIATE(r7, MSR_IDLE) li r6, MSR_RI andc r6, r9, r6 - LOAD_REG_ADDR(r7, pnv_enter_arch207_idle_mode) mtmsrd r6, 1 /* clear RI before setting SRR0/1 */ - mtspr SPRN_SRR0, r7 - mtspr SPRN_SRR1, r5 + mtspr SPRN_SRR0, r5 + mtspr SPRN_SRR1, r7 rfid .globl pnv_enter_arch207_idle_mode pnv_enter_arch207_idle_mode: -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* Tell KVM we're napping */ - li r4,KVM_HWTHREAD_IN_NAP - stb r4,HSTATE_HWTHREAD_STATE(r13) -#endif stb r3,PACA_THREAD_IDLE_STATE(r13) cmpwi cr3,r3,PNV_THREAD_SLEEP bge cr3,2f @@ -243,18 +245,21 @@ _GLOBAL(power7_idle) _GLOBAL(power7_nap) mr r4,r3 li r3,PNV_THREAD_NAP + LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) b pnv_powersave_common /* No return */ _GLOBAL(power7_sleep) li r3,PNV_THREAD_SLEEP li r4,1 + LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) b pnv_powersave_common /* No return */ _GLOBAL(power7_winkle) li r3,PNV_THREAD_WINKLE li r4,1 + LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode) b pnv_powersave_common /* No return */ -- cgit v0.10.2 From 0dfffb48cecd8f84c6e649baee9bacd9be925734 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Fri, 8 Jul 2016 11:50:48 +0530 Subject: powerpc/powernv: abstraction for saving SPRs before entering deep idle states Create a function for saving SPRs before entering deep idle states. This function can be reused for POWER9 deep idle states. Reviewed-by: Gautham R. Shenoy Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index a8397e3..2f909a1 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -53,6 +53,36 @@ .text /* + * Used by threads before entering deep idle states. Saves SPRs + * in interrupt stack frame + */ +save_sprs_to_stack: + /* + * Note all register i.e per-core, per-subcore or per-thread is saved + * here since any thread in the core might wake up first + */ + mfspr r3,SPRN_SDR1 + std r3,_SDR1(r1) + mfspr r3,SPRN_RPR + std r3,_RPR(r1) + mfspr r3,SPRN_SPURR + std r3,_SPURR(r1) + mfspr r3,SPRN_PURR + std r3,_PURR(r1) + mfspr r3,SPRN_TSCR + std r3,_TSCR(r1) + mfspr r3,SPRN_DSCR + std r3,_DSCR(r1) + mfspr r3,SPRN_AMOR + std r3,_AMOR(r1) + mfspr r3,SPRN_WORT + std r3,_WORT(r1) + mfspr r3,SPRN_WORC + std r3,_WORC(r1) + + blr + +/* * Used by threads when the lock bit of core_idle_state is set. * Threads will spin in HMT_LOW until the lock bit is cleared. * r14 - pointer to core_idle_state @@ -209,28 +239,8 @@ fastsleep_workaround_at_entry: b common_enter enter_winkle: - /* - * Note all register i.e per-core, per-subcore or per-thread is saved - * here since any thread in the core might wake up first - */ - mfspr r3,SPRN_SDR1 - std r3,_SDR1(r1) - mfspr r3,SPRN_RPR - std r3,_RPR(r1) - mfspr r3,SPRN_SPURR - std r3,_SPURR(r1) - mfspr r3,SPRN_PURR - std r3,_PURR(r1) - mfspr r3,SPRN_TSCR - std r3,_TSCR(r1) - mfspr r3,SPRN_DSCR - std r3,_DSCR(r1) - mfspr r3,SPRN_AMOR - std r3,_AMOR(r1) - mfspr r3,SPRN_WORT - std r3,_WORT(r1) - mfspr r3,SPRN_WORC - std r3,_WORC(r1) + bl save_sprs_to_stack + IDLE_STATE_ENTER_SEQ(PPC_WINKLE) _GLOBAL(power7_idle) -- cgit v0.10.2 From bcef83a00dc44ee25ff4d6e078cf6432ddf74dec Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Fri, 8 Jul 2016 11:50:49 +0530 Subject: powerpc/powernv: Add platform support for stop instruction POWER ISA v3 defines a new idle processor core mechanism. In summary, a) new instruction named stop is added. This instruction replaces instructions like nap, sleep, rvwinkle. b) new per thread SPR named Processor Stop Status and Control Register (PSSCR) is added which controls the behavior of stop instruction. PSSCR layout: ---------------------------------------------------------- | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL | ---------------------------------------------------------- 0 4 41 42 43 44 48 54 56 60 PSSCR key fields: Bits 0:3 - Power-Saving Level Status. This field indicates the lowest power-saving state the thread entered since stop instruction was last executed. Bit 42 - Enable State Loss 0 - No state is lost irrespective of other fields 1 - Allows state loss Bits 44:47 - Power-Saving Level Limit This limits the power-saving level that can be entered into. Bits 60:63 - Requested Level Used to specify which power-saving level must be entered on executing stop instruction This patch adds support for stop instruction and PSSCR handling. Reviewed-by: Gautham R. Shenoy Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index d2f99ca..3d7fc06 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -13,6 +13,8 @@ #ifndef __ASSEMBLY__ extern u32 pnv_fastsleep_workaround_at_entry[]; extern u32 pnv_fastsleep_workaround_at_exit[]; + +extern u64 pnv_first_deep_stop_state; #endif #endif diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 72b6225..d318d43 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -162,7 +162,7 @@ struct kvmppc_book3s_shadow_vcpu { /* Values for kvm_state */ #define KVM_HWTHREAD_IN_KERNEL 0 -#define KVM_HWTHREAD_IN_NAP 1 +#define KVM_HWTHREAD_IN_IDLE 1 #define KVM_HWTHREAD_IN_KVM 2 #endif /* __ASM_KVM_BOOK3S_ASM_H__ */ diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 72b5f27..6de1e4e 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -166,13 +166,20 @@ /* Device tree flags */ -/* Flags set in power-mgmt nodes in device tree if - * respective idle states are supported in the platform. +/* + * Flags set in power-mgmt nodes in device tree describing + * idle states that are supported in the platform. */ + +#define OPAL_PM_TIMEBASE_STOP 0x00000002 +#define OPAL_PM_LOSE_HYP_CONTEXT 0x00002000 +#define OPAL_PM_LOSE_FULL_CONTEXT 0x00004000 #define OPAL_PM_NAP_ENABLED 0x00010000 #define OPAL_PM_SLEEP_ENABLED 0x00020000 #define OPAL_PM_WINKLE_ENABLED 0x00040000 #define OPAL_PM_SLEEP_ENABLED_ER1 0x00080000 /* with workaround */ +#define OPAL_PM_STOP_INST_FAST 0x00100000 +#define OPAL_PM_STOP_INST_DEEP 0x00200000 /* * OPAL_CONFIG_CPU_IDLE_STATE parameters diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 9de9df1..81657a1 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -205,6 +205,8 @@ #define PPC_INST_SLEEP 0x4c0003a4 #define PPC_INST_WINKLE 0x4c0003e4 +#define PPC_INST_STOP 0x4c0002e4 + /* A2 specific instructions */ #define PPC_INST_ERATWE 0x7c0001a6 #define PPC_INST_ERATRE 0x7c000166 @@ -394,6 +396,8 @@ #define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP) #define PPC_WINKLE stringify_in_c(.long PPC_INST_WINKLE) +#define PPC_STOP stringify_in_c(.long PPC_INST_STOP) + /* BHRB instructions */ #define PPC_CLRBHRB stringify_in_c(.long PPC_INST_CLRBHRB) #define PPC_MFBHRBE(r, n) stringify_in_c(.long PPC_INST_BHRBE | \ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index b5925d5..68e3bf5 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -460,6 +460,8 @@ extern int powersave_nap; /* set if nap mode can be used in idle loop */ extern unsigned long power7_nap(int check_irq); extern unsigned long power7_sleep(void); extern unsigned long power7_winkle(void); +extern unsigned long power9_idle_stop(unsigned long stop_level); + extern void flush_instruction_cache(void); extern void hard_reset_now(void); extern void poweroff_now(void); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index ac4be83..c0263a2 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -145,6 +145,15 @@ #define MSR_64BIT 0 #endif +/* Power Management - Processor Stop Status and Control Register Fields */ +#define PSSCR_RL_MASK 0x0000000F /* Requested Level */ +#define PSSCR_MTL_MASK 0x000000F0 /* Maximum Transition Level */ +#define PSSCR_TR_MASK 0x00000300 /* Transition State */ +#define PSSCR_PSLL_MASK 0x000F0000 /* Power-Saving Level Limit */ +#define PSSCR_EC 0x00100000 /* Exit Criterion */ +#define PSSCR_ESL 0x00200000 /* Enable State Loss */ +#define PSSCR_SD 0x00400000 /* Status Disable */ + /* Floating Point Status and Control Register (FPSCR) Fields */ #define FPSCR_FX 0x80000000 /* FPU exception summary */ #define FPSCR_FEX 0x40000000 /* FPU enabled exception summary */ @@ -291,6 +300,7 @@ #define SPRN_PMICR 0x354 /* Power Management Idle Control Reg */ #define SPRN_PMSR 0x355 /* Power Management Status Reg */ #define SPRN_PMMAR 0x356 /* Power Management Memory Activity Register */ +#define SPRN_PSSCR 0x357 /* Processor Stop Status and Control Register (ISA 3.0) */ #define SPRN_PMCR 0x374 /* Power Management Control Register */ /* HFSCR and FSCR bit numbers are the same */ diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 2f909a1..1f564eb 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -1,6 +1,6 @@ /* - * This file contains idle entry/exit functions for POWER7 and - * POWER8 CPUs. + * This file contains idle entry/exit functions for POWER7, + * POWER8 and POWER9 CPUs. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -21,6 +21,7 @@ #include #include #include +#include #undef DEBUG @@ -37,6 +38,11 @@ #define _AMOR GPR9 #define _WORT GPR10 #define _WORC GPR11 +#define _PTCR GPR12 + +#define PSSCR_HV_TEMPLATE PSSCR_ESL | PSSCR_EC | \ + PSSCR_PSLL_MASK | PSSCR_TR_MASK | \ + PSSCR_MTL_MASK /* Idle state entry routines */ @@ -61,8 +67,17 @@ save_sprs_to_stack: * Note all register i.e per-core, per-subcore or per-thread is saved * here since any thread in the core might wake up first */ +BEGIN_FTR_SECTION + mfspr r3,SPRN_PTCR + std r3,_PTCR(r1) + /* + * Note - SDR1 is dropped in Power ISA v3. Hence not restoring + * SDR1 here + */ +FTR_SECTION_ELSE mfspr r3,SPRN_SDR1 std r3,_SDR1(r1) +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) mfspr r3,SPRN_RPR std r3,_RPR(r1) mfspr r3,SPRN_SPURR @@ -100,7 +115,8 @@ core_idle_lock_held: /* * Pass requested state in r3: - * r3 - PNV_THREAD_NAP/SLEEP/WINKLE + * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8 + * - Requested STOP state in POWER9 * * To check IRQ_HAPPENED in r4 * 0 - don't check @@ -161,7 +177,7 @@ _GLOBAL(pnv_powersave_common) #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* Tell KVM we're entering idle */ - li r4,KVM_HWTHREAD_IN_NAP + li r4,KVM_HWTHREAD_IN_IDLE stb r4,HSTATE_HWTHREAD_STATE(r13) #endif @@ -243,6 +259,41 @@ enter_winkle: IDLE_STATE_ENTER_SEQ(PPC_WINKLE) +/* + * r3 - requested stop state + */ +power_enter_stop: +/* + * Check if the requested state is a deep idle state. + */ + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) + ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) + cmpd r3,r4 + bge 2f + IDLE_STATE_ENTER_SEQ(PPC_STOP) +2: +/* + * Entering deep idle state. + * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to + * stack and enter stop + */ + lbz r7,PACA_THREAD_MASK(r13) + ld r14,PACA_CORE_IDLE_STATE_PTR(r13) + +lwarx_loop_stop: + lwarx r15,0,r14 + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bnel core_idle_lock_held + andc r15,r15,r7 /* Clear thread bit */ + + stwcx. r15,0,r14 + bne- lwarx_loop_stop + isync + + bl save_sprs_to_stack + + IDLE_STATE_ENTER_SEQ(PPC_STOP) + _GLOBAL(power7_idle) /* Now check if user or arch enabled NAP mode */ LOAD_REG_ADDRBASE(r3,powersave_nap) @@ -293,6 +344,17 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ /* + * r3 - requested stop state + */ +_GLOBAL(power9_idle_stop) + LOAD_REG_IMMEDIATE(r4, PSSCR_HV_TEMPLATE) + or r4,r4,r3 + mtspr SPRN_PSSCR, r4 + li r4, 1 + LOAD_REG_ADDR(r5,power_enter_stop) + b pnv_powersave_common + /* No return */ +/* * Called from reset vector. Check whether we have woken up with * hypervisor state loss. If yes, restore hypervisor state and return * back to reset vector. @@ -301,7 +363,33 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ * cr3 - set to gt if waking up with partial/complete hypervisor state loss */ _GLOBAL(pnv_restore_hyp_resource) + ld r2,PACATOC(r13); +BEGIN_FTR_SECTION + /* + * POWER ISA 3. Use PSSCR to determine if we + * are waking up from deep idle state + */ + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) + ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) + + mfspr r5,SPRN_PSSCR /* + * 0-3 bits correspond to Power-Saving Level Status + * which indicates the idle state we are waking up from + */ + rldicl r5,r5,4,60 + cmpd cr4,r5,r4 + bge cr4,pnv_wakeup_tb_loss + /* + * Waking up without hypervisor state loss. Return to + * reset vector + */ + blr + +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + + /* + * POWER ISA 2.07 or less. * Check if last bit of HSPGR0 is set. This indicates whether we are * waking up from winkle. */ @@ -324,9 +412,17 @@ _GLOBAL(pnv_restore_hyp_resource) blr /* Return back to System Reset vector from where pnv_restore_hyp_resource was invoked */ - +/* + * Called if waking up from idle state which can cause either partial or + * complete hyp state loss. + * In POWER8, called if waking up from fastsleep or winkle + * In POWER9, called if waking up from stop state >= pnv_first_deep_stop_state + * + * r13 - PACA + * cr3 - gt if waking up with partial/complete hypervisor state loss + * cr4 - eq if waking up from complete hypervisor state loss. + */ _GLOBAL(pnv_wakeup_tb_loss) - ld r2,PACATOC(r13); ld r1,PACAR1(r13) /* * Before entering any idle state, the NVGPRs are saved in the stack @@ -361,35 +457,35 @@ lwarx_loop2: bnel core_idle_lock_held cmpwi cr2,r15,0 - lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) - and r4,r4,r15 - cmpwi cr1,r4,0 /* Check if first in subcore */ /* * At this stage - * cr1 - 0b0100 if first thread to wakeup in subcore - * cr2 - 0b0100 if first thread to wakeup in core - * cr3- 0b0010 if waking up from sleep or winkle - * cr4 - 0b0100 if waking up from winkle + * cr2 - eq if first thread to wakeup in core + * cr3- gt if waking up with partial/complete hypervisor state loss + * cr4 - eq if waking up from complete hypervisor state loss. */ - or r15,r15,r7 /* Set thread bit */ - - beq cr1,first_thread_in_subcore - - /* Not first thread in subcore to wake up */ - stwcx. r15,0,r14 - bne- lwarx_loop2 - isync - b common_exit - -first_thread_in_subcore: - /* First thread in subcore to wakeup */ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT stwcx. r15,0,r14 bne- lwarx_loop2 isync +BEGIN_FTR_SECTION + lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) + and r4,r4,r15 + cmpwi r4,0 /* Check if first in subcore */ + + or r15,r15,r7 /* Set thread bit */ + beq first_thread_in_subcore +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) + + or r15,r15,r7 /* Set thread bit */ + beq cr2,first_thread_in_core + + /* Not first thread in core or subcore to wake up */ + b clear_lock + +first_thread_in_subcore: /* * If waking up from sleep, subcore state is not lost. Hence * skip subcore state restore @@ -399,6 +495,7 @@ first_thread_in_subcore: /* Restore per-subcore state */ ld r4,_SDR1(r1) mtspr SPRN_SDR1,r4 + ld r4,_RPR(r1) mtspr SPRN_RPR,r4 ld r4,_AMOR(r1) @@ -414,19 +511,23 @@ subcore_state_restored: first_thread_in_core: /* - * First thread in the core waking up from fastsleep. It needs to + * First thread in the core waking up from any state which can cause + * partial or complete hypervisor state loss. It needs to * call the fastsleep workaround code if the platform requires it. * Call it unconditionally here. The below branch instruction will - * be patched out when the idle states are discovered if platform - * does not require workaround. + * be patched out if the platform does not have fastsleep or does not + * require the workaround. Patching will be performed during the + * discovery of idle-states. */ .global pnv_fastsleep_workaround_at_exit pnv_fastsleep_workaround_at_exit: b fastsleep_workaround_at_exit timebase_resync: - /* Do timebase resync if we are waking up from sleep. Use cr3 value - * set in exceptions-64s.S */ + /* + * Use cr3 which indicates that we are waking up with atleast partial + * hypervisor state loss to determine if TIMEBASE RESYNC is needed. + */ ble cr3,clear_lock /* Time base re-sync */ li r0,OPAL_RESYNC_TIMEBASE @@ -439,7 +540,18 @@ timebase_resync: */ bne cr4,clear_lock - /* Restore per core state */ + /* + * First thread in the core to wake up and its waking up with + * complete hypervisor state loss. Restore per core hypervisor + * state. + */ +BEGIN_FTR_SECTION + ld r4,_PTCR(r1) + mtspr SPRN_PTCR,r4 + ld r4,_RPR(r1) + mtspr SPRN_RPR,r4 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + ld r4,_TSCR(r1) mtspr SPRN_TSCR,r4 ld r4,_WORC(r1) @@ -461,9 +573,9 @@ common_exit: /* Waking up from winkle */ - /* Restore per thread state */ - bl __restore_cpu_power8 - +BEGIN_MMU_FTR_SECTION + b no_segments +END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX) /* Restore SLB from PACA */ ld r8,PACA_SLBSHADOWPTR(r13) @@ -477,6 +589,9 @@ common_exit: slbmte r6,r5 1: addi r8,r8,16 .endr +no_segments: + + /* Restore per thread state */ ld r4,_SPURR(r1) mtspr SPRN_SPURR,r4 @@ -487,6 +602,16 @@ common_exit: ld r4,_WORT(r1) mtspr SPRN_WORT,r4 + /* Call cur_cpu_spec->cpu_restore() */ + LOAD_REG_ADDR(r4, cur_cpu_spec) + ld r4,0(r4) + ld r12,CPU_SPEC_RESTORE(r4) +#ifdef PPC64_ELF_ABI_v1 + ld r12,0(r12) +#endif + mtctr r12 + bctrl + hypervisor_state_restored: mtspr SPRN_SRR1,r16 diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 8a77f5c..8219e22 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -27,9 +27,12 @@ #include "powernv.h" #include "subcore.h" +/* Power ISA 3.0 allows for stop states 0x0 - 0xF */ +#define MAX_STOP_STATE 0xF + static u32 supported_cpuidle_states; -static int pnv_save_sprs_for_winkle(void) +static int pnv_save_sprs_for_deep_states(void) { int cpu; int rc; @@ -50,15 +53,19 @@ static int pnv_save_sprs_for_winkle(void) uint64_t pir = get_hard_smp_processor_id(cpu); uint64_t hsprg0_val = (uint64_t)&paca[cpu]; - /* - * HSPRG0 is used to store the cpu's pointer to paca. Hence last - * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0 - * with 63rd bit set, so that when a thread wakes up at 0x100 we - * can use this bit to distinguish between fastsleep and - * deep winkle. - */ - hsprg0_val |= 1; - + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + /* + * HSPRG0 is used to store the cpu's pointer to paca. + * Hence last 3 bits are guaranteed to be 0. Program + * slw to restore HSPRG0 with 63rd bit set, so that + * when a thread wakes up at 0x100 we can use this bit + * to distinguish between fastsleep and deep winkle. + * This is not necessary with stop/psscr since PLS + * field of psscr indicates which state we are waking + * up from. + */ + hsprg0_val |= 1; + } rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); if (rc != 0) return rc; @@ -130,8 +137,8 @@ static void pnv_alloc_idle_core_states(void) update_subcore_sibling_mask(); - if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) - pnv_save_sprs_for_winkle(); + if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) + pnv_save_sprs_for_deep_states(); } u32 pnv_get_supported_cpuidle_states(void) @@ -230,43 +237,151 @@ static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600, show_fastsleep_workaround_applyonce, store_fastsleep_workaround_applyonce); -static int __init pnv_init_idle_states(void) + +/* + * Used for ppc_md.power_save which needs a function with no parameters + */ +static void power9_idle(void) { - struct device_node *power_mgt; - int dt_idle_states; - u32 *flags; - int i; + /* Requesting stop state 0 */ + power9_idle_stop(0); +} +/* + * First deep stop state. Used to figure out when to save/restore + * hypervisor context. + */ +u64 pnv_first_deep_stop_state = MAX_STOP_STATE; - supported_cpuidle_states = 0; +/* + * Power ISA 3.0 idle initialization. + * + * POWER ISA 3.0 defines a new SPR Processor stop Status and Control + * Register (PSSCR) to control idle behavior. + * + * PSSCR layout: + * ---------------------------------------------------------- + * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL | + * ---------------------------------------------------------- + * 0 4 41 42 43 44 48 54 56 60 + * + * PSSCR key fields: + * Bits 0:3 - Power-Saving Level Status (PLS). This field indicates the + * lowest power-saving state the thread entered since stop instruction was + * last executed. + * + * Bit 41 - Status Disable(SD) + * 0 - Shows PLS entries + * 1 - PLS entries are all 0 + * + * Bit 42 - Enable State Loss + * 0 - No state is lost irrespective of other fields + * 1 - Allows state loss + * + * Bit 43 - Exit Criterion + * 0 - Exit from power-save mode on any interrupt + * 1 - Exit from power-save mode controlled by LPCR's PECE bits + * + * Bits 44:47 - Power-Saving Level Limit + * This limits the power-saving level that can be entered into. + * + * Bits 60:63 - Requested Level + * Used to specify which power-saving level must be entered on executing + * stop instruction + * + * @np: /ibm,opal/power-mgt device node + * @flags: cpu-idle-state-flags array + * @dt_idle_states: Number of idle state entries + * Returns 0 on success + */ +static int __init pnv_arch300_idle_init(struct device_node *np, u32 *flags, + int dt_idle_states) +{ + u64 *psscr_val = NULL; + int rc = 0, i; - if (cpuidle_disable != IDLE_NO_OVERRIDE) + psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), + GFP_KERNEL); + if (!psscr_val) { + rc = -1; goto out; - - if (!firmware_has_feature(FW_FEATURE_OPAL)) + } + if (of_property_read_u64_array(np, + "ibm,cpu-idle-state-psscr", + psscr_val, dt_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-states-psscr in DT\n"); + rc = -1; goto out; + } - power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); - if (!power_mgt) { + /* + * Set pnv_first_deep_stop_state to the first stop level + * to cause hypervisor state loss + */ + pnv_first_deep_stop_state = MAX_STOP_STATE; + for (i = 0; i < dt_idle_states; i++) { + u64 psscr_rl = psscr_val[i] & PSSCR_RL_MASK; + + if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) && + (pnv_first_deep_stop_state > psscr_rl)) + pnv_first_deep_stop_state = psscr_rl; + } + +out: + kfree(psscr_val); + return rc; +} + +/* + * Probe device tree for supported idle states + */ +static void __init pnv_probe_idle_states(void) +{ + struct device_node *np; + int dt_idle_states; + u32 *flags = NULL; + int i; + + np = of_find_node_by_path("/ibm,opal/power-mgt"); + if (!np) { pr_warn("opal: PowerMgmt Node not found\n"); goto out; } - dt_idle_states = of_property_count_u32_elems(power_mgt, + dt_idle_states = of_property_count_u32_elems(np, "ibm,cpu-idle-state-flags"); if (dt_idle_states < 0) { pr_warn("cpuidle-powernv: no idle states found in the DT\n"); goto out; } - flags = kzalloc(sizeof(*flags) * dt_idle_states, GFP_KERNEL); - if (of_property_read_u32_array(power_mgt, + flags = kcalloc(dt_idle_states, sizeof(*flags), GFP_KERNEL); + + if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags", flags, dt_idle_states)) { pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); - goto out_free; + goto out; + } + + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (pnv_arch300_idle_init(np, flags, dt_idle_states)) + goto out; } for (i = 0; i < dt_idle_states; i++) supported_cpuidle_states |= flags[i]; +out: + kfree(flags); +} +static int __init pnv_init_idle_states(void) +{ + + supported_cpuidle_states = 0; + + if (cpuidle_disable != IDLE_NO_OVERRIDE) + goto out; + + pnv_probe_idle_states(); + if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { patch_instruction( (unsigned int *)pnv_fastsleep_workaround_at_entry, @@ -288,8 +403,9 @@ static int __init pnv_init_idle_states(void) if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) ppc_md.power_save = power7_idle; -out_free: - kfree(flags); + else if (supported_cpuidle_states & OPAL_PM_STOP_INST_FAST) + ppc_md.power_save = power9_idle; + out: return 0; } -- cgit v0.10.2 From 169f3fae0c6d0024e6a957df9977e04695bf52f2 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Fri, 8 Jul 2016 11:50:50 +0530 Subject: cpuidle/powernv: Use CPUIDLE_STATE_MAX instead of MAX_POWERNV_IDLE_STATES Use cpuidle's CPUIDLE_STATE_MAX macro instead of powernv specific MAX_POWERNV_IDLE_STATES. Cc: Rafael J. Wysocki Cc: Daniel Lezcano Cc: linux-pm@vger.kernel.org Acked-by: Daniel Lezcano Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index e12dc30..3a763a8 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -20,8 +20,6 @@ #include #include -#define MAX_POWERNV_IDLE_STATES 8 - struct cpuidle_driver powernv_idle_driver = { .name = "powernv_idle", .owner = THIS_MODULE, @@ -96,7 +94,7 @@ static int fastsleep_loop(struct cpuidle_device *dev, /* * States for dedicated partition case. */ -static struct cpuidle_state powernv_states[MAX_POWERNV_IDLE_STATES] = { +static struct cpuidle_state powernv_states[CPUIDLE_STATE_MAX] = { { /* Snooze */ .name = "snooze", .desc = "snooze", -- cgit v0.10.2 From 957efcedeef4b02f2f7e5776d64448e5c40462b4 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Fri, 8 Jul 2016 11:50:51 +0530 Subject: cpuidle/powernv: cleanup cpuidle-powernv.c - Use stack instead of kzalloc'ed memory for variables while probing device tree for idle states. - Set cap for number of idle states that can be added to cpuidle_state_table - Minor change in way we check of_property_read_u32_array for error for sake of consistency - Drop unnecessary "&" while assigning function pointer Cc: Rafael J. Wysocki Cc: Daniel Lezcano Cc: linux-pm@vger.kernel.org Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 3a763a8..600bbe1 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -100,7 +100,7 @@ static struct cpuidle_state powernv_states[CPUIDLE_STATE_MAX] = { .desc = "snooze", .exit_latency = 0, .target_residency = 0, - .enter = &snooze_loop }, + .enter = snooze_loop }, }; static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n, @@ -166,7 +166,9 @@ static int powernv_add_idle_states(void) struct device_node *power_mgt; int nr_idle_states = 1; /* Snooze */ int dt_idle_states; - u32 *latency_ns, *residency_ns, *flags; + u32 latency_ns[CPUIDLE_STATE_MAX]; + u32 residency_ns[CPUIDLE_STATE_MAX]; + u32 flags[CPUIDLE_STATE_MAX]; int i, rc; /* Currently we have snooze statically defined */ @@ -184,22 +186,28 @@ static int powernv_add_idle_states(void) goto out; } - flags = kzalloc(sizeof(*flags) * dt_idle_states, GFP_KERNEL); + /* + * Since snooze is used as first idle state, max idle states allowed is + * CPUIDLE_STATE_MAX -1 + */ + if (dt_idle_states > CPUIDLE_STATE_MAX - 1) { + pr_warn("cpuidle-powernv: discovered idle states more than allowed"); + dt_idle_states = CPUIDLE_STATE_MAX - 1; + } + if (of_property_read_u32_array(power_mgt, "ibm,cpu-idle-state-flags", flags, dt_idle_states)) { pr_warn("cpuidle-powernv : missing ibm,cpu-idle-state-flags in DT\n"); - goto out_free_flags; + goto out; } - latency_ns = kzalloc(sizeof(*latency_ns) * dt_idle_states, GFP_KERNEL); - rc = of_property_read_u32_array(power_mgt, - "ibm,cpu-idle-state-latencies-ns", latency_ns, dt_idle_states); - if (rc) { + if (of_property_read_u32_array(power_mgt, + "ibm,cpu-idle-state-latencies-ns", latency_ns, + dt_idle_states)) { pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); - goto out_free_latency; + goto out; } - residency_ns = kzalloc(sizeof(*residency_ns) * dt_idle_states, GFP_KERNEL); rc = of_property_read_u32_array(power_mgt, "ibm,cpu-idle-state-residency-ns", residency_ns, dt_idle_states); @@ -215,7 +223,7 @@ static int powernv_add_idle_states(void) strcpy(powernv_states[nr_idle_states].desc, "Nap"); powernv_states[nr_idle_states].flags = 0; powernv_states[nr_idle_states].target_residency = 100; - powernv_states[nr_idle_states].enter = &nap_loop; + powernv_states[nr_idle_states].enter = nap_loop; } /* @@ -230,7 +238,7 @@ static int powernv_add_idle_states(void) strcpy(powernv_states[nr_idle_states].desc, "FastSleep"); powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP; powernv_states[nr_idle_states].target_residency = 300000; - powernv_states[nr_idle_states].enter = &fastsleep_loop; + powernv_states[nr_idle_states].enter = fastsleep_loop; } #endif powernv_states[nr_idle_states].exit_latency = @@ -243,12 +251,6 @@ static int powernv_add_idle_states(void) nr_idle_states++; } - - kfree(residency_ns); -out_free_latency: - kfree(latency_ns); -out_free_flags: - kfree(flags); out: return nr_idle_states; } -- cgit v0.10.2 From 3005c597ba46480b42e1fea3512c408f1830b816 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Fri, 8 Jul 2016 11:50:52 +0530 Subject: cpuidle/powernv: Add support for POWER ISA v3 idle states POWER ISA v3 defines a new idle processor core mechanism. In summary, a) new instruction named stop is added. b) new per thread SPR named PSSCR is added which controls the behavior of stop instruction. Supported idle states and value to be written to PSSCR register to enter any idle state is exposed via ibm,cpu-idle-state-names and ibm,cpu-idle-state-psscr respectively. To enter an idle state, platform provided power_stop() needs to be invoked with the appropriate PSSCR value. This patch adds support for this new mechanism in cpuidle powernv driver. Cc: Rafael J. Wysocki Cc: Daniel Lezcano Cc: Rob Herring Cc: Lorenzo Pieralisi Cc: linux-pm@vger.kernel.org Cc: Michael Ellerman Cc: Paul Mackerras Cc: linuxppc-dev@lists.ozlabs.org Reviewed-by: Gautham R. Shenoy Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 600bbe1..f7ca891 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -20,6 +20,8 @@ #include #include +#define POWERNV_THRESHOLD_LATENCY_NS 200000 + struct cpuidle_driver powernv_idle_driver = { .name = "powernv_idle", .owner = THIS_MODULE, @@ -27,6 +29,9 @@ struct cpuidle_driver powernv_idle_driver = { static int max_idle_state; static struct cpuidle_state *cpuidle_state_table; + +static u64 stop_psscr_table[CPUIDLE_STATE_MAX]; + static u64 snooze_timeout; static bool snooze_timeout_en; @@ -91,6 +96,17 @@ static int fastsleep_loop(struct cpuidle_device *dev, return index; } #endif + +static int stop_loop(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + ppc64_runlatch_off(); + power9_idle_stop(stop_psscr_table[index]); + ppc64_runlatch_on(); + return index; +} + /* * States for dedicated partition case. */ @@ -169,6 +185,8 @@ static int powernv_add_idle_states(void) u32 latency_ns[CPUIDLE_STATE_MAX]; u32 residency_ns[CPUIDLE_STATE_MAX]; u32 flags[CPUIDLE_STATE_MAX]; + u64 psscr_val[CPUIDLE_STATE_MAX]; + const char *names[CPUIDLE_STATE_MAX]; int i, rc; /* Currently we have snooze statically defined */ @@ -207,11 +225,34 @@ static int powernv_add_idle_states(void) pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); goto out; } + if (of_property_read_string_array(power_mgt, + "ibm,cpu-idle-state-names", names, dt_idle_states) < 0) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n"); + goto out; + } + + /* + * If the idle states use stop instruction, probe for psscr values + * which are necessary to specify required stop level. + */ + if (flags[0] & (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP)) + if (of_property_read_u64_array(power_mgt, + "ibm,cpu-idle-state-psscr", psscr_val, dt_idle_states)) { + pr_warn("cpuidle-powernv: missing ibm,cpu-idle-states-psscr in DT\n"); + goto out; + } rc = of_property_read_u32_array(power_mgt, "ibm,cpu-idle-state-residency-ns", residency_ns, dt_idle_states); for (i = 0; i < dt_idle_states; i++) { + /* + * If an idle state has exit latency beyond + * POWERNV_THRESHOLD_LATENCY_NS then don't use it + * in cpu-idle. + */ + if (latency_ns[i] > POWERNV_THRESHOLD_LATENCY_NS) + continue; /* * Cpuidle accepts exit_latency and target_residency in us. @@ -224,6 +265,16 @@ static int powernv_add_idle_states(void) powernv_states[nr_idle_states].flags = 0; powernv_states[nr_idle_states].target_residency = 100; powernv_states[nr_idle_states].enter = nap_loop; + } else if ((flags[i] & OPAL_PM_STOP_INST_FAST) && + !(flags[i] & OPAL_PM_TIMEBASE_STOP)) { + strncpy(powernv_states[nr_idle_states].name, + names[i], CPUIDLE_NAME_LEN); + strncpy(powernv_states[nr_idle_states].desc, + names[i], CPUIDLE_NAME_LEN); + powernv_states[nr_idle_states].flags = 0; + + powernv_states[nr_idle_states].enter = stop_loop; + stop_psscr_table[nr_idle_states] = psscr_val[i]; } /* @@ -239,6 +290,16 @@ static int powernv_add_idle_states(void) powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP; powernv_states[nr_idle_states].target_residency = 300000; powernv_states[nr_idle_states].enter = fastsleep_loop; + } else if ((flags[i] & OPAL_PM_STOP_INST_DEEP) && + (flags[i] & OPAL_PM_TIMEBASE_STOP)) { + strncpy(powernv_states[nr_idle_states].name, + names[i], CPUIDLE_NAME_LEN); + strncpy(powernv_states[nr_idle_states].desc, + names[i], CPUIDLE_NAME_LEN); + + powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP; + powernv_states[nr_idle_states].enter = stop_loop; + stop_psscr_table[nr_idle_states] = psscr_val[i]; } #endif powernv_states[nr_idle_states].exit_latency = -- cgit v0.10.2 From c0691f9dd2066087524d2b4498c0c9331f26dcd5 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Fri, 8 Jul 2016 11:50:53 +0530 Subject: powerpc/powernv: Use deepest stop state when cpu is offlined If hardware supports stop state, use the deepest stop state when the cpu is offlined. Reviewed-by: Gautham R. Shenoy Signed-off-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 8219e22..479c256 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -253,6 +253,11 @@ static void power9_idle(void) u64 pnv_first_deep_stop_state = MAX_STOP_STATE; /* + * Deepest stop idle state. Used when a cpu is offlined + */ +u64 pnv_deepest_stop_state; + +/* * Power ISA 3.0 idle initialization. * * POWER ISA 3.0 defines a new SPR Processor stop Status and Control @@ -314,8 +319,11 @@ static int __init pnv_arch300_idle_init(struct device_node *np, u32 *flags, } /* - * Set pnv_first_deep_stop_state to the first stop level - * to cause hypervisor state loss + * Set pnv_first_deep_stop_state and pnv_deepest_stop_state. + * pnv_first_deep_stop_state should be set to the first stop + * level to cause hypervisor state loss. + * pnv_deepest_stop_state should be set to the deepest stop + * stop state. */ pnv_first_deep_stop_state = MAX_STOP_STATE; for (i = 0; i < dt_idle_states; i++) { @@ -324,6 +332,9 @@ static int __init pnv_arch300_idle_init(struct device_node *np, u32 *flags, if ((flags[i] & OPAL_PM_LOSE_FULL_CONTEXT) && (pnv_first_deep_stop_state > psscr_rl)) pnv_first_deep_stop_state = psscr_rl; + + if (pnv_deepest_stop_state < psscr_rl) + pnv_deepest_stop_state = psscr_rl; } out: diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 6dbc0a1..da7c843 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -18,6 +18,7 @@ static inline void pnv_pci_shutdown(void) { } #endif extern u32 pnv_get_supported_cpuidle_states(void); +extern u64 pnv_deepest_stop_state; extern void pnv_lpc_init(void); diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index ad7b1a3..c789258 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -182,7 +182,9 @@ static void pnv_smp_cpu_kill_self(void) ppc64_runlatch_off(); - if (idle_states & OPAL_PM_WINKLE_ENABLED) + if (cpu_has_feature(CPU_FTR_ARCH_300)) + srr1 = power9_idle_stop(pnv_deepest_stop_state); + else if (idle_states & OPAL_PM_WINKLE_ENABLED) srr1 = power7_winkle(); else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) -- cgit v0.10.2 From 9fedd3f8804af87be608310aac707a64f831b344 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 16:37:05 +1000 Subject: powerpc/powernv: Add XICS emulation APIs OPAL provides an emulated XICS interrupt controller to use as a fallback on newer processors that don't have a XICS. It's meant as a way to provide backward compatibility with future processors. Add the corresponding interfaces. Signed-off-by: Benjamin Herrenschmidt Acked-by: Stewart Smith Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 6de1e4e..b349621 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -162,7 +162,11 @@ #define OPAL_PCI_GET_PRESENCE_STATE 119 #define OPAL_PCI_GET_POWER_STATE 120 #define OPAL_PCI_SET_POWER_STATE 121 -#define OPAL_LAST 121 +#define OPAL_INT_GET_XIRR 122 +#define OPAL_INT_SET_CPPR 123 +#define OPAL_INT_EOI 124 +#define OPAL_INT_SET_MFRR 125 +#define OPAL_LAST 125 /* Device tree flags */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 52b9f4a..162ebe6 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -218,6 +218,11 @@ int64_t opal_pci_set_power_state(uint64_t async_token, uint64_t id, uint64_t data); int64_t opal_pci_poll2(uint64_t id, uint64_t data); +int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll); +int64_t opal_int_set_cppr(uint8_t cppr); +int64_t opal_int_eoi(uint32_t xirr); +int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr); + /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 7979d6d..c7764f9 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -307,3 +307,7 @@ OPAL_CALL(opal_get_device_tree, OPAL_GET_DEVICE_TREE); OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE); OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE); OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE); +OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR); +OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR); +OPAL_CALL(opal_int_eoi, OPAL_INT_EOI); +OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR); -- cgit v0.10.2 From b88d4bce2b883e7f357ecf8f0cae070b9732f82b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sat, 16 Jul 2016 17:58:25 -0500 Subject: powerpc/book64s: Move a few exception common handlers to make room This moves the CBE RAS and facility unavailable "common" handlers down to after the FWNMI page. This frees up some space in the very demanded spaces before the relocation-on vectors and before the FWNMI page. They are still within 64K of __start, so CONFIG_RELOCATABLE should still work. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 5c009c5..3834031 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -742,11 +742,6 @@ kvmppc_skip_Hinterrupt: #else STD_EXCEPTION_COMMON(0x1700, altivec_assist, unknown_exception) #endif -#ifdef CONFIG_CBE_RAS - STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception) - STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception) - STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception) -#endif /* CONFIG_CBE_RAS */ /* * Relocation-on interrupts: A subset of the interrupts can be delivered @@ -1111,9 +1106,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) bl vsx_unavailable_exception b ret_from_except - STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception) - STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception) - /* Equivalents to the above handlers for relocation-on interrupt vectors */ STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist) MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell) @@ -1150,6 +1142,15 @@ fwnmi_data_area: . = 0x8000 #endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ + STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception) + STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception) + +#ifdef CONFIG_CBE_RAS + STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception) + STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception) + STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception) +#endif /* CONFIG_CBE_RAS */ + .globl hmi_exception_early hmi_exception_early: EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60) -- cgit v0.10.2 From 9baaef0a22c82a6c5d85b7535240e778c06e5b20 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 16:37:06 +1000 Subject: powerpc/irq: Add support for HV virtualization interrupts This will be delivering external interrupts from the XIVE to the Hypervisor. We treat it as a normal external interrupt for the lazy irq disable code (so it will be replayed as a 0x500) and route it to do_IRQ. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 93ae809..c7d2773 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -403,6 +403,8 @@ label##_relon_hv: \ #define SOFTEN_VALUE_0xe82 PACA_IRQ_DBELL #define SOFTEN_VALUE_0xe60 PACA_IRQ_HMI #define SOFTEN_VALUE_0xe62 PACA_IRQ_HMI +#define SOFTEN_VALUE_0xea0 PACA_IRQ_EE +#define SOFTEN_VALUE_0xea2 PACA_IRQ_EE #define __SOFTEN_TEST(h, vec) \ lbz r10,PACASOFTIRQEN(r13); \ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c0263a2..6de6abe 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -363,6 +363,7 @@ #define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */ #define LPCR_LPES_SH 2 #define LPCR_RMI 0x00000002 /* real mode is cache inhibit */ +#define LPCR_HVICE 0x00000002 /* P9: HV interrupt enable */ #define LPCR_HDICE 0x00000001 /* Hyp Decr enable (HV,PR,EE) */ #define LPCR_UPRT 0x00400000 /* Use Process Table (ISA 3) */ #ifndef SPRN_LPID diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index ec8a228..52ff3f0 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -99,6 +99,7 @@ _GLOBAL(__setup_cpu_power9) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH + ori r3, r3, LPCR_HVICE bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 @@ -118,6 +119,7 @@ _GLOBAL(__restore_cpu_power9) mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH + ori r3, r3, LPCR_HVICE bl __init_LPCR bl __init_HFSCR bl __init_tlb_power9 diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 3834031..6200e49 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -331,6 +331,12 @@ hv_doorbell_trampoline: EXCEPTION_PROLOG_0(PACA_EXGEN) b h_doorbell_hv + . = 0xea0 +hv_virt_irq_trampoline: + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b h_virt_irq_hv + /* We need to deal with the Altivec unavailable exception * here which is at 0xf20, thus in the middle of the * prolog code of the PerformanceMonitor one. A little @@ -581,6 +587,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell) KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82) + MASKABLE_EXCEPTION_HV_OOL(0xea2, h_virt_irq) + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xea2) + /* moved from 0xf00 */ STD_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf00) @@ -660,6 +669,8 @@ _GLOBAL(__replay_interrupt) BEGIN_FTR_SECTION cmpwi r3,0xe80 beq h_doorbell_common + cmpwi r3,0xea0 + beq h_virt_irq_common FTR_SECTION_ELSE cmpwi r3,0xa00 beq doorbell_super_common @@ -734,6 +745,7 @@ kvmppc_skip_Hinterrupt: #else STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, unknown_exception) #endif + STD_EXCEPTION_COMMON_ASYNC(0xea0, h_virt_irq, do_IRQ) STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, performance_monitor_exception) STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, instruction_breakpoint_exception) STD_EXCEPTION_COMMON(0x1502, denorm, unknown_exception) @@ -852,6 +864,12 @@ h_doorbell_relon_trampoline: EXCEPTION_PROLOG_0(PACA_EXGEN) b h_doorbell_relon_hv + . = 0x4ea0 +h_virt_irq_relon_trampoline: + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b h_virt_irq_relon_hv + . = 0x4f00 performance_monitor_relon_pseries_trampoline: SET_SCRATCH0(r13) @@ -1109,6 +1127,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) /* Equivalents to the above handlers for relocation-on interrupt vectors */ STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist) MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell) + MASKABLE_RELON_EXCEPTION_HV_OOL(0xea0, h_virt_irq) STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) -- cgit v0.10.2 From 1d607bb3bd60f404d1ceb0d6ebceadf261068422 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 16:37:07 +1000 Subject: powerpc/irq: Add mechanism to force a replay of interrupts Calling this function with interrupts soft-disabled will cause a replay of the external interrupt vector when they are re-enabled. This will be used by the OPAL XICS backend (and latter by the native XIVE code) to handle EOI signaling that there are more interrupts to fetch from the hardware since the hardware won't issue another HW interrupt in that case. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index b59ac27..c7d82ff 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -130,6 +130,8 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs) extern bool prep_irq_for_idle(void); +extern void force_external_irq_replay(void); + #else /* CONFIG_PPC64 */ #define SET_MSR_EE(x) mtmsr(x) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 58217ae..ac910d9 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -342,6 +342,21 @@ bool prep_irq_for_idle(void) return true; } +/* + * Force a replay of the external interrupt handler on this CPU. + */ +void force_external_irq_replay(void) +{ + /* + * This must only be called with interrupts soft-disabled, + * the replay will happen when re-enabling. + */ + WARN_ON(!arch_irqs_disabled()); + + /* Indicate in the PACA that we have an interrupt to replay */ + local_paca->irq_happened |= PACA_IRQ_EE; +} + #endif /* CONFIG_PPC64 */ int arch_show_interrupts(struct seq_file *p, int prec) -- cgit v0.10.2 From d74361881f0dfe5f9dcac37d1b753a15a2345d8c Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 16:37:08 +1000 Subject: powerpc/xics: Add ICP OPAL backend This adds a new XICS backend that uses OPAL calls, which can be used when we don't have native support for the platform interrupt controller. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index 04ef3ae..f5f729c 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -42,6 +42,12 @@ extern int icp_hv_init(void); static inline int icp_hv_init(void) { return -ENODEV; } #endif +#ifdef CONFIG_PPC_POWERNV +extern int icp_opal_init(void); +#else +static inline int icp_opal_init(void) { return -ENODEV; } +#endif + /* ICP ops */ struct icp_ops { unsigned int (*get_irq)(void); diff --git a/arch/powerpc/sysdev/xics/Makefile b/arch/powerpc/sysdev/xics/Makefile index c606aa8..5d7f5a6 100644 --- a/arch/powerpc/sysdev/xics/Makefile +++ b/arch/powerpc/sysdev/xics/Makefile @@ -4,4 +4,4 @@ obj-y += xics-common.o obj-$(CONFIG_PPC_ICP_NATIVE) += icp-native.o obj-$(CONFIG_PPC_ICP_HV) += icp-hv.o obj-$(CONFIG_PPC_ICS_RTAS) += ics-rtas.o -obj-$(CONFIG_PPC_POWERNV) += ics-opal.o +obj-$(CONFIG_PPC_POWERNV) += ics-opal.o icp-opal.o diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c new file mode 100644 index 0000000..57d72f1 --- /dev/null +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -0,0 +1,144 @@ +/* + * Copyright 2016 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +static void icp_opal_teardown_cpu(void) +{ + int cpu = smp_processor_id(); + + /* Clear any pending IPI */ + opal_int_set_mfrr(cpu, 0xff); +} + +static void icp_opal_flush_ipi(void) +{ + /* + * We take the ipi irq but and never return so we need to EOI the IPI, + * but want to leave our priority 0. + * + * Should we check all the other interrupts too? + * Should we be flagging idle loop instead? + * Or creating some task to be scheduled? + */ + opal_int_eoi((0x00 << 24) | XICS_IPI); +} + +static unsigned int icp_opal_get_irq(void) +{ + unsigned int xirr; + unsigned int vec; + unsigned int irq; + int64_t rc; + + rc = opal_int_get_xirr(&xirr, false); + if (rc < 0) + return NO_IRQ; + xirr = be32_to_cpu(xirr); + vec = xirr & 0x00ffffff; + if (vec == XICS_IRQ_SPURIOUS) + return NO_IRQ; + + irq = irq_find_mapping(xics_host, vec); + if (likely(irq != NO_IRQ)) { + xics_push_cppr(vec); + return irq; + } + + /* We don't have a linux mapping, so have rtas mask it. */ + xics_mask_unknown_vec(vec); + + /* We might learn about it later, so EOI it */ + opal_int_eoi(xirr); + + return NO_IRQ; +} + +static void icp_opal_set_cpu_priority(unsigned char cppr) +{ + xics_set_base_cppr(cppr); + opal_int_set_cppr(cppr); + iosync(); +} + +static void icp_opal_eoi(struct irq_data *d) +{ + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + int64_t rc; + + iosync(); + rc = opal_int_eoi((xics_pop_cppr() << 24) | hw_irq); + + /* + * EOI tells us whether there are more interrupts to fetch. + * + * Some HW implementations might not be able to send us another + * external interrupt in that case, so we force a replay. + */ + if (rc > 0) + force_external_irq_replay(); +} + +#ifdef CONFIG_SMP + +static void icp_opal_cause_ipi(int cpu, unsigned long data) +{ + opal_int_set_mfrr(cpu, IPI_PRIORITY); +} + +static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id) +{ + int cpu = smp_processor_id(); + + opal_int_set_mfrr(cpu, 0xff); + + return smp_ipi_demux(); +} + +#endif /* CONFIG_SMP */ + +static const struct icp_ops icp_opal_ops = { + .get_irq = icp_opal_get_irq, + .eoi = icp_opal_eoi, + .set_priority = icp_opal_set_cpu_priority, + .teardown_cpu = icp_opal_teardown_cpu, + .flush_ipi = icp_opal_flush_ipi, +#ifdef CONFIG_SMP + .ipi_action = icp_opal_ipi_action, + .cause_ipi = icp_opal_cause_ipi, +#endif +}; + +int icp_opal_init(void) +{ + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc"); + if (!np) + return -ENODEV; + + icp_ops = &icp_opal_ops; + + printk("XICS: Using OPAL ICP fallbacks\n"); + + return 0; +} + diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 47e43b7..a795a5f 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -404,8 +404,11 @@ void __init xics_init(void) /* Fist locate ICP */ if (firmware_has_feature(FW_FEATURE_LPAR)) rc = icp_hv_init(); - if (rc < 0) + if (rc < 0) { rc = icp_native_init(); + if (rc == -ENODEV) + rc = icp_opal_init(); + } if (rc < 0) { pr_warning("XICS: Cannot find a Presentation Controller !\n"); return; -- cgit v0.10.2 From fb111334e4d9b0365f76d30254fcea9032854867 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 16:37:09 +1000 Subject: powerpc/powernv: Discover IODA3 PHBs We instanciate them as IODA2. We also change the MSI EOI hack to only kick on PHB3 since it will not be needed on any new implementation. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 530d4af..49de081 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2705,7 +2705,8 @@ void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) struct irq_data *idata; struct irq_chip *ichip; - if (phb->type != PNV_PHB_IODA2) + /* The MSI EOI OPAL call is only needed on PHB3 */ + if (phb->model != PNV_PHB_MODEL_PHB3) return; if (!phb->ioda.irq_chip_init) { diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 62c7637..4617ea2 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -932,6 +932,10 @@ void __init pnv_pci_init(void) for_each_compatible_node(np, NULL, "ibm,ioda2-phb") pnv_pci_init_ioda2_phb(np); + /* Look for ioda3 built-in PHB4's, we treat them as IODA2 */ + for_each_compatible_node(np, NULL, "ibm,ioda3-phb") + pnv_pci_init_ioda2_phb(np); + /* Look for NPU PHBs */ for_each_compatible_node(np, NULL, "ibm,ioda2-npu-phb") pnv_pci_init_npu_phb(np); -- cgit v0.10.2 From b7d6bf4fdd47b7a067e6caecb606e27fd09d1ae9 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 16:37:10 +1000 Subject: powerpc/pseries/pci: Remove obsolete SW invalidate That was used by some old IBM internal bringup tools and is no longer relevant. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 3e8865b..770a753 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -120,35 +120,6 @@ static void iommu_pseries_free_group(struct iommu_table_group *table_group, kfree(table_group); } -static void tce_invalidate_pSeries_sw(struct iommu_table *tbl, - __be64 *startp, __be64 *endp) -{ - u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index; - unsigned long start, end, inc; - - start = __pa(startp); - end = __pa(endp); - inc = L1_CACHE_BYTES; /* invalidate a cacheline of TCEs at a time */ - - /* If this is non-zero, change the format. We shift the - * address and or in the magic from the device tree. */ - if (tbl->it_busno) { - start <<= 12; - end <<= 12; - inc <<= 12; - start |= tbl->it_busno; - end |= tbl->it_busno; - } - - end |= inc - 1; /* round up end to be different than start */ - - mb(); /* Make sure TCEs in memory are written */ - while (start <= end) { - out_be64(invalidate, start); - start += inc; - } -} - static int tce_build_pSeries(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, @@ -173,9 +144,6 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index, uaddr += TCE_PAGE_SIZE; tcep++; } - - if (tbl->it_type & TCE_PCI_SWINV_CREATE) - tce_invalidate_pSeries_sw(tbl, tces, tcep - 1); return 0; } @@ -188,9 +156,6 @@ static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) while (npages--) *(tcep++) = 0; - - if (tbl->it_type & TCE_PCI_SWINV_FREE) - tce_invalidate_pSeries_sw(tbl, tces, tcep - 1); } static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) @@ -537,7 +502,7 @@ static void iommu_table_setparms(struct pci_controller *phb, struct iommu_table *tbl) { struct device_node *node; - const unsigned long *basep, *sw_inval; + const unsigned long *basep; const u32 *sizep; node = phb->dn; @@ -575,22 +540,6 @@ static void iommu_table_setparms(struct pci_controller *phb, tbl->it_index = 0; tbl->it_blocksize = 16; tbl->it_type = TCE_PCI; - - sw_inval = of_get_property(node, "linux,tce-sw-invalidate-info", NULL); - if (sw_inval) { - /* - * This property contains information on how to - * invalidate the TCE entry. The first property is - * the base MMIO address used to invalidate entries. - * The second property tells us the format of the TCE - * invalidate (whether it needs to be shifted) and - * some magic routing info to add to our invalidate - * command. - */ - tbl->it_index = (unsigned long) ioremap(sw_inval[0], 8); - tbl->it_busno = sw_inval[1]; /* overload this with magic */ - tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE; - } } /* -- cgit v0.10.2 From 69c592ed40d32b4b680fd46c1b059cfe8abeb755 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 16:37:11 +1000 Subject: powerpc/opal: Add real mode call wrappers Replace the old generic opal_call_realmode() with proper per-call wrappers similar to the normal ones and convert callers. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index b349621..0e2e57b 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -166,7 +166,8 @@ #define OPAL_INT_SET_CPPR 123 #define OPAL_INT_EOI 124 #define OPAL_INT_SET_MFRR 125 -#define OPAL_LAST 125 +#define OPAL_PCI_TCE_KILL 126 +#define OPAL_LAST 126 /* Device tree flags */ @@ -919,6 +920,13 @@ enum { OPAL_REBOOT_PLATFORM_ERROR = 1, }; +/* Argument to OPAL_PCI_TCE_KILL */ +enum { + OPAL_PCI_TCE_KILL_PAGES, + OPAL_PCI_TCE_KILL_PE, + OPAL_PCI_TCE_KILL_ALL, +}; + #endif /* __ASSEMBLY__ */ #endif /* __OPAL_API_H */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 162ebe6..b656bb1 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -222,6 +222,12 @@ int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll); int64_t opal_int_set_cppr(uint8_t cppr); int64_t opal_int_eoi(uint32_t xirr); int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr); +int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type, + uint32_t pe_num, uint32_t tce_size, + uint64_t dma_addr, uint32_t npages); +int64_t opal_rm_pci_tce_kill(uint64_t phb_id, uint32_t kill_type, + uint32_t pe_num, uint32_t tce_size, + uint64_t dma_addr, uint32_t npages); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 1f564eb..335eb6c 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -245,8 +245,7 @@ fastsleep_workaround_at_entry: /* Fast sleep workaround */ li r3,1 li r4,1 - li r0,OPAL_CONFIG_CPU_IDLE_STATE - bl opal_call_realmode + bl opal_rm_config_cpu_idle_state /* Clear Lock bit */ li r0,0 @@ -337,8 +336,7 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ ld r2,PACATOC(r13); \ ld r1,PACAR1(r13); \ std r3,ORIG_GPR3(r1); /* Save original r3 */ \ - li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \ - bl opal_call_realmode; \ + bl opal_rm_handle_hmi; \ ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ 20: nop; @@ -430,7 +428,7 @@ _GLOBAL(pnv_wakeup_tb_loss) * until they are restored, they are free to be used. * * Save SRR1 and LR in NVGPRs as they might be clobbered in - * opal_call_realmode (called in CHECK_HMI_INTERRUPT). SRR1 is required + * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required * to determine the wakeup reason if we branch to kvm_start_guest. LR * is required to return back to reset vector after hypervisor state * restore is complete. @@ -530,10 +528,7 @@ timebase_resync: */ ble cr3,clear_lock /* Time base re-sync */ - li r0,OPAL_RESYNC_TIMEBASE - bl opal_call_realmode; - /* TODO: Check r3 for failure */ - + bl opal_rm_resync_timebase; /* * If waking up from sleep, per core state is not lost, skip to * clear_lock. @@ -622,8 +617,7 @@ hypervisor_state_restored: fastsleep_workaround_at_exit: li r3,1 li r4,0 - li r0,OPAL_CONFIG_CPU_IDLE_STATE - bl opal_call_realmode + bl opal_rm_config_cpu_idle_state b timebase_resync /* diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index c7764f9..cf928bb 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -59,7 +59,7 @@ END_FTR_SECTION(0, 1); \ #define OPAL_CALL(name, token) \ _GLOBAL_TOC(name); \ mflr r0; \ - std r0,16(r1); \ + std r0,PPC_LR_STKOFF(r1); \ li r0,token; \ OPAL_BRANCH(opal_tracepoint_entry) \ mfcr r12; \ @@ -92,7 +92,7 @@ opal_return: FIXUP_ENDIAN ld r2,PACATOC(r13); lwz r4,8(r1); - ld r5,16(r1); + ld r5,PPC_LR_STKOFF(r1); ld r6,PACASAVEDMSR(r13); mtspr SPRN_SRR0,r5; mtspr SPRN_SRR1,r6; @@ -157,43 +157,37 @@ opal_tracepoint_return: blr #endif -/* - * Make opal call in realmode. This is a generic function to be called - * from realmode. It handles endianness. - * - * r13 - paca pointer - * r1 - stack pointer - * r0 - opal token - */ -_GLOBAL(opal_call_realmode) - mflr r12 - std r12,PPC_LR_STKOFF(r1) - ld r2,PACATOC(r13) - /* Set opal return address */ - LOAD_REG_ADDR(r12,return_from_opal_call) - mtlr r12 - - mfmsr r12 -#ifdef __LITTLE_ENDIAN__ - /* Handle endian-ness */ - li r11,MSR_LE - andc r12,r12,r11 -#endif - mtspr SPRN_HSRR1,r12 - LOAD_REG_ADDR(r11,opal) - ld r12,8(r11) - ld r2,0(r11) - mtspr SPRN_HSRR0,r12 +#define OPAL_CALL_REAL(name, token) \ + _GLOBAL_TOC(name); \ + mflr r0; \ + std r0,PPC_LR_STKOFF(r1); \ + li r0,token; \ + mfcr r12; \ + stw r12,8(r1); \ + \ + /* Set opal return address */ \ + LOAD_REG_ADDR(r11, opal_return_realmode); \ + mtlr r11; \ + mfmsr r12; \ + li r11,MSR_LE; \ + andc r12,r12,r11; \ + mtspr SPRN_HSRR1,r12; \ + LOAD_REG_ADDR(r11,opal); \ + ld r12,8(r11); \ + ld r2,0(r11); \ + mtspr SPRN_HSRR0,r12; \ hrfid -return_from_opal_call: -#ifdef __LITTLE_ENDIAN__ +opal_return_realmode: FIXUP_ENDIAN -#endif + ld r2,PACATOC(r13); + lwz r11,8(r1); ld r12,PPC_LR_STKOFF(r1) + mtcr r11; mtlr r12 blr + OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL); OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE); OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ); @@ -271,6 +265,7 @@ OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE); OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE); OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE); OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE); +OPAL_CALL_REAL(opal_rm_resync_timebase, OPAL_RESYNC_TIMEBASE); OPAL_CALL(opal_check_token, OPAL_CHECK_TOKEN); OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT); OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO); @@ -286,7 +281,9 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ); OPAL_CALL(opal_get_param, OPAL_GET_PARAM); OPAL_CALL(opal_set_param, OPAL_SET_PARAM); OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); +OPAL_CALL_REAL(opal_rm_handle_hmi, OPAL_HANDLE_HMI); OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE); +OPAL_CALL_REAL(opal_rm_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE); OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG); OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION); @@ -311,3 +308,5 @@ OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR); OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR); OPAL_CALL(opal_int_eoi, OPAL_INT_EOI); OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR); +OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL); +OPAL_CALL_REAL(opal_rm_pci_tce_kill, OPAL_PCI_TCE_KILL); -- cgit v0.10.2 From a34ab7c328b969d9f841e7887f6be3cc45b5202e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 16:37:12 +1000 Subject: powerpc/powernv/pci: Rename TCE invalidation calls The TCE invalidation functions are fairly implementation specific, and while the IODA specs more/less describe the register, in practice various implementation workarounds may be required. So name the functions after the target PHB. Note today and for the foreseeable future, there's a 1:1 relationship between an IODA version and a PHB implementation. There exist another variant of IODA1 (Torrent) but we never supported in with OPAL and never will. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 0459e10..4383a5f 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -180,7 +180,7 @@ long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num, pe_err(npe, "Failed to configure TCE table, err %lld\n", rc); return rc; } - pnv_pci_ioda2_tce_invalidate_entire(phb, false); + pnv_pci_phb3_tce_invalidate_entire(phb, false); /* Add the table to the list so its TCE cache will get invalidated */ pnv_pci_link_table_and_group(phb->hose->node, num, @@ -204,7 +204,7 @@ long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num) pe_err(npe, "Unmapping failed, ret = %lld\n", rc); return rc; } - pnv_pci_ioda2_tce_invalidate_entire(phb, false); + pnv_pci_phb3_tce_invalidate_entire(phb, false); pnv_pci_unlink_table_and_group(npe->table_group.tables[num], &npe->table_group); @@ -270,7 +270,7 @@ static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe) 0 /* bypass base */, top); if (rc == OPAL_SUCCESS) - pnv_pci_ioda2_tce_invalidate_entire(phb, false); + pnv_pci_phb3_tce_invalidate_entire(phb, false); return rc; } @@ -334,7 +334,7 @@ void pnv_npu_take_ownership(struct pnv_ioda_pe *npe) pe_err(npe, "Failed to disable bypass, err %lld\n", rc); return; } - pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false); + pnv_pci_phb3_tce_invalidate_entire(npe->phb, false); } struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 49de081..4817bd1 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1721,7 +1721,7 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, } } -static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, +static void pnv_pci_p7ioc_tce_invalidate(struct iommu_table *tbl, unsigned long index, unsigned long npages, bool rm) { struct iommu_table_group_link *tgl = list_first_entry_or_null( @@ -1782,7 +1782,7 @@ static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index, attrs); if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE)) - pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false); + pnv_pci_p7ioc_tce_invalidate(tbl, index, npages, false); return ret; } @@ -1795,7 +1795,7 @@ static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index, if (!ret && (tbl->it_type & (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE))) - pnv_pci_ioda1_tce_invalidate(tbl, index, 1, false); + pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, false); return ret; } @@ -1807,7 +1807,7 @@ static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index, pnv_tce_free(tbl, index, npages); if (tbl->it_type & TCE_PCI_SWINV_FREE) - pnv_pci_ioda1_tce_invalidate(tbl, index, npages, false); + pnv_pci_p7ioc_tce_invalidate(tbl, index, npages, false); } static struct iommu_table_ops pnv_ioda1_iommu_ops = { @@ -1819,13 +1819,13 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { .get = pnv_tce_get, }; -#define TCE_KILL_INVAL_ALL PPC_BIT(0) -#define TCE_KILL_INVAL_PE PPC_BIT(1) -#define TCE_KILL_INVAL_TCE PPC_BIT(2) +#define PHB3_TCE_KILL_INVAL_ALL PPC_BIT(0) +#define PHB3_TCE_KILL_INVAL_PE PPC_BIT(1) +#define PHB3_TCE_KILL_INVAL_ONE PPC_BIT(2) -void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm) +void pnv_pci_phb3_tce_invalidate_entire(struct pnv_phb *phb, bool rm) { - const unsigned long val = TCE_KILL_INVAL_ALL; + const unsigned long val = PHB3_TCE_KILL_INVAL_ALL; mb(); /* Ensure previous TCE table stores are visible */ if (rm) @@ -1836,10 +1836,10 @@ void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm) __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg); } -static inline void pnv_pci_ioda2_tce_invalidate_pe(struct pnv_ioda_pe *pe) +static inline void pnv_pci_phb3_tce_invalidate_pe(struct pnv_ioda_pe *pe) { /* 01xb - invalidate TCEs that match the specified PE# */ - unsigned long val = TCE_KILL_INVAL_PE | (pe->pe_number & 0xFF); + unsigned long val = PHB3_TCE_KILL_INVAL_PE | (pe->pe_number & 0xFF); struct pnv_phb *phb = pe->phb; if (!phb->ioda.tce_inval_reg) @@ -1849,14 +1849,14 @@ static inline void pnv_pci_ioda2_tce_invalidate_pe(struct pnv_ioda_pe *pe) __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg); } -static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm, +static void pnv_pci_phb3_tce_invalidate(unsigned pe_number, bool rm, __be64 __iomem *invalidate, unsigned shift, unsigned long index, unsigned long npages) { unsigned long start, end, inc; /* We'll invalidate DMA address in PE scope */ - start = TCE_KILL_INVAL_TCE; + start = PHB3_TCE_KILL_INVAL_ONE; start |= (pe_number & 0xFF); end = start; @@ -1893,10 +1893,10 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, * per TCE entry so we have to invalidate * the entire cache for it. */ - pnv_pci_ioda2_tce_invalidate_entire(pe->phb, rm); + pnv_pci_phb3_tce_invalidate_entire(pe->phb, rm); continue; } - pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm, + pnv_pci_phb3_tce_invalidate(pe->pe_number, rm, invalidate, tbl->it_page_shift, index, npages); } @@ -2172,7 +2172,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, pnv_pci_link_table_and_group(phb->hose->node, num, tbl, &pe->table_group); - pnv_pci_ioda2_tce_invalidate_pe(pe); + pnv_pci_phb3_tce_invalidate_pe(pe); return 0; } @@ -2316,7 +2316,7 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, if (ret) pe_warn(pe, "Unmapping failed, ret = %ld\n", ret); else - pnv_pci_ioda2_tce_invalidate_pe(pe); + pnv_pci_phb3_tce_invalidate_pe(pe); pnv_pci_unlink_table_and_group(table_group->tables[num], table_group); @@ -3286,7 +3286,7 @@ static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe) if (rc != OPAL_SUCCESS) return; - pnv_pci_ioda1_tce_invalidate(tbl, tbl->it_offset, tbl->it_size, false); + pnv_pci_p7ioc_tce_invalidate(tbl, tbl->it_offset, tbl->it_size, false); if (pe->table_group.group) { iommu_group_put(pe->table_group.group); WARN_ON(pe->table_group.group); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index f0c276c..1336fe2 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -211,8 +211,6 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); extern void pnv_pci_init_npu_phb(struct device_node *np); -extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, - __be64 *startp, __be64 *endp, bool rm); extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); @@ -235,7 +233,7 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, /* Nvlink functions */ extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass); -extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm); +extern void pnv_pci_phb3_tce_invalidate_entire(struct pnv_phb *phb, bool rm); extern struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe); extern long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num, struct iommu_table *tbl); -- cgit v0.10.2 From 08acce1cab7c80f38169435c0a122369f07f12e8 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 16:37:13 +1000 Subject: powerpc/powernv/pci: Remove SWINV constants and obsolete TCE code We have some obsolete code in pnv_pci_p7ioc_tce_invalidate() to handle some internal lab tools that have stopped being useful a long time ago. Remove that along with the definition and test for the TCE_PCI_SWINV_* flags whose value is basically always the same. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/tce.h b/arch/powerpc/include/asm/tce.h index 743f36b..12e3629 100644 --- a/arch/powerpc/include/asm/tce.h +++ b/arch/powerpc/include/asm/tce.h @@ -31,9 +31,6 @@ */ #define TCE_VB 0 #define TCE_PCI 1 -#define TCE_PCI_SWINV_CREATE 2 -#define TCE_PCI_SWINV_FREE 4 -#define TCE_PCI_SWINV_PAIR 8 /* TCE page size is 4096 bytes (1 << 12) */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 4817bd1..c9494ea 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1733,29 +1733,15 @@ static void pnv_pci_p7ioc_tce_invalidate(struct iommu_table *tbl, (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys : pe->phb->ioda.tce_inval_reg; unsigned long start, end, inc; - const unsigned shift = tbl->it_page_shift; start = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset); end = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset + npages - 1); - /* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */ - if (tbl->it_busno) { - start <<= shift; - end <<= shift; - inc = 128ull << shift; - start |= tbl->it_busno; - end |= tbl->it_busno; - } else if (tbl->it_type & TCE_PCI_SWINV_PAIR) { - /* p7ioc-style invalidation, 2 TCEs per write */ - start |= (1ull << 63); - end |= (1ull << 63); - inc = 16; - } else { - /* Default (older HW) */ - inc = 128; - } - + /* p7ioc-style invalidation, 2 TCEs per write */ + start |= (1ull << 63); + end |= (1ull << 63); + inc = 16; end |= inc - 1; /* round up end to be different than start */ mb(); /* Ensure above stores are visible */ @@ -1781,7 +1767,7 @@ static int pnv_ioda1_tce_build(struct iommu_table *tbl, long index, int ret = pnv_tce_build(tbl, index, npages, uaddr, direction, attrs); - if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE)) + if (!ret) pnv_pci_p7ioc_tce_invalidate(tbl, index, npages, false); return ret; @@ -1793,8 +1779,7 @@ static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index, { long ret = pnv_tce_xchg(tbl, index, hpa, direction); - if (!ret && (tbl->it_type & - (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE))) + if (!ret) pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, false); return ret; @@ -1806,8 +1791,7 @@ static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index, { pnv_tce_free(tbl, index, npages); - if (tbl->it_type & TCE_PCI_SWINV_FREE) - pnv_pci_p7ioc_tce_invalidate(tbl, index, npages, false); + pnv_pci_p7ioc_tce_invalidate(tbl, index, npages, false); } static struct iommu_table_ops pnv_ioda1_iommu_ops = { @@ -1910,7 +1894,7 @@ static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, int ret = pnv_tce_build(tbl, index, npages, uaddr, direction, attrs); - if (!ret && (tbl->it_type & TCE_PCI_SWINV_CREATE)) + if (!ret) pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false); return ret; @@ -1922,8 +1906,7 @@ static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index, { long ret = pnv_tce_xchg(tbl, index, hpa, direction); - if (!ret && (tbl->it_type & - (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE))) + if (!ret) pnv_pci_ioda2_tce_invalidate(tbl, index, 1, false); return ret; @@ -1935,8 +1918,7 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index, { pnv_tce_free(tbl, index, npages); - if (tbl->it_type & TCE_PCI_SWINV_FREE) - pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false); + pnv_pci_ioda2_tce_invalidate(tbl, index, npages, false); } static void pnv_ioda2_table_free(struct iommu_table *tbl) @@ -2105,12 +2087,6 @@ found: base * PNV_IODA1_DMA32_SEGSIZE, IOMMU_PAGE_SHIFT_4K); - /* OPAL variant of P7IOC SW invalidated TCEs */ - if (phb->ioda.tce_inval_reg) - tbl->it_type |= (TCE_PCI_SWINV_CREATE | - TCE_PCI_SWINV_FREE | - TCE_PCI_SWINV_PAIR); - tbl->it_ops = &pnv_ioda1_iommu_ops; pe->table_group.tce32_start = tbl->it_offset << tbl->it_page_shift; pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift; @@ -2233,8 +2209,6 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group, } tbl->it_ops = &pnv_ioda2_iommu_ops; - if (pe->phb->ioda.tce_inval_reg) - tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); *ptbl = tbl; @@ -2283,10 +2257,6 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) if (!pnv_iommu_bypass_disabled) pnv_pci_ioda2_set_bypass(pe, true); - /* OPAL variant of PHB3 invalidated TCEs */ - if (pe->phb->ioda.tce_inval_reg) - tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); - /* * Setting table base here only for carrying iommu_group * further down to let iommu_add_device() do the job. -- cgit v0.10.2 From fd141d1a99a300944dc42a7820afb8c1bafd4035 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 16:37:14 +1000 Subject: powerpc/powernv/pci: Rework accessing the TCE invalidate register It's architected, always in a known place, so there is no need to keep a separate pointer to it, we use the existing "regs", and we complement it with a real mode variant. Signed-off-by: Benjamin Herrenschmidt # Conflicts: # arch/powerpc/platforms/powernv/pci-ioda.c # arch/powerpc/platforms/powernv/pci.h Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index c9494ea..6203b2f 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1721,6 +1721,13 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, } } +static inline __be64 __iomem *pnv_ioda_get_inval_reg(struct pnv_phb *phb, + bool real_mode) +{ + return real_mode ? (__be64 __iomem *)(phb->regs_phys + 0x210) : + (phb->regs + 0x210); +} + static void pnv_pci_p7ioc_tce_invalidate(struct iommu_table *tbl, unsigned long index, unsigned long npages, bool rm) { @@ -1729,9 +1736,7 @@ static void pnv_pci_p7ioc_tce_invalidate(struct iommu_table *tbl, next); struct pnv_ioda_pe *pe = container_of(tgl->table_group, struct pnv_ioda_pe, table_group); - __be64 __iomem *invalidate = rm ? - (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys : - pe->phb->ioda.tce_inval_reg; + __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm); unsigned long start, end, inc; start = __pa(((__be64 *)tbl->it_base) + index - tbl->it_offset); @@ -1809,39 +1814,36 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = { void pnv_pci_phb3_tce_invalidate_entire(struct pnv_phb *phb, bool rm) { + __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(phb, rm); const unsigned long val = PHB3_TCE_KILL_INVAL_ALL; mb(); /* Ensure previous TCE table stores are visible */ if (rm) - __raw_rm_writeq(cpu_to_be64(val), - (__be64 __iomem *) - phb->ioda.tce_inval_reg_phys); + __raw_rm_writeq(cpu_to_be64(val), invalidate); else - __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg); + __raw_writeq(cpu_to_be64(val), invalidate); } static inline void pnv_pci_phb3_tce_invalidate_pe(struct pnv_ioda_pe *pe) { /* 01xb - invalidate TCEs that match the specified PE# */ + __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, false); unsigned long val = PHB3_TCE_KILL_INVAL_PE | (pe->pe_number & 0xFF); - struct pnv_phb *phb = pe->phb; - - if (!phb->ioda.tce_inval_reg) - return; mb(); /* Ensure above stores are visible */ - __raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg); + __raw_writeq(cpu_to_be64(val), invalidate); } -static void pnv_pci_phb3_tce_invalidate(unsigned pe_number, bool rm, - __be64 __iomem *invalidate, unsigned shift, - unsigned long index, unsigned long npages) +static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe, bool rm, + unsigned shift, unsigned long index, + unsigned long npages) { + __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, false); unsigned long start, end, inc; /* We'll invalidate DMA address in PE scope */ start = PHB3_TCE_KILL_INVAL_ONE; - start |= (pe_number & 0xFF); + start |= (pe->pe_number & 0xFF); end = start; /* Figure out the start, end and step */ @@ -1867,10 +1869,6 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { struct pnv_ioda_pe *pe = container_of(tgl->table_group, struct pnv_ioda_pe, table_group); - __be64 __iomem *invalidate = rm ? - (__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys : - pe->phb->ioda.tce_inval_reg; - if (pe->phb->type == PNV_PHB_NPU) { /* * The NVLink hardware does not support TCE kill @@ -1880,9 +1878,8 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, pnv_pci_phb3_tce_invalidate_entire(pe->phb, rm); continue; } - pnv_pci_phb3_tce_invalidate(pe->pe_number, rm, - invalidate, tbl->it_page_shift, - index, npages); + pnv_pci_phb3_tce_invalidate(pe, rm, tbl->it_page_shift, + index, npages); } } @@ -2467,19 +2464,6 @@ static void pnv_pci_ioda_setup_iommu_api(void) static void pnv_pci_ioda_setup_iommu_api(void) { }; #endif -static void pnv_pci_ioda_setup_opal_tce_kill(struct pnv_phb *phb) -{ - const __be64 *swinvp; - - /* OPAL variant of PHB3 invalidated TCEs */ - swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL); - if (!swinvp) - return; - - phb->ioda.tce_inval_reg_phys = be64_to_cpup(swinvp); - phb->ioda.tce_inval_reg = ioremap(phb->ioda.tce_inval_reg_phys, 8); -} - static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned shift, unsigned levels, unsigned long limit, unsigned long *current_offset, unsigned long *total_allocated) @@ -3459,6 +3443,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, struct pnv_phb *phb; unsigned long size, m64map_off, m32map_off, pemap_off; unsigned long iomap_off = 0, dma32map_off = 0; + struct resource r; const __be64 *prop64; const __be32 *prop32; int len; @@ -3519,12 +3504,12 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, pci_process_bridge_OF_ranges(hose, np, !hose->global_number); /* Get registers */ - phb->regs = of_iomap(np, 0); - if (phb->regs == NULL) - pr_err(" Failed to map registers !\n"); - - /* Initialize TCE kill register */ - pnv_pci_ioda_setup_opal_tce_kill(phb); + if (!of_address_to_resource(np, 0, &r)) { + phb->regs_phys = r.start; + phb->regs = ioremap(r.start, resource_size(&r)); + if (phb->regs == NULL) + pr_err(" Failed to map registers !\n"); + } /* Initialize more IODA stuff */ phb->ioda.total_pe_num = 1; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 1336fe2..d088d4f 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -86,6 +86,7 @@ struct pnv_phb { u64 opal_id; int flags; void __iomem *regs; + u64 regs_phys; int initialized; spinlock_t lock; @@ -162,12 +163,6 @@ struct pnv_phb { /* Reverse map of PEs, indexed by {bus, devfn} */ unsigned int pe_rmap[0x10000]; - - /* TCE cache invalidate registers (physical and - * remapped) - */ - phys_addr_t tce_inval_reg_phys; - __be64 __iomem *tce_inval_reg; } ioda; /* PHB and hub status structure */ -- cgit v0.10.2 From f0228c413011b7e20de9198c4a7244f498dcda2a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 16:37:15 +1000 Subject: powerpc/powernv/pci: Fallback to OPAL for TCE invalidations If we don't find registers for the PHB or don't know the model specific invalidation method, use OPAL calls instead. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 6203b2f..d7502f2 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1861,6 +1861,17 @@ static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe, bool rm, } } +static inline void pnv_pci_ioda2_tce_invalidate_pe(struct pnv_ioda_pe *pe) +{ + struct pnv_phb *phb = pe->phb; + + if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs) + pnv_pci_phb3_tce_invalidate_pe(pe); + else + opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL_PE, + pe->pe_number, 0, 0, 0); +} + static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, unsigned long index, unsigned long npages, bool rm) { @@ -1869,17 +1880,31 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { struct pnv_ioda_pe *pe = container_of(tgl->table_group, struct pnv_ioda_pe, table_group); - if (pe->phb->type == PNV_PHB_NPU) { + struct pnv_phb *phb = pe->phb; + unsigned int shift = tbl->it_page_shift; + + if (phb->type == PNV_PHB_NPU) { /* * The NVLink hardware does not support TCE kill * per TCE entry so we have to invalidate * the entire cache for it. */ - pnv_pci_phb3_tce_invalidate_entire(pe->phb, rm); + pnv_pci_phb3_tce_invalidate_entire(phb, rm); continue; } - pnv_pci_phb3_tce_invalidate(pe, rm, tbl->it_page_shift, - index, npages); + if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs) + pnv_pci_phb3_tce_invalidate(pe, rm, shift, + index, npages); + else if (rm) + opal_rm_pci_tce_kill(phb->opal_id, + OPAL_PCI_TCE_KILL_PAGES, + pe->pe_number, 1u << shift, + index << shift, npages); + else + opal_pci_tce_kill(phb->opal_id, + OPAL_PCI_TCE_KILL_PAGES, + pe->pe_number, 1u << shift, + index << shift, npages); } } -- cgit v0.10.2 From a1339faf72ac0d90797516ad0996cec18fe534b5 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 16:37:16 +1000 Subject: powerpc/powernv/pci: Use the device-tree to get available range of M64's M64's are the configurable 64-bit windows that cover the 64-bit MMIO space. We used to hard code 16 windows. Newer chips might have a variable number and might need to reserve some as well (for example on PHB4/POWER9, M32 and M64 are actually unified and we use M64#0 to map the 32-bit space). So newer OPALs will provide a property we can use to know what range of windows is available. The property is named so that it can eventually support multiple ranges but we only use the first one for now. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index d7502f2..ed27f4c 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -191,9 +191,6 @@ static int pnv_ioda2_init_m64(struct pnv_phb *phb) goto fail; } - /* Mark the M64 BAR assigned */ - set_bit(phb->ioda.m64_bar_idx, &phb->ioda.m64_bar_alloc); - /* * Exclude the segments for reserved and root bus PE, which * are first or last two PEs. @@ -404,6 +401,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) struct pci_controller *hose = phb->hose; struct device_node *dn = hose->dn; struct resource *res; + u32 m64_range[2], i; const u32 *r; u64 pci_addr; @@ -424,6 +422,30 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) return; } + /* + * Find the available M64 BAR range and pickup the last one for + * covering the whole 64-bits space. We support only one range. + */ + if (of_property_read_u32_array(dn, "ibm,opal-available-m64-ranges", + m64_range, 2)) { + /* In absence of the property, assume 0..15 */ + m64_range[0] = 0; + m64_range[1] = 16; + } + /* We only support 64 bits in our allocator */ + if (m64_range[1] > 63) { + pr_warn("%s: Limiting M64 range to 63 (from %d) on PHB#%x\n", + __func__, m64_range[1], phb->hose->global_number); + m64_range[1] = 63; + } + /* Empty range, no m64 */ + if (m64_range[1] <= m64_range[0]) { + pr_warn("%s: M64 empty, disabling M64 usage on PHB#%x\n", + __func__, phb->hose->global_number); + return; + } + + /* Configure M64 informations */ res = &hose->mem_resources[1]; res->name = dn->full_name; res->start = of_translate_address(dn, r + 2); @@ -436,11 +458,28 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe_num; phb->ioda.m64_base = pci_addr; - pr_info(" MEM64 0x%016llx..0x%016llx -> 0x%016llx\n", - res->start, res->end, pci_addr); + /* This lines up nicely with the display from processing OF ranges */ + pr_info(" MEM 0x%016llx..0x%016llx -> 0x%016llx (M64 #%d..%d)\n", + res->start, res->end, pci_addr, m64_range[0], + m64_range[0] + m64_range[1] - 1); + + /* Mark all M64 used up by default */ + phb->ioda.m64_bar_alloc = (unsigned long)-1; /* Use last M64 BAR to cover M64 window */ - phb->ioda.m64_bar_idx = 15; + m64_range[1]--; + phb->ioda.m64_bar_idx = m64_range[0] + m64_range[1]; + + pr_info(" Using M64 #%d as default window\n", phb->ioda.m64_bar_idx); + + /* Mark remaining ones free */ + for (i = m64_range[0]; i < m64_range[1]; i++) + clear_bit(i, &phb->ioda.m64_bar_alloc); + + /* + * Setup init functions for M64 based on IODA version, IODA3 uses + * the IODA2 code. + */ if (phb->type == PNV_PHB_IODA1) phb->init_m64 = pnv_ioda1_init_m64; else -- cgit v0.10.2 From 08a45b320a0ec76866acca7db2fe2647387e5c21 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 16:37:17 +1000 Subject: powerpc/powernv/pci: Check status of a PHB before using it If the firmware encounters an error (internal or HW) during initialization of a PHB, it might leave the device-node in the tree but mark it disabled using the "status" property. We should check it. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index ed27f4c..891fc4a 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3516,6 +3516,9 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, void *aux; long rc; + if (!of_device_is_available(np)) + return; + pr_info("Initializing %s PHB (%s)\n", pnv_phb_names[ioda_type], of_node_full_name(np)); -- cgit v0.10.2 From 9a1a70ae1545c99ed685431c2ab6c4a2c58625de Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 16:37:18 +1000 Subject: powerpc/pci: Don't try to allocate resources that will be reassigned When we know we will reassign all resources, trying (and failing) to allocate them initially is fairly pointless and leads to a lot of scary messages in the kernel log Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index c6ac4f0..f93942b 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1418,8 +1418,10 @@ void __init pcibios_resource_survey(void) /* Allocate and assign resources */ list_for_each_entry(b, &pci_root_buses, node) pcibios_allocate_bus_resources(b); - pcibios_allocate_resources(0); - pcibios_allocate_resources(1); + if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC)) { + pcibios_allocate_resources(0); + pcibios_allocate_resources(1); + } /* Before we start assigning unassigned resource, we try to reserve * the low IO area and the VGA memory area if they intersect the -- cgit v0.10.2 From 8cd6d3c23e226ec6cb8825e1aa6a391ebda71c72 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 13 Jul 2016 15:05:20 +0530 Subject: powerpc/mm: Fix .long's in tlb-radix.c to more meaningful The .longs with the shifts are harder to read, use more meaningful names for the opcodes. PPC_TLBIE_5 is introduced for the 5 opcode variation of the instruction due to an existing op-code for the 2 opcode variant. Signed-off-by: Balbir Singh Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 81657a1..1c18a43 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -190,6 +190,7 @@ #define PPC_INST_STSWX 0x7c00052a #define PPC_INST_STXVD2X 0x7c000798 #define PPC_INST_TLBIE 0x7c000264 +#define PPC_INST_TLBIEL 0x7c000224 #define PPC_INST_TLBILX 0x7c000024 #define PPC_INST_WAIT 0x7c00007c #define PPC_INST_TLBIVAX 0x7c000624 @@ -281,6 +282,9 @@ #define ___PPC_RB(b) (((b) & 0x1f) << 11) #define ___PPC_RS(s) (((s) & 0x1f) << 21) #define ___PPC_RT(t) ___PPC_RS(t) +#define ___PPC_R(r) (((r) & 0x1) << 16) +#define ___PPC_PRS(prs) (((prs) & 0x1) << 17) +#define ___PPC_RIC(ric) (((ric) & 0x3) << 18) #define __PPC_RA(a) ___PPC_RA(__REG_##a) #define __PPC_RA0(a) ___PPC_RA(__REGA0_##a) #define __PPC_RB(b) ___PPC_RB(__REG_##b) @@ -347,6 +351,16 @@ __PPC_WC(w)) #define PPC_TLBIE(lp,a) stringify_in_c(.long PPC_INST_TLBIE | \ ___PPC_RB(a) | ___PPC_RS(lp)) +#define PPC_TLBIE_5(rb,rs,ric,prs,r) \ + stringify_in_c(.long PPC_INST_TLBIE | \ + ___PPC_RB(rb) | ___PPC_RS(rs) | \ + ___PPC_RIC(ric) | ___PPC_PRS(prs) | \ + ___PPC_R(r)) +#define PPC_TLBIEL(rb,rs,ric,prs,r) \ + stringify_in_c(.long PPC_INST_TLBIEL | \ + ___PPC_RB(rb) | ___PPC_RS(rs) | \ + ___PPC_RIC(ric) | ___PPC_PRS(prs) | \ + ___PPC_R(r)) #define PPC_TLBSRX_DOT(a,b) stringify_in_c(.long PPC_INST_TLBSRX_DOT | \ __PPC_RA0(a) | __PPC_RB(b)) #define PPC_TLBIVAX(a,b) stringify_in_c(.long PPC_INST_TLBIVAX | \ diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index ab2f60e..35690c4 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -34,8 +35,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set, r = 1; /* raidx format */ asm volatile("ptesync": : :"memory"); - asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |" - "(%2 << 17) | (%3 << 18) | (%4 << 21)" + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("ptesync": : :"memory"); } @@ -63,8 +63,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) r = 1; /* raidx format */ asm volatile("ptesync": : :"memory"); - asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |" - "(%2 << 17) | (%3 << 18) | (%4 << 21)" + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -81,8 +80,7 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid, r = 1; /* raidx format */ asm volatile("ptesync": : :"memory"); - asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |" - "(%2 << 17) | (%3 << 18) | (%4 << 21)" + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("ptesync": : :"memory"); } @@ -99,8 +97,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid, r = 1; /* raidx format */ asm volatile("ptesync": : :"memory"); - asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |" - "(%2 << 17) | (%3 << 18) | (%4 << 21)" + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } -- cgit v0.10.2 From bf16cdf48a5369ba29614a0ade4ae5daf7a9e47c Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 Jul 2016 15:05:21 +0530 Subject: powerpc/mm/radix: Update LPCR HR bit as per ISA PowerISA 3.0 requires the MMU mode (radix vs. hash) of the hypervisor to be mirrored in the LPCR register, in addition to the partition table. This is done to avoid fetching from the table when deciding, among other things, how to perform transitions to HV mode on some interrupts. So let's set it up appropriately Signed-off-by: Aneesh Kumar K.V Acked-by: Balbir Singh Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 6de6abe..295a19a 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -366,6 +366,7 @@ #define LPCR_HVICE 0x00000002 /* P9: HV interrupt enable */ #define LPCR_HDICE 0x00000001 /* Hyp Decr enable (HV,PR,EE) */ #define LPCR_UPRT 0x00400000 /* Use Process Table (ISA 3) */ +#define LPCR_HR 0x00100000 #ifndef SPRN_LPID #define SPRN_LPID 0x13F /* Logical Partition Identifier */ #endif diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 7931e14..4732fa3 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -342,7 +342,7 @@ void __init radix__early_init_mmu(void) radix_init_page_sizes(); if (!firmware_has_feature(FW_FEATURE_LPAR)) { lpcr = mfspr(SPRN_LPCR); - mtspr(SPRN_LPCR, lpcr | LPCR_UPRT); + mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); radix_init_partition_table(); } @@ -357,7 +357,7 @@ void radix__early_init_mmu_secondary(void) */ if (!firmware_has_feature(FW_FEATURE_LPAR)) { lpcr = mfspr(SPRN_LPCR); - mtspr(SPRN_LPCR, lpcr | LPCR_UPRT); + mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); -- cgit v0.10.2 From 66c570f545e056babdd9510595ce762dcedadd71 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 Jul 2016 15:05:22 +0530 Subject: powerpc/mm: use _raw variant of page table accessors This switch few of the page table accessor to use the __raw variant and does the cpu to big endian conversion of constants. This helps in generating better code. For ex: a pgd_none(pgd) check with and without fix is listed below Without fix: ------------ 2240: 20 00 61 eb ld r27,32(r1) /* PGD level */ typedef struct { __be64 pgd; } pgd_t; static inline unsigned long pgd_val(pgd_t x) { return be64_to_cpu(x.pgd); 2244: 22 00 66 78 rldicl r6,r3,32,32 2248: 3e 40 7d 54 rotlwi r29,r3,8 224c: 0e c0 7d 50 rlwimi r29,r3,24,0,7 2250: 3e 40 c5 54 rotlwi r5,r6,8 2254: 2e c4 7d 50 rlwimi r29,r3,24,16,23 2258: 0e c0 c5 50 rlwimi r5,r6,24,0,7 225c: 2e c4 c5 50 rlwimi r5,r6,24,16,23 2260: c6 07 bd 7b rldicr r29,r29,32,31 2264: 78 2b bd 7f or r29,r29,r5 if (pgd_none(pgd)) 2268: 00 00 bd 2f cmpdi cr7,r29,0 226c: 54 03 9e 41 beq cr7,25c0 <__get_user_pages_fast+0x500> With fix: --------- 2370: 20 00 61 eb ld r27,32(r1) if (pgd_none(pgd)) 2374: 00 00 bd 2f cmpdi cr7,r29,0 2378: a8 03 9e 41 beq cr7,2720 <__get_user_pages_fast+0x530> break; Signed-off-by: Aneesh Kumar K.V Acked-by: Balbir Singh Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h index 71e9abc..9db83b4 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h @@ -11,7 +11,7 @@ static inline int pmd_huge(pmd_t pmd) * leaf pte for huge page */ if (radix_enabled()) - return !!(pmd_val(pmd) & _PAGE_PTE); + return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)); return 0; } @@ -21,7 +21,7 @@ static inline int pud_huge(pud_t pud) * leaf pte for huge page */ if (radix_enabled()) - return !!(pud_val(pud) & _PAGE_PTE); + return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); return 0; } @@ -31,7 +31,7 @@ static inline int pgd_huge(pgd_t pgd) * leaf pte for huge page */ if (radix_enabled()) - return !!(pgd_val(pgd) & _PAGE_PTE); + return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PTE)); return 0; } #define pgd_huge pgd_huge diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h index cb2d0a5..0d2845b 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h @@ -15,7 +15,7 @@ static inline int pmd_huge(pmd_t pmd) /* * leaf pte for huge page */ - return !!(pmd_val(pmd) & _PAGE_PTE); + return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)); } static inline int pud_huge(pud_t pud) @@ -23,7 +23,7 @@ static inline int pud_huge(pud_t pud) /* * leaf pte for huge page */ - return !!(pud_val(pud) & _PAGE_PTE); + return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); } static inline int pgd_huge(pgd_t pgd) @@ -31,7 +31,7 @@ static inline int pgd_huge(pgd_t pgd) /* * leaf pte for huge page */ - return !!(pgd_val(pgd) & _PAGE_PTE); + return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PTE)); } #define pgd_huge pgd_huge diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index ab84c89..263bf39 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -318,7 +318,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, { unsigned long old; - if ((pte_val(*ptep) & (_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0) + if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0) return 0; old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); return (old & _PAGE_ACCESSED) != 0; @@ -336,8 +336,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - - if ((pte_val(*ptep) & _PAGE_WRITE) == 0) + if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0) return; pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0); @@ -346,7 +345,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - if ((pte_val(*ptep) & _PAGE_WRITE) == 0) + if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_WRITE)) == 0) return; pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1); @@ -365,17 +364,35 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, { pte_update(mm, addr, ptep, ~0UL, 0, 0); } -static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_WRITE);} -static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); } -static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & _PAGE_ACCESSED); } -static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIAL); } + +static inline int pte_write(pte_t pte) +{ + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE)); +} + +static inline int pte_dirty(pte_t pte) +{ + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DIRTY)); +} + +static inline int pte_young(pte_t pte) +{ + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_ACCESSED)); +} + +static inline int pte_special(pte_t pte) +{ + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SPECIAL)); +} + static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); } #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY static inline bool pte_soft_dirty(pte_t pte) { - return !!(pte_val(pte) & _PAGE_SOFT_DIRTY); + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SOFT_DIRTY)); } + static inline pte_t pte_mksoft_dirty(pte_t pte) { return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY); @@ -395,14 +412,14 @@ static inline pte_t pte_clear_soft_dirty(pte_t pte) */ static inline int pte_protnone(pte_t pte) { - return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PRIVILEGED)) == - (_PAGE_PRESENT | _PAGE_PRIVILEGED); + return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PRIVILEGED)) == + cpu_to_be64(_PAGE_PRESENT | _PAGE_PRIVILEGED); } #endif /* CONFIG_NUMA_BALANCING */ static inline int pte_present(pte_t pte) { - return !!(pte_val(pte) & _PAGE_PRESENT); + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT)); } /* * Conversion functions: convert a page and protection to a page entry, @@ -474,7 +491,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) static inline bool pte_user(pte_t pte) { - return !(pte_val(pte) & _PAGE_PRIVILEGED); + return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED)); } /* Encode and de-code a swap entry */ @@ -517,10 +534,12 @@ static inline pte_t pte_swp_mksoft_dirty(pte_t pte) { return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY); } + static inline bool pte_swp_soft_dirty(pte_t pte) { - return !!(pte_val(pte) & _PAGE_SWP_SOFT_DIRTY); + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SWP_SOFT_DIRTY)); } + static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) { return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY); @@ -626,8 +645,16 @@ static inline void pmd_clear(pmd_t *pmdp) *pmdp = __pmd(0); } -#define pmd_none(pmd) (!pmd_val(pmd)) -#define pmd_present(pmd) (!pmd_none(pmd)) +static inline int pmd_none(pmd_t pmd) +{ + return !pmd_raw(pmd); +} + +static inline int pmd_present(pmd_t pmd) +{ + + return !pmd_none(pmd); +} static inline int pmd_bad(pmd_t pmd) { @@ -646,19 +673,26 @@ static inline void pud_clear(pud_t *pudp) *pudp = __pud(0); } -#define pud_none(pud) (!pud_val(pud)) -#define pud_present(pud) (pud_val(pud) != 0) +static inline int pud_none(pud_t pud) +{ + return !pud_raw(pud); +} + +static inline int pud_present(pud_t pud) +{ + return !pud_none(pud); +} extern struct page *pud_page(pud_t pud); extern struct page *pmd_page(pmd_t pmd); static inline pte_t pud_pte(pud_t pud) { - return __pte(pud_val(pud)); + return __pte_raw(pud_raw(pud)); } static inline pud_t pte_pud(pte_t pte) { - return __pud(pte_val(pte)); + return __pud_raw(pte_raw(pte)); } #define pud_write(pud) pte_write(pud_pte(pud)) @@ -681,17 +715,24 @@ static inline void pgd_clear(pgd_t *pgdp) *pgdp = __pgd(0); } -#define pgd_none(pgd) (!pgd_val(pgd)) -#define pgd_present(pgd) (!pgd_none(pgd)) +static inline int pgd_none(pgd_t pgd) +{ + return !pgd_raw(pgd); +} + +static inline int pgd_present(pgd_t pgd) +{ + return !pgd_none(pgd); +} static inline pte_t pgd_pte(pgd_t pgd) { - return __pte(pgd_val(pgd)); + return __pte_raw(pgd_raw(pgd)); } static inline pgd_t pte_pgd(pte_t pte) { - return __pgd(pte_val(pte)); + return __pgd_raw(pte_raw(pte)); } static inline int pgd_bad(pgd_t pgd) @@ -783,12 +824,12 @@ struct page *realmode_pfn_to_page(unsigned long pfn); static inline pte_t pmd_pte(pmd_t pmd) { - return __pte(pmd_val(pmd)); + return __pte_raw(pmd_raw(pmd)); } static inline pmd_t pte_pmd(pte_t pte) { - return __pmd(pte_val(pte)); + return __pmd_raw(pte_raw(pte)); } static inline pte_t *pmdp_ptep(pmd_t *pmd) @@ -849,7 +890,7 @@ pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, static inline int pmd_large(pmd_t pmd) { - return !!(pmd_val(pmd) & _PAGE_PTE); + return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)); } static inline pmd_t pmd_mknotpresent(pmd_t pmd) @@ -865,7 +906,7 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, { unsigned long old; - if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0) + if ((pmd_raw(*pmdp) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0) return 0; old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0); return ((old & _PAGE_ACCESSED) != 0); @@ -876,7 +917,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - if ((pmd_val(*pmdp) & _PAGE_WRITE) == 0) + if ((pmd_raw(*pmdp) & cpu_to_be64(_PAGE_WRITE)) == 0) return; pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0); diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h index e2bf208..49c0a5a 100644 --- a/arch/powerpc/include/asm/pgtable-be-types.h +++ b/arch/powerpc/include/asm/pgtable-be-types.h @@ -6,6 +6,7 @@ /* PTE level */ typedef struct { __be64 pte; } pte_t; #define __pte(x) ((pte_t) { cpu_to_be64(x) }) +#define __pte_raw(x) ((pte_t) { (x) }) static inline unsigned long pte_val(pte_t x) { return be64_to_cpu(x.pte); @@ -20,6 +21,7 @@ static inline __be64 pte_raw(pte_t x) #ifdef CONFIG_PPC64 typedef struct { __be64 pmd; } pmd_t; #define __pmd(x) ((pmd_t) { cpu_to_be64(x) }) +#define __pmd_raw(x) ((pmd_t) { (x) }) static inline unsigned long pmd_val(pmd_t x) { return be64_to_cpu(x.pmd); @@ -37,21 +39,34 @@ static inline __be64 pmd_raw(pmd_t x) #if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES) typedef struct { __be64 pud; } pud_t; #define __pud(x) ((pud_t) { cpu_to_be64(x) }) +#define __pud_raw(x) ((pud_t) { (x) }) static inline unsigned long pud_val(pud_t x) { return be64_to_cpu(x.pud); } + +static inline __be64 pud_raw(pud_t x) +{ + return x.pud; +} + #endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */ #endif /* CONFIG_PPC64 */ /* PGD level */ typedef struct { __be64 pgd; } pgd_t; #define __pgd(x) ((pgd_t) { cpu_to_be64(x) }) +#define __pgd_raw(x) ((pgd_t) { (x) }) static inline unsigned long pgd_val(pgd_t x) { return be64_to_cpu(x.pgd); } +static inline __be64 pgd_raw(pgd_t x) +{ + return x.pgd; +} + /* Page protection bits */ typedef struct { unsigned long pgprot; } pgprot_t; #define pgprot_val(x) ((x).pgprot) -- cgit v0.10.2 From e21fc93b7020905c9f3672969ddd276aa152b9b7 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 Jul 2016 15:05:23 +0530 Subject: powerpc/mm: Compile out radix related functions if RADIX_MMU is disabled Currently we depend on mmu_has_feature to evalute to zero based on MMU_FTRS_POSSIBLE mask. In a later patch, we want to update radix_enabled() to runtime update the conditional operation to a jump instruction. This implies we cannot depend on MMU_FTRS_POSSIBLE mask. Instead define radix_enabled to return 0 if RADIX_MMU is not enabled. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 5854263..d4eda64 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -23,7 +23,12 @@ struct mmu_psize_def { }; extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; +#ifdef CONFIG_PPC_RADIX_MMU #define radix_enabled() mmu_has_feature(MMU_FTR_RADIX) +#else +#define radix_enabled() (0) +#endif + #endif /* __ASSEMBLY__ */ -- cgit v0.10.2 From accfad7d0a85c5678eef76083972426032d64469 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 Jul 2016 15:05:24 +0530 Subject: powerpc/mm: Clear top 16 bits of va only on older cpus As per ISA, we need to do this only for architecture version 2.02 and earlier. This continued to work even for 2.07. But let's not do this for anything after 2.02. ISA 3.0 requires these top bits to be not cleared. Signed-off-by: Aneesh Kumar K.V Acked-by: Balbir Singh Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index e53ebeb..5447122 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -24,6 +24,11 @@ /* * This is individual features */ +/* + * We need to clear top 16bits of va (from the remaining 64 bits )in + * tlbie* instructions + */ +#define MMU_FTR_TLBIE_CROP_VA ASM_CONST(0x00008000) /* Enable use of high BAT registers */ #define MMU_FTR_USE_HIGH_BATS ASM_CONST(0x00010000) @@ -97,7 +102,7 @@ #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \ MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2 #define MMU_FTRS_POWER4 MMU_FTRS_DEFAULT_HPTE_ARCH_V2 -#define MMU_FTRS_PPC970 MMU_FTRS_POWER4 +#define MMU_FTRS_PPC970 MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA #define MMU_FTRS_POWER5 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE #define MMU_FTRS_POWER6 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE #define MMU_FTRS_POWER7 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE @@ -124,7 +129,7 @@ enum { MMU_FTR_USE_TLBRSRV | MMU_FTR_USE_PAIRED_MAS | MMU_FTR_NO_SLBIE_B | MMU_FTR_16M_PAGE | MMU_FTR_TLBIEL | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE | - MMU_FTR_1T_SEGMENT | + MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA | #ifdef CONFIG_PPC_RADIX_MMU MMU_FTR_RADIX | #endif diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index eeeacf6..d81f826 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -137,7 +137,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4 (gp)", .cpu_features = CPU_FTRS_POWER4, .cpu_user_features = COMMON_USER_POWER4, - .mmu_features = MMU_FTRS_POWER4, + .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, @@ -152,7 +152,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER4+ (gq)", .cpu_features = CPU_FTRS_POWER4, .cpu_user_features = COMMON_USER_POWER4, - .mmu_features = MMU_FTRS_POWER4, + .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA, .icache_bsize = 128, .dcache_bsize = 128, .num_pmcs = 8, diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index b0e0fdb..70521ef 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -64,7 +64,8 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) * Older versions of the architecture (2.02 and earler) require the * masking of the top 16 bits. */ - va &= ~(0xffffULL << 48); + if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA)) + va &= ~(0xffffULL << 48); switch (psize) { case MMU_PAGE_4K: @@ -113,7 +114,8 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) * Older versions of the architecture (2.02 and earler) require the * masking of the top 16 bits. */ - va &= ~(0xffffULL << 48); + if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA)) + va &= ~(0xffffULL << 48); switch (psize) { case MMU_PAGE_4K: -- cgit v0.10.2 From 56547411a07b0aabf55ce8b841dfdb7daced1250 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 Jul 2016 15:05:25 +0530 Subject: powerpc/mm: Print formation regarding the the MMU mode This helps in easily identifying the MMU mode with which the kernel is operating. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 1e9117e5..4f0fd47 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -739,7 +739,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, * For now UPRT is 0 for us. */ partition_tb->patb1 = 0; - DBG("Partition table %p\n", partition_tb); + pr_info("Partition table %p\n", partition_tb); /* * update partition table control register, * 64 K size. @@ -947,6 +947,7 @@ void __init hash__early_init_mmu(void) */ htab_initialize(); + pr_info("Initializing hash mmu with SLB\n"); /* Initialize SLB management */ slb_initialize(); } diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 4732fa3..8636bf1 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -182,7 +182,8 @@ static void __init radix_init_partition_table(void) partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT); partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR); - printk("Partition table %p\n", partition_tb); + pr_info("Initializing Radix MMU\n"); + pr_info("Partition table %p\n", partition_tb); memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); /* -- cgit v0.10.2 From 4b7a350480506bf292193e9c1db6fc19d57321ec Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 Jul 2016 15:05:26 +0530 Subject: powerpc/mm/hash: Update SDR1 size encoding as documented in ISA 3.0 ISA 3.0 document hash table size in bytes = 2^(HTABSIZE + 18) No functionality change by this patch. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 4f0fd47..7d0955e 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -714,10 +714,9 @@ int remove_section_mapping(unsigned long start, unsigned long end) #endif /* CONFIG_MEMORY_HOTPLUG */ static void __init hash_init_partition_table(phys_addr_t hash_table, - unsigned long pteg_count) + unsigned long htab_size) { unsigned long ps_field; - unsigned long htab_size; unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; /* @@ -725,7 +724,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, * We can ignore that for lpid 0 */ ps_field = 0; - htab_size = __ilog2(pteg_count) - 11; + htab_size = __ilog2(htab_size) - 18; BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large."); partition_tb = __va(memblock_alloc_base(patb_size, patb_size, @@ -811,7 +810,7 @@ static void __init htab_initialize(void) htab_address = __va(table); /* htab absolute addr + encoded htabsize */ - _SDR1 = table + __ilog2(pteg_count) - 11; + _SDR1 = table + __ilog2(htab_size_bytes) - 18; /* Initialize the HPT with no entries */ memset((void *)table, 0, htab_size_bytes); @@ -820,7 +819,7 @@ static void __init htab_initialize(void) /* Set SDR1 */ mtspr(SPRN_SDR1, _SDR1); else - hash_init_partition_table(table, pteg_count); + hash_init_partition_table(table, htab_size_bytes); } prot = pgprot_val(PAGE_KERNEL); -- cgit v0.10.2 From 09cf5bcb0c93550db87f738b6012d97dbf73beb7 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 Jul 2016 15:05:27 +0530 Subject: powerpc/mm/radix: Update PID switch sequence Update the PID switch as per ISA doc. slbia is needed in radix to invalidate any implementation specific lookaside information. We use the .long format due to build errors with the below compiler version. gcc (Ubuntu 5.3.1-14ubuntu2.1) 5.3.1 20160413 GNU assembler (GNU Binutils for Ubuntu) 2.26 CC arch/powerpc/mm//mmu_context_book3s64.o {standard input}: Assembler messages: {standard input}:506: Error: junk at end of line: `0x7' scripts/Makefile.build:291: recipe for target 'arch/powerpc/mm//mmu_context_book3s64.o' failed make[1]: *** [arch/powerpc/mm//mmu_context_book3s64.o] Error 1 Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 1c18a43..5ecfb04 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -181,6 +181,7 @@ #define PPC_INST_MTSPR_DSCR_USER 0x7c0303a6 #define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1fffff #define PPC_INST_SLBFEE 0x7c0007a7 +#define PPC_INST_SLBIA 0x7c0003e4 #define PPC_INST_STRING 0x7c00042a #define PPC_INST_STRING_MASK 0xfc0007fe @@ -442,5 +443,7 @@ ___PPC_RA(a) | \ ___PPC_RB(b)) +#define PPC_SLBIA(IH) stringify_in_c(.long PPC_INST_SLBIA | \ + ((IH & 0x7) << 21)) #endif /* _ASM_POWERPC_PPC_OPCODE_H */ diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 19622222..b114f8b 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -181,7 +181,10 @@ void destroy_context(struct mm_struct *mm) #ifdef CONFIG_PPC_RADIX_MMU void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) { - mtspr(SPRN_PID, next->context.id); asm volatile("isync": : :"memory"); + mtspr(SPRN_PID, next->context.id); + asm volatile("isync \n" + PPC_SLBIA(0x7) + : : :"memory"); } #endif -- cgit v0.10.2 From 83209bc86129f7800dfd0e3e211e9b96b397b4b7 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 Jul 2016 15:05:28 +0530 Subject: powerpc/mm/radix: Update machine call back to support new HCALL. This update the machine dep callback such that we can use the same callback to register process table. The interface is updated such that we can easily call H_REGISTER_PROC_TBL hcall. The HCALL itself is introduced in a later patch. No functionality change introduced by this patch. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index a9af1bd..e62e7d3 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -256,7 +256,8 @@ struct machdep_calls { #ifdef CONFIG_ARCH_RANDOM int (*get_random_seed)(unsigned long *v); #endif - int (*update_partition_table)(u64); + int (*register_process_table)(unsigned long base, unsigned long page_size, + unsigned long tbl_size); }; extern void e500_idle(void); diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 70521ef..cb3b4c9 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -725,8 +725,14 @@ static void native_flush_hash_range(unsigned long number, int local) local_irq_restore(flags); } -static int native_update_partition_table(u64 patb1) +static int native_register_proc_table(unsigned long base, unsigned long page_size, + unsigned long table_size) { + unsigned long patb1 = base << 25; /* VSID */ + + patb1 |= (page_size << 5); /* sllp */ + patb1 |= table_size; + partition_tb->patb1 = cpu_to_be64(patb1); return 0; } @@ -743,5 +749,5 @@ void __init hpte_init_native(void) ppc_md.hugepage_invalidate = native_hugepage_invalidate; if (cpu_has_feature(CPU_FTR_ARCH_300)) - ppc_md.update_partition_table = native_update_partition_table; + ppc_md.register_process_table = native_register_proc_table; } diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 8636bf1..4ea1094 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -21,8 +21,11 @@ #include -static int native_update_partition_table(u64 patb1) +static int native_register_process_table(unsigned long base, unsigned long pg_sz, + unsigned long table_size) { + unsigned long patb1 = base | table_size | PATB_GR; + partition_tb->patb1 = cpu_to_be64(patb1); return 0; } @@ -168,7 +171,7 @@ redo: * of process table here. But our linear mapping also enable us to use * physical address here. */ - ppc_md.update_partition_table(__pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR); + ppc_md.register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12); pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd); } @@ -195,7 +198,7 @@ static void __init radix_init_partition_table(void) void __init radix_init_native(void) { - ppc_md.update_partition_table = native_update_partition_table; + ppc_md.register_process_table = native_register_process_table; } static int __init get_idx_from_shift(unsigned int shift) -- cgit v0.10.2 From 912cc87a654d3d30f5da6220489a97a5beb7d313 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 Jul 2016 15:05:29 +0530 Subject: powerpc/mm/radix: Add LPID based tlb flush helpers We add a tlb flush variant, to flush LPID mappings. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 3fa94fca..00703e7e 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -32,5 +32,7 @@ extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); #define radix___flush_tlb_page(mm,addr,p,i) radix___local_flush_tlb_page(mm,addr,p,i) #define radix__flush_tlb_pwc(tlb, addr) radix__local_flush_tlb_pwc(tlb, addr) #endif - +extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, + unsigned long page_size); +extern void radix__flush_tlb_lpid(unsigned long lpid); #endif diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 35690c4..e1f2270 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -282,9 +282,61 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, } EXPORT_SYMBOL(radix__flush_tlb_range); +static int radix_get_mmu_psize(int page_size) +{ + int psize; + + if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) + psize = mmu_virtual_psize; + else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) + psize = MMU_PAGE_2M; + else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) + psize = MMU_PAGE_1G; + else + return -1; + return psize; +} void radix__tlb_flush(struct mmu_gather *tlb) { struct mm_struct *mm = tlb->mm; radix__flush_tlb_mm(mm); } + +void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, + unsigned long page_size) +{ + unsigned long rb,rs,prs,r; + unsigned long ap; + unsigned long ric = RIC_FLUSH_TLB; + + ap = mmu_get_ap(radix_get_mmu_psize(page_size)); + rb = gpa & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = lpid & ((1UL << 32) - 1); + prs = 0; /* process scoped */ + r = 1; /* raidx format */ + + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); +} +EXPORT_SYMBOL(radix__flush_tlb_lpid_va); + +void radix__flush_tlb_lpid(unsigned long lpid) +{ + unsigned long rb,rs,prs,r; + unsigned long ric = RIC_FLUSH_ALL; + + rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */ + rs = lpid & ((1UL << 32) - 1); + prs = 0; /* partition scoped */ + r = 1; /* raidx format */ + + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); +} +EXPORT_SYMBOL(radix__flush_tlb_lpid); -- cgit v0.10.2 From b275bfb2696387be216db5b7372ee9dcf3f05b80 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 Jul 2016 15:05:31 +0530 Subject: powerpc/mm/radix: Add a kernel command line to disable radix This patch adds the kernel command line disable_radix which disable the radix MMU mode even if firmware indicates radix support via ibm,pa-features device tree node. This helps in testing different MMU mode easily. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 738bae4..bba7ef3 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -929,6 +929,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. disable= [IPV6] See Documentation/networking/ipv6.txt. + disable_radix [PPC] + Disable RADIX MMU mode on POWER9 + disable_cpu_apicid= [X86,APIC,SMP] Format: The number of initial APIC ID for the diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 48434be..7a01113 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -646,6 +646,14 @@ static void __init early_reserve_mem(void) #endif } +static bool disable_radix; +static int __init parse_disable_radix(char *p) +{ + disable_radix = true; + return 0; +} +early_param("disable_radix", parse_disable_radix); + void __init early_init_devtree(void *params) { phys_addr_t limit; @@ -735,6 +743,11 @@ void __init early_init_devtree(void *params) */ spinning_secondaries = boot_cpu_count - 1; #endif + /* + * now fixup radix MMU mode based on kernel command line + */ + if (disable_radix) + cur_cpu_spec->mmu_features &= ~MMU_FTR_RADIX; #ifdef CONFIG_PPC_POWERNV /* Scan and build the list of machine check recoverable ranges */ -- cgit v0.10.2 From a4b349540a26af9a544e2e858223140fb9ddc35c Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 15 Jul 2016 21:04:26 +1000 Subject: powerpc/mm: Cleanup LPCR defines This makes it easy to verify we are not overloading the bits. No functionality change by this patch. mpe: Cleanup more. Completely fixup whitespace, convert all UL values to ASM_CONST(), and replace all occurrences of 63-x with the actual shift. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 295a19a..d7e9ab5 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -330,43 +330,43 @@ #define HFSCR_FP __MASK(FSCR_FP_LG) #define SPRN_TAR 0x32f /* Target Address Register */ #define SPRN_LPCR 0x13E /* LPAR Control Register */ -#define LPCR_VPM0 (1ul << (63-0)) -#define LPCR_VPM1 (1ul << (63-1)) -#define LPCR_ISL (1ul << (63-2)) -#define LPCR_VC_SH (63-2) -#define LPCR_DPFD_SH (63-11) -#define LPCR_DPFD (7ul << LPCR_DPFD_SH) -#define LPCR_VRMASD (0x1ful << (63-16)) -#define LPCR_VRMA_L (1ul << (63-12)) -#define LPCR_VRMA_LP0 (1ul << (63-15)) -#define LPCR_VRMA_LP1 (1ul << (63-16)) -#define LPCR_VRMASD_SH (63-16) -#define LPCR_RMLS 0x1C000000 /* impl dependent rmo limit sel */ -#define LPCR_RMLS_SH (63-37) -#define LPCR_ILE 0x02000000 /* !HV irqs set MSR:LE */ -#define LPCR_AIL 0x01800000 /* Alternate interrupt location */ -#define LPCR_AIL_0 0x00000000 /* MMU off exception offset 0x0 */ -#define LPCR_AIL_3 0x01800000 /* MMU on exception offset 0xc00...4xxx */ -#define LPCR_ONL 0x00040000 /* online - PURR/SPURR count */ -#define LPCR_LD 0x00020000 /* large decremeter */ -#define LPCR_PECE 0x0001f000 /* powersave exit cause enable */ -#define LPCR_PECEDP 0x00010000 /* directed priv dbells cause exit */ -#define LPCR_PECEDH 0x00008000 /* directed hyp dbells cause exit */ -#define LPCR_PECE0 0x00004000 /* ext. exceptions can cause exit */ -#define LPCR_PECE1 0x00002000 /* decrementer can cause exit */ -#define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */ -#define LPCR_MER 0x00000800 /* Mediated External Exception */ -#define LPCR_MER_SH 11 -#define LPCR_TC 0x00000200 /* Translation control */ -#define LPCR_LPES 0x0000000c -#define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */ -#define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */ -#define LPCR_LPES_SH 2 -#define LPCR_RMI 0x00000002 /* real mode is cache inhibit */ -#define LPCR_HVICE 0x00000002 /* P9: HV interrupt enable */ -#define LPCR_HDICE 0x00000001 /* Hyp Decr enable (HV,PR,EE) */ -#define LPCR_UPRT 0x00400000 /* Use Process Table (ISA 3) */ -#define LPCR_HR 0x00100000 +#define LPCR_VPM0 ASM_CONST(0x8000000000000000) +#define LPCR_VPM1 ASM_CONST(0x4000000000000000) +#define LPCR_ISL ASM_CONST(0x2000000000000000) +#define LPCR_VC_SH 61 +#define LPCR_DPFD_SH 52 +#define LPCR_DPFD (ASM_CONST(7) << LPCR_DPFD_SH) +#define LPCR_VRMASD_SH 47 +#define LPCR_VRMASD (ASM_CONST(1) << LPCR_VRMASD_SH) +#define LPCR_VRMA_L ASM_CONST(0x0008000000000000) +#define LPCR_VRMA_LP0 ASM_CONST(0x0001000000000000) +#define LPCR_VRMA_LP1 ASM_CONST(0x0000800000000000) +#define LPCR_RMLS 0x1C000000 /* Implementation dependent RMO limit sel */ +#define LPCR_RMLS_SH 26 +#define LPCR_ILE ASM_CONST(0x0000000002000000) /* !HV irqs set MSR:LE */ +#define LPCR_AIL ASM_CONST(0x0000000001800000) /* Alternate interrupt location */ +#define LPCR_AIL_0 ASM_CONST(0x0000000000000000) /* MMU off exception offset 0x0 */ +#define LPCR_AIL_3 ASM_CONST(0x0000000001800000) /* MMU on exception offset 0xc00...4xxx */ +#define LPCR_ONL ASM_CONST(0x0000000000040000) /* online - PURR/SPURR count */ +#define LPCR_LD ASM_CONST(0x0000000000020000) /* large decremeter */ +#define LPCR_PECE ASM_CONST(0x000000000001f000) /* powersave exit cause enable */ +#define LPCR_PECEDP ASM_CONST(0x0000000000010000) /* directed priv dbells cause exit */ +#define LPCR_PECEDH ASM_CONST(0x0000000000008000) /* directed hyp dbells cause exit */ +#define LPCR_PECE0 ASM_CONST(0x0000000000004000) /* ext. exceptions can cause exit */ +#define LPCR_PECE1 ASM_CONST(0x0000000000002000) /* decrementer can cause exit */ +#define LPCR_PECE2 ASM_CONST(0x0000000000001000) /* machine check etc can cause exit */ +#define LPCR_MER ASM_CONST(0x0000000000000800) /* Mediated External Exception */ +#define LPCR_MER_SH 11 +#define LPCR_TC ASM_CONST(0x0000000000000200) /* Translation control */ +#define LPCR_LPES 0x0000000c +#define LPCR_LPES0 ASM_CONST(0x0000000000000008) /* LPAR Env selector 0 */ +#define LPCR_LPES1 ASM_CONST(0x0000000000000004) /* LPAR Env selector 1 */ +#define LPCR_LPES_SH 2 +#define LPCR_RMI ASM_CONST(0x0000000000000002) /* real mode is cache inhibit */ +#define LPCR_HVICE ASM_CONST(0x0000000000000002) /* P9: HV interrupt enable */ +#define LPCR_HDICE ASM_CONST(0x0000000000000001) /* Hyp Decr enable (HV,PR,EE) */ +#define LPCR_UPRT ASM_CONST(0x0000000000400000) /* Use Process Table (ISA 3) */ +#define LPCR_HR ASM_CONST(0x0000000000100000) #ifndef SPRN_LPID #define SPRN_LPID 0x13F /* Logical Partition Identifier */ #endif -- cgit v0.10.2 From c2ca9f6b4cc4c45eb598b24b8b06beee668052d5 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Tue, 19 Jul 2016 12:33:35 +1000 Subject: powerpc/powernv: Fix pci-cxl.c build when CONFIG_MODULES=n pnv_cxl_enable_phb_kernel_api() grabs a reference to the cxl module to prevent it from being unloaded after the PHB has been switched to CX4 mode. This breaks the build when CONFIG_MODULES=n as module_mutex doesn't exist. However, if we don't have modules, we don't need to protect against the case of the cxl module being unloaded. As such, split the relevant code out into a function surrounded with #if IS_MODULE(CXL) so we don't try to compile it if cxl isn't being compiled as a module. Fixes: 5918dbc9b4ec ("powerpc/powernv: Add support for the cxl kernel api on the real phb") Reported-by: Michael Ellerman Signed-off-by: Ian Munsie Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c index 3f34207..1349a09 100644 --- a/arch/powerpc/platforms/powernv/pci-cxl.c +++ b/arch/powerpc/platforms/powernv/pci-cxl.c @@ -166,6 +166,28 @@ int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, } EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup); +#if IS_MODULE(CONFIG_CXL) +static inline int get_cxl_module(void) +{ + struct module *cxl_module; + + mutex_lock(&module_mutex); + + cxl_module = find_module("cxl"); + if (cxl_module) + __module_get(cxl_module); + + mutex_unlock(&module_mutex); + + if (!cxl_module) + return -ENODEV; + + return 0; +} +#else +static inline int get_cxl_module(void) { return 0; } +#endif + /* * Sets flags and switches the controller ops to enable the cxl kernel api. * Originally the cxl kernel API operated on a virtual PHB, but certain cards @@ -175,7 +197,7 @@ EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup); int pnv_cxl_enable_phb_kernel_api(struct pci_controller *hose, bool enable) { struct pnv_phb *phb = hose->private_data; - struct module *cxl_module; + int rc; if (!enable) { /* @@ -194,13 +216,9 @@ int pnv_cxl_enable_phb_kernel_api(struct pci_controller *hose, bool enable) * long as we are in this mode (and since we can't safely disable this * mode once enabled...). */ - mutex_lock(&module_mutex); - cxl_module = find_module("cxl"); - if (cxl_module) - __module_get(cxl_module); - mutex_unlock(&module_mutex); - if (!cxl_module) - return -ENODEV; + rc = get_cxl_module(); + if (rc) + return rc; phb->flags |= PNV_PHB_FLAG_CXL; hose->controller_ops = pnv_cxl_cx4_ioda_controller_ops; -- cgit v0.10.2 From 1e44727a0b220f6ead12fefcff997354be0f0f02 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Mon, 18 Jul 2016 14:52:57 +1000 Subject: cxl: remove dead Kconfig options Remove the CXL_KERNEL_API and CXL_EEH Kconfig options, as they were only needed to coordinate the merging of the cxlflash driver. Also remove the stub implementation of cxl_perst_reloads_same_image() in cxlflash which is only used if CXL_EEH isn't defined (i.e. never). Suggested-by: Ian Munsie Signed-off-by: Andrew Donnellan Acked-by: Ian Munsie Acked-by: Matthew R. Ochs Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig index 8d76770..b75cf83 100644 --- a/drivers/misc/cxl/Kconfig +++ b/drivers/misc/cxl/Kconfig @@ -7,14 +7,6 @@ config CXL_BASE default n select PPC_COPRO_BASE -config CXL_KERNEL_API - bool - default n - -config CXL_EEH - bool - default n - config CXL_AFU_DRIVER_OPS bool default n @@ -23,8 +15,6 @@ config CXL tristate "Support for IBM Coherent Accelerators (CXL)" depends on PPC_POWERNV && PCI_MSI && EEH select CXL_BASE - select CXL_KERNEL_API - select CXL_EEH select CXL_AFU_DRIVER_OPS default m help diff --git a/drivers/scsi/cxlflash/main.h b/drivers/scsi/cxlflash/main.h index eb9d8f7..a893408 100644 --- a/drivers/scsi/cxlflash/main.h +++ b/drivers/scsi/cxlflash/main.h @@ -100,8 +100,4 @@ struct asyc_intr_info { #define SCAN_HOST 0x04 }; -#ifndef CONFIG_CXL_EEH -#define cxl_perst_reloads_same_image(_a, _b) do { } while (0) -#endif - #endif /* _CXLFLASH_MAIN_H */ -- cgit v0.10.2 From 8fbaa51d43ef2c6a72849ec34060910723a0365f Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 15 Jul 2016 17:20:36 +1000 Subject: cxl: fix potential NULL dereference in free_adapter() If kzalloc() fails when allocating adapter->guest in cxl_guest_init_adapter(), we call free_adapter() before erroring out. free_adapter() in turn attempts to dereference adapter->guest, which in this case is NULL. In free_adapter(), skip the adapter->guest cleanup if adapter->guest is NULL. Fixes: 14baf4d9c739 ("cxl: Add guest-specific code") Reported-by: Dan Carpenter Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c index ee7148e..9aa58a7 100644 --- a/drivers/misc/cxl/guest.c +++ b/drivers/misc/cxl/guest.c @@ -1055,16 +1055,18 @@ static void free_adapter(struct cxl *adapter) struct irq_avail *cur; int i; - if (adapter->guest->irq_avail) { - for (i = 0; i < adapter->guest->irq_nranges; i++) { - cur = &adapter->guest->irq_avail[i]; - kfree(cur->bitmap); + if (adapter->guest) { + if (adapter->guest->irq_avail) { + for (i = 0; i < adapter->guest->irq_nranges; i++) { + cur = &adapter->guest->irq_avail[i]; + kfree(cur->bitmap); + } + kfree(adapter->guest->irq_avail); } - kfree(adapter->guest->irq_avail); + kfree(adapter->guest->status); + kfree(adapter->guest); } - kfree(adapter->guest->status); cxl_remove_adapter_nr(adapter); - kfree(adapter->guest); kfree(adapter); } -- cgit v0.10.2 From 9054619ef54a3a832863ae25d15ac410ae3df146 Mon Sep 17 00:00:00 2001 From: John Allen Date: Thu, 7 Jul 2016 10:00:34 -0500 Subject: powerpc/pseries: Add pseries hotplug workqueue In support of PAPR changes to add a new hotplug interrupt, introduce a hotplug workqueue to avoid processing hotplug events in interrupt context. We will also take advantage of the queue on PowerVM to ensure hotplug events initiated from different sources (HMC and PRRN events) are handled and serialized properly. Signed-off-by: John Allen Reviewed-by: Nathan Fontenot Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 2b93ae8..66a77d7 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -27,6 +27,15 @@ #include #include +struct workqueue_struct *pseries_hp_wq; + +struct pseries_hp_work { + struct work_struct work; + struct pseries_hp_errorlog *errlog; + struct completion *hp_completion; + int *rc; +}; + struct cc_workarea { __be32 drc_index; __be32 zero; @@ -368,10 +377,51 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog) return rc; } +void pseries_hp_work_fn(struct work_struct *work) +{ + struct pseries_hp_work *hp_work = + container_of(work, struct pseries_hp_work, work); + + if (hp_work->rc) + *(hp_work->rc) = handle_dlpar_errorlog(hp_work->errlog); + else + handle_dlpar_errorlog(hp_work->errlog); + + if (hp_work->hp_completion) + complete(hp_work->hp_completion); + + kfree(hp_work->errlog); + kfree((void *)work); +} + +void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog, + struct completion *hotplug_done, int *rc) +{ + struct pseries_hp_work *work; + struct pseries_hp_errorlog *hp_errlog_copy; + + hp_errlog_copy = kmalloc(sizeof(struct pseries_hp_errorlog), + GFP_KERNEL); + memcpy(hp_errlog_copy, hp_errlog, sizeof(struct pseries_hp_errorlog)); + + work = kmalloc(sizeof(struct pseries_hp_work), GFP_KERNEL); + if (work) { + INIT_WORK((struct work_struct *)work, pseries_hp_work_fn); + work->errlog = hp_errlog_copy; + work->hp_completion = hotplug_done; + work->rc = rc; + queue_work(pseries_hp_wq, (struct work_struct *)work); + } else { + *rc = -ENOMEM; + complete(hotplug_done); + } +} + static ssize_t dlpar_store(struct class *class, struct class_attribute *attr, const char *buf, size_t count) { struct pseries_hp_errorlog *hp_elog; + struct completion hotplug_done; const char *arg; int rc; @@ -450,6 +500,8 @@ static CLASS_ATTR(dlpar, S_IWUSR, NULL, dlpar_store); static int __init pseries_dlpar_init(void) { + pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue", + WQ_UNBOUND, 1); return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr); } machine_device_initcall(pseries, pseries_dlpar_init); -- cgit v0.10.2 From b7d9eb397b8764c1f1c53d504aa70f85ce0e212f Mon Sep 17 00:00:00 2001 From: John Allen Date: Thu, 7 Jul 2016 10:03:44 -0500 Subject: powerpc/pseries: Add support for hotplug interrupt source Add handler for new hotplug interrupt. For memory and CPU hotplug events, we will add the hotplug errorlog to the hotplug workqueue. Since PCI hotplug is not currently supported in the kernel, PCI hotplug events are written to the rtas_log_bug and are handled by rtas_errd. Signed-off-by: John Allen Reviewed-by: Nathan Fontenot Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index ddb9aa5..bba3285 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -56,6 +56,8 @@ extern int dlpar_detach_node(struct device_node *); extern int dlpar_acquire_drc(u32 drc_index); extern int dlpar_release_drc(u32 drc_index); +void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog, + struct completion *hotplug_done, int *rc); #ifdef CONFIG_MEMORY_HOTPLUG int dlpar_memory(struct pseries_hp_errorlog *hp_elog); #else diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 9a3e27b..904a677 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -43,6 +43,7 @@ static int ras_check_exception_token; /* EPOW events counter variable */ static int num_epow_events; +static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id); static irqreturn_t ras_epow_interrupt(int irq, void *dev_id); static irqreturn_t ras_error_interrupt(int irq, void *dev_id); @@ -65,6 +66,14 @@ static int __init init_ras_IRQ(void) of_node_put(np); } + /* Hotplug Events */ + np = of_find_node_by_path("/event-sources/hot-plug-events"); + if (np != NULL) { + request_event_sources_irqs(np, ras_hotplug_interrupt, + "RAS_HOTPLUG"); + of_node_put(np); + } + /* EPOW Events */ np = of_find_node_by_path("/event-sources/epow-events"); if (np != NULL) { @@ -190,6 +199,36 @@ static void rtas_parse_epow_errlog(struct rtas_error_log *log) num_epow_events++; } +static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id) +{ + struct pseries_errorlog *pseries_log; + struct pseries_hp_errorlog *hp_elog; + + spin_lock(&ras_log_buf_lock); + + rtas_call(ras_check_exception_token, 6, 1, NULL, + RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq), + RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf), + rtas_get_error_log_max()); + + pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf, + PSERIES_ELOG_SECT_ID_HOTPLUG); + hp_elog = (struct pseries_hp_errorlog *)pseries_log->data; + + /* + * Since PCI hotplug is not currently supported on pseries, put PCI + * hotplug events on the ras_log_buf to be handled by rtas_errd. + */ + if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM || + hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU) + queue_hotplug_event(hp_elog, NULL, NULL); + else + log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); + + spin_unlock(&ras_log_buf_lock); + return IRQ_HANDLED; +} + /* Handle environmental and power warning (EPOW) interrupts. */ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) { -- cgit v0.10.2 From 1dc75956663661e6bfee7836ddd2def2f66745b9 Mon Sep 17 00:00:00 2001 From: John Allen Date: Thu, 7 Jul 2016 10:05:52 -0500 Subject: powerpc/pseries: Use kernel hotplug queue for PowerVM hotplug events The sysfs interface used to handle PowerVM hotplug events should use the hotplug queue as well. PRRN events will soon be placing many hotplug events on the queue at once and we will need ordinary hotplug events to use the queue as well in order to ensure these events will still be handled and that proper serialization is maintained during the PRRN event. Signed-off-by: John Allen Reviewed-by: Nathan Fontenot Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 66a77d7..4748124 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -489,7 +489,9 @@ static ssize_t dlpar_store(struct class *class, struct class_attribute *attr, goto dlpar_store_out; } - rc = handle_dlpar_errorlog(hp_elog); + init_completion(&hotplug_done); + queue_hotplug_event(hp_elog, &hotplug_done, &rc); + wait_for_completion(&hotplug_done); dlpar_store_out: kfree(hp_elog); -- cgit v0.10.2 From da4230714662278781d007fb2b2dcb5bcb9aa524 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Wed, 13 Jul 2016 09:14:38 +0800 Subject: powerpc/32/booke: Fix the build error when CRASH_DUMP is enabled In the current code, the RELOCATABLE will be forcedly enabled when enabling CRASH_DUMP. But for ppc32, the RELOCABLE also depend on ADVANCED_OPTIONS and select NONSTATIC_KERNEL. This will cause the following build error when CRASH_DUMP=y && ADVANCED_OPTIONS=n because the select of NONSTATIC_KERNEL doesn't take effect. arch/powerpc/include/asm/io.h: In function 'virt_to_phys': arch/powerpc/include/asm/page.h:113:26: error: 'virt_phys_offset' undeclared (first use in this function) #define VIRT_PHYS_OFFSET virt_phys_offset ^ It doesn't have any strong reasons to make the RELOCATABLE depend on ADVANCED_OPTIONS. So remove this dependency to fix this issue. Signed-off-by: Kevin Hao Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ee82f9a..dfe2151 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -952,7 +952,7 @@ config DYNAMIC_MEMSTART config RELOCATABLE bool "Build a relocatable kernel" - depends on ADVANCED_OPTIONS && FLATMEM && (44x || FSL_BOOKE) + depends on FLATMEM && (44x || FSL_BOOKE) select NONSTATIC_KERNEL help This builds a kernel image that is capable of running at the -- cgit v0.10.2 From 4c91bd6eeabb004f283db8a6854b134e2a2de1bc Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Wed, 13 Jul 2016 09:14:39 +0800 Subject: powerpc: Merge the RELOCATABLE config entries for ppc32 and ppc64 It makes no sense to keep two separate RELOCATABLE config entries for ppc32 and ppc64 respectively. Merge them into one and move it to a common place. Signed-off-by: Kevin Hao Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index dfe2151..f044782 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -458,6 +458,33 @@ config KEXEC interface is strongly in flux, so no good recommendation can be made. +config RELOCATABLE + bool "Build a relocatable kernel" + depends on (PPC64 && !COMPILE_TEST) || (FLATMEM && (44x || FSL_BOOKE)) + select NONSTATIC_KERNEL + help + This builds a kernel image that is capable of running at the + location the kernel is loaded at. For ppc32, there is no any + alignment restrictions, and this feature is a superset of + DYNAMIC_MEMSTART and hence overrides it. For ppc64, we should use + 16k-aligned base address. The kernel is linked as a + position-independent executable (PIE) and contains dynamic relocations + which are processed early in the bootup process. + + One use is for the kexec on panic case where the recovery kernel + must live at a different physical address than the primary + kernel. + + Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address + it has been loaded at and the compile time physical addresses + CONFIG_PHYSICAL_START is ignored. However CONFIG_PHYSICAL_START + setting can still be useful to bootwrappers that need to know the + load address of the kernel (eg. u-boot/mkimage). + +config RELOCATABLE_PPC32 + def_bool y + depends on PPC32 && RELOCATABLE + config CRASH_DUMP bool "Build a kdump crash kernel" depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP) @@ -950,29 +977,6 @@ config DYNAMIC_MEMSTART This option is overridden by CONFIG_RELOCATABLE -config RELOCATABLE - bool "Build a relocatable kernel" - depends on FLATMEM && (44x || FSL_BOOKE) - select NONSTATIC_KERNEL - help - This builds a kernel image that is capable of running at the - location the kernel is loaded at, without any alignment restrictions. - This feature is a superset of DYNAMIC_MEMSTART and hence overrides it. - - One use is for the kexec on panic case where the recovery kernel - must live at a different physical address than the primary - kernel. - - Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address - it has been loaded at and the compile time physical addresses - CONFIG_PHYSICAL_START is ignored. However CONFIG_PHYSICAL_START - setting can still be useful to bootwrappers that need to know the - load address of the kernel (eg. u-boot/mkimage). - -config RELOCATABLE_PPC32 - def_bool y - depends on PPC32 && RELOCATABLE - config PAGE_OFFSET_BOOL bool "Set custom page offset address" depends on ADVANCED_OPTIONS @@ -1058,21 +1062,6 @@ config PIN_TLB endmenu if PPC64 -config RELOCATABLE - bool "Build a relocatable kernel" - depends on !COMPILE_TEST - select NONSTATIC_KERNEL - help - This builds a kernel image that is capable of running anywhere - in the RMA (real memory area) at any 16k-aligned base address. - The kernel is linked as a position-independent executable (PIE) - and contains dynamic relocations which are processed early - in the bootup process. - - One use is for the kexec on panic case where the recovery kernel - must live at a different physical address than the primary - kernel. - # This value must have zeroes in the bottom 60 bits otherwise lots will break config PAGE_OFFSET hex -- cgit v0.10.2 From 27d1149667352772240655b65372a4294f992ea7 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Wed, 13 Jul 2016 09:14:40 +0800 Subject: powerpc/32: Remove RELOCATABLE_PPC32 It is seldom used in the kernel code and can be easily replaced by either RELOCATABLE or PPC32. So there is no reason to keep a separate kernel option for this. Signed-off-by: Kevin Hao Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index f044782..9b6d36b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -481,10 +481,6 @@ config RELOCATABLE setting can still be useful to bootwrappers that need to know the load address of the kernel (eg. u-boot/mkimage). -config RELOCATABLE_PPC32 - def_bool y - depends on PPC32 && RELOCATABLE - config CRASH_DUMP bool "Build a kdump crash kernel" depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP) diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 51db3a3..56398e7 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -96,7 +96,7 @@ extern unsigned int HPAGE_SHIFT; extern phys_addr_t memstart_addr; extern phys_addr_t kernstart_addr; -#ifdef CONFIG_RELOCATABLE_PPC32 +#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_PPC32) extern long long virt_phys_offset; #endif @@ -139,9 +139,9 @@ extern long long virt_phys_offset; * determine MEMORY_START until then. However we can determine PHYSICAL_START * from information at hand (program counter, TLB lookup). * - * On BookE with RELOCATABLE (RELOCATABLE_PPC32) + * On BookE with RELOCATABLE && PPC32 * - * With RELOCATABLE_PPC32, we support loading the kernel at any physical + * With RELOCATABLE && PPC32, we support loading the kernel at any physical * address without any restriction on the page alignment. * * We find the runtime address of _stext and relocate ourselves based on diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 9e7bfc32..fe4c075 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -42,7 +42,6 @@ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o -obj64-$(CONFIG_RELOCATABLE) += reloc_64.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o @@ -87,7 +86,7 @@ extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o extra-$(CONFIG_8xx) := head_8xx.o extra-y += vmlinux.lds -obj-$(CONFIG_RELOCATABLE_PPC32) += reloc_32.o +obj-$(CONFIG_RELOCATABLE) += reloc_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC32) += entry_32.o setup_32.o obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 2dd91f7..b5fba68 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -165,7 +165,7 @@ SECTIONS . = ALIGN(8); .dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET) { -#ifdef CONFIG_RELOCATABLE_PPC32 +#ifdef CONFIG_PPC32 __dynamic_symtab = .; #endif *(.dynsym) diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index e2d7ba1..448685f 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -64,7 +64,7 @@ EXPORT_SYMBOL(memstart_addr); phys_addr_t kernstart_addr; EXPORT_SYMBOL(kernstart_addr); -#ifdef CONFIG_RELOCATABLE_PPC32 +#ifdef CONFIG_RELOCATABLE /* Used in __va()/__pa() */ long long virt_phys_offset; EXPORT_SYMBOL(virt_phys_offset); -- cgit v0.10.2 From e2413a7dae52fab290b7a8d11ec8579657bab95b Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Mon, 11 Jul 2016 17:16:27 -0500 Subject: PCI: rpaphp: Fix slot registration for multiple slots under a PHB The underlying slot hotplug registration code assumed multiple slots, but the actual implementation is broken for multiple slots. This went unnoticed for years do to the fact that PowerVM seems to only ever provide a single hotplug slot per PHB. Under qemu/kvm the hotplug slot model aligns more with x86 where multiple slots are presented under a single PHB. As seen in the following each additional slot after the first fails to register due to each slot always being compared against the first child node of the PHB in the device tree. rpaphp: RPA HOT Plug PCI Controller Driver version: 0.1 rpaphp: Slot [Slot 0] registered rpaphp: pci_hp_register failed with error -16 rpaphp: pci_hp_register failed with error -16 rpaphp: pci_hp_register failed with error -16 rpaphp: pci_hp_register failed with error -16 The registration logic is fixed so that each slot is compared against the existing child devices of the PHB in the device tree to determine present slots vs empty slots. rpaphp: RPA HOT Plug PCI Controller Driver version: 0.1 rpaphp: Slot [C0] registered rpaphp: Slot [C1] registered rpaphp: Slot [C2] registered rpaphp: Slot [C3] registered rpaphp: Slot [C4] registered Signed-off-by: Tyrel Datwyler Reviewed-by: Nathan Fontenot [mpe: Massage changelog] Signed-off-by: Michael Ellerman diff --git a/drivers/pci/hotplug/rpaphp_slot.c b/drivers/pci/hotplug/rpaphp_slot.c index 6937c72..388c4d8 100644 --- a/drivers/pci/hotplug/rpaphp_slot.c +++ b/drivers/pci/hotplug/rpaphp_slot.c @@ -117,8 +117,10 @@ EXPORT_SYMBOL_GPL(rpaphp_deregister_slot); int rpaphp_register_slot(struct slot *slot) { struct hotplug_slot *php_slot = slot->hotplug_slot; + struct device_node *child; + u32 my_index; int retval; - int slotno; + int slotno = -1; dbg("%s registering slot:path[%s] index[%x], name[%s] pdomain[%x] type[%d]\n", __func__, slot->dn->full_name, slot->index, slot->name, @@ -130,10 +132,15 @@ int rpaphp_register_slot(struct slot *slot) return -EAGAIN; } - if (slot->dn->child) - slotno = PCI_SLOT(PCI_DN(slot->dn->child)->devfn); - else - slotno = -1; + for_each_child_of_node(slot->dn, child) { + retval = of_property_read_u32(child, "ibm,my-drc-index", &my_index); + if (my_index == slot->index) { + slotno = PCI_SLOT(PCI_DN(child)->devfn); + of_node_put(child); + break; + } + } + retval = pci_hp_register(php_slot, slot->bus, slotno, slot->name); if (retval) { err("pci_hp_register failed with error %d\n", retval); -- cgit v0.10.2 From b9c13fe32faaa71c4e4f8a426d79f8c93495e9f9 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 08:35:59 +1000 Subject: dt: Add of_device_compatible_match() This provides an equivalent of of_fdt_match() for non-flat trees. This is more practical than matching an array of of_device_id structs when converting a bunch of existing users of of_fdt_match(). Signed-off-by: Benjamin Herrenschmidt Acked-by: Rob Herring Signed-off-by: Michael Ellerman diff --git a/drivers/of/base.c b/drivers/of/base.c index ebf84e3..c382e1fc 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -493,6 +493,28 @@ int of_device_is_compatible(const struct device_node *device, } EXPORT_SYMBOL(of_device_is_compatible); +/** Checks if the device is compatible with any of the entries in + * a NULL terminated array of strings. Returns the best match + * score or 0. + */ +int of_device_compatible_match(struct device_node *device, + const char *const *compat) +{ + unsigned int tmp, score = 0; + + if (!compat) + return 0; + + while (*compat) { + tmp = of_device_is_compatible(device, *compat); + if (tmp > score) + score = tmp; + compat++; + } + + return score; +} + /** * of_machine_is_compatible - Test root of device tree for a given compatible value * @compat: compatible string to look for in root node's compatible property. diff --git a/include/linux/of.h b/include/linux/of.h index 74eb28c..33c184d 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -324,6 +324,8 @@ extern int of_property_read_string_helper(const struct device_node *np, const char **out_strs, size_t sz, int index); extern int of_device_is_compatible(const struct device_node *device, const char *); +extern int of_device_compatible_match(struct device_node *device, + const char *const *compat); extern bool of_device_is_available(const struct device_node *device); extern bool of_device_is_big_endian(const struct device_node *device); extern const void *of_get_property(const struct device_node *node, -- cgit v0.10.2 From 9402c684613163888714df0955fa1f17142b08bf Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:41 +1000 Subject: powerpc: Factor do_feature_fixup calls 32 and 64-bit do a similar set of calls early on, we move it all to a single common function to make the boot code more readable. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 9a67a38..57fec8a 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -184,4 +184,8 @@ label##3: \ FTR_ENTRY_OFFSET label##1b-label##3b; \ .popsection; +#ifndef __ASSEMBLY__ +void apply_feature_fixups(void); +#endif + #endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */ diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h index c508686..78efe8d 100644 --- a/arch/powerpc/include/asm/synch.h +++ b/arch/powerpc/include/asm/synch.h @@ -13,7 +13,6 @@ extern unsigned int __start___lwsync_fixup, __stop___lwsync_fixup; extern void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end); -extern void do_final_fixups(void); static inline void eieio(void) { diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 4abefb5..3f0aca2 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -70,7 +70,6 @@ int ucache_bsize; notrace unsigned long __init early_init(unsigned long dt_ptr) { unsigned long offset = reloc_offset(); - struct cpu_spec *spec; /* First zero the BSS -- use memset_io, some platforms don't have * caches on yet */ @@ -81,21 +80,9 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) * Identify the CPU type and fix up code sections * that depend on which cpu we have. */ - spec = identify_cpu(offset, mfspr(SPRN_PVR)); + identify_cpu(offset, mfspr(SPRN_PVR)); - do_feature_fixups(spec->cpu_features, - PTRRELOC(&__start___ftr_fixup), - PTRRELOC(&__stop___ftr_fixup)); - - do_feature_fixups(spec->mmu_features, - PTRRELOC(&__start___mmu_ftr_fixup), - PTRRELOC(&__stop___mmu_ftr_fixup)); - - do_lwsync_fixups(spec->cpu_features, - PTRRELOC(&__start___lwsync_fixup), - PTRRELOC(&__stop___lwsync_fixup)); - - do_final_fixups(); + apply_feature_fixups(); return KERNELBASE + offset; } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 521846c..373ef9d 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -467,18 +467,7 @@ void __init setup_system(void) { DBG(" -> setup_system()\n"); - /* Apply the CPUs-specific and firmware specific fixups to kernel - * text (nop out sections not relevant to this CPU or this firmware) - */ - do_feature_fixups(cur_cpu_spec->cpu_features, - &__start___ftr_fixup, &__stop___ftr_fixup); - do_feature_fixups(cur_cpu_spec->mmu_features, - &__start___mmu_ftr_fixup, &__stop___mmu_ftr_fixup); - do_feature_fixups(powerpc_firmware_features, - &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup); - do_lwsync_fixups(cur_cpu_spec->cpu_features, - &__start___lwsync_fixup, &__stop___lwsync_fixup); - do_final_fixups(); + apply_feature_fixups(); /* * Unflatten the device-tree passed by prom_init or kexec diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 7ce3870..defb299 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -20,7 +20,8 @@ #include #include #include - +#include +#include struct fixup_entry { unsigned long mask; @@ -130,7 +131,7 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) } } -void do_final_fixups(void) +static void do_final_fixups(void) { #if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE) int *src, *dest; @@ -151,6 +152,33 @@ void do_final_fixups(void) #endif } +void apply_feature_fixups(void) +{ + struct cpu_spec *spec = *PTRRELOC(&cur_cpu_spec); + + /* + * Apply the CPU-specific and firmware specific fixups to kernel text + * (nop out sections not relevant to this CPU or this firmware). + */ + do_feature_fixups(spec->cpu_features, + PTRRELOC(&__start___ftr_fixup), + PTRRELOC(&__stop___ftr_fixup)); + + do_feature_fixups(spec->mmu_features, + PTRRELOC(&__start___mmu_ftr_fixup), + PTRRELOC(&__stop___mmu_ftr_fixup)); + + do_lwsync_fixups(spec->cpu_features, + PTRRELOC(&__start___lwsync_fixup), + PTRRELOC(&__stop___lwsync_fixup)); + +#ifdef CONFIG_PPC64 + do_feature_fixups(powerpc_firmware_features, + &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup); +#endif + do_final_fixups(); +} + #ifdef CONFIG_FTR_FIXUP_SELFTEST #define check(x) \ -- cgit v0.10.2 From c4bd6cb87c9e28a7d9f4a97db5a06cc538eb5e48 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:42 +1000 Subject: powerpc: Move 64-bit feature fixup earlier Make it part of early_setup() as we really want the feature fixups to be applied before we turn on the MMU since they can have an impact on the various assembly path related to MMU management and interrupts. This makes 64-bit match what 32-bit does. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 373ef9d..0a6d5f7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -285,6 +285,9 @@ void __init early_setup(unsigned long dt_ptr) /* Initialize the hash table or TLB handling */ early_init_mmu(); + /* Apply all the dynamic patching */ + apply_feature_fixups(); + /* * At this point, we can let interrupts switch to virtual mode * (the MMU has been setup), so adjust the MSR in the PACA to @@ -467,8 +470,6 @@ void __init setup_system(void) { DBG(" -> setup_system()\n"); - apply_feature_fixups(); - /* * Unflatten the device-tree passed by prom_init or kexec */ -- cgit v0.10.2 From de4cf3de594f96f5a27f0e2346dd211beb126f88 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:43 +1000 Subject: powerpc: Move 64-bit memory reserves to setup_arch() There is really no need to do them that early, early_setup() runs before MMU is on, we should do the strict minimum there to get the MMU going. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 0a6d5f7..155dbcc 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -295,16 +295,6 @@ void __init early_setup(unsigned long dt_ptr) */ cpu_ready_for_interrupts(); - /* Reserve large chunks of memory for use by CMA for KVM */ - kvm_cma_reserve(); - - /* - * Reserve any gigantic pages requested on the command line. - * memblock needs to have been initialized by the time this is - * called since this will reserve memory. - */ - reserve_hugetlb_gpages(); - DBG(" <- early_setup()\n"); #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX @@ -687,6 +677,17 @@ void __init setup_arch(char **cmdline_p) dcache_bsize = ppc64_caches.dline_size; icache_bsize = ppc64_caches.iline_size; + + /* Reserve large chunks of memory for use by CMA for KVM */ + kvm_cma_reserve(); + + /* + * Reserve any gigantic pages requested on the command line. + * memblock needs to have been initialized by the time this is + * called since this will reserve memory. + */ + reserve_hugetlb_gpages(); + if (ppc_md.panic) setup_panic(); @@ -711,7 +712,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif - if (ppc_md.setup_arch) ppc_md.setup_arch(); -- cgit v0.10.2 From c40785ad305b32e9b0b5fbc888f1f5d57f29bf44 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:47 +1000 Subject: powerpc/dart: Use a cachable DART Instead of punching a hole in the linear mapping, just use normal cachable memory, and apply the flush sequence documented in the CPC625 (aka U3) user manual. This allows us to remove quite a bit of code related to the early allocation of the DART and the hole in the linear mapping. We can also get rid of the copy of the DART for suspend/resume as the original memory can just be saved/restored now, as long as we properly sync the caches. Signed-off-by: Benjamin Herrenschmidt [mpe: Integrate dart_init() fix to return ENODEV when DART disabled] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 7b87bab..f49a72a 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -273,7 +273,6 @@ extern void iommu_init_early_pSeries(void); extern void iommu_init_early_dart(struct pci_controller_ops *controller_ops); extern void iommu_init_early_pasemi(void); -extern void alloc_dart_table(void); #if defined(CONFIG_PPC64) && defined(CONFIG_PM) static inline void iommu_save(void) { diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 7d0955e..859ecaa 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -87,10 +87,6 @@ * */ -#ifdef CONFIG_U3_DART -extern unsigned long dart_tablebase; -#endif /* CONFIG_U3_DART */ - static unsigned long _SDR1; struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; EXPORT_SYMBOL_GPL(mmu_psize_defs); @@ -846,34 +842,6 @@ static void __init htab_initialize(void) DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", base, size, prot); -#ifdef CONFIG_U3_DART - /* Do not map the DART space. Fortunately, it will be aligned - * in such a way that it will not cross two memblock regions and - * will fit within a single 16Mb page. - * The DART space is assumed to be a full 16Mb region even if - * we only use 2Mb of that space. We will use more of it later - * for AGP GART. We have to use a full 16Mb large page. - */ - DBG("DART base: %lx\n", dart_tablebase); - - if (dart_tablebase != 0 && dart_tablebase >= base - && dart_tablebase < (base + size)) { - unsigned long dart_table_end = dart_tablebase + 16 * MB; - if (base != dart_tablebase) - BUG_ON(htab_bolt_mapping(base, dart_tablebase, - __pa(base), prot, - mmu_linear_psize, - mmu_kernel_ssize)); - if ((base + size) > dart_table_end) - BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB, - base + size, - __pa(dart_table_end), - prot, - mmu_linear_psize, - mmu_kernel_ssize)); - continue; - } -#endif /* CONFIG_U3_DART */ BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), prot, mmu_linear_psize, mmu_kernel_ssize)); } diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 5f8f6f9..99b9b96 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -305,13 +305,6 @@ static int __init maple_probe(void) if (!of_flat_dt_is_compatible(root, "Momentum,Maple") && !of_flat_dt_is_compatible(root, "Momentum,Apache")) return 0; - /* - * On U3, the DART (iommu) must be allocated now since it - * has an impact on htab_initialize (due to the large page it - * occupies having to be broken up so the DART itself is not - * part of the cacheable linar mapping - */ - alloc_dart_table(); hpte_init_native(); pm_power_off = maple_power_off; diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index bd83b52c..fc0b69f 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -607,14 +607,6 @@ static int __init pmac_probe(void) return 0; #ifdef CONFIG_PPC64 - /* - * On U3, the DART (iommu) must be allocated now since it - * has an impact on htab_initialize (due to the large page it - * occupies having to be broken up so the DART itself is not - * part of the cacheable linar mapping - */ - alloc_dart_table(); - hpte_init_native(); #endif diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index b734863..26904f4 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -48,16 +48,10 @@ #include "dart.h" -/* Physical base address and size of the DART table */ -unsigned long dart_tablebase; /* exported to htab_initialize */ +/* DART table address and size */ +static u32 *dart_tablebase; static unsigned long dart_tablesize; -/* Virtual base address of the DART table */ -static u32 *dart_vbase; -#ifdef CONFIG_PM -static u32 *dart_copy; -#endif - /* Mapped base address for the dart */ static unsigned int __iomem *dart; @@ -151,6 +145,34 @@ wait_more: spin_unlock_irqrestore(&invalidate_lock, flags); } +static void dart_cache_sync(unsigned int *base, unsigned int count) +{ + /* + * We add 1 to the number of entries to flush, following a + * comment in Darwin indicating that the memory controller + * can prefetch unmapped memory under some circumstances. + */ + unsigned long start = (unsigned long)base; + unsigned long end = start + (count + 1) * sizeof(unsigned int); + unsigned int tmp; + + /* Perform a standard cache flush */ + flush_inval_dcache_range(start, end); + + /* + * Perform the sequence described in the CPC925 manual to + * ensure all the data gets to a point the cache incoherent + * DART hardware will see. + */ + asm volatile(" sync;" + " isync;" + " dcbf 0,%1;" + " sync;" + " isync;" + " lwz %0,0(%1);" + " isync" : "=r" (tmp) : "r" (end) : "memory"); +} + static void dart_flush(struct iommu_table *tbl) { mb(); @@ -165,13 +187,13 @@ static int dart_build(struct iommu_table *tbl, long index, enum dma_data_direction direction, struct dma_attrs *attrs) { - unsigned int *dp; + unsigned int *dp, *orig_dp; unsigned int rpn; long l; DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr); - dp = ((unsigned int*)tbl->it_base) + index; + orig_dp = dp = ((unsigned int*)tbl->it_base) + index; /* On U3, all memory is contiguous, so we can move this * out of the loop. @@ -184,11 +206,7 @@ static int dart_build(struct iommu_table *tbl, long index, uaddr += DART_PAGE_SIZE; } - - /* make sure all updates have reached memory */ - mb(); - in_be32((unsigned __iomem *)dp); - mb(); + dart_cache_sync(orig_dp, npages); if (dart_is_u4) { rpn = index; @@ -203,7 +221,8 @@ static int dart_build(struct iommu_table *tbl, long index, static void dart_free(struct iommu_table *tbl, long index, long npages) { - unsigned int *dp; + unsigned int *dp, *orig_dp; + long orig_npages = npages; /* We don't worry about flushing the TLB cache. The only drawback of * not doing it is that we won't catch buggy device drivers doing @@ -212,34 +231,30 @@ static void dart_free(struct iommu_table *tbl, long index, long npages) DBG("dart: free at: %lx, %lx\n", index, npages); - dp = ((unsigned int *)tbl->it_base) + index; + orig_dp = dp = ((unsigned int *)tbl->it_base) + index; while (npages--) *(dp++) = dart_emptyval; -} + dart_cache_sync(orig_dp, orig_npages); +} -static int __init dart_init(struct device_node *dart_node) +static void allocate_dart(void) { - unsigned int i; - unsigned long tmp, base, size; - struct resource r; - - if (dart_tablebase == 0 || dart_tablesize == 0) { - printk(KERN_INFO "DART: table not allocated, using " - "direct DMA\n"); - return -ENODEV; - } + unsigned long tmp; - if (of_address_to_resource(dart_node, 0, &r)) - panic("DART: can't get register base ! "); + /* 512 pages (2MB) is max DART tablesize. */ + dart_tablesize = 1UL << 21; - /* Make sure nothing from the DART range remains in the CPU cache - * from a previous mapping that existed before the kernel took - * over + /* + * 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we + * will blow up an entire large page anyway in the kernel mapping. */ - flush_dcache_phys_range(dart_tablebase, - dart_tablebase + dart_tablesize); + dart_tablebase = __va(memblock_alloc_base(1UL<<24, + 1UL<<24, 0x80000000L)); + + /* There is no point scanning the DART space for leaks*/ + kmemleak_no_scan((void *)dart_tablebase); /* Allocate a spare page to map all invalid DART pages. We need to do * that to work around what looks like a problem with the HT bridge @@ -249,20 +264,51 @@ static int __init dart_init(struct device_node *dart_node) dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) & DARTMAP_RPNMASK); + printk(KERN_INFO "DART table allocated at: %p\n", dart_tablebase); +} + +static int __init dart_init(struct device_node *dart_node) +{ + unsigned int i; + unsigned long base, size; + struct resource r; + + /* IOMMU disabled by the user ? bail out */ + if (iommu_is_off) + return -ENODEV; + + /* + * Only use the DART if the machine has more than 1GB of RAM + * or if requested with iommu=on on cmdline. + * + * 1GB of RAM is picked as limit because some default devices + * (i.e. Airport Extreme) have 30 bit address range limits. + */ + + if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull) + return -ENODEV; + + /* Get DART registers */ + if (of_address_to_resource(dart_node, 0, &r)) + panic("DART: can't get register base ! "); + /* Map in DART registers */ dart = ioremap(r.start, resource_size(&r)); if (dart == NULL) panic("DART: Cannot map registers!"); - /* Map in DART table */ - dart_vbase = ioremap(__pa(dart_tablebase), dart_tablesize); + /* Allocate the DART and dummy page */ + allocate_dart(); /* Fill initial table */ for (i = 0; i < dart_tablesize/4; i++) - dart_vbase[i] = dart_emptyval; + dart_tablebase[i] = dart_emptyval; + + /* Push to memory */ + dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32)); /* Initialize DART with table base and enable it. */ - base = dart_tablebase >> DART_PAGE_SHIFT; + base = ((unsigned long)dart_tablebase) >> DART_PAGE_SHIFT; size = dart_tablesize >> DART_PAGE_SHIFT; if (dart_is_u4) { size &= DART_SIZE_U4_SIZE_MASK; @@ -301,7 +347,7 @@ static void iommu_table_dart_setup(void) iommu_table_dart.it_page_shift = IOMMU_PAGE_SHIFT_4K; /* Initialize the common IOMMU code */ - iommu_table_dart.it_base = (unsigned long)dart_vbase; + iommu_table_dart.it_base = (unsigned long)dart_tablebase; iommu_table_dart.it_index = 0; iommu_table_dart.it_blocksize = 1; iommu_table_dart.it_ops = &iommu_dart_ops; @@ -404,75 +450,21 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) } #ifdef CONFIG_PM -static void iommu_dart_save(void) -{ - memcpy(dart_copy, dart_vbase, 2*1024*1024); -} - static void iommu_dart_restore(void) { - memcpy(dart_vbase, dart_copy, 2*1024*1024); + dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32)); dart_tlb_invalidate_all(); } static int __init iommu_init_late_dart(void) { - unsigned long tbasepfn; - struct page *p; - - /* if no dart table exists then we won't need to save it - * and the area has also not been reserved */ if (!dart_tablebase) return 0; - tbasepfn = __pa(dart_tablebase) >> PAGE_SHIFT; - register_nosave_region_late(tbasepfn, - tbasepfn + ((1<<24) >> PAGE_SHIFT)); - - /* For suspend we need to copy the dart contents because - * it is not part of the regular mapping (see above) and - * thus not saved automatically. The memory for this copy - * must be allocated early because we need 2 MB. */ - p = alloc_pages(GFP_KERNEL, 21 - PAGE_SHIFT); - BUG_ON(!p); - dart_copy = page_address(p); - - ppc_md.iommu_save = iommu_dart_save; ppc_md.iommu_restore = iommu_dart_restore; return 0; } late_initcall(iommu_init_late_dart); -#endif - -void __init alloc_dart_table(void) -{ - /* Only reserve DART space if machine has more than 1GB of RAM - * or if requested with iommu=on on cmdline. - * - * 1GB of RAM is picked as limit because some default devices - * (i.e. Airport Extreme) have 30 bit address range limits. - */ - - if (iommu_is_off) - return; - - if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull) - return; - - /* 512 pages (2MB) is max DART tablesize. */ - dart_tablesize = 1UL << 21; - /* 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we - * will blow up an entire large page anyway in the kernel mapping - */ - dart_tablebase = (unsigned long) - __va(memblock_alloc_base(1UL<<24, 1UL<<24, 0x80000000L)); - /* - * The DART space is later unmapped from the kernel linear mapping and - * accessing dart_tablebase during kmemleak scanning will fault. - */ - kmemleak_no_scan((void *)dart_tablebase); - - printk(KERN_INFO "DART table allocated at: %lx\n", dart_tablebase); -} +#endif /* CONFIG_PM */ -- cgit v0.10.2 From 3808a88985b4f5f5e947c364debce4441a380fb8 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:48 +1000 Subject: powerpc: Move FW feature probing out of pseries probe() We move the function itself to pseries/firmware.c and call it along with almost all other flat device-tree parsers from early_init_devtree() Signed-off-by: Benjamin Herrenschmidt [mpe: Move #ifdefs into the header by providing pseries_probe_fw_features()] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index b062924..1e0b5a5 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -126,6 +126,12 @@ extern int fwnmi_active; extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup; +#ifdef CONFIG_PPC_PSERIES +void pseries_probe_fw_features(void); +#else +static inline void pseries_probe_fw_features(void) { }; +#endif + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* __ASM_POWERPC_FIRMWARE_H */ diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 7a01113..2bd1784 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -57,6 +57,7 @@ #include #include #include +#include #include @@ -755,6 +756,9 @@ void __init early_init_devtree(void *params) #endif epapr_paravirt_early_init(); + /* Now try to figure out if we are running on LPAR and so on */ + pseries_probe_fw_features(); + DBG(" <- early_init_devtree()\n"); } diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index 8c80588..ea7f09b 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -22,6 +22,7 @@ */ +#include #include #include #include @@ -69,7 +70,8 @@ hypertas_fw_features_table[] = { * device-tree/ibm,hypertas-functions. Ultimately this functionality may * be moved into prom.c prom_init(). */ -void __init fw_hypertas_feature_init(const char *hypertas, unsigned long len) +static void __init fw_hypertas_feature_init(const char *hypertas, + unsigned long len) { const char *s; int i; @@ -113,7 +115,7 @@ vec5_fw_features_table[] = { {FW_FEATURE_PRRN, OV5_PRRN}, }; -void __init fw_vec5_feature_init(const char *vec5, unsigned long len) +static void __init fw_vec5_feature_init(const char *vec5, unsigned long len) { unsigned int index, feat; int i; @@ -131,3 +133,45 @@ void __init fw_vec5_feature_init(const char *vec5, unsigned long len) pr_debug(" <- fw_vec5_feature_init()\n"); } + +/* + * Called very early, MMU is off, device-tree isn't unflattened + */ +static int __init probe_fw_features(unsigned long node, const char *uname, int + depth, void *data) +{ + const char *prop; + int len; + static int hypertas_found; + static int vec5_found; + + if (depth != 1) + return 0; + + if (!strcmp(uname, "rtas") || !strcmp(uname, "rtas@0")) { + prop = of_get_flat_dt_prop(node, "ibm,hypertas-functions", + &len); + if (prop) { + powerpc_firmware_features |= FW_FEATURE_LPAR; + fw_hypertas_feature_init(prop, len); + } + + hypertas_found = 1; + } + + if (!strcmp(uname, "chosen")) { + prop = of_get_flat_dt_prop(node, "ibm,architecture-vec-5", + &len); + if (prop) + fw_vec5_feature_init(prop, len); + + vec5_found = 1; + } + + return hypertas_found && vec5_found; +} + +void __init pseries_probe_fw_features(void) +{ + of_scan_flat_dt(probe_fw_features, NULL); +} diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index bba3285..b1be7b7 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -20,11 +20,6 @@ extern void request_event_sources_irqs(struct device_node *np, #include -extern void __init fw_hypertas_feature_init(const char *hypertas, - unsigned long len); -extern void __init fw_vec5_feature_init(const char *hypertas, - unsigned long len); - struct pt_regs; extern int pSeries_system_reset_exception(struct pt_regs *regs); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index e61e9b9..9cc9b88 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -659,45 +659,6 @@ static void pseries_power_off(void) for (;;); } -/* - * Called very early, MMU is off, device-tree isn't unflattened - */ - -static int __init pseries_probe_fw_features(unsigned long node, - const char *uname, int depth, - void *data) -{ - const char *prop; - int len; - static int hypertas_found; - static int vec5_found; - - if (depth != 1) - return 0; - - if (!strcmp(uname, "rtas") || !strcmp(uname, "rtas@0")) { - prop = of_get_flat_dt_prop(node, "ibm,hypertas-functions", - &len); - if (prop) { - powerpc_firmware_features |= FW_FEATURE_LPAR; - fw_hypertas_feature_init(prop, len); - } - - hypertas_found = 1; - } - - if (!strcmp(uname, "chosen")) { - prop = of_get_flat_dt_prop(node, "ibm,architecture-vec-5", - &len); - if (prop) - fw_vec5_feature_init(prop, len); - - vec5_found = 1; - } - - return hypertas_found && vec5_found; -} - static int __init pSeries_probe(void) { unsigned long root = of_get_flat_dt_root(); @@ -717,8 +678,6 @@ static int __init pSeries_probe(void) pr_debug("pSeries detected, looking for LPAR capability...\n"); - /* Now try to figure out if we are running on LPAR */ - of_scan_flat_dt(pseries_probe_fw_features, NULL); #ifdef __LITTLE_ENDIAN__ if (firmware_has_feature(FW_FEATURE_SET_MODE)) { -- cgit v0.10.2 From d3cbff1b5a90afe6cb201aa2187c9609e21f92ad Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:49 +1000 Subject: powerpc: Put exception configuration in a common place The various calls to establish exception endianness and AIL are now done from a single point using already established CPU and FW feature bits to decide what to do. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 0bc9c28..708edeb 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -431,17 +431,6 @@ static inline unsigned long cmo_get_page_size(void) { return CMO_PageSize; } - -extern long pSeries_enable_reloc_on_exc(void); -extern long pSeries_disable_reloc_on_exc(void); - -extern long pseries_big_endian_exceptions(void); - -#else - -#define pSeries_enable_reloc_on_exc() do {} while (0) -#define pSeries_disable_reloc_on_exc() do {} while (0) - #endif /* CONFIG_PPC_PSERIES */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index b656bb1..ee05bd2 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -234,6 +234,7 @@ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); extern int early_init_dt_scan_recoverable_ranges(unsigned long node, const char *uname, int depth, void *data); +extern void opal_configure_cores(void); extern int opal_get_chars(uint32_t vtermno, char *buf, int count); extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len); diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index e9d384c..654d64c 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -26,6 +26,18 @@ void initmem_init(void); void setup_panic(void); #define ARCH_PANIC_TIMEOUT 180 +#ifdef CONFIG_PPC_PSERIES +extern void pseries_enable_reloc_on_exc(void); +extern void pseries_disable_reloc_on_exc(void); +extern void pseries_big_endian_exceptions(void); +extern void pseries_little_endian_exceptions(void); +#else +static inline void pseries_enable_reloc_on_exc(void) {} +static inline void pseries_disable_reloc_on_exc(void) {} +static inline void pseries_big_endian_exceptions(void) {} +static inline void pseries_little_endian_exceptions(void) {} +#endif /* CONFIG_PPC_PSERIES */ + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_SETUP_H */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 155dbcc..4ffd090 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -69,6 +69,7 @@ #include #include #include +#include #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -205,23 +206,50 @@ static void fixup_boot_paca(void) get_paca()->data_offset = 0; } -static void cpu_ready_for_interrupts(void) +static void configure_exceptions(void) { - /* Set IR and DR in PACA MSR */ - get_paca()->kernel_msr = MSR_KERNEL; - /* - * Enable AIL if supported, and we are in hypervisor mode. If we are - * not in hypervisor mode, we enable relocation-on interrupts later - * in pSeries_setup_arch() using the H_SET_MODE hcall. + * Setup the trampolines from the lowmem exception vectors + * to the kdump kernel when not using a relocatable kernel. */ - if (cpu_has_feature(CPU_FTR_HVMODE) && - cpu_has_feature(CPU_FTR_ARCH_207S)) { - unsigned long lpcr = mfspr(SPRN_LPCR); - mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + setup_kdump_trampoline(); + + /* Under a PAPR hypervisor, we need hypercalls */ + if (firmware_has_feature(FW_FEATURE_SET_MODE)) { + /* Enable AIL if possible */ + pseries_enable_reloc_on_exc(); + + /* + * Tell the hypervisor that we want our exceptions to + * be taken in little endian mode. + * + * We don't call this for big endian as our calling convention + * makes us always enter in BE, and the call may fail under + * some circumstances with kdump. + */ +#ifdef __LITTLE_ENDIAN__ + pseries_little_endian_exceptions(); +#endif + } else { + /* Set endian mode using OPAL */ + if (firmware_has_feature(FW_FEATURE_OPAL)) + opal_configure_cores(); + + /* Enable AIL if supported, and we are in hypervisor mode */ + if (cpu_has_feature(CPU_FTR_HVMODE) && + cpu_has_feature(CPU_FTR_ARCH_207S)) { + unsigned long lpcr = mfspr(SPRN_LPCR); + mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + } } } +static void cpu_ready_for_interrupts(void) +{ + /* Set IR and DR in PACA MSR */ + get_paca()->kernel_msr = MSR_KERNEL; +} + /* * Early initialization entry point. This is called by head.S * with MMU translation disabled. We rely on the "feature" of @@ -277,10 +305,10 @@ void __init early_setup(unsigned long dt_ptr) probe_machine(); /* - * Setup the trampolines from the lowmem exception vectors - * to the kdump kernel when not using a relocatable kernel. + * Configure exception handlers. This include setting up trampolines + * if needed, setting exception endian mode, etc... */ - setup_kdump_trampoline(); + configure_exceptions(); /* Initialize the hash table or TLB handling */ early_init_mmu(); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 8e4f64f..c4f7d6b 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include #include @@ -1690,7 +1690,7 @@ static int kvmppc_core_init_vm_pr(struct kvm *kvm) if (firmware_has_feature(FW_FEATURE_SET_MODE)) { spin_lock(&kvm_global_user_count_lock); if (++kvm_global_user_count == 1) - pSeries_disable_reloc_on_exc(); + pseries_disable_reloc_on_exc(); spin_unlock(&kvm_global_user_count_lock); } return 0; @@ -1706,7 +1706,7 @@ static void kvmppc_core_destroy_vm_pr(struct kvm *kvm) spin_lock(&kvm_global_user_count_lock); BUG_ON(kvm_global_user_count == 0); if (--kvm_global_user_count == 0) - pSeries_enable_reloc_on_exc(); + pseries_enable_reloc_on_exc(); spin_unlock(&kvm_global_user_count_lock); } } diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index da40d6b..8b4fc68 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -57,7 +57,7 @@ static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX]; static uint32_t opal_heartbeat; static struct task_struct *kopald_tsk; -static void opal_reinit_cores(void) +void opal_configure_cores(void) { /* Do the actual re-init, This will clobber all FPRs, VRs, etc... * @@ -70,6 +70,10 @@ static void opal_reinit_cores(void) #else opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE); #endif + + /* Restore some bits */ + if (cur_cpu_spec->cpu_restore) + cur_cpu_spec->cpu_restore(); } int __init early_init_dt_scan_opal(unsigned long node, @@ -106,13 +110,6 @@ int __init early_init_dt_scan_opal(unsigned long node, panic("OPAL != V3 detected, no longer supported.\n"); } - /* Reinit all cores with the right endian */ - opal_reinit_cores(); - - /* Restore some bits */ - if (cur_cpu_spec->cpu_restore) - cur_cpu_spec->cpu_restore(); - return 1; } diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 03ff986..03c732a 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -261,24 +261,8 @@ static void pSeries_lpar_hptab_clear(void) * This is also called on boot when a fadump happens. In that case we * must not change the exception endian mode. */ - if (firmware_has_feature(FW_FEATURE_SET_MODE) && !is_fadump_active()) { - long rc; - - rc = pseries_big_endian_exceptions(); - /* - * At this point it is unlikely panic() will get anything - * out to the user, but at least this will stop us from - * continuing on further and creating an even more - * difficult to debug situation. - * - * There is a known problem when kdump'ing, if cpus are offline - * the above call will fail. Rather than panicking again, keep - * going and hope the kdump kernel is also little endian, which - * it usually is. - */ - if (rc && !kdump_in_progress()) - panic("Could not enable big endian exceptions"); - } + if (firmware_has_feature(FW_FEATURE_SET_MODE) && !is_fadump_active()) + pseries_big_endian_exceptions(); #endif } diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 9cc9b88..ba7dc12 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -319,15 +319,23 @@ static void pseries_lpar_idle(void) * to ever be a problem in practice we can move this into a kernel thread to * finish off the process later in boot. */ -long pSeries_enable_reloc_on_exc(void) +void pseries_enable_reloc_on_exc(void) { long rc; unsigned int delay, total_delay = 0; while (1) { rc = enable_reloc_on_exceptions(); - if (!H_IS_LONG_BUSY(rc)) - return rc; + if (!H_IS_LONG_BUSY(rc)) { + if (rc == H_P2) { + pr_info("Relocation on exceptions not" + " supported\n"); + } else if (rc != H_SUCCESS) { + pr_warn("Unable to enable relocation" + " on exceptions: %ld\n", rc); + } + break; + } delay = get_longbusy_msecs(rc); total_delay += delay; @@ -335,66 +343,81 @@ long pSeries_enable_reloc_on_exc(void) pr_warn("Warning: Giving up waiting to enable " "relocation on exceptions (%u msec)!\n", total_delay); - return rc; + return; } mdelay(delay); } } -EXPORT_SYMBOL(pSeries_enable_reloc_on_exc); +EXPORT_SYMBOL(pseries_enable_reloc_on_exc); -long pSeries_disable_reloc_on_exc(void) +void pseries_disable_reloc_on_exc(void) { long rc; while (1) { rc = disable_reloc_on_exceptions(); if (!H_IS_LONG_BUSY(rc)) - return rc; + break; mdelay(get_longbusy_msecs(rc)); } + if (rc != H_SUCCESS) + pr_warning("Warning: Failed to disable relocation on " + "exceptions: %ld\n", rc); } -EXPORT_SYMBOL(pSeries_disable_reloc_on_exc); +EXPORT_SYMBOL(pseries_disable_reloc_on_exc); #ifdef CONFIG_KEXEC static void pSeries_machine_kexec(struct kimage *image) { - long rc; - - if (firmware_has_feature(FW_FEATURE_SET_MODE)) { - rc = pSeries_disable_reloc_on_exc(); - if (rc != H_SUCCESS) - pr_warning("Warning: Failed to disable relocation on " - "exceptions: %ld\n", rc); - } + if (firmware_has_feature(FW_FEATURE_SET_MODE)) + pseries_disable_reloc_on_exc(); default_machine_kexec(image); } #endif #ifdef __LITTLE_ENDIAN__ -long pseries_big_endian_exceptions(void) +void pseries_big_endian_exceptions(void) { long rc; while (1) { rc = enable_big_endian_exceptions(); if (!H_IS_LONG_BUSY(rc)) - return rc; + break; mdelay(get_longbusy_msecs(rc)); } + + /* + * At this point it is unlikely panic() will get anything + * out to the user, since this is called very late in kexec + * but at least this will stop us from continuing on further + * and creating an even more difficult to debug situation. + * + * There is a known problem when kdump'ing, if cpus are offline + * the above call will fail. Rather than panicking again, keep + * going and hope the kdump kernel is also little endian, which + * it usually is. + */ + if (rc && !kdump_in_progress()) + panic("Could not enable big endian exceptions"); } -static long pseries_little_endian_exceptions(void) +void pseries_little_endian_exceptions(void) { long rc; while (1) { rc = enable_little_endian_exceptions(); if (!H_IS_LONG_BUSY(rc)) - return rc; + break; mdelay(get_longbusy_msecs(rc)); } + if (rc) { + ppc_md.progress("H_SET_MODE LE exception fail", 0); + panic("Could not enable little endian exceptions"); + } } #endif @@ -464,18 +487,6 @@ static void __init pSeries_setup_arch(void) } ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare; - - if (firmware_has_feature(FW_FEATURE_SET_MODE)) { - long rc; - - rc = pSeries_enable_reloc_on_exc(); - if (rc == H_P2) { - pr_info("Relocation on exceptions not supported\n"); - } else if (rc != H_SUCCESS) { - pr_warn("Unable to enable relocation on exceptions: " - "%ld\n", rc); - } - } } static int __init pSeries_init_panel(void) @@ -678,23 +689,6 @@ static int __init pSeries_probe(void) pr_debug("pSeries detected, looking for LPAR capability...\n"); - -#ifdef __LITTLE_ENDIAN__ - if (firmware_has_feature(FW_FEATURE_SET_MODE)) { - long rc; - /* - * Tell the hypervisor that we want our exceptions to - * be taken in little endian mode. If this fails we don't - * want to use BUG() because it will trigger an exception. - */ - rc = pseries_little_endian_exceptions(); - if (rc) { - ppc_md.progress("H_SET_MODE LE exception fail", 0); - panic("Could not enable little endian exceptions"); - } - } -#endif - if (firmware_has_feature(FW_FEATURE_LPAR)) hpte_init_lpar(); else -- cgit v0.10.2 From 91b6fad5cf16c5fcf0ab2a08fcdbd1483d871d43 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:50 +1000 Subject: powerpc/pmac: Remove early allocation of the SMU command buffer The SMU command buffer needs to be allocated below 2G using memblock. In the past, this had to be done very early from the arch code as memblock wasn't available past that point. That is no longer the case though, smu_init() is called from setup_arch() when memblock is still functional these days. So move the allocation to the SMU driver itself. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h index 9dc2de5..09f98e8 100644 --- a/arch/powerpc/include/asm/smu.h +++ b/arch/powerpc/include/asm/smu.h @@ -471,13 +471,6 @@ extern int smu_get_rtc_time(struct rtc_time *time, int spinwait); extern int smu_set_rtc_time(struct rtc_time *time, int spinwait); /* - * SMU command buffer absolute address, exported by pmac_setup, - * this is allocated very early during boot. - */ -extern unsigned long smu_cmdbuf_abs; - - -/* * Kernel asynchronous i2c interface */ diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index fc0b69f..4ad6168 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -52,7 +52,6 @@ #include #include #include -#include #include #include @@ -97,11 +96,6 @@ int sccdbg; sys_ctrler_t sys_ctrler = SYS_CTRLER_UNKNOWN; EXPORT_SYMBOL(sys_ctrler); -#ifdef CONFIG_PMAC_SMU -unsigned long smu_cmdbuf_abs; -EXPORT_SYMBOL(smu_cmdbuf_abs); -#endif - static void pmac_show_cpuinfo(struct seq_file *m) { struct device_node *np; @@ -325,7 +319,6 @@ static void __init pmac_setup_arch(void) defined(CONFIG_PPC64) pmac_nvram_init(); #endif - #ifdef CONFIG_PPC32 #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start) @@ -617,15 +610,6 @@ static int __init pmac_probe(void) DMA_MODE_WRITE = 2; #endif /* CONFIG_PPC32 */ -#ifdef CONFIG_PMAC_SMU - /* - * SMU based G5s need some memory below 2Gb, at least the current - * driver needs that. We have to allocate it now. We allocate 4k - * (1 small page) for now. - */ - smu_cmdbuf_abs = memblock_alloc_base(4096, 4096, 0x80000000UL); -#endif /* CONFIG_PMAC_SMU */ - pm_power_off = pmac_power_off; return 1; diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index d531f80..d6f72c8 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -99,6 +100,7 @@ static DEFINE_MUTEX(smu_mutex); static struct smu_device *smu; static DEFINE_MUTEX(smu_part_access); static int smu_irq_inited; +static unsigned long smu_cmdbuf_abs; static void smu_i2c_retry(unsigned long data); @@ -479,8 +481,13 @@ int __init smu_init (void) printk(KERN_INFO "SMU: Driver %s %s\n", VERSION, AUTHOR); + /* + * SMU based G5s need some memory below 2Gb. Thankfully this is + * called at a time where memblock is still available. + */ + smu_cmdbuf_abs = memblock_alloc_base(4096, 4096, 0x80000000UL); if (smu_cmdbuf_abs == 0) { - printk(KERN_ERR "SMU: Command buffer not allocated !\n"); + printk(KERN_ERR "SMU: Command buffer allocation failed !\n"); ret = -EINVAL; goto fail_np; } -- cgit v0.10.2 From 166dd7d3fbf2df183926f0e4b4855f6cbd8da945 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:51 +1000 Subject: powerpc/64: Move MMU backend selection out of platform code We move it into early_mmu_init() based on firmware features. For PS3, we have to move the setting of these into early_init_devtree(). Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index a1bc7e7..a19f831 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -526,4 +526,6 @@ void ps3_sync_irq(int node); u32 ps3_get_hw_thread_id(int cpu); u64 ps3_get_spe_id(void *arg); +void ps3_early_mm_init(void); + #endif diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 2bd1784..bae3db7 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -759,6 +759,12 @@ void __init early_init_devtree(void *params) /* Now try to figure out if we are running on LPAR and so on */ pseries_probe_fw_features(); +#ifdef CONFIG_PPC_PS3 + /* Identify PS3 firmware */ + if (of_flat_dt_is_compatible(of_get_flat_dt_root(), "sony,ps3")) + powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE; +#endif + DBG(" <- early_init_devtree()\n"); } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 859ecaa..336fad6 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -58,6 +58,7 @@ #include #include #include +#include #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -872,6 +873,11 @@ static void __init htab_initialize(void) #undef KB #undef MB +void __init __weak hpte_init_lpar(void) +{ + panic("FW_FEATURE_LPAR set but no LPAR support compiled\n"); +} + void __init hash__early_init_mmu(void) { /* @@ -908,6 +914,14 @@ void __init hash__early_init_mmu(void) pci_io_base = ISA_IO_BASE; #endif + /* Select appropriate backend */ + if (firmware_has_feature(FW_FEATURE_PS3_LV1)) + ps3_early_mm_init(); + else if (firmware_has_feature(FW_FEATURE_LPAR)) + hpte_init_lpar(); + else + hpte_init_native(); + /* Initialize the MMU Hash table and create the linear mapping * of memory. Has to be done before SLB initialization as this is * currently where the page size encoding is obtained. diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 4ea1094..003ff48 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -345,6 +345,7 @@ void __init radix__early_init_mmu(void) radix_init_page_sizes(); if (!firmware_has_feature(FW_FEATURE_LPAR)) { + radix_init_native(); lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); radix_init_partition_table(); diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index 36cff28..e342f78 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -261,7 +261,6 @@ static int __init cell_probe(void) !of_flat_dt_is_compatible(root, "IBM,CPBW-1.0")) return 0; - hpte_init_native(); pm_power_off = rtas_power_off; return 1; diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 99b9b96..b1ecd99 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -306,7 +306,6 @@ static int __init maple_probe(void) !of_flat_dt_is_compatible(root, "Momentum,Apache")) return 0; - hpte_init_native(); pm_power_off = maple_power_off; return 1; diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index 7349644..924d01d 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -426,8 +426,6 @@ static int __init pas_probe(void) !of_flat_dt_is_compatible(root, "pasemi,pwrficient")) return 0; - hpte_init_native(); - alloc_iobmap_l2(); return 1; diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 4ad6168..795bf8c 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -599,10 +599,6 @@ static int __init pmac_probe(void) !of_flat_dt_is_compatible(root, "MacRISC")) return 0; -#ifdef CONFIG_PPC64 - hpte_init_native(); -#endif - #ifdef CONFIG_PPC32 /* isa_io_base gets set in pmac_pci_init */ ISA_DMA_THRESHOLD = ~0L; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 8492bbb..f70ea83 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -273,11 +273,6 @@ static int __init pnv_probe(void) if (!of_flat_dt_is_compatible(root, "ibm,powernv")) return 0; - if (IS_ENABLED(CONFIG_PPC_RADIX_MMU) && radix_enabled()) - radix_init_native(); - else if (IS_ENABLED(CONFIG_PPC_STD_MMU_64)) - hpte_init_native(); - if (firmware_has_feature(FW_FEATURE_OPAL)) pnv_setup_machdep_opal(); diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 486ecd0..b7fdf88 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -226,9 +226,17 @@ static void __init ps3_progress(char *s, unsigned short hex) printk("*** %04x : %s\n", hex, s ? s : ""); } -static int __init ps3_probe(void) +void __init ps3_early_mm_init(void) { unsigned long htab_size; + + ps3_mm_init(); + ps3_mm_vas_create(&htab_size); + ps3_hpte_init(htab_size); +} + +static int __init ps3_probe(void) +{ unsigned long dt_root; DBG(" -> %s:%d\n", __func__, __LINE__); @@ -237,12 +245,7 @@ static int __init ps3_probe(void) if (!of_flat_dt_is_compatible(dt_root, "sony,ps3")) return 0; - powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE; - ps3_os_area_save_params(); - ps3_mm_init(); - ps3_mm_vas_create(&htab_size); - ps3_hpte_init(htab_size); pm_power_off = ps3_power_off; DBG(" <- %s:%d\n", __func__, __LINE__); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index ba7dc12..2407213 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -687,13 +687,6 @@ static int __init pSeries_probe(void) of_flat_dt_is_compatible(root, "IBM,CBEA")) return 0; - pr_debug("pSeries detected, looking for LPAR capability...\n"); - - if (firmware_has_feature(FW_FEATURE_LPAR)) - hpte_init_lpar(); - else - hpte_init_native(); - pm_power_off = pseries_power_off; pr_debug("Machine is%s LPAR !\n", -- cgit v0.10.2 From 388dc1c3f003539c82e0f1436019955df78aa9f9 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:52 +1000 Subject: powerpc/pasemi: Remove IOBMAP allocation from platform probe() These days, memblocks is available later, so we can just allocate it as part of iob_init. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index c929644..43dd3fb 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -202,6 +202,11 @@ int __init iob_init(struct device_node *dn) pr_debug(" -> %s\n", __func__); + /* For 2G space, 8x64 pages (2^21 bytes) is max total l2 size */ + iob_l2_base = (u32 *)__va(memblock_alloc_base(1UL<<21, 1UL<<21, 0x80000000)); + + printk(KERN_INFO "IOBMAP L2 allocated at: %p\n", iob_l2_base); + /* Allocate a spare page to map all invalid IOTLB pages. */ tmp = memblock_alloc(IOBMAP_PAGE_SIZE, IOBMAP_PAGE_SIZE); if (!tmp) @@ -260,13 +265,3 @@ void __init iommu_init_early_pasemi(void) set_pci_dma_ops(&dma_iommu_ops); } -void __init alloc_iobmap_l2(void) -{ -#ifndef CONFIG_PPC_PASEMI_IOMMU - return; -#endif - /* For 2G space, 8x64 pages (2^21 bytes) is max total l2 size */ - iob_l2_base = (u32 *)__va(memblock_alloc_base(1UL<<21, 1UL<<21, 0x80000000)); - - printk(KERN_INFO "IOBMAP L2 allocated at: %p\n", iob_l2_base); -} diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h index 11f230a..74cbcb3 100644 --- a/arch/powerpc/platforms/pasemi/pasemi.h +++ b/arch/powerpc/platforms/pasemi/pasemi.h @@ -8,7 +8,6 @@ extern void pas_pci_dma_dev_setup(struct pci_dev *dev); extern void __iomem *pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset); -extern void __init alloc_iobmap_l2(void); extern void __init pasemi_map_registers(void); /* Power savings modes, implemented in asm */ diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index 924d01d..d120f7a 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -426,8 +426,6 @@ static int __init pas_probe(void) !of_flat_dt_is_compatible(root, "pasemi,pwrficient")) return 0; - alloc_iobmap_l2(); - return 1; } -- cgit v0.10.2 From 5556ecf5e9fa1c7bcc50d0aa74aec28f30d9590a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:53 +1000 Subject: powerpc/mm/hash: Don't use machine_is() early during boot Use the device-tree instead as we'll be moving probe_machine() out of early_setup Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 336fad6..a9472ea 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -749,7 +750,7 @@ static void __init htab_initialize(void) unsigned long table; unsigned long pteg_count; unsigned long prot; - unsigned long base = 0, size = 0, limit; + unsigned long base = 0, size = 0; struct memblock_region *reg; DBG(" -> htab_initialize()\n"); @@ -775,7 +776,8 @@ static void __init htab_initialize(void) htab_hash_mask = pteg_count - 1; - if (firmware_has_feature(FW_FEATURE_LPAR)) { + if (firmware_has_feature(FW_FEATURE_LPAR) || + firmware_has_feature(FW_FEATURE_PS3_LV1)) { /* Using a hypervisor which owns the htab */ htab_address = NULL; _SDR1 = 0; @@ -790,16 +792,22 @@ static void __init htab_initialize(void) ppc_md.hpte_clear_all(); #endif } else { - /* Find storage for the HPT. Must be contiguous in - * the absolute address space. On cell we want it to be - * in the first 2 Gig so we can use it for IOMMU hacks. + unsigned long limit = MEMBLOCK_ALLOC_ANYWHERE; + +#ifdef CONFIG_PPC_CELL + /* + * Cell may require the hash table down low when using the + * Axon IOMMU in order to fit the dynamic region over it, see + * comments in cell/iommu.c */ - if (machine_is(cell)) + if (fdt_subnode_offset(initial_boot_params, 0, "axon") > 0) { limit = 0x80000000; - else - limit = MEMBLOCK_ALLOC_ANYWHERE; + pr_info("Hash table forced below 2G for Axon IOMMU\n"); + } +#endif /* CONFIG_PPC_CELL */ - table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit); + table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, + limit); DBG("Hash table allocated at %lx, size: %lx\n", table, htab_size_bytes); -- cgit v0.10.2 From 2b4e3ad8f5790cae6e0356c5fc200588bd2c915c Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:56 +1000 Subject: powerpc/mm/hash64: Don't test for machine type to detect HEA special case Instead, check for FW_FEATURE_SPLPAR. This should be roughtly equivalent as all pseries machiens that can have an HEA also support SPLPAR and no other machine type does. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index a9472ea..eab3074 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -527,7 +527,8 @@ static bool might_have_hea(void) * we will never see an HEA ethernet device. */ #ifdef CONFIG_IBMEBUS - return !cpu_has_feature(CPU_FTR_ARCH_207S); + return !cpu_has_feature(CPU_FTR_ARCH_207S) && + !firmware_has_feature(FW_FEATURE_SPLPAR); #else return false; #endif @@ -593,7 +594,7 @@ found: * would stop us accessing the HEA ethernet. So if we * have the chance of ever seeing one, stay at 4k. */ - if (!might_have_hea() || !machine_is(pseries)) + if (!might_have_hea()) mmu_io_psize = MMU_PAGE_64K; } else mmu_ci_restrictions = 1; -- cgit v0.10.2 From b521f576df6c49fcc06fbc06a349f7590f223a38 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:57 +1000 Subject: powerpc/pmac: Remove spurrious machine type test pmac_declare_of_platform_devices() is already a machine initcall, thus it won't be called on a non-powermac machine. Testing for chrp there is pointless. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 795bf8c..8fffe1c 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -484,9 +484,6 @@ static int __init pmac_declare_of_platform_devices(void) { struct device_node *np; - if (machine_is(chrp)) - return -1; - np = of_find_node_by_name(NULL, "valkyrie"); if (np) { of_platform_device_create(np, "valkyrie", NULL); -- cgit v0.10.2 From 7025776ed1ebdfa1959932e7a4662c2f88607df0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:58 +1000 Subject: powerpc/mm: Move hash table ops to a separate structure Moving probe_machine() to after mmu init will cause the ppc_md fields relative to the hash table management to be overwritten. Since we have essentially disconnected the machine type from the hash backend ops, finish the job by moving them to a different structure. The only callback that didn't quite fix is update_partition_table since this is not specific to hash, so I moved it to a standalone variable for now. We can revisit later if needed. Signed-off-by: Benjamin Herrenschmidt [mpe: Fix ppc64e build failure in kexec] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 2c2d555..b0f4dff 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -124,6 +124,45 @@ #ifndef __ASSEMBLY__ +struct mmu_hash_ops { + void (*hpte_invalidate)(unsigned long slot, + unsigned long vpn, + int bpsize, int apsize, + int ssize, int local); + long (*hpte_updatepp)(unsigned long slot, + unsigned long newpp, + unsigned long vpn, + int bpsize, int apsize, + int ssize, unsigned long flags); + void (*hpte_updateboltedpp)(unsigned long newpp, + unsigned long ea, + int psize, int ssize); + long (*hpte_insert)(unsigned long hpte_group, + unsigned long vpn, + unsigned long prpn, + unsigned long rflags, + unsigned long vflags, + int psize, int apsize, + int ssize); + long (*hpte_remove)(unsigned long hpte_group); + int (*hpte_removebolted)(unsigned long ea, + int psize, int ssize); + void (*flush_hash_range)(unsigned long number, int local); + void (*hugepage_invalidate)(unsigned long vsid, + unsigned long addr, + unsigned char *hpte_slot_array, + int psize, int ssize, int local); + /* + * Special for kexec. + * To be called in real mode with interrupts disabled. No locks are + * taken as such, concurrent access on pre POWER5 hardware could result + * in a deadlock. + * The linear mapping is destroyed as well. + */ + void (*hpte_clear_all)(void); +}; +extern struct mmu_hash_ops mmu_hash_ops; + struct hash_pte { __be64 v; __be64 r; diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index e62e7d3..5b2edf5 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -34,42 +34,6 @@ struct pci_host_bridge; struct machdep_calls { char *name; #ifdef CONFIG_PPC64 - void (*hpte_invalidate)(unsigned long slot, - unsigned long vpn, - int bpsize, int apsize, - int ssize, int local); - long (*hpte_updatepp)(unsigned long slot, - unsigned long newpp, - unsigned long vpn, - int bpsize, int apsize, - int ssize, unsigned long flags); - void (*hpte_updateboltedpp)(unsigned long newpp, - unsigned long ea, - int psize, int ssize); - long (*hpte_insert)(unsigned long hpte_group, - unsigned long vpn, - unsigned long prpn, - unsigned long rflags, - unsigned long vflags, - int psize, int apsize, - int ssize); - long (*hpte_remove)(unsigned long hpte_group); - int (*hpte_removebolted)(unsigned long ea, - int psize, int ssize); - void (*flush_hash_range)(unsigned long number, int local); - void (*hugepage_invalidate)(unsigned long vsid, - unsigned long addr, - unsigned char *hpte_slot_array, - int psize, int ssize, int local); - /* - * Special for kexec. - * To be called in real mode with interrupts disabled. No locks are - * taken as such, concurrent access on pre POWER5 hardware could result - * in a deadlock. - * The linear mapping is destroyed as well. - */ - void (*hpte_clear_all)(void); - void __iomem * (*ioremap)(phys_addr_t addr, unsigned long size, unsigned long flags, void *caller); void (*iounmap)(volatile void __iomem *token); diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 50bf551..4c780a3 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -55,7 +55,7 @@ int default_machine_kexec_prepare(struct kimage *image) const unsigned long *basep; const unsigned int *sizep; - if (!ppc_md.hpte_clear_all) + if (!mmu_hash_ops.hpte_clear_all) return -ENOENT; /* @@ -380,7 +380,12 @@ void default_machine_kexec(struct kimage *image) */ kexec_sequence(&kexec_stack, image->start, image, page_address(image->control_code_page), - ppc_md.hpte_clear_all); +#ifdef CONFIG_PPC_STD_MMU + mmu_hash_ops.hpte_clear_all +#else + NULL +#endif + ); /* NOTREACHED */ } diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 7a85190..cb19515 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -667,7 +667,7 @@ _GLOBAL(kexec_sequence) mr r12,r27 #endif mtctr r12 - bctrl /* ppc_md.hpte_clear_all(void); */ + bctrl /* mmu_hash_ops.hpte_clear_all(void); */ #endif /* !CONFIG_PPC_BOOK3E */ /* diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 114edac..a587e8f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -34,9 +34,9 @@ void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { - ppc_md.hpte_invalidate(pte->slot, pte->host_vpn, - pte->pagesize, pte->pagesize, MMU_SEGSIZE_256M, - false); + mmu_hash_ops.hpte_invalidate(pte->slot, pte->host_vpn, + pte->pagesize, pte->pagesize, + MMU_SEGSIZE_256M, false); } /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using @@ -169,13 +169,13 @@ map_again: /* In case we tried normal mapping already, let's nuke old entries */ if (attempt > 1) - if (ppc_md.hpte_remove(hpteg) < 0) { + if (mmu_hash_ops.hpte_remove(hpteg) < 0) { r = -1; goto out_unlock; } - ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags, - hpsize, hpsize, MMU_SEGSIZE_256M); + ret = mmu_hash_ops.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags, + hpsize, hpsize, MMU_SEGSIZE_256M); if (ret < 0) { /* If we couldn't map a primary PTE, try a secondary */ @@ -187,8 +187,10 @@ map_again: trace_kvm_book3s_64_mmu_map(rflags, hpteg, vpn, hpaddr, orig_pte); - /* The ppc_md code may give us a secondary entry even though we - asked for a primary. Fix up. */ + /* + * The mmu_hash_ops code may give us a secondary entry even + * though we asked for a primary. Fix up. + */ if ((ret & _PTEIDX_SECONDARY) && !(vflags & HPTE_V_SECONDARY)) { hash = ~hash; hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c index 6333b27..42c702b 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/hash64_4k.c @@ -70,8 +70,8 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; - if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K, - MMU_PAGE_4K, ssize, flags) == -1) + if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K, + MMU_PAGE_4K, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } @@ -84,21 +84,23 @@ repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, - MMU_PAGE_4K, MMU_PAGE_4K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, + MMU_PAGE_4K, MMU_PAGE_4K, ssize); /* * Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, - rflags, HPTE_V_SECONDARY, - MMU_PAGE_4K, MMU_PAGE_4K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, + rflags, + HPTE_V_SECONDARY, + MMU_PAGE_4K, + MMU_PAGE_4K, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); /* * FIXME!! Should be try the group from which we removed ? */ diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index 16644e1..3bbbea0 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -133,9 +133,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; - ret = ppc_md.hpte_updatepp(slot, rflags, vpn, - MMU_PAGE_4K, MMU_PAGE_4K, - ssize, flags); + ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, + MMU_PAGE_4K, MMU_PAGE_4K, + ssize, flags); /* *if we failed because typically the HPTE wasn't really here * we try an insertion. @@ -166,21 +166,22 @@ repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, - MMU_PAGE_4K, MMU_PAGE_4K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, + MMU_PAGE_4K, MMU_PAGE_4K, ssize); /* * Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, - rflags, HPTE_V_SECONDARY, - MMU_PAGE_4K, MMU_PAGE_4K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, + rflags, HPTE_V_SECONDARY, + MMU_PAGE_4K, MMU_PAGE_4K, + ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); /* * FIXME!! Should be try the group from which we removed ? */ @@ -272,8 +273,9 @@ int __hash_page_64K(unsigned long ea, unsigned long access, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; - if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K, - MMU_PAGE_64K, ssize, flags) == -1) + if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K, + MMU_PAGE_64K, ssize, + flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } @@ -286,21 +288,24 @@ repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, - MMU_PAGE_64K, MMU_PAGE_64K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, + MMU_PAGE_64K, MMU_PAGE_64K, + ssize); /* * Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, - rflags, HPTE_V_SECONDARY, - MMU_PAGE_64K, MMU_PAGE_64K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, + rflags, + HPTE_V_SECONDARY, + MMU_PAGE_64K, + MMU_PAGE_64K, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); /* * FIXME!! Should be try the group from which we removed ? */ diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index cb3b4c9..88ce7d2 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -739,14 +739,14 @@ static int native_register_proc_table(unsigned long base, unsigned long page_siz void __init hpte_init_native(void) { - ppc_md.hpte_invalidate = native_hpte_invalidate; - ppc_md.hpte_updatepp = native_hpte_updatepp; - ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp; - ppc_md.hpte_insert = native_hpte_insert; - ppc_md.hpte_remove = native_hpte_remove; - ppc_md.hpte_clear_all = native_hpte_clear; - ppc_md.flush_hash_range = native_flush_hash_range; - ppc_md.hugepage_invalidate = native_hugepage_invalidate; + mmu_hash_ops.hpte_invalidate = native_hpte_invalidate; + mmu_hash_ops.hpte_updatepp = native_hpte_updatepp; + mmu_hash_ops.hpte_updateboltedpp = native_hpte_updateboltedpp; + mmu_hash_ops.hpte_insert = native_hpte_insert; + mmu_hash_ops.hpte_remove = native_hpte_remove; + mmu_hash_ops.hpte_clear_all = native_hpte_clear; + mmu_hash_ops.flush_hash_range = native_flush_hash_range; + mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate; if (cpu_has_feature(CPU_FTR_ARCH_300)) ppc_md.register_process_table = native_register_proc_table; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index eab3074..3416324 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -118,6 +118,8 @@ static u8 *linear_map_hash_slots; static unsigned long linear_map_hash_count; static DEFINE_SPINLOCK(linear_map_hash_lock); #endif /* CONFIG_DEBUG_PAGEALLOC */ +struct mmu_hash_ops mmu_hash_ops; +EXPORT_SYMBOL(mmu_hash_ops); /* There are definitions of page sizes arrays to be used when none * is provided by the firmware. @@ -276,9 +278,10 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, hash = hpt_hash(vpn, shift, ssize); hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); - BUG_ON(!ppc_md.hpte_insert); - ret = ppc_md.hpte_insert(hpteg, vpn, paddr, tprot, - HPTE_V_BOLTED, psize, psize, ssize); + BUG_ON(!mmu_hash_ops.hpte_insert); + ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot, + HPTE_V_BOLTED, psize, psize, + ssize); if (ret < 0) break; @@ -303,11 +306,11 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, shift = mmu_psize_defs[psize].shift; step = 1 << shift; - if (!ppc_md.hpte_removebolted) + if (!mmu_hash_ops.hpte_removebolted) return -ENODEV; for (vaddr = vstart; vaddr < vend; vaddr += step) { - rc = ppc_md.hpte_removebolted(vaddr, psize, ssize); + rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize); if (rc == -ENOENT) { ret = -ENOENT; continue; @@ -789,8 +792,8 @@ static void __init htab_initialize(void) * Clear the htab if firmware assisted dump is active so * that we dont end up using old mappings. */ - if (is_fadump_active() && ppc_md.hpte_clear_all) - ppc_md.hpte_clear_all(); + if (is_fadump_active() && mmu_hash_ops.hpte_clear_all) + mmu_hash_ops.hpte_clear_all(); #endif } else { unsigned long limit = MEMBLOCK_ALLOC_ANYWHERE; @@ -1480,7 +1483,8 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, * We use same base page size and actual psize, because we don't * use these functions for hugepage */ - ppc_md.hpte_invalidate(slot, vpn, psize, psize, ssize, local); + mmu_hash_ops.hpte_invalidate(slot, vpn, psize, psize, + ssize, local); } pte_iterate_hashed_end(); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -1521,9 +1525,9 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr, if (!hpte_slot_array) return; - if (ppc_md.hugepage_invalidate) { - ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, - psize, ssize, local); + if (mmu_hash_ops.hugepage_invalidate) { + mmu_hash_ops.hugepage_invalidate(vsid, s_addr, hpte_slot_array, + psize, ssize, local); goto tm_abort; } /* @@ -1550,8 +1554,8 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, vpn, psize, - MMU_PAGE_16M, ssize, local); + mmu_hash_ops.hpte_invalidate(slot, vpn, psize, + MMU_PAGE_16M, ssize, local); } tm_abort: #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -1575,8 +1579,8 @@ tm_abort: void flush_hash_range(unsigned long number, int local) { - if (ppc_md.flush_hash_range) - ppc_md.flush_hash_range(number, local); + if (mmu_hash_ops.flush_hash_range) + mmu_hash_ops.flush_hash_range(number, local); else { int i; struct ppc64_tlb_batch *batch = @@ -1621,22 +1625,22 @@ repeat: HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, vflags, - psize, psize, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, vflags, + psize, psize, ssize); /* Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, - vflags | HPTE_V_SECONDARY, - psize, psize, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, + vflags | HPTE_V_SECONDARY, + psize, psize, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP)&~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); goto repeat; } } @@ -1686,8 +1690,9 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_linear_psize, - mmu_kernel_ssize, 0); + mmu_hash_ops.hpte_invalidate(slot, vpn, mmu_linear_psize, + mmu_linear_psize, + mmu_kernel_ssize, 0); } void __kernel_map_pages(struct page *page, int numpages, int enable) diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index ba3fc22..f20d16f 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -103,8 +103,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; - ret = ppc_md.hpte_updatepp(slot, rflags, vpn, - psize, lpsize, ssize, flags); + ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, + psize, lpsize, ssize, flags); /* * We failed to update, try to insert a new entry. */ @@ -131,23 +131,24 @@ repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, - psize, lpsize, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, + psize, lpsize, ssize); /* * Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, - rflags, HPTE_V_SECONDARY, - psize, lpsize, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, + rflags, + HPTE_V_SECONDARY, + psize, lpsize, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); goto repeat; } } diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 3058560..d5026f3 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -79,8 +79,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; - if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, - mmu_psize, ssize, flags) == -1) + if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, mmu_psize, + mmu_psize, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index c9a3e67..cb3c503 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -195,12 +195,12 @@ static void ps3_hpte_clear(void) void __init ps3_hpte_init(unsigned long htab_size) { - ppc_md.hpte_invalidate = ps3_hpte_invalidate; - ppc_md.hpte_updatepp = ps3_hpte_updatepp; - ppc_md.hpte_updateboltedpp = ps3_hpte_updateboltedpp; - ppc_md.hpte_insert = ps3_hpte_insert; - ppc_md.hpte_remove = ps3_hpte_remove; - ppc_md.hpte_clear_all = ps3_hpte_clear; + mmu_hash_ops.hpte_invalidate = ps3_hpte_invalidate; + mmu_hash_ops.hpte_updatepp = ps3_hpte_updatepp; + mmu_hash_ops.hpte_updateboltedpp = ps3_hpte_updateboltedpp; + mmu_hash_ops.hpte_insert = ps3_hpte_insert; + mmu_hash_ops.hpte_remove = ps3_hpte_remove; + mmu_hash_ops.hpte_clear_all = ps3_hpte_clear; ppc64_pft_size = __ilog2(htab_size); } diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 03c732a..0e91388 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -591,15 +591,15 @@ __setup("bulk_remove=", disable_bulk_remove); void __init hpte_init_lpar(void) { - ppc_md.hpte_invalidate = pSeries_lpar_hpte_invalidate; - ppc_md.hpte_updatepp = pSeries_lpar_hpte_updatepp; - ppc_md.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp; - ppc_md.hpte_insert = pSeries_lpar_hpte_insert; - ppc_md.hpte_remove = pSeries_lpar_hpte_remove; - ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted; - ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range; - ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear; - ppc_md.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; + mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate; + mmu_hash_ops.hpte_updatepp = pSeries_lpar_hpte_updatepp; + mmu_hash_ops.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp; + mmu_hash_ops.hpte_insert = pSeries_lpar_hpte_insert; + mmu_hash_ops.hpte_remove = pSeries_lpar_hpte_remove; + mmu_hash_ops.hpte_removebolted = pSeries_lpar_hpte_removebolted; + mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; + mmu_hash_ops.hpte_clear_all = pSeries_lpar_hptab_clear; + mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; } #ifdef CONFIG_PPC_SMLPAR -- cgit v0.10.2 From 84b62c72faa197a5c9b75ee93527add31695fb32 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:59 +1000 Subject: powerpc: Ensure that ppc_md is empty before probing for machine type Anything in there will be overwritten, so it helps catching nasty bugs if we check that it's indeed full of NULL's before we do so. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 2a3564c..b8ee1c8 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -575,6 +575,7 @@ void probe_machine(void) { extern struct machdep_calls __machine_desc_start; extern struct machdep_calls __machine_desc_end; + unsigned int i; /* * Iterate all ppc_md structures until we find the proper @@ -582,6 +583,17 @@ void probe_machine(void) */ DBG("Probing machine type ...\n"); + /* + * Check ppc_md is empty, if not we have a bug, ie, we setup an + * entry before probe_machine() which will be overwritten + */ + for (i = 0; i < (sizeof(ppc_md) / sizeof(void *)); i++) { + if (((void **)&ppc_md)[i]) { + printk(KERN_ERR "Entry %d in ppc_md non empty before" + " machine probe !\n", i); + } + } + for (machine_id = &__machine_desc_start; machine_id < &__machine_desc_end; machine_id++) { -- cgit v0.10.2 From 406b0b6ae3fcd5c7946a68a9e43b470c79d292a2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:00 +1000 Subject: powerpc/64: Move 64-bit probe_machine() to later in the boot process We no long need the machine type that early, so we can move probe_machine() to after the device-tree has been expanded. This will allow further consolidation. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 4ffd090..883d527 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -301,9 +301,6 @@ void __init early_setup(unsigned long dt_ptr) setup_paca(&paca[boot_cpuid]); fixup_boot_paca(); - /* Probe the machine type */ - probe_machine(); - /* * Configure exception handlers. This include setting up trampolines * if needed, setting exception endian mode, etc... @@ -511,6 +508,9 @@ void __init setup_system(void) */ check_for_initrd(); + /* Probe the machine type */ + probe_machine(); + /* * Do some platform specific early initializations, that includes * setting up the hash table pointers. It also sets up some interrupt-mapping diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index e342f78..d3543e6 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -255,10 +255,8 @@ static void __init cell_setup_arch(void) static int __init cell_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "IBM,CBEA") && - !of_flat_dt_is_compatible(root, "IBM,CPBW-1.0")) + if (!of_machine_is_compatible("IBM,CBEA") && + !of_machine_is_compatible("IBM,CPBW-1.0")) return 0; pm_power_off = rtas_power_off; diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index b1ecd99..d3d44cb 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -300,10 +300,8 @@ static void __init maple_progress(char *s, unsigned short hex) */ static int __init maple_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "Momentum,Maple") && - !of_flat_dt_is_compatible(root, "Momentum,Apache")) + if (!of_machine_is_compatible("Momentum,Maple") && + !of_machine_is_compatible("Momentum,Apache")) return 0; pm_power_off = maple_power_off; diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index d120f7a..ec810dd 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -420,10 +420,8 @@ machine_device_initcall(pasemi, pasemi_publish_devices); */ static int __init pas_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "PA6T-1682M") && - !of_flat_dt_is_compatible(root, "pasemi,pwrficient")) + if (!of_machine_is_compatible("PA6T-1682M") && + !of_machine_is_compatible("pasemi,pwrficient")) return 0; return 1; diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 8fffe1c..0872f98 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -590,11 +590,17 @@ console_initcall(check_pmac_serial_console); */ static int __init pmac_probe(void) { +#ifdef CONFIG_PPC32 unsigned long root = of_get_flat_dt_root(); if (!of_flat_dt_is_compatible(root, "Power Macintosh") && !of_flat_dt_is_compatible(root, "MacRISC")) return 0; +#else + if (!of_machine_is_compatible("Power Macintosh") && + !of_machine_is_compatible("MacRISC")) + return 0; +#endif #ifdef CONFIG_PPC32 /* isa_io_base gets set in pmac_pci_init */ diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index f70ea83..8865efa 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -268,9 +268,7 @@ static void __init pnv_setup_machdep_opal(void) static int __init pnv_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "ibm,powernv")) + if (!of_machine_is_compatible("ibm,powernv")) return 0; if (firmware_has_feature(FW_FEATURE_OPAL)) diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index b7fdf88..3a487e7 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -237,15 +237,13 @@ void __init ps3_early_mm_init(void) static int __init ps3_probe(void) { - unsigned long dt_root; - DBG(" -> %s:%d\n", __func__, __LINE__); - dt_root = of_get_flat_dt_root(); - if (!of_flat_dt_is_compatible(dt_root, "sony,ps3")) + if (!of_machine_is_compatible("sony,ps3")) return 0; ps3_os_area_save_params(); + pm_power_off = ps3_power_off; DBG(" <- %s:%d\n", __func__, __LINE__); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 2407213..6988b9d 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -672,8 +672,7 @@ static void pseries_power_off(void) static int __init pSeries_probe(void) { - unsigned long root = of_get_flat_dt_root(); - const char *dtype = of_get_flat_dt_prop(root, "device_type", NULL); + const char *dtype = of_get_property(of_root, "device_type", NULL); if (dtype == NULL) return 0; @@ -683,8 +682,8 @@ static int __init pSeries_probe(void) /* Cell blades firmware claims to be chrp while it's not. Until this * is fixed, we need to avoid those here. */ - if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0") || - of_flat_dt_is_compatible(root, "IBM,CBEA")) + if (of_machine_is_compatible("IBM,CPBW-1.0") || + of_machine_is_compatible("IBM,CBEA")) return 0; pm_power_off = pseries_power_off; -- cgit v0.10.2 From 565713840445b7ccafb28dc1230d57d40bcb42a5 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:05 +1000 Subject: powerpc: Move 32-bit probe() machine to later in the boot process This converts all the 32-bit platforms to use the expanded device-tree which is a pretty mechanical change. Unlike 64-bit, the 32-bit kernel didn't rely on platform initializations to setup the MMU since it sets it up entirely before probe_machine() so the move has comparatively less consequences though it's a bigger patch. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 3f0aca2..e7bb4e7 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -113,23 +113,7 @@ notrace void __init machine_init(u64 dt_ptr) early_init_mmu(); - probe_machine(); - setup_kdump_trampoline(); - -#ifdef CONFIG_6xx - if (cpu_has_feature(CPU_FTR_CAN_DOZE) || - cpu_has_feature(CPU_FTR_CAN_NAP)) - ppc_md.power_save = ppc6xx_idle; -#endif - -#ifdef CONFIG_E500 - if (cpu_has_feature(CPU_FTR_CAN_DOZE) || - cpu_has_feature(CPU_FTR_CAN_NAP)) - ppc_md.power_save = e500_idle; -#endif - if (ppc_md.progress) - ppc_md.progress("id mach(): done", 0x200); } /* Checks "l2cr=xxxx" command-line option */ @@ -249,6 +233,21 @@ static void __init exc_lvl_early_init(void) #define exc_lvl_early_init() #endif +static void setup_power_save(void) +{ +#ifdef CONFIG_6xx + if (cpu_has_feature(CPU_FTR_CAN_DOZE) || + cpu_has_feature(CPU_FTR_CAN_NAP)) + ppc_md.power_save = ppc6xx_idle; +#endif + +#ifdef CONFIG_E500 + if (cpu_has_feature(CPU_FTR_CAN_DOZE) || + cpu_has_feature(CPU_FTR_CAN_NAP)) + ppc_md.power_save = e500_idle; +#endif +} + /* Warning, IO base is not yet inited */ void __init setup_arch(char **cmdline_p) { @@ -260,6 +259,10 @@ void __init setup_arch(char **cmdline_p) unflatten_device_tree(); check_for_initrd(); + probe_machine(); + + setup_power_save(); + if (ppc_md.init_early) ppc_md.init_early(); diff --git a/arch/powerpc/platforms/40x/ep405.c b/arch/powerpc/platforms/40x/ep405.c index ddc12a1..1c8aec6 100644 --- a/arch/powerpc/platforms/40x/ep405.c +++ b/arch/powerpc/platforms/40x/ep405.c @@ -105,9 +105,7 @@ static void __init ep405_setup_arch(void) static int __init ep405_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "ep405")) + if (!of_machine_is_compatible("ep405")) return 0; return 1; diff --git a/arch/powerpc/platforms/40x/ppc40x_simple.c b/arch/powerpc/platforms/40x/ppc40x_simple.c index b0c4637..2a05000 100644 --- a/arch/powerpc/platforms/40x/ppc40x_simple.c +++ b/arch/powerpc/platforms/40x/ppc40x_simple.c @@ -63,7 +63,7 @@ static const char * const board[] __initconst = { static int __init ppc40x_probe(void) { - if (of_flat_dt_match(of_get_flat_dt_root(), board)) { + if (of_device_compatible_match(of_root, board)) { pci_set_flags(PCI_REASSIGN_ALL_RSRC); return 1; } diff --git a/arch/powerpc/platforms/40x/virtex.c b/arch/powerpc/platforms/40x/virtex.c index 9aa7ae2..91a08ea 100644 --- a/arch/powerpc/platforms/40x/virtex.c +++ b/arch/powerpc/platforms/40x/virtex.c @@ -37,9 +37,7 @@ machine_device_initcall(virtex, virtex_device_probe); static int __init virtex_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "xlnx,virtex")) + if (!of_machine_is_compatible("xlnx,virtex")) return 0; return 1; diff --git a/arch/powerpc/platforms/40x/walnut.c b/arch/powerpc/platforms/40x/walnut.c index f7ac2d0..e579781 100644 --- a/arch/powerpc/platforms/40x/walnut.c +++ b/arch/powerpc/platforms/40x/walnut.c @@ -46,9 +46,7 @@ machine_device_initcall(walnut, walnut_device_probe); static int __init walnut_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "ibm,walnut")) + if (!of_machine_is_compatible("ibm,walnut")) return 0; pci_set_flags(PCI_REASSIGN_ALL_RSRC); diff --git a/arch/powerpc/platforms/44x/canyonlands.c b/arch/powerpc/platforms/44x/canyonlands.c index 22ca543..157f4ce 100644 --- a/arch/powerpc/platforms/44x/canyonlands.c +++ b/arch/powerpc/platforms/44x/canyonlands.c @@ -53,11 +53,10 @@ machine_device_initcall(canyonlands, ppc460ex_device_probe); static int __init ppc460ex_probe(void) { - unsigned long root = of_get_flat_dt_root(); - if (of_flat_dt_is_compatible(root, "amcc,canyonlands")) { + if (of_machine_is_compatible("amcc,canyonlands")) { pci_set_flags(PCI_REASSIGN_ALL_RSRC); return 1; - } + } return 0; } diff --git a/arch/powerpc/platforms/44x/ebony.c b/arch/powerpc/platforms/44x/ebony.c index ae89322..1070225 100644 --- a/arch/powerpc/platforms/44x/ebony.c +++ b/arch/powerpc/platforms/44x/ebony.c @@ -49,9 +49,7 @@ machine_device_initcall(ebony, ebony_device_probe); */ static int __init ebony_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "ibm,ebony")) + if (!of_machine_is_compatible("ibm,ebony")) return 0; pci_set_flags(PCI_REASSIGN_ALL_RSRC); diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c index c7c6758..5f296dd 100644 --- a/arch/powerpc/platforms/44x/iss4xx.c +++ b/arch/powerpc/platforms/44x/iss4xx.c @@ -149,9 +149,7 @@ static void __init iss4xx_setup_arch(void) */ static int __init iss4xx_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "ibm,iss-4xx")) + if (!of_machine_is_compatible("ibm,iss-4xx")) return 0; return 1; diff --git a/arch/powerpc/platforms/44x/ppc44x_simple.c b/arch/powerpc/platforms/44x/ppc44x_simple.c index 573c3d2..8d6e4da 100644 --- a/arch/powerpc/platforms/44x/ppc44x_simple.c +++ b/arch/powerpc/platforms/44x/ppc44x_simple.c @@ -67,11 +67,10 @@ static char *board[] __initdata = { static int __init ppc44x_probe(void) { - unsigned long root = of_get_flat_dt_root(); int i = 0; for (i = 0; i < ARRAY_SIZE(board); i++) { - if (of_flat_dt_is_compatible(root, board[i])) { + if (of_machine_is_compatible(board[i])) { pci_set_flags(PCI_REASSIGN_ALL_RSRC); return 1; } diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c index 003973f..e55933f 100644 --- a/arch/powerpc/platforms/44x/ppc476.c +++ b/arch/powerpc/platforms/44x/ppc476.c @@ -275,12 +275,10 @@ static void ppc47x_pci_irq_fixup(struct pci_dev *dev) */ static int __init ppc47x_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "ibm,akebono")) + if (of_machine_is_compatible("ibm,akebono")) return 1; - if (of_flat_dt_is_compatible(root, "ibm,currituck")) { + if (of_machine_is_compatible("ibm,currituck")) { ppc_md.pci_irq_fixup = ppc47x_pci_irq_fixup; return 1; } diff --git a/arch/powerpc/platforms/44x/sam440ep.c b/arch/powerpc/platforms/44x/sam440ep.c index 3ee4a03..688ffea 100644 --- a/arch/powerpc/platforms/44x/sam440ep.c +++ b/arch/powerpc/platforms/44x/sam440ep.c @@ -46,9 +46,7 @@ machine_device_initcall(sam440ep, sam440ep_device_probe); static int __init sam440ep_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "acube,sam440ep")) + if (!of_machine_is_compatible("acube,sam440ep")) return 0; pci_set_flags(PCI_REASSIGN_ALL_RSRC); diff --git a/arch/powerpc/platforms/44x/virtex.c b/arch/powerpc/platforms/44x/virtex.c index ad272c1..a7e0802 100644 --- a/arch/powerpc/platforms/44x/virtex.c +++ b/arch/powerpc/platforms/44x/virtex.c @@ -43,9 +43,7 @@ machine_device_initcall(virtex, virtex_device_probe); static int __init virtex_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "xlnx,virtex440")) + if (!of_machine_is_compatible("xlnx,virtex440")) return 0; return 1; diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c index 501333c..5ecce54 100644 --- a/arch/powerpc/platforms/44x/warp.c +++ b/arch/powerpc/platforms/44x/warp.c @@ -44,9 +44,7 @@ machine_device_initcall(warp, warp_device_probe); static int __init warp_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "pika,warp")) + if (!of_machine_is_compatible("pika,warp")) return 0; /* For __dma_alloc_coherent */ diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c index 3e90ece..4e03f04 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads.c @@ -57,9 +57,7 @@ static void __init mpc5121_ads_init_IRQ(void) */ static int __init mpc5121_ads_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "fsl,mpc5121ads"); + return of_machine_is_compatible("fsl,mpc5121ads"); } define_machine(mpc5121_ads) { diff --git a/arch/powerpc/platforms/512x/mpc512x_generic.c b/arch/powerpc/platforms/512x/mpc512x_generic.c index ce71408..87eba17 100644 --- a/arch/powerpc/platforms/512x/mpc512x_generic.c +++ b/arch/powerpc/platforms/512x/mpc512x_generic.c @@ -38,7 +38,7 @@ static const char * const board[] __initconst = { */ static int __init mpc512x_generic_probe(void) { - return of_flat_dt_match(of_get_flat_dt_root(), board); + return of_device_compatible_match(of_root, board); } define_machine(mpc512x_generic) { diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c index 116f2325..f9cad19 100644 --- a/arch/powerpc/platforms/512x/pdm360ng.c +++ b/arch/powerpc/platforms/512x/pdm360ng.c @@ -113,9 +113,7 @@ void __init pdm360ng_init(void) static int __init pdm360ng_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "ifm,pdm360ng"); + return of_machine_is_compatible("ifm,pdm360ng"); } define_machine(pdm360ng) { diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index 6af651e..39b4982 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -200,8 +200,7 @@ static void __init efika_setup_arch(void) static int __init efika_probe(void) { - const char *model = of_get_flat_dt_prop(of_get_flat_dt_root(), - "model", NULL); + const char *model = of_get_property(of_root, "model", NULL); if (model == NULL) return 0; diff --git a/arch/powerpc/platforms/52xx/lite5200.c b/arch/powerpc/platforms/52xx/lite5200.c index 7492de3..c94c385 100644 --- a/arch/powerpc/platforms/52xx/lite5200.c +++ b/arch/powerpc/platforms/52xx/lite5200.c @@ -183,7 +183,7 @@ static const char * const board[] __initconst = { */ static int __init lite5200_probe(void) { - return of_flat_dt_match(of_get_flat_dt_root(), board); + return of_device_compatible_match(of_root, board); } define_machine(lite5200) { diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c index 8fb9548..a322704 100644 --- a/arch/powerpc/platforms/52xx/media5200.c +++ b/arch/powerpc/platforms/52xx/media5200.c @@ -242,7 +242,7 @@ static const char * const board[] __initconst = { */ static int __init media5200_probe(void) { - return of_flat_dt_match(of_get_flat_dt_root(), board); + return of_device_compatible_match(of_root, board); } define_machine(media5200_platform) { diff --git a/arch/powerpc/platforms/52xx/mpc5200_simple.c b/arch/powerpc/platforms/52xx/mpc5200_simple.c index 792a301..a80c627 100644 --- a/arch/powerpc/platforms/52xx/mpc5200_simple.c +++ b/arch/powerpc/platforms/52xx/mpc5200_simple.c @@ -70,7 +70,7 @@ static const char *board[] __initdata = { */ static int __init mpc5200_simple_probe(void) { - return of_flat_dt_match(of_get_flat_dt_root(), board); + return of_device_compatible_match(of_root, board); } define_machine(mpc5200_simple_platform) { diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c index 6781bda..cdab847 100644 --- a/arch/powerpc/platforms/82xx/ep8248e.c +++ b/arch/powerpc/platforms/82xx/ep8248e.c @@ -309,8 +309,7 @@ machine_device_initcall(ep8248e, declare_of_platform_devices); */ static int __init ep8248e_probe(void) { - unsigned long root = of_get_flat_dt_root(); - return of_flat_dt_is_compatible(root, "fsl,ep8248e"); + return of_machine_is_compatible("fsl,ep8248e"); } define_machine(ep8248e) diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c index 387b446..28860e4 100644 --- a/arch/powerpc/platforms/82xx/km82xx.c +++ b/arch/powerpc/platforms/82xx/km82xx.c @@ -198,8 +198,7 @@ machine_device_initcall(km82xx, declare_of_platform_devices); */ static int __init km82xx_probe(void) { - unsigned long root = of_get_flat_dt_root(); - return of_flat_dt_is_compatible(root, "keymile,km82xx"); + return of_machine_is_compatible("keymile,km82xx"); } define_machine(km82xx) diff --git a/arch/powerpc/platforms/82xx/mpc8272_ads.c b/arch/powerpc/platforms/82xx/mpc8272_ads.c index d24deac..d23c10a 100644 --- a/arch/powerpc/platforms/82xx/mpc8272_ads.c +++ b/arch/powerpc/platforms/82xx/mpc8272_ads.c @@ -201,8 +201,7 @@ machine_device_initcall(mpc8272_ads, declare_of_platform_devices); */ static int __init mpc8272_ads_probe(void) { - unsigned long root = of_get_flat_dt_root(); - return of_flat_dt_is_compatible(root, "fsl,mpc8272ads"); + return of_machine_is_compatible("fsl,mpc8272ads"); } define_machine(mpc8272_ads) diff --git a/arch/powerpc/platforms/82xx/pq2fads.c b/arch/powerpc/platforms/82xx/pq2fads.c index 3a5164a..6c654dc 100644 --- a/arch/powerpc/platforms/82xx/pq2fads.c +++ b/arch/powerpc/platforms/82xx/pq2fads.c @@ -164,8 +164,7 @@ static void __init pq2fads_setup_arch(void) */ static int __init pq2fads_probe(void) { - unsigned long root = of_get_flat_dt_root(); - return of_flat_dt_is_compatible(root, "fsl,pq2fads"); + return of_machine_is_compatible("fsl,pq2fads"); } static const struct of_device_id of_bus_ids[] __initconst = { diff --git a/arch/powerpc/platforms/83xx/asp834x.c b/arch/powerpc/platforms/83xx/asp834x.c index 464ea8e..17e5433 100644 --- a/arch/powerpc/platforms/83xx/asp834x.c +++ b/arch/powerpc/platforms/83xx/asp834x.c @@ -43,8 +43,7 @@ machine_device_initcall(asp834x, mpc83xx_declare_of_platform_devices); */ static int __init asp834x_probe(void) { - unsigned long root = of_get_flat_dt_root(); - return of_flat_dt_is_compatible(root, "analogue-and-micro,asp8347e"); + return of_machine_is_compatible("analogue-and-micro,asp8347e"); } define_machine(asp834x) { diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c index 4bc6bbb..e7fbd63 100644 --- a/arch/powerpc/platforms/83xx/km83xx.c +++ b/arch/powerpc/platforms/83xx/km83xx.c @@ -171,11 +171,10 @@ static char *board[] __initdata = { */ static int __init mpc83xx_km_probe(void) { - unsigned long node = of_get_flat_dt_root(); int i = 0; while (board[i]) { - if (of_flat_dt_is_compatible(node, board[i])) + if (of_machine_is_compatible(board[i])) break; i++; } diff --git a/arch/powerpc/platforms/83xx/mpc830x_rdb.c b/arch/powerpc/platforms/83xx/mpc830x_rdb.c index 4f2d9fe..040d5d0 100644 --- a/arch/powerpc/platforms/83xx/mpc830x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc830x_rdb.c @@ -46,7 +46,7 @@ static const char *board[] __initdata = { */ static int __init mpc830x_rdb_probe(void) { - return of_flat_dt_match(of_get_flat_dt_root(), board); + return of_device_compatible_match(of_root, board); } machine_device_initcall(mpc830x_rdb, mpc83xx_declare_of_platform_devices); diff --git a/arch/powerpc/platforms/83xx/mpc831x_rdb.c b/arch/powerpc/platforms/83xx/mpc831x_rdb.c index fa25977..40e0d83 100644 --- a/arch/powerpc/platforms/83xx/mpc831x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc831x_rdb.c @@ -46,7 +46,7 @@ static const char *board[] __initdata = { */ static int __init mpc831x_rdb_probe(void) { - return of_flat_dt_match(of_get_flat_dt_root(), board); + return of_device_compatible_match(of_root, board); } machine_device_initcall(mpc831x_rdb, mpc83xx_declare_of_platform_devices); diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c index a973b2a..cdfa47c 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c @@ -102,9 +102,7 @@ machine_device_initcall(mpc832x_mds, mpc83xx_declare_of_platform_devices); */ static int __init mpc832x_sys_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "MPC832xMDS"); + return of_machine_is_compatible("MPC832xMDS"); } define_machine(mpc832x_mds) { diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c index ea2b87d..2ef03e7 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -220,9 +220,7 @@ machine_device_initcall(mpc832x_rdb, mpc83xx_declare_of_platform_devices); */ static int __init mpc832x_rdb_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "MPC832xRDB"); + return of_machine_is_compatible("MPC832xRDB"); } define_machine(mpc832x_rdb) { diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c index 80aea8c..8fd0c1e 100644 --- a/arch/powerpc/platforms/83xx/mpc834x_itx.c +++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c @@ -70,9 +70,7 @@ static void __init mpc834x_itx_setup_arch(void) */ static int __init mpc834x_itx_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "MPC834xMITX"); + return of_machine_is_compatible("MPC834xMITX"); } define_machine(mpc834x_itx) { diff --git a/arch/powerpc/platforms/83xx/mpc834x_mds.c b/arch/powerpc/platforms/83xx/mpc834x_mds.c index 553e793..eeaee61 100644 --- a/arch/powerpc/platforms/83xx/mpc834x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc834x_mds.c @@ -91,9 +91,7 @@ machine_device_initcall(mpc834x_mds, mpc83xx_declare_of_platform_devices); */ static int __init mpc834x_mds_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "MPC834xMDS"); + return of_machine_is_compatible("MPC834xMDS"); } define_machine(mpc834x_mds) { diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c index dd70b85..dacf4c2 100644 --- a/arch/powerpc/platforms/83xx/mpc836x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c @@ -209,9 +209,7 @@ machine_arch_initcall(mpc836x_mds, mpc836x_usb_cfg); */ static int __init mpc836x_mds_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "MPC836xMDS"); + return of_machine_is_compatible("MPC836xMDS"); } define_machine(mpc836x_mds) { diff --git a/arch/powerpc/platforms/83xx/mpc836x_rdk.c b/arch/powerpc/platforms/83xx/mpc836x_rdk.c index 4cd7153..cf67ac9 100644 --- a/arch/powerpc/platforms/83xx/mpc836x_rdk.c +++ b/arch/powerpc/platforms/83xx/mpc836x_rdk.c @@ -42,9 +42,7 @@ static void __init mpc836x_rdk_setup_arch(void) */ static int __init mpc836x_rdk_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "fsl,mpc8360rdk"); + return of_machine_is_compatible("fsl,mpc8360rdk"); } define_machine(mpc836x_rdk) { diff --git a/arch/powerpc/platforms/83xx/mpc837x_mds.c b/arch/powerpc/platforms/83xx/mpc837x_mds.c index e53a60b..652b97d 100644 --- a/arch/powerpc/platforms/83xx/mpc837x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc837x_mds.c @@ -93,9 +93,7 @@ machine_device_initcall(mpc837x_mds, mpc83xx_declare_of_platform_devices); */ static int __init mpc837x_mds_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "fsl,mpc837xmds"); + return of_machine_is_compatible("fsl,mpc837xmds"); } define_machine(mpc837x_mds) { diff --git a/arch/powerpc/platforms/83xx/mpc837x_rdb.c b/arch/powerpc/platforms/83xx/mpc837x_rdb.c index 9813c81..667731d 100644 --- a/arch/powerpc/platforms/83xx/mpc837x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc837x_rdb.c @@ -73,7 +73,7 @@ static const char * const board[] __initconst = { */ static int __init mpc837x_rdb_probe(void) { - return of_flat_dt_match(of_get_flat_dt_root(), board); + return of_device_compatible_match(of_root, board); } define_machine(mpc837x_rdb) { diff --git a/arch/powerpc/platforms/83xx/sbc834x.c b/arch/powerpc/platforms/83xx/sbc834x.c index 26cb3e9..b867e88 100644 --- a/arch/powerpc/platforms/83xx/sbc834x.c +++ b/arch/powerpc/platforms/83xx/sbc834x.c @@ -60,9 +60,7 @@ machine_device_initcall(sbc834x, mpc83xx_declare_of_platform_devices); */ static int __init sbc834x_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "SBC834xE"); + return of_machine_is_compatible("SBC834xE"); } define_machine(sbc834x) { diff --git a/arch/powerpc/platforms/85xx/bsc913x_qds.c b/arch/powerpc/platforms/85xx/bsc913x_qds.c index dcfafd6..07dd6ae 100644 --- a/arch/powerpc/platforms/85xx/bsc913x_qds.c +++ b/arch/powerpc/platforms/85xx/bsc913x_qds.c @@ -60,9 +60,7 @@ machine_arch_initcall(bsc9132_qds, mpc85xx_common_publish_devices); static int __init bsc9132_qds_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "fsl,bsc9132qds"); + return of_machine_is_compatible("fsl,bsc9132qds"); } define_machine(bsc9132_qds) { diff --git a/arch/powerpc/platforms/85xx/bsc913x_rdb.c b/arch/powerpc/platforms/85xx/bsc913x_rdb.c index 9d57bed..e48f671 100644 --- a/arch/powerpc/platforms/85xx/bsc913x_rdb.c +++ b/arch/powerpc/platforms/85xx/bsc913x_rdb.c @@ -50,9 +50,7 @@ machine_device_initcall(bsc9131_rdb, mpc85xx_common_publish_devices); static int __init bsc9131_rdb_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "fsl,bsc9131rdb"); + return of_machine_is_compatible("fsl,bsc9131rdb"); } define_machine(bsc9131_rdb) { diff --git a/arch/powerpc/platforms/85xx/c293pcie.c b/arch/powerpc/platforms/85xx/c293pcie.c index 61bc851..3b9e3f0 100644 --- a/arch/powerpc/platforms/85xx/c293pcie.c +++ b/arch/powerpc/platforms/85xx/c293pcie.c @@ -54,9 +54,7 @@ machine_arch_initcall(c293_pcie, mpc85xx_common_publish_devices); */ static int __init c293_pcie_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "fsl,C293PCIE")) + if (of_machine_is_compatible("fsl,C293PCIE")) return 1; return 0; } diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index a2b0bc8..3a6a84f 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -170,20 +170,19 @@ static const char * const boards[] __initconst = { */ static int __init corenet_generic_probe(void) { - unsigned long root = of_get_flat_dt_root(); char hv_compat[24]; int i; #ifdef CONFIG_SMP extern struct smp_ops_t smp_85xx_ops; #endif - if (of_flat_dt_match(root, boards)) + if (of_device_compatible_match(of_root, boards)) return 1; /* Check if we're running under the Freescale hypervisor */ for (i = 0; boards[i]; i++) { snprintf(hv_compat, sizeof(hv_compat), "%s-hv", boards[i]); - if (of_flat_dt_is_compatible(root, hv_compat)) { + if (of_machine_is_compatible(hv_compat)) { ppc_md.init_IRQ = ehv_pic_init; ppc_md.get_irq = ehv_pic_get_irq; diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c index 55eefef..14af36a 100644 --- a/arch/powerpc/platforms/85xx/ge_imp3a.c +++ b/arch/powerpc/platforms/85xx/ge_imp3a.c @@ -197,9 +197,7 @@ static void ge_imp3a_show_cpuinfo(struct seq_file *m) */ static int __init ge_imp3a_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "ge,IMP3A"); + return of_machine_is_compatible("ge,IMP3A"); } machine_arch_initcall(ge_imp3a, mpc85xx_common_publish_devices); diff --git a/arch/powerpc/platforms/85xx/ksi8560.c b/arch/powerpc/platforms/85xx/ksi8560.c index 867a107..6ef8580 100644 --- a/arch/powerpc/platforms/85xx/ksi8560.c +++ b/arch/powerpc/platforms/85xx/ksi8560.c @@ -176,9 +176,7 @@ machine_device_initcall(ksi8560, mpc85xx_common_publish_devices); */ static int __init ksi8560_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "emerson,KSI8560"); + return of_machine_is_compatible("emerson,KSI8560"); } define_machine(ksi8560) { diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c index a378ba3..6ba687f 100644 --- a/arch/powerpc/platforms/85xx/mpc8536_ds.c +++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c @@ -64,9 +64,7 @@ machine_arch_initcall(mpc8536_ds, swiotlb_setup_bus_notifier); */ static int __init mpc8536_ds_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "fsl,mpc8536ds"); + return of_machine_is_compatible("fsl,mpc8536ds"); } define_machine(mpc8536_ds) { diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ads.c b/arch/powerpc/platforms/85xx/mpc85xx_ads.c index de72a5f..8756715 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ads.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ads.c @@ -160,9 +160,7 @@ machine_arch_initcall(mpc85xx_ads, mpc85xx_common_publish_devices); */ static int __init mpc85xx_ads_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "MPC85xxADS"); + return of_machine_is_compatible("MPC85xxADS"); } define_machine(mpc85xx_ads) { diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c index 5e0a0a2..62f171c 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -367,9 +367,7 @@ static void mpc85xx_cds_show_cpuinfo(struct seq_file *m) */ static int __init mpc85xx_cds_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "MPC85xxCDS"); + return of_machine_is_compatible("MPC85xxCDS"); } machine_arch_initcall(mpc85xx_cds, mpc85xx_common_publish_devices); diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c index 64a7e8c..6bc07d8 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c @@ -167,9 +167,7 @@ static void __init mpc85xx_ds_setup_arch(void) */ static int __init mpc8544_ds_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return !!of_flat_dt_is_compatible(root, "MPC8544DS"); + return !!of_machine_is_compatible("MPC8544DS"); } machine_arch_initcall(mpc8544_ds, mpc85xx_common_publish_devices); @@ -185,9 +183,7 @@ machine_arch_initcall(p2020_ds, swiotlb_setup_bus_notifier); */ static int __init mpc8572_ds_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return !!of_flat_dt_is_compatible(root, "fsl,MPC8572DS"); + return !!of_machine_is_compatible("fsl,MPC8572DS"); } /* @@ -195,9 +191,7 @@ static int __init mpc8572_ds_probe(void) */ static int __init p2020_ds_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return !!of_flat_dt_is_compatible(root, "fsl,P2020DS"); + return !!of_machine_is_compatible("fsl,P2020DS"); } define_machine(mpc8544_ds) { diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c index f61cbe2..fa9cd71 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c @@ -376,9 +376,7 @@ static void __init mpc85xx_mds_pic_init(void) static int __init mpc85xx_mds_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "MPC85xxMDS"); + return of_machine_is_compatible("MPC85xxMDS"); } define_machine(mpc8568_mds) { @@ -398,9 +396,7 @@ define_machine(mpc8568_mds) { static int __init mpc8569_mds_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "fsl,MPC8569EMDS"); + return of_machine_is_compatible("fsl,MPC8569EMDS"); } define_machine(mpc8569_mds) { @@ -420,9 +416,7 @@ define_machine(mpc8569_mds) { static int __init p1021_mds_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "fsl,P1021MDS"); + return of_machine_is_compatible("fsl,P1021MDS"); } diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c index 761e504..c1499cb 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c @@ -147,80 +147,60 @@ machine_arch_initcall(p1024_rdb, mpc85xx_common_publish_devices); */ static int __init p2020_rdb_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "fsl,P2020RDB")) + if (of_machine_is_compatible("fsl,P2020RDB")) return 1; return 0; } static int __init p1020_rdb_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "fsl,P1020RDB")) + if (of_machine_is_compatible("fsl,P1020RDB")) return 1; return 0; } static int __init p1020_rdb_pc_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "fsl,P1020RDB-PC"); + return of_machine_is_compatible("fsl,P1020RDB-PC"); } static int __init p1020_rdb_pd_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "fsl,P1020RDB-PD"); + return of_machine_is_compatible("fsl,P1020RDB-PD"); } static int __init p1021_rdb_pc_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "fsl,P1021RDB-PC")) + if (of_machine_is_compatible("fsl,P1021RDB-PC")) return 1; return 0; } static int __init p2020_rdb_pc_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "fsl,P2020RDB-PC")) + if (of_machine_is_compatible("fsl,P2020RDB-PC")) return 1; return 0; } static int __init p1025_rdb_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "fsl,P1025RDB"); + return of_machine_is_compatible("fsl,P1025RDB"); } static int __init p1020_mbg_pc_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "fsl,P1020MBG-PC"); + return of_machine_is_compatible("fsl,P1020MBG-PC"); } static int __init p1020_utm_pc_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "fsl,P1020UTM-PC"); + return of_machine_is_compatible("fsl,P1020UTM-PC"); } static int __init p1024_rdb_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "fsl,P1024RDB"); + return of_machine_is_compatible("fsl,P1024RDB"); } define_machine(p2020_rdb) { diff --git a/arch/powerpc/platforms/85xx/mvme2500.c b/arch/powerpc/platforms/85xx/mvme2500.c index 1233050..acc3d0d 100644 --- a/arch/powerpc/platforms/85xx/mvme2500.c +++ b/arch/powerpc/platforms/85xx/mvme2500.c @@ -53,9 +53,7 @@ machine_arch_initcall(mvme2500, mpc85xx_common_publish_devices); */ static int __init mvme2500_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "artesyn,MVME2500"); + return of_machine_is_compatible("artesyn,MVME2500"); } define_machine(mvme2500) { diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c index ad1a3d4..661d7b5 100644 --- a/arch/powerpc/platforms/85xx/p1010rdb.c +++ b/arch/powerpc/platforms/85xx/p1010rdb.c @@ -62,11 +62,9 @@ machine_arch_initcall(p1010_rdb, swiotlb_setup_bus_notifier); */ static int __init p1010_rdb_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "fsl,P1010RDB")) + if (of_machine_is_compatible("fsl,P1010RDB")) return 1; - if (of_flat_dt_is_compatible(root, "fsl,P1010RDB-PB")) + if (of_machine_is_compatible("fsl,P1010RDB-PB")) return 1; return 0; } diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c index 371df82..63568d6 100644 --- a/arch/powerpc/platforms/85xx/p1022_ds.c +++ b/arch/powerpc/platforms/85xx/p1022_ds.c @@ -555,9 +555,7 @@ machine_arch_initcall(p1022_ds, swiotlb_setup_bus_notifier); */ static int __init p1022_ds_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "fsl,p1022ds"); + return of_machine_is_compatible("fsl,p1022ds"); } define_machine(p1022_ds) { diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c index 5087bec..2f29436 100644 --- a/arch/powerpc/platforms/85xx/p1022_rdk.c +++ b/arch/powerpc/platforms/85xx/p1022_rdk.c @@ -135,9 +135,7 @@ machine_arch_initcall(p1022_rdk, swiotlb_setup_bus_notifier); */ static int __init p1022_rdk_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "fsl,p1022rdk"); + return of_machine_is_compatible("fsl,p1022rdk"); } define_machine(p1022_rdk) { diff --git a/arch/powerpc/platforms/85xx/p1023_rdb.c b/arch/powerpc/platforms/85xx/p1023_rdb.c index d5b7509..40d8de5 100644 --- a/arch/powerpc/platforms/85xx/p1023_rdb.c +++ b/arch/powerpc/platforms/85xx/p1023_rdb.c @@ -100,9 +100,7 @@ static void __init mpc85xx_rdb_pic_init(void) static int __init p1023_rdb_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "fsl,P1023RDB"); + return of_machine_is_compatible("fsl,P1023RDB"); } diff --git a/arch/powerpc/platforms/85xx/ppa8548.c b/arch/powerpc/platforms/85xx/ppa8548.c index 12019f1..2410167 100644 --- a/arch/powerpc/platforms/85xx/ppa8548.c +++ b/arch/powerpc/platforms/85xx/ppa8548.c @@ -81,9 +81,7 @@ machine_device_initcall(ppa8548, declare_of_platform_devices); */ static int __init ppa8548_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "ppa8548"); + return of_machine_is_compatible("ppa8548"); } define_machine(ppa8548) { diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c index 8ad2fe6..50d7458 100644 --- a/arch/powerpc/platforms/85xx/qemu_e500.c +++ b/arch/powerpc/platforms/85xx/qemu_e500.c @@ -62,9 +62,7 @@ static void __init qemu_e500_setup_arch(void) */ static int __init qemu_e500_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return !!of_flat_dt_is_compatible(root, "fsl,qemu-e500"); + return !!of_machine_is_compatible("fsl,qemu-e500"); } machine_arch_initcall(qemu_e500, mpc85xx_common_publish_devices); diff --git a/arch/powerpc/platforms/85xx/sbc8548.c b/arch/powerpc/platforms/85xx/sbc8548.c index b072146..62b6c45 100644 --- a/arch/powerpc/platforms/85xx/sbc8548.c +++ b/arch/powerpc/platforms/85xx/sbc8548.c @@ -120,9 +120,7 @@ machine_arch_initcall(sbc8548, mpc85xx_common_publish_devices); */ static int __init sbc8548_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "SBC8548"); + return of_machine_is_compatible("SBC8548"); } define_machine(sbc8548) { diff --git a/arch/powerpc/platforms/85xx/socrates.c b/arch/powerpc/platforms/85xx/socrates.c index ae368e0..cd255ac 100644 --- a/arch/powerpc/platforms/85xx/socrates.c +++ b/arch/powerpc/platforms/85xx/socrates.c @@ -79,9 +79,7 @@ machine_arch_initcall(socrates, mpc85xx_common_publish_devices); */ static int __init socrates_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "abb,socrates")) + if (of_machine_is_compatible("abb,socrates")) return 1; return 0; diff --git a/arch/powerpc/platforms/85xx/stx_gp3.c b/arch/powerpc/platforms/85xx/stx_gp3.c index 6f4939b..91b824c 100644 --- a/arch/powerpc/platforms/85xx/stx_gp3.c +++ b/arch/powerpc/platforms/85xx/stx_gp3.c @@ -93,9 +93,7 @@ machine_arch_initcall(stx_gp3, mpc85xx_common_publish_devices); */ static int __init stx_gp3_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "stx,gp3-8560"); + return of_machine_is_compatible("stx,gp3-8560"); } define_machine(stx_gp3) { diff --git a/arch/powerpc/platforms/85xx/tqm85xx.c b/arch/powerpc/platforms/85xx/tqm85xx.c index ec0b727..b7c5445 100644 --- a/arch/powerpc/platforms/85xx/tqm85xx.c +++ b/arch/powerpc/platforms/85xx/tqm85xx.c @@ -122,7 +122,7 @@ static const char * const board[] __initconst = { */ static int __init tqm85xx_probe(void) { - return of_flat_dt_match(of_get_flat_dt_root(), board); + return of_device_compatible_match(of_root, board); } define_machine(tqm85xx) { diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c index 71bc255..1bc02a8 100644 --- a/arch/powerpc/platforms/85xx/twr_p102x.c +++ b/arch/powerpc/platforms/85xx/twr_p102x.c @@ -128,9 +128,7 @@ machine_arch_initcall(twr_p1025, mpc85xx_common_publish_devices); static int __init twr_p1025_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "fsl,TWR-P1025"); + return of_machine_is_compatible("fsl,TWR-P1025"); } define_machine(twr_p1025) { diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c index 1a9c108..cf0c70f 100644 --- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c +++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c @@ -144,23 +144,17 @@ machine_arch_initcall(xes_mpc8540, mpc85xx_common_publish_devices); */ static int __init xes_mpc8572_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "xes,MPC8572"); + return of_machine_is_compatible("xes,MPC8572"); } static int __init xes_mpc8548_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "xes,MPC8548"); + return of_machine_is_compatible("xes,MPC8548"); } static int __init xes_mpc8540_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "xes,MPC8540"); + return of_machine_is_compatible("xes,MPC8540"); } define_machine(xes_mpc8572) { diff --git a/arch/powerpc/platforms/86xx/gef_ppc9a.c b/arch/powerpc/platforms/86xx/gef_ppc9a.c index 8e63b75..ef684af 100644 --- a/arch/powerpc/platforms/86xx/gef_ppc9a.c +++ b/arch/powerpc/platforms/86xx/gef_ppc9a.c @@ -189,9 +189,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB, */ static int __init gef_ppc9a_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "gef,ppc9a")) + if (of_machine_is_compatible("gef,ppc9a")) return 1; return 0; diff --git a/arch/powerpc/platforms/86xx/gef_sbc310.c b/arch/powerpc/platforms/86xx/gef_sbc310.c index 0e0be94..67dd0c2 100644 --- a/arch/powerpc/platforms/86xx/gef_sbc310.c +++ b/arch/powerpc/platforms/86xx/gef_sbc310.c @@ -176,9 +176,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB, */ static int __init gef_sbc310_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "gef,sbc310")) + if (of_machine_is_compatible("gef,sbc310")) return 1; return 0; diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c index e8292b4..8050269 100644 --- a/arch/powerpc/platforms/86xx/gef_sbc610.c +++ b/arch/powerpc/platforms/86xx/gef_sbc610.c @@ -166,9 +166,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB, */ static int __init gef_sbc610_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "gef,sbc610")) + if (of_machine_is_compatible("gef,sbc610")) return 1; return 0; diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c index 957473e..fef0582 100644 --- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c +++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c @@ -319,9 +319,7 @@ static void __init mpc86xx_hpcd_setup_arch(void) */ static int __init mpc86xx_hpcd_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "fsl,MPC8610HPCD")) + if (of_machine_is_compatible("fsl,MPC8610HPCD")) return 1; /* Looks good */ return 0; diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c index e508481..5ae42a0 100644 --- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c +++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c @@ -96,13 +96,11 @@ mpc86xx_hpcn_show_cpuinfo(struct seq_file *m) */ static int __init mpc86xx_hpcn_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "fsl,mpc8641hpcn")) + if (of_machine_is_compatible("fsl,mpc8641hpcn")) return 1; /* Looks good */ /* Be nice and don't give silent boot death. Delete this in 2.6.27 */ - if (of_flat_dt_is_compatible(root, "mpc86xx")) { + if (of_machine_is_compatible("mpc86xx")) { pr_warning("WARNING: your dts/dtb is old. You must update before the next kernel release\n"); return 1; } diff --git a/arch/powerpc/platforms/86xx/sbc8641d.c b/arch/powerpc/platforms/86xx/sbc8641d.c index 2a9cf27..52af573 100644 --- a/arch/powerpc/platforms/86xx/sbc8641d.c +++ b/arch/powerpc/platforms/86xx/sbc8641d.c @@ -67,9 +67,7 @@ sbc8641_show_cpuinfo(struct seq_file *m) */ static int __init sbc8641_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "wind,sbc8641")) + if (of_machine_is_compatible("wind,sbc8641")) return 1; /* Looks good */ return 0; diff --git a/arch/powerpc/platforms/8xx/adder875.c b/arch/powerpc/platforms/8xx/adder875.c index 61cae4c..333dece 100644 --- a/arch/powerpc/platforms/8xx/adder875.c +++ b/arch/powerpc/platforms/8xx/adder875.c @@ -88,8 +88,7 @@ static void __init adder875_setup(void) static int __init adder875_probe(void) { - unsigned long root = of_get_flat_dt_root(); - return of_flat_dt_is_compatible(root, "analogue-and-micro,adder875"); + return of_machine_is_compatible("analogue-and-micro,adder875"); } static const struct of_device_id of_bus_ids[] __initconst = { diff --git a/arch/powerpc/platforms/8xx/ep88xc.c b/arch/powerpc/platforms/8xx/ep88xc.c index 2bedeb7..cd0d90f 100644 --- a/arch/powerpc/platforms/8xx/ep88xc.c +++ b/arch/powerpc/platforms/8xx/ep88xc.c @@ -143,8 +143,7 @@ static void __init ep88xc_setup_arch(void) static int __init ep88xc_probe(void) { - unsigned long root = of_get_flat_dt_root(); - return of_flat_dt_is_compatible(root, "fsl,ep88xc"); + return of_machine_is_compatible("fsl,ep88xc"); } static const struct of_device_id of_bus_ids[] __initconst = { diff --git a/arch/powerpc/platforms/8xx/mpc86xads_setup.c b/arch/powerpc/platforms/8xx/mpc86xads_setup.c index 78180c5..8d02f5f 100644 --- a/arch/powerpc/platforms/8xx/mpc86xads_setup.c +++ b/arch/powerpc/platforms/8xx/mpc86xads_setup.c @@ -118,8 +118,7 @@ static void __init mpc86xads_setup_arch(void) static int __init mpc86xads_probe(void) { - unsigned long root = of_get_flat_dt_root(); - return of_flat_dt_is_compatible(root, "fsl,mpc866ads"); + return of_machine_is_compatible("fsl,mpc866ads"); } static const struct of_device_id of_bus_ids[] __initconst = { diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c index 4d62bf9..e821a42 100644 --- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c +++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c @@ -193,8 +193,7 @@ static void __init mpc885ads_setup_arch(void) static int __init mpc885ads_probe(void) { - unsigned long root = of_get_flat_dt_root(); - return of_flat_dt_is_compatible(root, "fsl,mpc885ads"); + return of_machine_is_compatible("fsl,mpc885ads"); } static const struct of_device_id of_bus_ids[] __initconst = { diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c index bee47a2..4cea8b1 100644 --- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c +++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c @@ -119,9 +119,7 @@ static void __init tqm8xx_setup_arch(void) static int __init tqm8xx_probe(void) { - unsigned long node = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(node, "tqc,tqm8xx"); + return of_machine_is_compatible("tqc,tqm8xx"); } static const struct of_device_id of_bus_ids[] __initconst = { diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c index 3e12d87..45cb982 100644 --- a/arch/powerpc/platforms/amigaone/setup.c +++ b/arch/powerpc/platforms/amigaone/setup.c @@ -143,9 +143,7 @@ void __noreturn amigaone_restart(char *cmd) static int __init amigaone_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "eyetech,amigaone")) { + if (of_machine_is_compatible("eyetech,amigaone")) { /* * Coherent memory access cause complete system lockup! Thus * disable this CPU feature, even if the CPU needs it. diff --git a/arch/powerpc/platforms/embedded6xx/c2k.c b/arch/powerpc/platforms/embedded6xx/c2k.c index 7820662..7fef40e 100644 --- a/arch/powerpc/platforms/embedded6xx/c2k.c +++ b/arch/powerpc/platforms/embedded6xx/c2k.c @@ -123,9 +123,7 @@ void c2k_show_cpuinfo(struct seq_file *m) */ static int __init c2k_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "GEFanuc,C2K")) + if (!of_machine_is_compatible("GEFanuc,C2K")) return 0; printk(KERN_INFO "Detected a GEFanuc C2K board\n"); diff --git a/arch/powerpc/platforms/embedded6xx/gamecube.c b/arch/powerpc/platforms/embedded6xx/gamecube.c index b17705c..f5a837f 100644 --- a/arch/powerpc/platforms/embedded6xx/gamecube.c +++ b/arch/powerpc/platforms/embedded6xx/gamecube.c @@ -61,10 +61,7 @@ static void __init gamecube_init_early(void) static int __init gamecube_probe(void) { - unsigned long dt_root; - - dt_root = of_get_flat_dt_root(); - if (!of_flat_dt_is_compatible(dt_root, "nintendo,gamecube")) + if (!of_machine_is_compatible("nintendo,gamecube")) return 0; pm_power_off = gamecube_power_off; diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index 8b6e761..dafba10 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -250,9 +250,7 @@ void holly_halt(void) */ static int __init holly_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "ibm,holly")) + if (!of_machine_is_compatible("ibm,holly")) return 0; return 1; } diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c index 4c5089f..f29cf29 100644 --- a/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -141,11 +141,7 @@ static void linkstation_show_cpuinfo(struct seq_file *m) static int __init linkstation_probe(void) { - unsigned long root; - - root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "linkstation")) + if (!of_machine_is_compatible("linkstation")) return 0; pm_power_off = linkstation_power_off; diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index 3eda5df..80804f9 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -161,9 +161,7 @@ static void __noreturn mpc7448_hpc2_restart(char *cmd) */ static int __init mpc7448_hpc2_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "mpc74xx")) + if (!of_machine_is_compatible("mpc74xx")) return 0; return 1; } diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c index 1382e1f..ed7321d 100644 --- a/arch/powerpc/platforms/embedded6xx/mvme5100.c +++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c @@ -194,9 +194,7 @@ static void __noreturn mvme5100_restart(char *cmd) */ static int __init mvme5100_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "MVME5100"); + return of_machine_is_compatible("MVME5100"); } static int __init probe_of_platform_devices(void) diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c index 4596cba..471a50b 100644 --- a/arch/powerpc/platforms/embedded6xx/storcenter.c +++ b/arch/powerpc/platforms/embedded6xx/storcenter.c @@ -109,9 +109,7 @@ static void __noreturn storcenter_restart(char *cmd) static int __init storcenter_probe(void) { - unsigned long root = of_get_flat_dt_root(); - - return of_flat_dt_is_compatible(root, "iomega,storcenter"); + return of_machine_is_compatible("iomega,storcenter"); } define_machine(storcenter){ diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index ebaecb8..fe3e769 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -205,10 +205,7 @@ static void __init wii_pic_probe(void) static int __init wii_probe(void) { - unsigned long dt_root; - - dt_root = of_get_flat_dt_root(); - if (!of_flat_dt_is_compatible(dt_root, "nintendo,wii")) + if (!of_machine_is_compatible("nintendo,wii")) return 0; pm_power_off = wii_power_off; diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 0872f98..60ca40a 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -590,17 +590,9 @@ console_initcall(check_pmac_serial_console); */ static int __init pmac_probe(void) { -#ifdef CONFIG_PPC32 - unsigned long root = of_get_flat_dt_root(); - - if (!of_flat_dt_is_compatible(root, "Power Macintosh") && - !of_flat_dt_is_compatible(root, "MacRISC")) - return 0; -#else if (!of_machine_is_compatible("Power Macintosh") && !of_machine_is_compatible("MacRISC")) return 0; -#endif #ifdef CONFIG_PPC32 /* isa_io_base gets set in pmac_pci_init */ -- cgit v0.10.2 From f2d576948d6cec16e4aae201d738c4f22039a551 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:06 +1000 Subject: powerpc: Get rid of ppc_md.init_early() It is now called right after platform probe, so the probe function can just do the job. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 5b2edf5..76f5398 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -53,7 +53,6 @@ struct machdep_calls { int (*probe)(void); void (*setup_arch)(void); /* Optional, may be NULL */ - void (*init_early)(void); /* Optional, may be NULL. */ void (*show_cpuinfo)(struct seq_file *m); void (*show_percpuinfo)(struct seq_file *m, int i); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index e7bb4e7..22347e87 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -263,9 +263,6 @@ void __init setup_arch(char **cmdline_p) setup_power_save(); - if (ppc_md.init_early) - ppc_md.init_early(); - find_legacy_serial_ports(); smp_setup_cpu_maps(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 883d527..8b9768a 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -511,14 +511,6 @@ void __init setup_system(void) /* Probe the machine type */ probe_machine(); - /* - * Do some platform specific early initializations, that includes - * setting up the hash table pointers. It also sets up some interrupt-mapping - * related options that will be used by finish_device_tree() - */ - if (ppc_md.init_early) - ppc_md.init_early(); - /* * We can discover serial ports now since the above did setup the * hash table management for us, thus ioremap works. We do that early diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c index 4e03f04..f65d503 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads.c @@ -57,7 +57,12 @@ static void __init mpc5121_ads_init_IRQ(void) */ static int __init mpc5121_ads_probe(void) { - return of_machine_is_compatible("fsl,mpc5121ads"); + if (!of_machine_is_compatible("fsl,mpc5121ads")) + return 0; + + mpc512x_init_early(); + + return 1; } define_machine(mpc5121_ads) { @@ -65,7 +70,6 @@ define_machine(mpc5121_ads) { .probe = mpc5121_ads_probe, .setup_arch = mpc5121_ads_setup_arch, .init = mpc512x_init, - .init_early = mpc512x_init_early, .init_IRQ = mpc5121_ads_init_IRQ, .get_irq = ipic_get_irq, .calibrate_decr = generic_calibrate_decr, diff --git a/arch/powerpc/platforms/512x/mpc512x_generic.c b/arch/powerpc/platforms/512x/mpc512x_generic.c index 87eba17..bf884d3 100644 --- a/arch/powerpc/platforms/512x/mpc512x_generic.c +++ b/arch/powerpc/platforms/512x/mpc512x_generic.c @@ -38,14 +38,18 @@ static const char * const board[] __initconst = { */ static int __init mpc512x_generic_probe(void) { - return of_device_compatible_match(of_root, board); + if (!of_device_compatible_match(of_root, board)) + return 0; + + mpc512x_init_early(); + + return 1; } define_machine(mpc512x_generic) { .name = "MPC512x generic", .probe = mpc512x_generic_probe, .init = mpc512x_init, - .init_early = mpc512x_init_early, .setup_arch = mpc512x_setup_arch, .init_IRQ = mpc512x_init_IRQ, .get_irq = ipic_get_irq, diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c index f9cad19..dc81f05 100644 --- a/arch/powerpc/platforms/512x/pdm360ng.c +++ b/arch/powerpc/platforms/512x/pdm360ng.c @@ -113,7 +113,12 @@ void __init pdm360ng_init(void) static int __init pdm360ng_probe(void) { - return of_machine_is_compatible("ifm,pdm360ng"); + if (!of_machine_is_compatible("ifm,pdm360ng")) + return 0; + + mpc512x_init_early(); + + return 1; } define_machine(pdm360ng) { @@ -121,7 +126,6 @@ define_machine(pdm360ng) { .probe = pdm360ng_probe, .setup_arch = mpc512x_setup_arch, .init = pdm360ng_init, - .init_early = mpc512x_init_early, .init_IRQ = mpc512x_init_IRQ, .get_irq = ipic_get_irq, .calibrate_decr = generic_calibrate_decr, diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index c55002f..bfb3006 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -253,7 +253,7 @@ static void __noreturn briq_restart(char *cmd) * But unfortunately, the firmware does not connect /chosen/{stdin,stdout} * the the built-in serial node. Instead, a /failsafe node is created. */ -static __init void chrp_init_early(void) +static __init void chrp_init(void) { struct device_node *node; const char *property; @@ -587,6 +587,8 @@ static int __init chrp_probe(void) pm_power_off = rtas_power_off; + chrp_init(); + return 1; } @@ -595,7 +597,6 @@ define_machine(chrp) { .probe = chrp_probe, .setup_arch = chrp_setup_arch, .init = chrp_init2, - .init_early = chrp_init_early, .show_cpuinfo = chrp_show_cpuinfo, .init_IRQ = chrp_init_IRQ, .restart = rtas_restart, diff --git a/arch/powerpc/platforms/embedded6xx/c2k.c b/arch/powerpc/platforms/embedded6xx/c2k.c index 7fef40e..d19e4e7 100644 --- a/arch/powerpc/platforms/embedded6xx/c2k.c +++ b/arch/powerpc/platforms/embedded6xx/c2k.c @@ -130,6 +130,9 @@ static int __init c2k_probe(void) _set_L2CR(0); _set_L2CR(L2CR_L2E | L2CR_L2PE | L2CR_L2I); + + mv64x60_init_early(); + return 1; } @@ -137,7 +140,6 @@ define_machine(c2k) { .name = "C2K", .probe = c2k_probe, .setup_arch = c2k_setup_arch, - .init_early = mv64x60_init_early, .show_cpuinfo = c2k_show_cpuinfo, .init_IRQ = mv64x60_init_irq, .get_irq = mv64x60_get_irq, diff --git a/arch/powerpc/platforms/embedded6xx/gamecube.c b/arch/powerpc/platforms/embedded6xx/gamecube.c index f5a837f..36789ce 100644 --- a/arch/powerpc/platforms/embedded6xx/gamecube.c +++ b/arch/powerpc/platforms/embedded6xx/gamecube.c @@ -54,11 +54,6 @@ static void __noreturn gamecube_halt(void) gamecube_restart(NULL); } -static void __init gamecube_init_early(void) -{ - ug_udbg_init(); -} - static int __init gamecube_probe(void) { if (!of_machine_is_compatible("nintendo,gamecube")) @@ -66,6 +61,8 @@ static int __init gamecube_probe(void) pm_power_off = gamecube_power_off; + ug_udbg_init(); + return 1; } @@ -77,7 +74,6 @@ static void gamecube_shutdown(void) define_machine(gamecube) { .name = "gamecube", .probe = gamecube_probe, - .init_early = gamecube_init_early, .restart = gamecube_restart, .halt = gamecube_halt, .init_IRQ = flipper_pic_probe, diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c index fe3e769..3fd683e 100644 --- a/arch/powerpc/platforms/embedded6xx/wii.c +++ b/arch/powerpc/platforms/embedded6xx/wii.c @@ -192,11 +192,6 @@ static void __noreturn wii_halt(void) wii_spin(); } -static void __init wii_init_early(void) -{ - ug_udbg_init(); -} - static void __init wii_pic_probe(void) { flipper_pic_probe(); @@ -210,6 +205,8 @@ static int __init wii_probe(void) pm_power_off = wii_power_off; + ug_udbg_init(); + return 1; } @@ -222,7 +219,6 @@ static void wii_shutdown(void) define_machine(wii) { .name = "wii", .probe = wii_probe, - .init_early = wii_init_early, .setup_arch = wii_setup_arch, .restart = wii_restart, .halt = wii_halt, diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index d3d44cb..3c30c7a 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -198,18 +198,6 @@ void __init maple_setup_arch(void) mmio_nvram_init(); } -/* - * Early initialization. - */ -static void __init maple_init_early(void) -{ - DBG(" -> maple_init_early\n"); - - iommu_init_early_dart(&maple_pci_controller_ops); - - DBG(" <- maple_init_early\n"); -} - /* * This is almost identical to pSeries and CHRP. We need to make that * code generic at one point, with appropriate bits in the device-tree to @@ -306,6 +294,8 @@ static int __init maple_probe(void) pm_power_off = maple_power_off; + iommu_init_early_dart(&maple_pci_controller_ops); + return 1; } @@ -313,7 +303,6 @@ define_machine(maple) { .name = "Maple", .probe = maple_probe, .setup_arch = maple_setup_arch, - .init_early = maple_init_early, .init_IRQ = maple_init_IRQ, .pci_irq_fixup = maple_pci_irq_fixup, .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index ec810dd..e86c1bd 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -339,11 +339,6 @@ out: return !!(srr1 & 0x2); } -static void __init pas_init_early(void) -{ - iommu_init_early_pasemi(); -} - #ifdef CONFIG_PCMCIA static int pcmcia_notify(struct notifier_block *nb, unsigned long action, void *data) @@ -424,6 +419,8 @@ static int __init pas_probe(void) !of_machine_is_compatible("pasemi,pwrficient")) return 0; + iommu_init_early_pasemi(); + return 1; } @@ -431,7 +428,6 @@ define_machine(pasemi) { .name = "PA Semi PWRficient", .probe = pas_probe, .setup_arch = pas_setup_arch, - .init_early = pas_init_early, .init_IRQ = pas_init_IRQ, .get_irq = mpic_get_irq, .restart = pas_restart, diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 60ca40a..3de4a7c 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -452,7 +452,7 @@ pmac_halt(void) /* * Early initialization. */ -static void __init pmac_init_early(void) +static void __init pmac_init(void) { /* Enable early btext debug if requested */ if (strstr(boot_command_line, "btextdbg")) { @@ -603,6 +603,8 @@ static int __init pmac_probe(void) pm_power_off = pmac_power_off; + pmac_init(); + return 1; } @@ -610,7 +612,6 @@ define_machine(powermac) { .name = "PowerMac", .probe = pmac_probe, .setup_arch = pmac_setup_arch, - .init_early = pmac_init_early, .show_cpuinfo = pmac_show_cpuinfo, .init_IRQ = pmac_pic_init, .get_irq = NULL, /* changed later */ diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 8865efa..efe8b6b 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -58,7 +58,7 @@ static void __init pnv_setup_arch(void) /* XXX PMCS */ } -static void __init pnv_init_early(void) +static void __init pnv_init(void) { /* * Initialize the LPC bus now so that legacy serial @@ -276,6 +276,8 @@ static int __init pnv_probe(void) pr_debug("PowerNV detected !\n"); + pnv_init(); + return 1; } @@ -301,7 +303,6 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu) define_machine(powernv) { .name = "PowerNV", .probe = pnv_probe, - .init_early = pnv_init_early, .setup_arch = pnv_setup_arch, .init_IRQ = pnv_init_IRQ, .show_cpuinfo = pnv_show_cpuinfo, diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 6988b9d..4ffcaa6 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -620,9 +620,9 @@ static void pSeries_cmo_feature_init(void) /* * Early initialization. Relocation is on but do not reference unbolted pages */ -static void __init pSeries_init_early(void) +static void __init pseries_init(void) { - pr_debug(" -> pSeries_init_early()\n"); + pr_debug(" -> pseries_init()\n"); #ifdef CONFIG_HVC_CONSOLE if (firmware_has_feature(FW_FEATURE_LPAR)) @@ -639,7 +639,7 @@ static void __init pSeries_init_early(void) pSeries_cmo_feature_init(); iommu_init_early_pSeries(); - pr_debug(" <- pSeries_init_early()\n"); + pr_debug(" <- pseries_init()\n"); } /** @@ -691,6 +691,8 @@ static int __init pSeries_probe(void) pr_debug("Machine is%s LPAR !\n", (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not"); + pseries_init(); + return 1; } @@ -709,7 +711,6 @@ define_machine(pseries) { .name = "pSeries", .probe = pSeries_probe, .setup_arch = pSeries_setup_arch, - .init_early = pSeries_init_early, .init_IRQ = pseries_init_irq, .show_cpuinfo = pSeries_show_cpuinfo, .log_error = pSeries_log_error, -- cgit v0.10.2 From bf1b61fb574bfe13ab71347389a2ab16f673d24f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:07 +1000 Subject: powerpc/64: Move the boot time info banner to a separate function Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 8b9768a..2395a88 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -476,6 +476,37 @@ static void __init initialize_cache_info(void) DBG(" <- initialize_cache_info()\n"); } +static __init void print_system_info(void) +{ + pr_info("-----------------------------------------------------\n"); + pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); + pr_info("phys_mem_size = 0x%llx\n", memblock_phys_mem_size()); + + if (ppc64_caches.dline_size != 0x80) + pr_info("dcache_line_size = 0x%x\n", ppc64_caches.dline_size); + if (ppc64_caches.iline_size != 0x80) + pr_info("icache_line_size = 0x%x\n", ppc64_caches.iline_size); + + pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); + pr_info(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE); + pr_info(" always = 0x%016lx\n", CPU_FTRS_ALWAYS); + pr_info("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features, + cur_cpu_spec->cpu_user_features2); + pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); + pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); + +#ifdef CONFIG_PPC_STD_MMU_64 + if (htab_address) + pr_info("htab_address = 0x%p\n", htab_address); + + pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask); +#endif + + if (PHYSICAL_START > 0) + pr_info("physical_start = 0x%llx\n", + (unsigned long long)PHYSICAL_START); + pr_info("-----------------------------------------------------\n"); +} /* * Do some initial setup of the system. The parameters are those which @@ -543,37 +574,8 @@ void __init setup_system(void) smp_release_cpus(); #endif - pr_info("Starting Linux %s %s\n", init_utsname()->machine, - init_utsname()->version); - - pr_info("-----------------------------------------------------\n"); - pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); - pr_info("phys_mem_size = 0x%llx\n", memblock_phys_mem_size()); - - if (ppc64_caches.dline_size != 0x80) - pr_info("dcache_line_size = 0x%x\n", ppc64_caches.dline_size); - if (ppc64_caches.iline_size != 0x80) - pr_info("icache_line_size = 0x%x\n", ppc64_caches.iline_size); - - pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); - pr_info(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE); - pr_info(" always = 0x%016lx\n", CPU_FTRS_ALWAYS); - pr_info("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features, - cur_cpu_spec->cpu_user_features2); - pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); - pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); - -#ifdef CONFIG_PPC_STD_MMU_64 - if (htab_address) - pr_info("htab_address = 0x%p\n", htab_address); - - pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask); -#endif - - if (PHYSICAL_START > 0) - pr_info("physical_start = 0x%llx\n", - (unsigned long long)PHYSICAL_START); - pr_info("-----------------------------------------------------\n"); + /* Print various info about the machine that has been gathered so far. */ + print_system_info(); DBG(" <- setup_system()\n"); } -- cgit v0.10.2 From 9df549afeab4ea968b6d83cf9d7a1e3c577a9846 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:08 +1000 Subject: powerpc/64: Move setting of {i,d}cache_bsize to initialize_cache_info() Also remove the completely osbolete comment. We *do* look in the device-tree. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 2395a88..aca215d 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -473,6 +473,10 @@ static void __init initialize_cache_info(void) } } + /* For use by binfmt_elf */ + dcache_bsize = ppc64_caches.dline_size; + icache_bsize = ppc64_caches.iline_size; + DBG(" <- initialize_cache_info()\n"); } @@ -691,15 +695,6 @@ void __init setup_arch(char **cmdline_p) { *cmdline_p = boot_command_line; - /* - * Set cache line size based on type of cpu as a default. - * Systems with OF can look in the properties on the cpu node(s) - * for a possibly more accurate value. - */ - dcache_bsize = ppc64_caches.dline_size; - icache_bsize = ppc64_caches.iline_size; - - /* Reserve large chunks of memory for use by CMA for KVM */ kvm_cma_reserve(); -- cgit v0.10.2 From fa745a129cae93ca5d871ebac2a8f6c27ae3fbf2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:09 +1000 Subject: powerpc/64: Move the content of setup_system() to setup_arch() And kill setup_system(). Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 064cd93..f765b04 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -941,7 +941,7 @@ start_here_multiplatform: mtspr SPRN_SRR1,r4 RFI b . /* prevent speculative execution */ - + /* This is where all platforms converge execution */ start_here_common: @@ -951,9 +951,6 @@ start_here_common: /* Load the TOC (virtual address) */ ld r2,PACATOC(r13) - /* Do more system initializations in virtual mode */ - bl setup_system - /* Mark interrupts soft and hard disabled (they might be enabled * in the PACA when doing hotplug) */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index aca215d..61c3e6c4 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -512,78 +512,6 @@ static __init void print_system_info(void) pr_info("-----------------------------------------------------\n"); } -/* - * Do some initial setup of the system. The parameters are those which - * were passed in from the bootloader. - */ -void __init setup_system(void) -{ - DBG(" -> setup_system()\n"); - - /* - * Unflatten the device-tree passed by prom_init or kexec - */ - unflatten_device_tree(); - - /* - * Fill the ppc64_caches & systemcfg structures with informations - * retrieved from the device-tree. - */ - initialize_cache_info(); - -#ifdef CONFIG_PPC_RTAS - /* - * Initialize RTAS if available - */ - rtas_initialize(); -#endif /* CONFIG_PPC_RTAS */ - - /* - * Check if we have an initrd provided via the device-tree - */ - check_for_initrd(); - - /* Probe the machine type */ - probe_machine(); - - /* - * We can discover serial ports now since the above did setup the - * hash table management for us, thus ioremap works. We do that early - * so that further code can be debugged - */ - find_legacy_serial_ports(); - - /* - * Register early console - */ - register_early_udbg_console(); - - /* - * Initialize xmon - */ - xmon_setup(); - - smp_setup_cpu_maps(); - check_smt_enabled(); - setup_tlb_core_data(); - - /* - * Freescale Book3e parts spin in a loop provided by firmware, - * so smp_release_cpus() does nothing for them - */ -#if defined(CONFIG_SMP) - /* Release secondary cpus out of their spinloops at 0x60 now that - * we can map physical -> logical CPU ids - */ - smp_release_cpus(); -#endif - - /* Print various info about the machine that has been gathered so far. */ - print_system_info(); - - DBG(" <- setup_system()\n"); -} - /* This returns the limit below which memory accesses to the linear * mapping are guarnateed not to cause a TLB or SLB miss. This is * used to allocate interrupt or emergency stacks for which our @@ -695,6 +623,68 @@ void __init setup_arch(char **cmdline_p) { *cmdline_p = boot_command_line; + /* + * Unflatten the device-tree passed by prom_init or kexec + */ + unflatten_device_tree(); + + /* + * Fill the ppc64_caches & systemcfg structures with informations + * retrieved from the device-tree. + */ + initialize_cache_info(); + +#ifdef CONFIG_PPC_RTAS + /* + * Initialize RTAS if available + */ + rtas_initialize(); +#endif /* CONFIG_PPC_RTAS */ + + /* + * Check if we have an initrd provided via the device-tree + */ + check_for_initrd(); + + /* Probe the machine type */ + probe_machine(); + + /* + * We can discover serial ports now since the above did setup the + * hash table management for us, thus ioremap works. We do that early + * so that further code can be debugged + */ + find_legacy_serial_ports(); + + /* + * Register early console + */ + register_early_udbg_console(); + + /* + * Initialize xmon + */ + xmon_setup(); + + smp_setup_cpu_maps(); + check_smt_enabled(); + setup_tlb_core_data(); + + /* + * Freescale Book3e parts spin in a loop provided by firmware, + * so smp_release_cpus() does nothing for them + */ +#if defined(CONFIG_SMP) + /* + * Release secondary cpus out of their spinloops at 0x60 now that + * we can map physical -> logical CPU ids + */ + smp_release_cpus(); +#endif + + /* Print various info about the machine that has been gathered so far. */ + print_system_info(); + /* Reserve large chunks of memory for use by CMA for KVM */ kvm_cma_reserve(); -- cgit v0.10.2 From 8f212cb26fc74dcf7b8c5c3dbcf3c3741990c31d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:10 +1000 Subject: powerpc/32: Move cache info inits to a separate function Matches 64-bit. Also move the call to the same spot as ppc64 Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 22347e87..5457911 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -248,6 +248,21 @@ static void setup_power_save(void) #endif } +static __init void initialize_cache_info(void) +{ + /* + * Set cache line size based on type of cpu as a default. + * Systems with OF can look in the properties on the cpu node(s) + * for a possibly more accurate value. + */ + dcache_bsize = cur_cpu_spec->dcache_bsize; + icache_bsize = cur_cpu_spec->icache_bsize; + ucache_bsize = 0; + if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE)) + ucache_bsize = icache_bsize = dcache_bsize; +} + + /* Warning, IO base is not yet inited */ void __init setup_arch(char **cmdline_p) { @@ -257,6 +272,7 @@ void __init setup_arch(char **cmdline_p) loops_per_jiffy = 500000000 / HZ; unflatten_device_tree(); + initialize_cache_info(); check_for_initrd(); probe_machine(); @@ -272,17 +288,6 @@ void __init setup_arch(char **cmdline_p) xmon_setup(); - /* - * Set cache line size based on type of cpu as a default. - * Systems with OF can look in the properties on the cpu node(s) - * for a possibly more accurate value. - */ - dcache_bsize = cur_cpu_spec->dcache_bsize; - icache_bsize = cur_cpu_spec->icache_bsize; - ucache_bsize = 0; - if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE)) - ucache_bsize = icache_bsize = dcache_bsize; - if (ppc_md.panic) setup_panic(); -- cgit v0.10.2 From e39afba3aa11f7088ddc00d37ab34a85d960a76e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:11 +1000 Subject: powerpc: Re-order the call to smp_setup_cpu_maps() It makes more sense to do it before intializing xmon() as xmon might use the info in there. We do want to register the console early though in case we want some functioning printk's in the cpu map setup. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 5457911..58674b6 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -281,11 +281,11 @@ void __init setup_arch(char **cmdline_p) find_legacy_serial_ports(); - smp_setup_cpu_maps(); - /* Register early console */ register_early_udbg_console(); + smp_setup_cpu_maps(); + xmon_setup(); if (ppc_md.panic) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 61c3e6c4..3fd59bc 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -661,12 +661,13 @@ void __init setup_arch(char **cmdline_p) */ register_early_udbg_console(); + smp_setup_cpu_maps(); + /* * Initialize xmon */ xmon_setup(); - smp_setup_cpu_maps(); check_smt_enabled(); setup_tlb_core_data(); -- cgit v0.10.2 From f7b9ebb79e90b19bf6a2cb805a536258437fc3fa Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:04:12 +1000 Subject: powerpc: Re-order setup_panic() Do it right after probe_machine() since it's about testing ppc_md, and put the test in the common code. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index b8ee1c8..ca9255e 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -688,6 +688,8 @@ static struct notifier_block ppc_panic_block = { void __init setup_panic(void) { + if (!ppc_md.panic) + return; atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block); } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 58674b6..6247a3a 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -277,6 +277,8 @@ void __init setup_arch(char **cmdline_p) probe_machine(); + setup_panic(); + setup_power_save(); find_legacy_serial_ports(); @@ -288,9 +290,6 @@ void __init setup_arch(char **cmdline_p) xmon_setup(); - if (ppc_md.panic) - setup_panic(); - init_mm.start_code = (unsigned long)_stext; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 3fd59bc..f55c25d 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -649,6 +649,8 @@ void __init setup_arch(char **cmdline_p) /* Probe the machine type */ probe_machine(); + setup_panic(); + /* * We can discover serial ports now since the above did setup the * hash table management for us, thus ioremap works. We do that early @@ -696,9 +698,6 @@ void __init setup_arch(char **cmdline_p) */ reserve_hugetlb_gpages(); - if (ppc_md.panic) - setup_panic(); - klp_init_thread_info(&init_thread_info); init_mm.start_code = (unsigned long)_stext; -- cgit v0.10.2 From 009776baa18448b223be73ac74912fef7e17b9e2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:07:50 +1000 Subject: powerpc/64: Make a few boot functions __init Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index f55c25d..fba96ad 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -198,7 +198,7 @@ early_param("smt-enabled", early_smt_enabled); #endif /* CONFIG_SMP */ /** Fix up paca fields required for the boot cpu */ -static void fixup_boot_paca(void) +static void __init fixup_boot_paca(void) { /* The boot cpu is started */ get_paca()->cpu_start = 1; @@ -206,7 +206,7 @@ static void fixup_boot_paca(void) get_paca()->data_offset = 0; } -static void configure_exceptions(void) +static void __init configure_exceptions(void) { /* * Setup the trampolines from the lowmem exception vectors @@ -517,7 +517,7 @@ static __init void print_system_info(void) * used to allocate interrupt or emergency stacks for which our * exception entry path doesn't deal with being interrupted. */ -static u64 safe_stack_limit(void) +static __init u64 safe_stack_limit(void) { #ifdef CONFIG_PPC_BOOK3E /* Freescale BookE bolts the entire linear mapping */ -- cgit v0.10.2 From b1923caa6e641f3d0a93b5d045aef67ded5aef67 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:07:51 +1000 Subject: powerpc: Merge 32-bit and 64-bit setup_arch() There is little enough differences now. mpe: Add a/p/k/setup.h to contain the prototypes and empty versions of functions we need, rather than using weak functions. Add a few other empty versions to avoid as many #ifdefs as possible in the code. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index fa3e3c4..9c23baa 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -351,7 +351,6 @@ extern bool rtas_indicator_present(int token, int *maxindex); extern int rtas_set_indicator(int indicator, int index, int new_value); extern int rtas_set_indicator_fast(int indicator, int index, int new_value); extern void rtas_progress(char *s, unsigned short hex); -extern void rtas_initialize(void); extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); extern int rtas_online_cpus_mask(cpumask_var_t cpus); @@ -460,9 +459,11 @@ static inline int page_is_rtas_user_buf(unsigned long pfn) /* Not the best place to put pSeries_coalesce_init, will be fixed when we * move some of the rtas suspend-me stuff to pseries */ extern void pSeries_coalesce_init(void); +void rtas_initialize(void); #else static inline int page_is_rtas_user_buf(unsigned long pfn) { return 0;} static inline void pSeries_coalesce_init(void) { } +static inline void rtas_initialize(void) { }; #endif extern int call_rtas(const char *, int, int, unsigned long *, ...); diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index e1afd4c..0d02c11 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -160,9 +160,6 @@ static inline void set_hard_smp_processor_id(int cpu, int phys) { paca[cpu].hw_cpu_id = phys; } - -extern void smp_release_cpus(void); - #else /* 32-bit */ #ifndef CONFIG_SMP @@ -179,6 +176,12 @@ static inline void set_hard_smp_processor_id(int cpu, int phys) #endif /* !CONFIG_SMP */ #endif /* !CONFIG_PPC64 */ +#if defined(CONFIG_PPC64) && (defined(CONFIG_SMP) || defined(CONFIG_KEXEC)) +extern void smp_release_cpus(void); +#else +static inline void smp_release_cpus(void) { }; +#endif + extern int smt_enabled_at_boot; extern void smp_mpic_probe(void); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index ca9255e..714b4ba 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -61,6 +62,12 @@ #include #include #include +#include +#include +#include +#include + +#include "setup.h" #ifdef DEBUG #include @@ -758,3 +765,169 @@ void arch_setup_pdev_archdata(struct platform_device *pdev) pdev->dev.dma_mask = &pdev->archdata.dma_mask; set_dma_ops(&pdev->dev, &dma_direct_ops); } + +static __init void print_system_info(void) +{ + pr_info("-----------------------------------------------------\n"); +#ifdef CONFIG_PPC_STD_MMU_64 + pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); +#endif +#ifdef CONFIG_PPC_STD_MMU_32 + pr_info("Hash_size = 0x%lx\n", Hash_size); +#endif + pr_info("phys_mem_size = 0x%llx\n", + (unsigned long long)memblock_phys_mem_size()); + + pr_info("dcache_bsize = 0x%x\n", dcache_bsize); + pr_info("icache_bsize = 0x%x\n", icache_bsize); + if (ucache_bsize != 0) + pr_info("ucache_bsize = 0x%x\n", ucache_bsize); + + pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); + pr_info(" possible = 0x%016lx\n", + (unsigned long)CPU_FTRS_POSSIBLE); + pr_info(" always = 0x%016lx\n", + (unsigned long)CPU_FTRS_ALWAYS); + pr_info("cpu_user_features = 0x%08x 0x%08x\n", + cur_cpu_spec->cpu_user_features, + cur_cpu_spec->cpu_user_features2); + pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); +#ifdef CONFIG_PPC64 + pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); +#endif + +#ifdef CONFIG_PPC_STD_MMU_64 + if (htab_address) + pr_info("htab_address = 0x%p\n", htab_address); + if (htab_hash_mask) + pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask); +#endif +#ifdef CONFIG_PPC_STD_MMU_32 + if (Hash) + pr_info("Hash = 0x%p\n", Hash); + if (Hash_mask) + pr_info("Hash_mask = 0x%lx\n", Hash_mask); +#endif + + if (PHYSICAL_START > 0) + pr_info("physical_start = 0x%llx\n", + (unsigned long long)PHYSICAL_START); + pr_info("-----------------------------------------------------\n"); +} + +/* + * Called into from start_kernel this initializes memblock, which is used + * to manage page allocation until mem_init is called. + */ +void __init setup_arch(char **cmdline_p) +{ + *cmdline_p = boot_command_line; + + /* Set a half-reasonable default so udelay does something sensible */ + loops_per_jiffy = 500000000 / HZ; + + /* Unflatten the device-tree passed by prom_init or kexec */ + unflatten_device_tree(); + + /* + * Initialize cache line/block info from device-tree (on ppc64) or + * just cputable (on ppc32). + */ + initialize_cache_info(); + + /* Initialize RTAS if available. */ + rtas_initialize(); + + /* Check if we have an initrd provided via the device-tree. */ + check_for_initrd(); + + /* Probe the machine type, establish ppc_md. */ + probe_machine(); + + /* Setup panic notifier if requested by the platform. */ + setup_panic(); + + /* + * Configure ppc_md.power_save (ppc32 only, 64-bit machines do + * it from their respective probe() function. + */ + setup_power_save(); + + /* Discover standard serial ports. */ + find_legacy_serial_ports(); + + /* Register early console with the printk subsystem. */ + register_early_udbg_console(); + + /* Setup the various CPU maps based on the device-tree. */ + smp_setup_cpu_maps(); + + /* Initialize xmon. */ + xmon_setup(); + + /* Check the SMT related command line arguments (ppc64). */ + check_smt_enabled(); + + /* On BookE, setup per-core TLB data structures. */ + setup_tlb_core_data(); + + /* + * Release secondary cpus out of their spinloops at 0x60 now that + * we can map physical -> logical CPU ids. + * + * Freescale Book3e parts spin in a loop provided by firmware, + * so smp_release_cpus() does nothing for them. + */ +#ifdef CONFIG_SMP + smp_release_cpus(); +#endif + + /* Print various info about the machine that has been gathered so far. */ + print_system_info(); + + /* Reserve large chunks of memory for use by CMA for KVM. */ + kvm_cma_reserve(); + + /* + * Reserve any gigantic pages requested on the command line. + * memblock needs to have been initialized by the time this is + * called since this will reserve memory. + */ + reserve_hugetlb_gpages(); + + klp_init_thread_info(&init_thread_info); + + init_mm.start_code = (unsigned long)_stext; + init_mm.end_code = (unsigned long) _etext; + init_mm.end_data = (unsigned long) _edata; + init_mm.brk = klimit; +#ifdef CONFIG_PPC_64K_PAGES + init_mm.context.pte_frag = NULL; +#endif +#ifdef CONFIG_SPAPR_TCE_IOMMU + mm_iommu_init(&init_mm.context); +#endif + irqstack_early_init(); + exc_lvl_early_init(); + emergency_stack_init(); + + initmem_init(); + +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif + if (ppc_md.setup_arch) + ppc_md.setup_arch(); + + paging_init(); + + /* Initialize the MMU context management stuff. */ + mmu_context_init(); + +#ifdef CONFIG_PPC64 + /* Interrupt code needs to be 64K-aligned. */ + if ((unsigned long)_stext & 0xffff) + panic("Kernelbase not 64K-aligned (0x%lx)!\n", + (unsigned long)_stext); +#endif +} diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h new file mode 100644 index 0000000..cfba134 --- /dev/null +++ b/arch/powerpc/kernel/setup.h @@ -0,0 +1,58 @@ +/* + * Prototypes for functions that are shared between setup_(32|64|common).c + * + * Copyright 2016 Michael Ellerman, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __ARCH_POWERPC_KERNEL_SETUP_H +#define __ARCH_POWERPC_KERNEL_SETUP_H + +void initialize_cache_info(void); +void irqstack_early_init(void); + +#ifdef CONFIG_PPC32 +void setup_power_save(void); +#else +static inline void setup_power_save(void) { }; +#endif + +#if defined(CONFIG_PPC64) && defined(CONFIG_SMP) +void check_smt_enabled(void); +#else +static inline void check_smt_enabled(void) { }; +#endif + +#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) +void setup_tlb_core_data(void); +#else +static inline void setup_tlb_core_data(void) { }; +#endif + +#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_BOOKE) || defined(CONFIG_40x) +void exc_lvl_early_init(void); +#else +static inline void exc_lvl_early_init(void) { }; +#endif + +#ifdef CONFIG_PPC64 +void emergency_stack_init(void); +#else +static inline void emergency_stack_init(void) { }; +#endif + +/* + * Having this in kvm_ppc.h makes include dependencies too + * tricky to solve for setup-common.c so have it here. + */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +void kvm_cma_reserve(void); +#else +static inline void kvm_cma_reserve(void) { }; +#endif + +#endif /* __ARCH_POWERPC_KERNEL_SETUP_H */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 6247a3a..00f5775 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #define DBG(fmt...) @@ -191,7 +190,7 @@ int __init ppc_init(void) arch_initcall(ppc_init); -static void __init irqstack_early_init(void) +void __init irqstack_early_init(void) { unsigned int i; @@ -206,7 +205,7 @@ static void __init irqstack_early_init(void) } #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) -static void __init exc_lvl_early_init(void) +void __init exc_lvl_early_init(void) { unsigned int i, hw_cpu; @@ -229,11 +228,9 @@ static void __init exc_lvl_early_init(void) #endif } } -#else -#define exc_lvl_early_init() #endif -static void setup_power_save(void) +void __init setup_power_save(void) { #ifdef CONFIG_6xx if (cpu_has_feature(CPU_FTR_CAN_DOZE) || @@ -248,7 +245,7 @@ static void setup_power_save(void) #endif } -static __init void initialize_cache_info(void) +__init void initialize_cache_info(void) { /* * Set cache line size based on type of cpu as a default. @@ -261,57 +258,3 @@ static __init void initialize_cache_info(void) if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE)) ucache_bsize = icache_bsize = dcache_bsize; } - - -/* Warning, IO base is not yet inited */ -void __init setup_arch(char **cmdline_p) -{ - *cmdline_p = boot_command_line; - - /* so udelay does something sensible, assume <= 1000 bogomips */ - loops_per_jiffy = 500000000 / HZ; - - unflatten_device_tree(); - initialize_cache_info(); - check_for_initrd(); - - probe_machine(); - - setup_panic(); - - setup_power_save(); - - find_legacy_serial_ports(); - - /* Register early console */ - register_early_udbg_console(); - - smp_setup_cpu_maps(); - - xmon_setup(); - - init_mm.start_code = (unsigned long)_stext; - init_mm.end_code = (unsigned long) _etext; - init_mm.end_data = (unsigned long) _edata; - init_mm.brk = klimit; - - exc_lvl_early_init(); - - irqstack_early_init(); - - initmem_init(); - if ( ppc_md.progress ) ppc_md.progress("setup_arch: initmem", 0x3eab); - -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - - if (ppc_md.setup_arch) - ppc_md.setup_arch(); - if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab); - - paging_init(); - - /* Initialize the MMU context management stuff */ - mmu_context_init(); -} diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index fba96ad..d8216ae 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include @@ -64,12 +63,10 @@ #include #include #include -#include #include -#include -#include #include #include +#include #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -100,7 +97,7 @@ int icache_bsize; int ucache_bsize; #if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) -static void setup_tlb_core_data(void) +void __init setup_tlb_core_data(void) { int cpu; @@ -133,10 +130,6 @@ static void setup_tlb_core_data(void) } } } -#else -static void setup_tlb_core_data(void) -{ -} #endif #ifdef CONFIG_SMP @@ -144,7 +137,7 @@ static void setup_tlb_core_data(void) static char *smt_enabled_cmdline; /* Look for ibm,smt-enabled OF option */ -static void check_smt_enabled(void) +void __init check_smt_enabled(void) { struct device_node *dn; const char *smt_option; @@ -193,8 +186,6 @@ static int __init early_smt_enabled(char *p) } early_param("smt-enabled", early_smt_enabled); -#else -#define check_smt_enabled() #endif /* CONFIG_SMP */ /** Fix up paca fields required for the boot cpu */ @@ -408,7 +399,7 @@ void smp_release_cpus(void) * cache informations about the CPU that will be used by cache flush * routines and/or provided to userland */ -static void __init initialize_cache_info(void) +void __init initialize_cache_info(void) { struct device_node *np; unsigned long num_cpus = 0; @@ -480,38 +471,6 @@ static void __init initialize_cache_info(void) DBG(" <- initialize_cache_info()\n"); } -static __init void print_system_info(void) -{ - pr_info("-----------------------------------------------------\n"); - pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); - pr_info("phys_mem_size = 0x%llx\n", memblock_phys_mem_size()); - - if (ppc64_caches.dline_size != 0x80) - pr_info("dcache_line_size = 0x%x\n", ppc64_caches.dline_size); - if (ppc64_caches.iline_size != 0x80) - pr_info("icache_line_size = 0x%x\n", ppc64_caches.iline_size); - - pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features); - pr_info(" possible = 0x%016lx\n", CPU_FTRS_POSSIBLE); - pr_info(" always = 0x%016lx\n", CPU_FTRS_ALWAYS); - pr_info("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features, - cur_cpu_spec->cpu_user_features2); - pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features); - pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); - -#ifdef CONFIG_PPC_STD_MMU_64 - if (htab_address) - pr_info("htab_address = 0x%p\n", htab_address); - - pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask); -#endif - - if (PHYSICAL_START > 0) - pr_info("physical_start = 0x%llx\n", - (unsigned long long)PHYSICAL_START); - pr_info("-----------------------------------------------------\n"); -} - /* This returns the limit below which memory accesses to the linear * mapping are guarnateed not to cause a TLB or SLB miss. This is * used to allocate interrupt or emergency stacks for which our @@ -533,7 +492,7 @@ static __init u64 safe_stack_limit(void) #endif } -static void __init irqstack_early_init(void) +void __init irqstack_early_init(void) { u64 limit = safe_stack_limit(); unsigned int i; @@ -553,7 +512,7 @@ static void __init irqstack_early_init(void) } #ifdef CONFIG_PPC_BOOK3E -static void __init exc_lvl_early_init(void) +void __init exc_lvl_early_init(void) { unsigned int i; unsigned long sp; @@ -575,8 +534,6 @@ static void __init exc_lvl_early_init(void) if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) patch_exception(0x040, exc_debug_debug_book3e); } -#else -#define exc_lvl_early_init() #endif /* @@ -584,7 +541,7 @@ static void __init exc_lvl_early_init(void) * early in SMP boots before relocation is enabled. Exclusive emergency * stack for machine checks. */ -static void __init emergency_stack_init(void) +void __init emergency_stack_init(void) { u64 limit; unsigned int i; @@ -615,124 +572,6 @@ static void __init emergency_stack_init(void) } } -/* - * Called into from start_kernel this initializes memblock, which is used - * to manage page allocation until mem_init is called. - */ -void __init setup_arch(char **cmdline_p) -{ - *cmdline_p = boot_command_line; - - /* - * Unflatten the device-tree passed by prom_init or kexec - */ - unflatten_device_tree(); - - /* - * Fill the ppc64_caches & systemcfg structures with informations - * retrieved from the device-tree. - */ - initialize_cache_info(); - -#ifdef CONFIG_PPC_RTAS - /* - * Initialize RTAS if available - */ - rtas_initialize(); -#endif /* CONFIG_PPC_RTAS */ - - /* - * Check if we have an initrd provided via the device-tree - */ - check_for_initrd(); - - /* Probe the machine type */ - probe_machine(); - - setup_panic(); - - /* - * We can discover serial ports now since the above did setup the - * hash table management for us, thus ioremap works. We do that early - * so that further code can be debugged - */ - find_legacy_serial_ports(); - - /* - * Register early console - */ - register_early_udbg_console(); - - smp_setup_cpu_maps(); - - /* - * Initialize xmon - */ - xmon_setup(); - - check_smt_enabled(); - setup_tlb_core_data(); - - /* - * Freescale Book3e parts spin in a loop provided by firmware, - * so smp_release_cpus() does nothing for them - */ -#if defined(CONFIG_SMP) - /* - * Release secondary cpus out of their spinloops at 0x60 now that - * we can map physical -> logical CPU ids - */ - smp_release_cpus(); -#endif - - /* Print various info about the machine that has been gathered so far. */ - print_system_info(); - - /* Reserve large chunks of memory for use by CMA for KVM */ - kvm_cma_reserve(); - - /* - * Reserve any gigantic pages requested on the command line. - * memblock needs to have been initialized by the time this is - * called since this will reserve memory. - */ - reserve_hugetlb_gpages(); - - klp_init_thread_info(&init_thread_info); - - init_mm.start_code = (unsigned long)_stext; - init_mm.end_code = (unsigned long) _etext; - init_mm.end_data = (unsigned long) _edata; - init_mm.brk = klimit; -#ifdef CONFIG_PPC_64K_PAGES - init_mm.context.pte_frag = NULL; -#endif -#ifdef CONFIG_SPAPR_TCE_IOMMU - mm_iommu_init(&init_mm.context); -#endif - irqstack_early_init(); - exc_lvl_early_init(); - emergency_stack_init(); - - initmem_init(); - -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif - if (ppc_md.setup_arch) - ppc_md.setup_arch(); - - paging_init(); - - /* Initialize the MMU context management stuff */ - mmu_context_init(); - - /* Interrupt code needs to be 64K-aligned */ - if ((unsigned long)_stext & 0xffff) - panic("Kernelbase not 64K-aligned (0x%lx)!\n", - (unsigned long)_stext); -} - #ifdef CONFIG_SMP #define PCPU_DYN_SIZE () -- cgit v0.10.2 From 9d636109511a000882f8dff4eaafa874eec5ece8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 19 Jul 2016 14:48:30 +1000 Subject: powerpc/ftrace: Separate the heuristics for checking call sites In __ftrace_make_nop() (the 64-bit version), we have code to deal with two ftrace ABIs. There is the original ABI, which looks mostly like a function call, and then the mprofile-kernel ABI which is just a branch. The code tries to handle both cases, by looking for the presence of a load to restore the TOC pointer (PPC_INST_LD_TOC). If we detect the TOC load, we assume the call site is for an mcount() call using the old ABI. That means we patch the mcount() call with a b +8, to branch over the TOC load. However if the kernel was built with mprofile-kernel, then there will never be a call site using the original ftrace ABI. If for some reason we do see a TOC load, then it's there for a good reason, and we should not jump over it. So split the code, using the existing CC_USING_MPROFILE_KERNEL. Kernels built with mprofile-kernel will only look for, and expect, the new ABI, and similarly for the original ABI. Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 7af6c4d..cc52d97 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -144,6 +144,21 @@ __ftrace_make_nop(struct module *mod, return -EINVAL; } +#ifdef CC_USING_MPROFILE_KERNEL + /* When using -mkernel_profile there is no load to jump over */ + pop = PPC_INST_NOP; + + if (probe_kernel_read(&op, (void *)(ip - 4), 4)) { + pr_err("Fetching instruction at %lx failed.\n", ip - 4); + return -EFAULT; + } + + /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ + if (op != PPC_INST_MFLR && op != PPC_INST_STD_LR) { + pr_err("Unexpected instruction %08x around bl _mcount\n", op); + return -EINVAL; + } +#else /* * Our original call site looks like: * @@ -170,24 +185,10 @@ __ftrace_make_nop(struct module *mod, } if (op != PPC_INST_LD_TOC) { - unsigned int inst; - - if (probe_kernel_read(&inst, (void *)(ip - 4), 4)) { - pr_err("Fetching instruction at %lx failed.\n", ip - 4); - return -EFAULT; - } - - /* We expect either a mlfr r0, or a std r0, LRSAVE(r1) */ - if (inst != PPC_INST_MFLR && inst != PPC_INST_STD_LR) { - pr_err("Unexpected instructions around bl _mcount\n" - "when enabling dynamic ftrace!\t" - "(%08x,bl,%08x)\n", inst, op); - return -EINVAL; - } - - /* When using -mkernel_profile there is no load to jump over */ - pop = PPC_INST_NOP; + pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, op); + return -EINVAL; } +#endif /* CC_USING_MPROFILE_KERNEL */ if (patch_instruction((unsigned int *)ip, pop)) { pr_err("Patching NOP failed.\n"); -- cgit v0.10.2 From 31278b17a0dfed3014786b623fd07ee110b801da Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 19 Jul 2016 14:48:31 +1000 Subject: powerpc/modules: Never restore r2 for a mprofile-kernel style mcount() call In the module loader we process relocations, and for long jumps we generate trampolines (aka stubs). At the call site for one of these trampolines we usually need to generate a load instruction to restore the TOC pointer into r2. There is one exception however, which is calls to mcount() using the mprofile-kernel ABI, they handle the TOC inside the stub, and so for them we do not generate a TOC load. The bug is in how the code in restore_r2() decides if it needs to generate the TOC load. It does so by looking for a nop following the branch, and if it sees a nop, it replaces it with the load. In general the compiler has no reason to generate a nop following the mcount() call and so that check works OK. However if we combine a jump label at the start of a function, with an early return, such that GCC applies the shrink-wrapping optimisation, we can then end up with an mcount call followed immediately by a nop. However the nop is not there for a TOC load, it is for the jump label. That confuses restore_r2() into replacing the jump label nop with a TOC load, which in turn confuses ftrace into replacing the mcount call with a b +8 (fixed in the previous commit). The end result is we jump over the jump label, which if it was supposed to return means we incorrectly run the body of the function. We have seen this in practice with some yet-to-be-merged patches that use jump labels more extensively. The fix is relatively simple, in restore_r2() we check for an mprofile-kernel style mcount() call first, before looking for the presence of a nop. Fixes: 153086644fd1 ("powerpc/ftrace: Add support for -mprofile-kernel ftrace ABI") Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index f703f34..183368e 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -494,9 +494,10 @@ static bool is_early_mcount_callsite(u32 *instruction) restore r2. */ static int restore_r2(u32 *instruction, struct module *me) { + if (is_early_mcount_callsite(instruction - 1)) + return 1; + if (*instruction != PPC_INST_NOP) { - if (is_early_mcount_callsite(instruction - 1)) - return 1; pr_err("%s: Expect noop after relocate, got %08x\n", me->name, *instruction); return 0; -- cgit v0.10.2 From 0eab46be21449f1612791201aa029facb676bd31 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 24 Mar 2016 02:07:57 -0400 Subject: powerpc/mm: Add memory barrier in __hugepte_alloc() __hugepte_alloc() uses kmem_cache_zalloc() to allocate a zeroed PTE and proceeds to use the newly allocated PTE. Add a memory barrier to make sure that the other CPUs see a properly initialized PTE. Based on a fix suggested by James Dykman. Reported-by: James Dykman Signed-off-by: Aneesh Kumar K.V Signed-off-by: Sukadev Bhattiprolu Tested-by: James Dykman Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 5aac1a3..6be21e0 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -81,6 +81,13 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (! new) return -ENOMEM; + /* + * Make sure other cpus find the hugepd set only after a + * properly initialized page table is visible to them. + * For more details look for comment in __pte_alloc(). + */ + smp_wmb(); + spin_lock(&mm->page_table_lock); #ifdef CONFIG_PPC_FSL_BOOK3E /* -- cgit v0.10.2 From 802a345183c0308aa64969cca62c23961bf86a44 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 20 Jul 2016 14:26:51 +1000 Subject: powerpc/powernv/ioda: Fix endianness when reading TCEs The iommu_table_ops::exchange() callback writes new TCE to the table and returns old value and permission mask. The old TCE value is correctly converted from BE to CPU endian; however permission mask was calculated from BE value and therefore always returned DMA_NONE which could cause memory leak on LE systems using VFIO SPAPR TCE IOMMU v1 driver. This fixes pnv_tce_xchg() to have @oldtce a CPU endian. Fixes: 05c6cfb9dce0 ("powerpc/iommu/powernv: Release replaced TCE") Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 4617ea2..6701dd5 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -737,8 +737,8 @@ int pnv_tce_xchg(struct iommu_table *tbl, long index, if (newtce & TCE_PCI_WRITE) newtce |= TCE_PCI_READ; - oldtce = xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce)); - *hpa = be64_to_cpu(oldtce) & ~(TCE_PCI_READ | TCE_PCI_WRITE); + oldtce = be64_to_cpu(xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce))); + *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); *direction = iommu_tce_direction(oldtce); return 0; -- cgit v0.10.2 From 4a1202765ddf4e5bb3143c0a859ee37f8fcf9b85 Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Tue, 19 Jul 2016 14:03:52 +1000 Subject: powerpc: Add module autoloading based on CPU features This patch provides the necessary infrastructure to allow drivers to be automatically loaded via udev. It implements the minimum required to be able to use module_cpu_feature_match() to trigger the GENERIC_CPU_AUTOPROBE mechanisms. The features exposed are a mirror of the cpu_user_features (converted to an offset from a mask). This decision was made to ensure that the behavior between features for module loading and userspace are consistent. Signed-off-by: Alastair D'Silva [mpe: Only define the bits we currently need] Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9b6d36b..a5e0b47 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -165,6 +165,7 @@ config PPC select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS + select GENERIC_CPU_AUTOPROBE config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN diff --git a/arch/powerpc/include/asm/cpufeature.h b/arch/powerpc/include/asm/cpufeature.h new file mode 100644 index 0000000..19e6290 --- /dev/null +++ b/arch/powerpc/include/asm/cpufeature.h @@ -0,0 +1,40 @@ +/* + * CPU feature definitions for module loading, used by + * module_cpu_feature_match(), see asm/cputable.h for powerpc CPU features. + * + * Copyright 2016 Alastair D'Silva, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __ASM_POWERPC_CPUFEATURE_H +#define __ASM_POWERPC_CPUFEATURE_H + +#include + +/* Keep these in step with powerpc/include/asm/cputable.h */ +#define MAX_CPU_FEATURES (2 * 32) + +/* + * Currently we don't have a need for any of the feature bits defined in + * cpu_user_features. When we do, they should be defined such as: + * + * #define PPC_MODULE_FEATURE_32 (ilog2(PPC_FEATURE_32)) + */ + +#define PPC_MODULE_FEATURE_VEC_CRYPTO (32 + ilog2(PPC_FEATURE2_VEC_CRYPTO)) + +#define cpu_feature(x) (x) + +static inline bool cpu_have_feature(unsigned int num) +{ + if (num < 32) + return !!(cur_cpu_spec->cpu_user_features & 1UL << num); + else + return !!(cur_cpu_spec->cpu_user_features2 & 1UL << (num - 32)); +} + +#endif /* __ASM_POWERPC_CPUFEATURE_H */ -- cgit v0.10.2 From ccf5c442a1b82bf74105d72416e069607353cb82 Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Tue, 19 Jul 2016 14:03:53 +1000 Subject: crypto: vmx - Convert to CPU feature based module autoloading This patch utilises the GENERIC_CPU_AUTOPROBE infrastructure to automatically load the vmx_crypto module if the CPU supports it. Signed-off-by: Alastair D'Silva Acked-by: Herbert Xu Signed-off-by: Michael Ellerman diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig index 89d8208..a83ead1 100644 --- a/drivers/crypto/vmx/Kconfig +++ b/drivers/crypto/vmx/Kconfig @@ -1,7 +1,7 @@ config CRYPTO_DEV_VMX_ENCRYPT tristate "Encryption acceleration support on P8 CPU" depends on CRYPTO_DEV_VMX - default y + default m help Support for VMX cryptographic acceleration instructions on Power8 CPU. This module supports acceleration for AES and GHASH in hardware. If you diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c index e163d57..5a40f2f 100644 --- a/drivers/crypto/vmx/vmx.c +++ b/drivers/crypto/vmx/vmx.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -43,9 +44,6 @@ int __init p8_init(void) int ret = 0; struct crypto_alg **alg_it; - if (!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO)) - return -ENODEV; - for (alg_it = algs; *alg_it; alg_it++) { ret = crypto_register_alg(*alg_it); printk(KERN_INFO "crypto_register_alg '%s' = %d\n", @@ -78,7 +76,7 @@ void __exit p8_exit(void) crypto_unregister_shash(&p8_ghash_alg); } -module_init(p8_init); +module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, p8_init); module_exit(p8_exit); MODULE_AUTHOR("Marcelo Cerri"); -- cgit v0.10.2 From 7353644fa9df875aee778a802e3d28f1e3578512 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 25 Jul 2016 11:54:41 +1000 Subject: powerpc/mm: Fix build break when PPC_NATIVE=n The recent commit to rework the hash MMU setup broke the build when CONFIG_PPC_NATIVE=n. Fix it by adding an IS_ENABLED() check before calling hpte_init_native(). Removing the else clause opens the possibility that we don't set any ops, which would probably lead to a strange crash later. So add a check that we correctly initialised at least one member of the struct. Fixes: 166dd7d3fbf2 ("powerpc/64: Move MMU backend selection out of platform code") Reported-by: Stephen Rothwell Acked-by: Stephen Rothwell Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 3416324..381b589 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -931,9 +931,12 @@ void __init hash__early_init_mmu(void) ps3_early_mm_init(); else if (firmware_has_feature(FW_FEATURE_LPAR)) hpte_init_lpar(); - else + else if IS_ENABLED(CONFIG_PPC_NATIVE) hpte_init_native(); + if (!mmu_hash_ops.hpte_insert) + panic("hash__early_init_mmu: No MMU hash ops defined!\n"); + /* Initialize the MMU Hash table and create the linear mapping * of memory. Has to be done before SLB initialization as this is * currently where the page size encoding is obtained. -- cgit v0.10.2 From 6364e84e855ae9a0558ac873e3ff50ecb75bb40a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 26 Jul 2016 10:33:03 +1000 Subject: powerpc/mm: Rename hpte_init_lpar() and move the fallback to a header hpte_init_lpar() is part of the pseries platform, so name it as such. Move the fallback implementation for when PSERIES=n into the header, dropping the weak implementation. The panic() is now handled by the calling code. Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index b0f4dff..450b017 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -391,8 +391,13 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); +#ifdef CONFIG_PPC_PSERIES +void hpte_init_pseries(void); +#else +static inline void hpte_init_pseries(void) { } +#endif + extern void hpte_init_native(void); -extern void hpte_init_lpar(void); extern void hpte_init_beat(void); extern void hpte_init_beat_v3(void); diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 381b589..1ff11c1 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -885,11 +885,6 @@ static void __init htab_initialize(void) #undef KB #undef MB -void __init __weak hpte_init_lpar(void) -{ - panic("FW_FEATURE_LPAR set but no LPAR support compiled\n"); -} - void __init hash__early_init_mmu(void) { /* @@ -930,7 +925,7 @@ void __init hash__early_init_mmu(void) if (firmware_has_feature(FW_FEATURE_PS3_LV1)) ps3_early_mm_init(); else if (firmware_has_feature(FW_FEATURE_LPAR)) - hpte_init_lpar(); + hpte_init_pseries(); else if IS_ENABLED(CONFIG_PPC_NATIVE) hpte_init_native(); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 0e91388..86707e6 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -589,7 +589,7 @@ static int __init disable_bulk_remove(char *str) __setup("bulk_remove=", disable_bulk_remove); -void __init hpte_init_lpar(void) +void __init hpte_init_pseries(void) { mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate; mmu_hash_ops.hpte_updatepp = pSeries_lpar_hpte_updatepp; -- cgit v0.10.2 From 1a1cee843c4a532f57083ffe8cbf4fcd3dcb381d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 25 Jul 2016 11:35:08 +1000 Subject: powerpc/mm: Drop unused externs for hpte_init_beat[_v3]() We removed the BEAT support in 2015 in commit bf4981a00636 ("powerpc: Remove the celleb support"). These externs are unused since then. Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 450b017..5eaf86a 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -398,8 +398,6 @@ static inline void hpte_init_pseries(void) { } #endif extern void hpte_init_native(void); -extern void hpte_init_beat(void); -extern void hpte_init_beat_v3(void); extern void slb_initialize(void); extern void slb_flush_and_rebolt(void); -- cgit v0.10.2 From dd57023747e33572b31867f890b0d99f55b5cc2f Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 20 May 2016 04:41:34 +1000 Subject: powerpc: Improve comment explaining why we modify VRSAVE The comment explaining why we modify VRSAVE is misleading, glibc does rely on the behaviour. Update the comment. Signed-off-by: Anton Blanchard Reviewed-by: Cyril Bur Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 1c2e7a3..616a6d8 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -70,10 +70,11 @@ _GLOBAL(load_up_altivec) MTMSRD(r5) /* enable use of AltiVec now */ isync - /* Hack: if we get an altivec unavailable trap with VRSAVE - * set to all zeros, we assume this is a broken application - * that fails to set it properly, and thus we switch it to - * all 1's + /* + * While userspace in general ignores VRSAVE, glibc uses it as a boolean + * to optimise userspace context save/restore. Whenever we take an + * altivec unavailable exception we must set VRSAVE to something non + * zero. Set it to all 1s. See also the programming note in the ISA. */ mfspr r4,SPRN_VRSAVE cmpwi 0,r4,0 -- cgit v0.10.2 From a431b946db581d6a121d035a887d370cdc4b8dea Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Wed, 29 Jun 2016 21:41:51 +1000 Subject: selftests/powerpc: exec() with suspended transaction Perform an exec() class syscall with a suspended transaction. This is a test for the bug we fixed in 8e96a87c5431 ("powerpc/tm: Always reclaim in start_thread() for exec() class syscalls"). Signed-off-by: Cyril Bur [mpe: Fix build errors, use a single binary for the test] Signed-off-by: Michael Ellerman diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index bb942db..82c0a9c 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -6,3 +6,4 @@ tm-vmxcopy tm-fork tm-tar tm-tmspr +tm-exec diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index d0505db..9d301d7 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -1,11 +1,14 @@ -TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy tm-fork tm-tar tm-tmspr +TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \ + tm-vmxcopy tm-fork tm-tar tm-tmspr tm-exec tm-execed all: $(TEST_PROGS) $(TEST_PROGS): ../harness.c ../utils.c +CFLAGS += -mhtm + tm-syscall: tm-syscall-asm.S -tm-syscall: CFLAGS += -mhtm -I../../../../../usr/include +tm-syscall: CFLAGS += -I../../../../../usr/include tm-tmspr: CFLAGS += -pthread include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/tm/tm-exec.c b/tools/testing/selftests/powerpc/tm/tm-exec.c new file mode 100644 index 0000000..3d27fa0 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-exec.c @@ -0,0 +1,70 @@ +/* + * Copyright 2016, Cyril Bur, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Syscalls can be performed provided the transactions are suspended. + * The exec() class of syscall is unique as a new process is loaded. + * + * It makes little sense for after an exec() call for the previously + * suspended transaction to still exist. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "tm.h" + +static char *path; + +static int test_exec(void) +{ + SKIP_IF(!have_htm()); + + asm __volatile__( + "tbegin.;" + "blt 1f; " + "tsuspend.;" + "1: ;" + : : : "memory"); + + execl(path, "tm-exec", "--child", NULL); + + /* Shouldn't get here */ + perror("execl() failed"); + return 1; +} + +static int after_exec(void) +{ + asm __volatile__( + "tbegin.;" + "blt 1f;" + "tsuspend.;" + "1: ;" + : : : "memory"); + + FAIL_IF(failure_is_nesting()); + return 0; +} + +int main(int argc, char *argv[]) +{ + path = argv[0]; + + if (argc > 1 && strcmp(argv[1], "--child") == 0) + return after_exec(); + + return test_harness(test_exec, "tm_exec"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c index 60560cb..454b965 100644 --- a/tools/testing/selftests/powerpc/tm/tm-syscall.c +++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c @@ -27,21 +27,6 @@ unsigned retries = 0; #define TEST_DURATION 10 /* seconds */ #define TM_RETRIES 100 -long failure_code(void) -{ - return __builtin_get_texasru() >> 24; -} - -bool failure_is_persistent(void) -{ - return (failure_code() & TM_CAUSE_PERSISTENT) == TM_CAUSE_PERSISTENT; -} - -bool failure_is_syscall(void) -{ - return (failure_code() & TM_CAUSE_SYSCALL) == TM_CAUSE_SYSCALL; -} - pid_t getppid_tm(bool suspend) { int i; diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h index 24144b2..60318ba 100644 --- a/tools/testing/selftests/powerpc/tm/tm.h +++ b/tools/testing/selftests/powerpc/tm/tm.h @@ -6,8 +6,9 @@ #ifndef _SELFTESTS_POWERPC_TM_TM_H #define _SELFTESTS_POWERPC_TM_TM_H -#include +#include #include +#include #include "../utils.h" @@ -31,4 +32,24 @@ static inline bool have_htm_nosc(void) #endif } +static inline long failure_code(void) +{ + return __builtin_get_texasru() >> 24; +} + +static inline bool failure_is_persistent(void) +{ + return (failure_code() & TM_CAUSE_PERSISTENT) == TM_CAUSE_PERSISTENT; +} + +static inline bool failure_is_syscall(void) +{ + return (failure_code() & TM_CAUSE_SYSCALL) == TM_CAUSE_SYSCALL; +} + +static inline bool failure_is_nesting(void) +{ + return (__builtin_get_texasru() & 0x400000); +} + #endif /* _SELFTESTS_POWERPC_TM_TM_H */ -- cgit v0.10.2 From bbc3dfe8805de86874b1a1b1429a002e8670043e Mon Sep 17 00:00:00 2001 From: Sam Mendoza-Jonas Date: Mon, 11 Jul 2016 13:38:57 +1000 Subject: tty/hvc: Use IRQF_SHARED for OPAL hvc consoles Commit 2def86a7200c ("hvc: Convert to using interrupts instead of opal events") enabled the use of interrupts in the hvc_driver for OPAL platforms. However on machines with more than one hvc console, any console after the first will fail to register an interrupt handler in notifier_add_irq() since all consoles share the same IRQ number but do not set the IRQF_SHARED flag: genirq: Flags mismatch irq 31. 00000000 (hvc_console) vs. 00000000 (hvc_console) hvc_open: request_irq failed with rc -16. This error propagates up to hvc_open() and the console is closed, but OPAL will still generate interrupts that are not handled, leading to rcu_sched stall warnings. Set IRQF_SHARED when calling request_irq(), allowing additional consoles to start properly. This is only set for consoles handled by hvc_opal_probe(), leaving other types unaffected. Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: Michael Ellerman diff --git a/drivers/tty/hvc/hvc_console.h b/drivers/tty/hvc/hvc_console.h index 9131019..798c48d 100644 --- a/drivers/tty/hvc/hvc_console.h +++ b/drivers/tty/hvc/hvc_console.h @@ -60,6 +60,7 @@ struct hvc_struct { struct winsize ws; struct work_struct tty_resize; struct list_head next; + unsigned long flags; }; /* implemented by a low level driver */ diff --git a/drivers/tty/hvc/hvc_irq.c b/drivers/tty/hvc/hvc_irq.c index c9adb05..bc7a968 100644 --- a/drivers/tty/hvc/hvc_irq.c +++ b/drivers/tty/hvc/hvc_irq.c @@ -14,6 +14,11 @@ static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance) /* if hvc_poll request a repoll, then kick the hvcd thread */ if (hvc_poll(dev_instance)) hvc_kick(); + + /* + * We're safe to always return IRQ_HANDLED as the hvcd thread will + * iterate through each hvc_struct. + */ return IRQ_HANDLED; } @@ -28,8 +33,8 @@ int notifier_add_irq(struct hvc_struct *hp, int irq) hp->irq_requested = 0; return 0; } - rc = request_irq(irq, hvc_handle_interrupt, 0, - "hvc_console", hp); + rc = request_irq(irq, hvc_handle_interrupt, hp->flags, + "hvc_console", hp); if (!rc) hp->irq_requested = 1; return rc; diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c index 47b54c6..b7cd0ae 100644 --- a/drivers/tty/hvc/hvc_opal.c +++ b/drivers/tty/hvc/hvc_opal.c @@ -224,6 +224,9 @@ static int hvc_opal_probe(struct platform_device *dev) hp = hvc_alloc(termno, irq, ops, MAX_VIO_PUT_CHARS); if (IS_ERR(hp)) return PTR_ERR(hp); + + /* hvc consoles on powernv may need to share a single irq */ + hp->flags = IRQF_SHARED; dev_set_drvdata(&dev->dev, hp); return 0; -- cgit v0.10.2 From 00dab8187e182da41122f66c207707b192509df4 Mon Sep 17 00:00:00 2001 From: Sam Mendoza-Jonas Date: Mon, 11 Jul 2016 13:38:58 +1000 Subject: tty/hvc: Use opal irqchip interface if available Update the hvc driver to use the OPAL irqchip if made available by the running firmware. If it is not present, the driver falls back to the existing OPAL event number. Signed-off-by: Samuel Mendoza-Jonas Signed-off-by: Michael Ellerman diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c index b7cd0ae..5107993 100644 --- a/drivers/tty/hvc/hvc_opal.c +++ b/drivers/tty/hvc/hvc_opal.c @@ -214,7 +214,13 @@ static int hvc_opal_probe(struct platform_device *dev) dev->dev.of_node->full_name, boot ? " (boot console)" : ""); - irq = opal_event_request(ilog2(OPAL_EVENT_CONSOLE_INPUT)); + irq = irq_of_parse_and_map(dev->dev.of_node, 0); + if (!irq) { + pr_info("hvc%d: No interrupts property, using OPAL event\n", + termno); + irq = opal_event_request(ilog2(OPAL_EVENT_CONSOLE_INPUT)); + } + if (!irq) { pr_err("hvc_opal: Unable to map interrupt for device %s\n", dev->dev.of_node->full_name); -- cgit v0.10.2 From fbef66f0adcddf4475e19f3d09df22fb34e633f6 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 28 Jul 2016 12:35:02 +1000 Subject: powerpc/mm: Parenthesise IS_ENABLED() in if condition Currently IS_ENABLED() produces an expression surrounded by parentheses, which allows this code to compile, generating eg: else if (1 || 0) hpte_init_native(); However a change to the macro in the kbuild tree will break this in future by removing the parentheses. Fixes: 7353644fa9df ("powerpc/mm: Fix build break when PPC_NATIVE=n") Signed-off-by: Stephen Rothwell Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 1ff11c1..b78b5d2 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -926,7 +926,7 @@ void __init hash__early_init_mmu(void) ps3_early_mm_init(); else if (firmware_has_feature(FW_FEATURE_LPAR)) hpte_init_pseries(); - else if IS_ENABLED(CONFIG_PPC_NATIVE) + else if (IS_ENABLED(CONFIG_PPC_NATIVE)) hpte_init_native(); if (!mmu_hash_ops.hpte_insert) -- cgit v0.10.2