diff options
Diffstat (limited to 'arch/powerpc/platforms')
129 files changed, 5094 insertions, 6467 deletions
diff --git a/arch/powerpc/platforms/44x/currituck.c b/arch/powerpc/platforms/44x/currituck.c index ecd3890..7f1b71a 100644 --- a/arch/powerpc/platforms/44x/currituck.c +++ b/arch/powerpc/platforms/44x/currituck.c @@ -91,12 +91,12 @@ static void __init ppc47x_init_irq(void) } #ifdef CONFIG_SMP -static void __cpuinit smp_ppc47x_setup_cpu(int cpu) +static void smp_ppc47x_setup_cpu(int cpu) { mpic_setup_this_cpu(); } -static int __cpuinit smp_ppc47x_kick_cpu(int cpu) +static int smp_ppc47x_kick_cpu(int cpu) { struct device_node *cpunode = of_get_cpu_node(cpu, NULL); const u64 *spin_table_addr_prop; @@ -176,13 +176,48 @@ static int __init ppc47x_probe(void) return 1; } +static int board_rev = -1; +static int __init ppc47x_get_board_rev(void) +{ + u8 fpga_reg0; + void *fpga; + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, "ibm,currituck-fpga"); + if (!np) + goto fail; + + fpga = of_iomap(np, 0); + of_node_put(np); + if (!fpga) + goto fail; + + fpga_reg0 = ioread8(fpga); + board_rev = fpga_reg0 & 0x03; + pr_info("%s: Found board revision %d\n", __func__, board_rev); + iounmap(fpga); + return 0; + +fail: + pr_info("%s: Unable to find board revision\n", __func__); + return 0; +} +machine_arch_initcall(ppc47x, ppc47x_get_board_rev); + /* Use USB controller should have been hardware swizzled but it wasn't :( */ static void ppc47x_pci_irq_fixup(struct pci_dev *dev) { if (dev->vendor == 0x1033 && (dev->device == 0x0035 || dev->device == 0x00e0)) { - dev->irq = irq_create_mapping(NULL, 47); - pr_info("%s: Mapping irq 47 %d\n", __func__, dev->irq); + if (board_rev == 0) { + dev->irq = irq_create_mapping(NULL, 47); + pr_info("%s: Mapping irq %d\n", __func__, dev->irq); + } else if (board_rev == 2) { + dev->irq = irq_create_mapping(NULL, 49); + pr_info("%s: Mapping irq %d\n", __func__, dev->irq); + } else { + pr_alert("%s: Unknown board revision\n", __func__); + } } } diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c index a28a862..4241bc8 100644 --- a/arch/powerpc/platforms/44x/iss4xx.c +++ b/arch/powerpc/platforms/44x/iss4xx.c @@ -81,12 +81,12 @@ static void __init iss4xx_init_irq(void) } #ifdef CONFIG_SMP -static void __cpuinit smp_iss4xx_setup_cpu(int cpu) +static void smp_iss4xx_setup_cpu(int cpu) { mpic_setup_this_cpu(); } -static int __cpuinit smp_iss4xx_kick_cpu(int cpu) +static int smp_iss4xx_kick_cpu(int cpu) { struct device_node *cpunode = of_get_cpu_node(cpu, NULL); const u64 *spin_table_addr_prop; diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c index 4cfa499..534574a 100644 --- a/arch/powerpc/platforms/44x/warp.c +++ b/arch/powerpc/platforms/44x/warp.c @@ -16,7 +16,6 @@ #include <linux/interrupt.h> #include <linux/delay.h> #include <linux/of_gpio.h> -#include <linux/of_i2c.h> #include <linux/slab.h> #include <linux/export.h> diff --git a/arch/powerpc/platforms/512x/clock.c b/arch/powerpc/platforms/512x/clock.c index e504166..fd8a376 100644 --- a/arch/powerpc/platforms/512x/clock.c +++ b/arch/powerpc/platforms/512x/clock.c @@ -24,6 +24,7 @@ #include <linux/mutex.h> #include <linux/io.h> +#include <linux/of_address.h> #include <linux/of_platform.h> #include <asm/mpc5xxx.h> #include <asm/mpc5121.h> diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c index 0a134e0..3e90ece 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads.c @@ -43,9 +43,7 @@ static void __init mpc5121_ads_setup_arch(void) mpc83xx_add_bridge(np); #endif -#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) - mpc512x_setup_diu(); -#endif + mpc512x_setup_arch(); } static void __init mpc5121_ads_init_IRQ(void) @@ -69,7 +67,7 @@ define_machine(mpc5121_ads) { .probe = mpc5121_ads_probe, .setup_arch = mpc5121_ads_setup_arch, .init = mpc512x_init, - .init_early = mpc512x_init_diu, + .init_early = mpc512x_init_early, .init_IRQ = mpc5121_ads_init_IRQ, .get_irq = ipic_get_irq, .calibrate_decr = generic_calibrate_decr, diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h index 0a8e600..cc97f02 100644 --- a/arch/powerpc/platforms/512x/mpc512x.h +++ b/arch/powerpc/platforms/512x/mpc512x.h @@ -12,18 +12,12 @@ #ifndef __MPC512X_H__ #define __MPC512X_H__ extern void __init mpc512x_init_IRQ(void); +extern void __init mpc512x_init_early(void); extern void __init mpc512x_init(void); +extern void __init mpc512x_setup_arch(void); extern int __init mpc5121_clk_init(void); -void __init mpc512x_declare_of_platform_devices(void); extern const char *mpc512x_select_psc_compat(void); +extern const char *mpc512x_select_reset_compat(void); extern void mpc512x_restart(char *cmd); -#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) -void mpc512x_init_diu(void); -void mpc512x_setup_diu(void); -#else -#define mpc512x_init_diu NULL -#define mpc512x_setup_diu NULL -#endif - #endif /* __MPC512X_H__ */ diff --git a/arch/powerpc/platforms/512x/mpc512x_generic.c b/arch/powerpc/platforms/512x/mpc512x_generic.c index 5fb919b..ce71408 100644 --- a/arch/powerpc/platforms/512x/mpc512x_generic.c +++ b/arch/powerpc/platforms/512x/mpc512x_generic.c @@ -45,8 +45,8 @@ define_machine(mpc512x_generic) { .name = "MPC512x generic", .probe = mpc512x_generic_probe, .init = mpc512x_init, - .init_early = mpc512x_init_diu, - .setup_arch = mpc512x_setup_diu, + .init_early = mpc512x_init_early, + .setup_arch = mpc512x_setup_arch, .init_IRQ = mpc512x_init_IRQ, .get_irq = ipic_get_irq, .calibrate_decr = generic_calibrate_decr, diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index 6eb94ab..36b5652 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -35,8 +35,10 @@ static struct mpc512x_reset_module __iomem *reset_module_base; static void __init mpc512x_restart_init(void) { struct device_node *np; + const char *reset_compat; - np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-reset"); + reset_compat = mpc512x_select_reset_compat(); + np = of_find_compatible_node(NULL, NULL, reset_compat); if (!np) return; @@ -58,8 +60,6 @@ void mpc512x_restart(char *cmd) ; } -#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE) - struct fsl_diu_shared_fb { u8 gamma[0x300]; /* 32-bit aligned! */ struct diu_ad ad0; /* 32-bit aligned! */ @@ -69,7 +69,7 @@ struct fsl_diu_shared_fb { }; #define DIU_DIV_MASK 0x000000ff -void mpc512x_set_pixel_clock(unsigned int pixclock) +static void mpc512x_set_pixel_clock(unsigned int pixclock) { unsigned long bestval, bestfreq, speed, busfreq; unsigned long minpixclock, maxpixclock, pixval; @@ -162,7 +162,7 @@ void mpc512x_set_pixel_clock(unsigned int pixclock) iounmap(ccm); } -enum fsl_diu_monitor_port +static enum fsl_diu_monitor_port mpc512x_valid_monitor_port(enum fsl_diu_monitor_port port) { return FSL_DIU_PORT_DVI; @@ -177,7 +177,7 @@ static inline void mpc512x_free_bootmem(struct page *page) free_reserved_page(page); } -void mpc512x_release_bootmem(void) +static void mpc512x_release_bootmem(void) { unsigned long addr = diu_shared_fb.fb_phys & PAGE_MASK; unsigned long size = diu_shared_fb.fb_len; @@ -203,7 +203,7 @@ void mpc512x_release_bootmem(void) * address range will be reserved in setup_arch() after bootmem * allocator is up. */ -void __init mpc512x_init_diu(void) +static void __init mpc512x_init_diu(void) { struct device_node *np; struct diu __iomem *diu_reg; @@ -272,7 +272,7 @@ out: iounmap(diu_reg); } -void __init mpc512x_setup_diu(void) +static void __init mpc512x_setup_diu(void) { int ret; @@ -301,8 +301,6 @@ void __init mpc512x_setup_diu(void) diu_ops.release_bootmem = mpc512x_release_bootmem; } -#endif - void __init mpc512x_init_IRQ(void) { struct device_node *np; @@ -335,7 +333,7 @@ static struct of_device_id __initdata of_bus_ids[] = { {}, }; -void __init mpc512x_declare_of_platform_devices(void) +static void __init mpc512x_declare_of_platform_devices(void) { if (of_platform_bus_probe(NULL, of_bus_ids, NULL)) printk(KERN_ERR __FILE__ ": " @@ -355,6 +353,17 @@ const char *mpc512x_select_psc_compat(void) return NULL; } +const char *mpc512x_select_reset_compat(void) +{ + if (of_machine_is_compatible("fsl,mpc5121")) + return "fsl,mpc5121-reset"; + + if (of_machine_is_compatible("fsl,mpc5125")) + return "fsl,mpc5125-reset"; + + return NULL; +} + static unsigned int __init get_fifo_size(struct device_node *np, char *prop_name) { @@ -374,7 +383,7 @@ static unsigned int __init get_fifo_size(struct device_node *np, ((u32)(_base) + sizeof(struct mpc52xx_psc))) /* Init PSC FIFO space for TX and RX slices */ -void __init mpc512x_psc_fifo_init(void) +static void __init mpc512x_psc_fifo_init(void) { struct device_node *np; void __iomem *psc; @@ -436,14 +445,26 @@ void __init mpc512x_psc_fifo_init(void) } } +void __init mpc512x_init_early(void) +{ + mpc512x_restart_init(); + if (IS_ENABLED(CONFIG_FB_FSL_DIU)) + mpc512x_init_diu(); +} + void __init mpc512x_init(void) { mpc5121_clk_init(); mpc512x_declare_of_platform_devices(); - mpc512x_restart_init(); mpc512x_psc_fifo_init(); } +void __init mpc512x_setup_arch(void) +{ + if (IS_ENABLED(CONFIG_FB_FSL_DIU)) + mpc512x_setup_diu(); +} + /** * mpc512x_cs_config - Setup chip select configuration * @cs: chip select number diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c index 0575e85..116f2325 100644 --- a/arch/powerpc/platforms/512x/pdm360ng.c +++ b/arch/powerpc/platforms/512x/pdm360ng.c @@ -14,6 +14,8 @@ #include <linux/kernel.h> #include <linux/io.h> +#include <linux/of_address.h> +#include <linux/of_fdt.h> #include <linux/of_platform.h> #include <asm/machdep.h> @@ -119,9 +121,9 @@ static int __init pdm360ng_probe(void) define_machine(pdm360ng) { .name = "PDM360NG", .probe = pdm360ng_probe, - .setup_arch = mpc512x_setup_diu, + .setup_arch = mpc512x_setup_arch, .init = pdm360ng_init, - .init_early = mpc512x_init_diu, + .init_early = mpc512x_init_early, .init_IRQ = mpc512x_init_IRQ, .get_irq = ipic_get_irq, .calibrate_decr = generic_calibrate_decr, diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig index 90f4496..af54174 100644 --- a/arch/powerpc/platforms/52xx/Kconfig +++ b/arch/powerpc/platforms/52xx/Kconfig @@ -57,5 +57,5 @@ config PPC_MPC5200_BUGFIX config PPC_MPC5200_LPBFIFO tristate "MPC5200 LocalPlus bus FIFO driver" - depends on PPC_MPC52xx + depends on PPC_MPC52xx && PPC_BESTCOMM select PPC_BESTCOMM_GEN_BD diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c index b89ef65..2898b73 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c @@ -340,7 +340,7 @@ static int mpc52xx_irqhost_map(struct irq_domain *h, unsigned int virq, { int l1irq; int l2irq; - struct irq_chip *irqchip; + struct irq_chip *uninitialized_var(irqchip); void *hndlr; int type; u32 reg; diff --git a/arch/powerpc/platforms/82xx/mpc8272_ads.c b/arch/powerpc/platforms/82xx/mpc8272_ads.c index 30394b4..6a14cf5 100644 --- a/arch/powerpc/platforms/82xx/mpc8272_ads.c +++ b/arch/powerpc/platforms/82xx/mpc8272_ads.c @@ -16,6 +16,8 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/fsl_devices.h> +#include <linux/of_address.h> +#include <linux/of_fdt.h> #include <linux/of_platform.h> #include <linux/io.h> diff --git a/arch/powerpc/platforms/82xx/pq2fads.c b/arch/powerpc/platforms/82xx/pq2fads.c index e1dceee..e5f82ec 100644 --- a/arch/powerpc/platforms/82xx/pq2fads.c +++ b/arch/powerpc/platforms/82xx/pq2fads.c @@ -15,6 +15,8 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/fsl_devices.h> +#include <linux/of_address.h> +#include <linux/of_fdt.h> #include <linux/of_platform.h> #include <asm/io.h> diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index 624cb51..fd71cfd 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -204,7 +204,6 @@ static int mcu_remove(struct i2c_client *client) ret = mcu_gpiochip_remove(mcu); if (ret) return ret; - i2c_set_clientdata(client, NULL); kfree(mcu); return 0; } @@ -231,17 +230,7 @@ static struct i2c_driver mcu_driver = { .id_table = mcu_ids, }; -static int __init mcu_init(void) -{ - return i2c_add_driver(&mcu_driver); -} -module_init(mcu_init); - -static void __exit mcu_exit(void) -{ - i2c_del_driver(&mcu_driver); -} -module_exit(mcu_exit); +module_i2c_driver(mcu_driver); MODULE_DESCRIPTION("Power Management and GPIO expander driver for " "MPC8349E-mITX-compatible MCU"); diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 1d769a2..3d9716c 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -20,6 +20,8 @@ #include <linux/freezer.h> #include <linux/suspend.h> #include <linux/fsl_devices.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> #include <linux/of_platform.h> #include <linux/export.h> diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index 60afff1..25d7580 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -42,7 +42,6 @@ config BSC9132_QDS config C293_PCIE bool "Freescale C293PCIE" select DEFAULT_UIMAGE - select SWIOTLB help This option enables support for the C293PCIE board @@ -132,10 +131,10 @@ config P1023_RDB This option enables support for the P1023 RDB board config P1023_RDS - bool "Freescale P1023 RDS" + bool "Freescale P1023 RDS/RDB" select DEFAULT_UIMAGE help - This option enables support for the P1023 RDS board + This option enables support for the P1023 RDS and RDB boards config TWR_P102x bool "Freescale TWR-P102x" @@ -238,96 +237,16 @@ config GE_IMP3A This board is a 3U CompactPCI Single Board Computer with a Freescale P2020 processor. -config P2041_RDB - bool "Freescale P2041 RDB" - select DEFAULT_UIMAGE - select PPC_E500MC - select PHYS_64BIT - select SWIOTLB - select ARCH_REQUIRE_GPIOLIB - select GPIO_MPC8XXX - select HAS_RAPIDIO - select PPC_EPAPR_HV_PIC - select HAS_FSL_QBMAN - help - This option enables support for the P2041 RDB board - -config P3041_DS - bool "Freescale P3041 DS" - select DEFAULT_UIMAGE - select PPC_E500MC - select PHYS_64BIT - select SWIOTLB - select ARCH_REQUIRE_GPIOLIB - select GPIO_MPC8XXX - select HAS_RAPIDIO - select PPC_EPAPR_HV_PIC - select HAS_FSL_QBMAN - select MDIO_BUS_MUX if FSL_DPAA_ETH - select MDIO_BUS_MUX_MMIOREG if FSL_DPAA_ETH - help - This option enables support for the P3041 DS board - -config P4080_DS - bool "Freescale P4080 DS" - select DEFAULT_UIMAGE - select PPC_E500MC - select PHYS_64BIT - select SWIOTLB - select ARCH_REQUIRE_GPIOLIB - select GPIO_MPC8XXX - select HAS_RAPIDIO - select PPC_EPAPR_HV_PIC - select HAS_FSL_QBMAN - select MDIO_BUS_MUX if FSL_DPAA_ETH - select MDIO_BUS_MUX_GPIO - help - This option enables support for the P4080 DS board - config SGY_CTS1000 tristate "Servergy CTS-1000 support" select GPIOLIB select OF_GPIO - depends on P4080_DS + depends on CORENET_GENERIC help Enable this to support functionality in Servergy's CTS-1000 systems. endif # PPC32 -config P5020_DS - bool "Freescale P5020 DS" - select DEFAULT_UIMAGE - select E500 - select PPC_E500MC - select PHYS_64BIT - select SWIOTLB - select ARCH_REQUIRE_GPIOLIB - select GPIO_MPC8XXX - select HAS_RAPIDIO - select PPC_EPAPR_HV_PIC - select HAS_FSL_QBMAN - select MDIO_BUS_MUX if FSL_DPAA_ETH - select MDIO_BUS_MUX_MMIOREG if FSL_DPAA_ETH - help - This option enables support for the P5020 DS board - -config P5040_DS - bool "Freescale P5040 DS" - select DEFAULT_UIMAGE - select E500 - select PPC_E500MC - select PHYS_64BIT - select SWIOTLB - select ARCH_REQUIRE_GPIOLIB - select GPIO_MPC8XXX - select HAS_RAPIDIO - select PPC_EPAPR_HV_PIC - select HAS_FSL_QBMAN - select MDIO_BUS_MUX if FSL_DPAA_ETH - select MDIO_BUS_MUX_MMIOREG if FSL_DPAA_ETH - help - This option enables support for the P5040 DS board - config PPC_QEMU_E500 bool "QEMU generic e500 platform" select DEFAULT_UIMAGE @@ -343,10 +262,8 @@ config PPC_QEMU_E500 unset based on the emulated CPU (or actual host CPU in the case of KVM). -if PPC64 - -config T4240_QDS - bool "Freescale T4240 QDS" +config CORENET_GENERIC + bool "Freescale CoreNet Generic" select DEFAULT_UIMAGE select E500 select PPC_E500MC @@ -360,28 +277,14 @@ config T4240_QDS select MDIO_BUS_MUX if FSL_DPAA_ETH select MDIO_BUS_MUX_MMIOREG if FSL_DPAA_ETH help - This option enables support for the T4240 QDS board - -config B4_QDS - bool "Freescale B4 QDS" - select DEFAULT_UIMAGE - select E500 - select PPC_E500MC - select PHYS_64BIT - select SWIOTLB - select GPIOLIB - select ARCH_REQUIRE_GPIOLIB - select HAS_RAPIDIO - select PPC_EPAPR_HV_PIC - select MDIO_BUS_MUX if FSL_DPAA_ETH - select MDIO_BUS_MUX_MMIOREG if FSL_DPAA_ETH - help - This option enables support for the B4 QDS board - The B4 application development system B4 QDS is a complete - debugging environment intended for engineers developing - applications for the B4. + This option enables support for the FSL CoreNet based boards. + For 32bit kernel, the following boards are supported: + P2041 RDB, P3041 DS and P4080 DS + For 64bit kernel, the following boards are supported: + T4240 QDS and B4 QDS + The following boards are supported for both 32bit and 64bit kernel: + P5020 DS and P5040 DS -endif endif # FSL_SOC_BOOKE config TQM85xx diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile index 05ed088..f385b5c 100644 --- a/arch/powerpc/platforms/85xx/Makefile +++ b/arch/powerpc/platforms/85xx/Makefile @@ -24,13 +24,7 @@ obj-$(CONFIG_P1022_DS) += p1022_ds.o obj-$(CONFIG_P1022_RDK) += p1022_rdk.o obj-$(CONFIG_P1023_RDS) += p1023_rds.o obj-$(CONFIG_TWR_P102x) += twr_p102x.o -obj-$(CONFIG_P2041_RDB) += p2041_rdb.o corenet_ds.o -obj-$(CONFIG_P3041_DS) += p3041_ds.o corenet_ds.o -obj-$(CONFIG_P4080_DS) += p4080_ds.o corenet_ds.o -obj-$(CONFIG_P5020_DS) += p5020_ds.o corenet_ds.o -obj-$(CONFIG_P5040_DS) += p5040_ds.o corenet_ds.o -obj-$(CONFIG_T4240_QDS) += t4240_qds.o corenet_ds.o -obj-$(CONFIG_B4_QDS) += b4_qds.o corenet_ds.o +obj-$(CONFIG_CORENET_GENERIC) += corenet_generic.o obj-$(CONFIG_STX_GP3) += stx_gp3.o obj-$(CONFIG_TQM85xx) += tqm85xx.o obj-$(CONFIG_SBC8548) += sbc8548.o diff --git a/arch/powerpc/platforms/85xx/b4_qds.c b/arch/powerpc/platforms/85xx/b4_qds.c deleted file mode 100644 index d4b6a2c..0000000 --- a/arch/powerpc/platforms/85xx/b4_qds.c +++ /dev/null @@ -1,92 +0,0 @@ -/* - * B4 QDS Setup - * Should apply for QDS platform of B4860 and it's personalities. - * viz B4860/B4420/B4220QDS - * - * Copyright 2012 Freescale Semiconductor Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/kdev_t.h> -#include <linux/delay.h> -#include <linux/interrupt.h> -#include <linux/phy.h> - -#include <asm/time.h> -#include <asm/machdep.h> -#include <asm/pci-bridge.h> -#include <mm/mmu_decl.h> -#include <asm/prom.h> -#include <asm/udbg.h> -#include <asm/mpic.h> - -#include <linux/of_platform.h> -#include <sysdev/fsl_soc.h> -#include <sysdev/fsl_pci.h> -#include <asm/ehv_pic.h> - -#include "corenet_ds.h" - -/* - * Called very early, device-tree isn't unflattened - */ -static int __init b4_qds_probe(void) -{ - unsigned long root = of_get_flat_dt_root(); - - if ((of_flat_dt_is_compatible(root, "fsl,B4860QDS")) || - (of_flat_dt_is_compatible(root, "fsl,B4420QDS")) || - (of_flat_dt_is_compatible(root, "fsl,B4220QDS"))) - return 1; - - /* Check if we're running under the Freescale hypervisor */ - if ((of_flat_dt_is_compatible(root, "fsl,B4860QDS-hv")) || - (of_flat_dt_is_compatible(root, "fsl,B4420QDS-hv")) || - (of_flat_dt_is_compatible(root, "fsl,B4220QDS-hv"))) { - ppc_md.init_IRQ = ehv_pic_init; - ppc_md.get_irq = ehv_pic_get_irq; - ppc_md.restart = fsl_hv_restart; - ppc_md.power_off = fsl_hv_halt; - ppc_md.halt = fsl_hv_halt; - return 1; - } - - return 0; -} - -define_machine(b4_qds) { - .name = "B4 QDS", - .probe = b4_qds_probe, - .setup_arch = corenet_ds_setup_arch, - .init_IRQ = corenet_ds_pic_init, -#ifdef CONFIG_PCI - .pcibios_fixup_bus = fsl_pcibios_fixup_bus, -#endif -/* coreint doesn't play nice with lazy EE, use legacy mpic for now */ -#ifdef CONFIG_PPC64 - .get_irq = mpic_get_irq, -#else - .get_irq = mpic_get_coreint_irq, -#endif - .restart = fsl_rstcr_restart, - .calibrate_decr = generic_calibrate_decr, - .progress = udbg_progress, -#ifdef CONFIG_PPC64 - .power_save = book3e_idle, -#else - .power_save = e500_idle, -#endif - .init_early = corenet_ds_init_early, -}; - -machine_arch_initcall(b4_qds, corenet_ds_publish_devices); - -#ifdef CONFIG_SWIOTLB -machine_arch_initcall(b4_qds, swiotlb_setup_bus_notifier); -#endif diff --git a/arch/powerpc/platforms/85xx/c293pcie.c b/arch/powerpc/platforms/85xx/c293pcie.c index 75dda12..213d5b8 100644 --- a/arch/powerpc/platforms/85xx/c293pcie.c +++ b/arch/powerpc/platforms/85xx/c293pcie.c @@ -11,16 +11,10 @@ #include <linux/stddef.h> #include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/interrupt.h> +#include <linux/of_fdt.h> #include <linux/of_platform.h> -#include <asm/time.h> #include <asm/machdep.h> -#include <asm/pci-bridge.h> -#include <mm/mmu_decl.h> -#include <asm/prom.h> #include <asm/udbg.h> #include <asm/mpic.h> diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c index f63c10d..d694407 100644 --- a/arch/powerpc/platforms/85xx/common.c +++ b/arch/powerpc/platforms/85xx/common.c @@ -5,6 +5,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + +#include <linux/of_irq.h> #include <linux/of_platform.h> #include <sysdev/cpm2_pic.h> diff --git a/arch/powerpc/platforms/85xx/corenet_ds.h b/arch/powerpc/platforms/85xx/corenet_ds.h deleted file mode 100644 index a5b63c6..0000000 --- a/arch/powerpc/platforms/85xx/corenet_ds.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Corenet based SoC DS Setup - * - * Copyright 2009 Freescale Semiconductor Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#ifndef CORENET_DS_H -#define CORENET_DS_H - -extern void __init corenet_ds_pic_init(void); -extern void __init corenet_ds_setup_arch(void); -extern int __init corenet_ds_publish_devices(void); -extern void __init corenet_ds_init_early(void); - -#endif diff --git a/arch/powerpc/platforms/85xx/corenet_ds.c b/arch/powerpc/platforms/85xx/corenet_generic.c index 004ffb4..30a5c43 100644 --- a/arch/powerpc/platforms/85xx/corenet_ds.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -5,8 +5,8 @@ * * Copyright 2009-2011 Freescale Semiconductor Inc. * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. */ @@ -25,6 +25,7 @@ #include <asm/prom.h> #include <asm/udbg.h> #include <asm/mpic.h> +#include <asm/ehv_pic.h> #include <linux/of_platform.h> #include <sysdev/fsl_soc.h> @@ -33,7 +34,7 @@ #include <linux/fsl_usdpaa.h> -void __init corenet_ds_pic_init(void) +void __init corenet_gen_pic_init(void) { struct mpic *mpic; unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU | @@ -51,16 +52,10 @@ void __init corenet_ds_pic_init(void) /* * Setup the architecture */ -void __init corenet_ds_setup_arch(void) +void __init corenet_gen_setup_arch(void) { mpc85xx_smp_init(); -#if defined(CONFIG_PCI) && defined(CONFIG_PPC64) - pci_devs_phb_init(); -#endif - - fsl_pci_assign_primary(); - swiotlb_detect_4g(); pr_info("%s board from Freescale Semiconductor\n", ppc_md.name); @@ -68,7 +63,7 @@ void __init corenet_ds_setup_arch(void) static const struct of_device_id of_device_ids[] = { { - .compatible = "simple-bus", + .compatible = "simple-bus" }, { .compatible = "fsl,dpaa", @@ -110,11 +105,60 @@ static const struct of_device_id of_device_ids[] = { {} }; -int __init corenet_ds_publish_devices(void) +int __init corenet_gen_publish_devices(void) { return of_platform_bus_probe(NULL, of_device_ids, NULL); } +static const char * const boards[] __initconst = { + "fsl,P2041RDB", + "fsl,P3041DS", + "fsl,P4080DS", + "fsl,P5020DS", + "fsl,P5040DS", + "fsl,T4240QDS", + "fsl,B4860QDS", + "fsl,B4420QDS", + "fsl,B4220QDS", + NULL +}; + +static const char * const hv_boards[] __initconst = { + "fsl,P2041RDB-hv", + "fsl,P3041DS-hv", + "fsl,P4080DS-hv", + "fsl,P5020DS-hv", + "fsl,P5040DS-hv", + "fsl,T4240QDS-hv", + "fsl,B4860QDS-hv", + "fsl,B4420QDS-hv", + "fsl,B4220QDS-hv", + NULL +}; + +/* + * Called very early, device-tree isn't unflattened + */ +static int __init corenet_generic_probe(void) +{ + unsigned long root = of_get_flat_dt_root(); + + if (of_flat_dt_match(root, boards)) + return 1; + + /* Check if we're running under the Freescale hypervisor */ + if (of_flat_dt_match(root, hv_boards)) { + ppc_md.init_IRQ = ehv_pic_init; + ppc_md.get_irq = ehv_pic_get_irq; + ppc_md.restart = fsl_hv_restart; + ppc_md.power_off = fsl_hv_halt; + ppc_md.halt = fsl_hv_halt; + return 1; + } + + return 0; +} + /* Early setup is required for large chunks of contiguous (and coarsely-aligned) * memory. The following shoe-horns Q/Bman "init_early" calls into the * platform setup to let them parse their CCSR nodes early on. */ @@ -128,7 +172,7 @@ void __init bman_init_early(void); void __init pme2_init_early(void); #endif -__init void corenet_ds_init_early(void) +static __init void corenet_ds_init_early(void) { #ifdef CONFIG_FSL_QMAN_CONFIG qman_init_early(); @@ -143,3 +187,29 @@ __init void corenet_ds_init_early(void) fsl_usdpaa_init_early(); #endif } + +define_machine(corenet_generic) { + .name = "CoreNet Generic", + .probe = corenet_generic_probe, + .setup_arch = corenet_gen_setup_arch, + .init_IRQ = corenet_gen_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, +#endif + .get_irq = mpic_get_coreint_irq, + .restart = fsl_rstcr_restart, + .calibrate_decr = generic_calibrate_decr, + .progress = udbg_progress, +#ifdef CONFIG_PPC64 + .power_save = book3e_idle, +#else + .power_save = e500_idle, +#endif + .init_early = corenet_ds_init_early, +}; + +machine_arch_initcall(corenet_generic, corenet_gen_publish_devices); + +#ifdef CONFIG_SWIOTLB +machine_arch_initcall(corenet_generic, swiotlb_setup_bus_notifier); +#endif diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c index 0252961..d6a3dd3 100644 --- a/arch/powerpc/platforms/85xx/p1010rdb.c +++ b/arch/powerpc/platforms/85xx/p1010rdb.c @@ -66,6 +66,8 @@ static int __init p1010_rdb_probe(void) if (of_flat_dt_is_compatible(root, "fsl,P1010RDB")) return 1; + if (of_flat_dt_is_compatible(root, "fsl,P1010RDB-PB")) + return 1; return 0; } diff --git a/arch/powerpc/platforms/85xx/p2041_rdb.c b/arch/powerpc/platforms/85xx/p2041_rdb.c deleted file mode 100644 index 9ee8758..0000000 --- a/arch/powerpc/platforms/85xx/p2041_rdb.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * P2041 RDB Setup - * - * Copyright 2011 Freescale Semiconductor Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/kdev_t.h> -#include <linux/delay.h> -#include <linux/interrupt.h> -#include <linux/phy.h> - -#include <asm/time.h> -#include <asm/machdep.h> -#include <asm/pci-bridge.h> -#include <mm/mmu_decl.h> -#include <asm/prom.h> -#include <asm/udbg.h> -#include <asm/mpic.h> - -#include <linux/of_platform.h> -#include <sysdev/fsl_soc.h> -#include <sysdev/fsl_pci.h> -#include <asm/ehv_pic.h> - -#include "corenet_ds.h" - -/* - * Called very early, device-tree isn't unflattened - */ -static int __init p2041_rdb_probe(void) -{ - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "fsl,P2041RDB")) - return 1; - - /* Check if we're running under the Freescale hypervisor */ - if (of_flat_dt_is_compatible(root, "fsl,P2041RDB-hv")) { - ppc_md.init_IRQ = ehv_pic_init; - ppc_md.get_irq = ehv_pic_get_irq; - ppc_md.restart = fsl_hv_restart; - ppc_md.power_off = fsl_hv_halt; - ppc_md.halt = fsl_hv_halt; - return 1; - } - - return 0; -} - -define_machine(p2041_rdb) { - .name = "P2041 RDB", - .probe = p2041_rdb_probe, - .setup_arch = corenet_ds_setup_arch, - .init_IRQ = corenet_ds_pic_init, -#ifdef CONFIG_PCI - .pcibios_fixup_bus = fsl_pcibios_fixup_bus, -#endif - .get_irq = mpic_get_coreint_irq, - .restart = fsl_rstcr_restart, - .calibrate_decr = generic_calibrate_decr, - .progress = udbg_progress, - .power_save = e500_idle, - .init_early = corenet_ds_init_early, -}; - -machine_arch_initcall(p2041_rdb, corenet_ds_publish_devices); - -#ifdef CONFIG_SWIOTLB -machine_arch_initcall(p2041_rdb, swiotlb_setup_bus_notifier); -#endif diff --git a/arch/powerpc/platforms/85xx/p3041_ds.c b/arch/powerpc/platforms/85xx/p3041_ds.c deleted file mode 100644 index b8af7fb..0000000 --- a/arch/powerpc/platforms/85xx/p3041_ds.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * P3041 DS Setup - * - * Maintained by Kumar Gala (see MAINTAINERS for contact information) - * - * Copyright 2009-2010 Freescale Semiconductor Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/kdev_t.h> -#include <linux/delay.h> -#include <linux/interrupt.h> -#include <linux/phy.h> - -#include <asm/time.h> -#include <asm/machdep.h> -#include <asm/pci-bridge.h> -#include <mm/mmu_decl.h> -#include <asm/prom.h> -#include <asm/udbg.h> -#include <asm/mpic.h> - -#include <linux/of_platform.h> -#include <sysdev/fsl_soc.h> -#include <sysdev/fsl_pci.h> -#include <asm/ehv_pic.h> - -#include "corenet_ds.h" - -/* - * Called very early, device-tree isn't unflattened - */ -static int __init p3041_ds_probe(void) -{ - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "fsl,P3041DS")) - return 1; - - /* Check if we're running under the Freescale hypervisor */ - if (of_flat_dt_is_compatible(root, "fsl,P3041DS-hv")) { - ppc_md.init_IRQ = ehv_pic_init; - ppc_md.get_irq = ehv_pic_get_irq; - ppc_md.restart = fsl_hv_restart; - ppc_md.power_off = fsl_hv_halt; - ppc_md.halt = fsl_hv_halt; - return 1; - } - - return 0; -} - -define_machine(p3041_ds) { - .name = "P3041 DS", - .probe = p3041_ds_probe, - .setup_arch = corenet_ds_setup_arch, - .init_IRQ = corenet_ds_pic_init, -#ifdef CONFIG_PCI - .pcibios_fixup_bus = fsl_pcibios_fixup_bus, -#endif - .get_irq = mpic_get_coreint_irq, - .restart = fsl_rstcr_restart, - .calibrate_decr = generic_calibrate_decr, - .progress = udbg_progress, - .power_save = e500_idle, - .init_early = corenet_ds_init_early, -}; - -machine_arch_initcall(p3041_ds, corenet_ds_publish_devices); - -#ifdef CONFIG_SWIOTLB -machine_arch_initcall(p3041_ds, swiotlb_setup_bus_notifier); -#endif diff --git a/arch/powerpc/platforms/85xx/p4080_ds.c b/arch/powerpc/platforms/85xx/p4080_ds.c deleted file mode 100644 index d83b8d0..0000000 --- a/arch/powerpc/platforms/85xx/p4080_ds.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * P4080 DS Setup - * - * Maintained by Kumar Gala (see MAINTAINERS for contact information) - * - * Copyright 2009 Freescale Semiconductor Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/kdev_t.h> -#include <linux/delay.h> -#include <linux/interrupt.h> - -#include <asm/time.h> -#include <asm/machdep.h> -#include <asm/pci-bridge.h> -#include <mm/mmu_decl.h> -#include <asm/prom.h> -#include <asm/udbg.h> -#include <asm/mpic.h> - -#include <linux/of_platform.h> -#include <sysdev/fsl_soc.h> -#include <sysdev/fsl_pci.h> -#include <asm/ehv_pic.h> - -#include "corenet_ds.h" - -/* - * Called very early, device-tree isn't unflattened - */ -static int __init p4080_ds_probe(void) -{ - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "fsl,P4080DS")) - return 1; - - /* Check if we're running under the Freescale hypervisor */ - if (of_flat_dt_is_compatible(root, "fsl,P4080DS-hv")) { - ppc_md.init_IRQ = ehv_pic_init; - ppc_md.get_irq = ehv_pic_get_irq; - ppc_md.restart = fsl_hv_restart; - ppc_md.power_off = fsl_hv_halt; - ppc_md.halt = fsl_hv_halt; - return 1; - } - - return 0; -} - -define_machine(p4080_ds) { - .name = "P4080 DS", - .probe = p4080_ds_probe, - .setup_arch = corenet_ds_setup_arch, - .init_IRQ = corenet_ds_pic_init, -#ifdef CONFIG_PCI - .pcibios_fixup_bus = fsl_pcibios_fixup_bus, -#endif - .get_irq = mpic_get_coreint_irq, - .restart = fsl_rstcr_restart, - .calibrate_decr = generic_calibrate_decr, - .progress = udbg_progress, - .power_save = e500_idle, - .init_early = corenet_ds_init_early, -}; - -machine_arch_initcall(p4080_ds, corenet_ds_publish_devices); -#ifdef CONFIG_SWIOTLB -machine_arch_initcall(p4080_ds, swiotlb_setup_bus_notifier); -#endif diff --git a/arch/powerpc/platforms/85xx/p5020_ds.c b/arch/powerpc/platforms/85xx/p5020_ds.c deleted file mode 100644 index 6e1ed9f..0000000 --- a/arch/powerpc/platforms/85xx/p5020_ds.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * P5020 DS Setup - * - * Maintained by Kumar Gala (see MAINTAINERS for contact information) - * - * Copyright 2009-2010 Freescale Semiconductor Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/kdev_t.h> -#include <linux/delay.h> -#include <linux/interrupt.h> -#include <linux/phy.h> - -#include <asm/time.h> -#include <asm/machdep.h> -#include <asm/pci-bridge.h> -#include <mm/mmu_decl.h> -#include <asm/prom.h> -#include <asm/udbg.h> -#include <asm/mpic.h> - -#include <linux/of_platform.h> -#include <sysdev/fsl_soc.h> -#include <sysdev/fsl_pci.h> -#include <asm/ehv_pic.h> - -#include "corenet_ds.h" - -/* - * Called very early, device-tree isn't unflattened - */ -static int __init p5020_ds_probe(void) -{ - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "fsl,P5020DS")) - return 1; - - /* Check if we're running under the Freescale hypervisor */ - if (of_flat_dt_is_compatible(root, "fsl,P5020DS-hv")) { - ppc_md.init_IRQ = ehv_pic_init; - ppc_md.get_irq = ehv_pic_get_irq; - ppc_md.restart = fsl_hv_restart; - ppc_md.power_off = fsl_hv_halt; - ppc_md.halt = fsl_hv_halt; - return 1; - } - - return 0; -} - -define_machine(p5020_ds) { - .name = "P5020 DS", - .probe = p5020_ds_probe, - .setup_arch = corenet_ds_setup_arch, - .init_IRQ = corenet_ds_pic_init, -#ifdef CONFIG_PCI - .pcibios_fixup_bus = fsl_pcibios_fixup_bus, -#endif -/* coreint doesn't play nice with lazy EE, use legacy mpic for now */ -#ifdef CONFIG_PPC64 - .get_irq = mpic_get_irq, -#else - .get_irq = mpic_get_coreint_irq, -#endif - .restart = fsl_rstcr_restart, - .calibrate_decr = generic_calibrate_decr, - .progress = udbg_progress, -#ifdef CONFIG_PPC64 - .power_save = book3e_idle, -#else - .power_save = e500_idle, -#endif - .init_early = corenet_ds_init_early, -}; - -machine_arch_initcall(p5020_ds, corenet_ds_publish_devices); - -#ifdef CONFIG_SWIOTLB -machine_arch_initcall(p5020_ds, swiotlb_setup_bus_notifier); -#endif diff --git a/arch/powerpc/platforms/85xx/p5040_ds.c b/arch/powerpc/platforms/85xx/p5040_ds.c deleted file mode 100644 index 095806a..0000000 --- a/arch/powerpc/platforms/85xx/p5040_ds.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * P5040 DS Setup - * - * Copyright 2009-2010 Freescale Semiconductor Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/pci.h> - -#include <asm/machdep.h> -#include <asm/udbg.h> -#include <asm/mpic.h> - -#include <linux/of_fdt.h> - -#include <sysdev/fsl_soc.h> -#include <sysdev/fsl_pci.h> -#include <asm/ehv_pic.h> - -#include "corenet_ds.h" - -/* - * Called very early, device-tree isn't unflattened - */ -static int __init p5040_ds_probe(void) -{ - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "fsl,P5040DS")) - return 1; - - /* Check if we're running under the Freescale hypervisor */ - if (of_flat_dt_is_compatible(root, "fsl,P5040DS-hv")) { - ppc_md.init_IRQ = ehv_pic_init; - ppc_md.get_irq = ehv_pic_get_irq; - ppc_md.restart = fsl_hv_restart; - ppc_md.power_off = fsl_hv_halt; - ppc_md.halt = fsl_hv_halt; - return 1; - } - - return 0; -} - -define_machine(p5040_ds) { - .name = "P5040 DS", - .probe = p5040_ds_probe, - .setup_arch = corenet_ds_setup_arch, - .init_IRQ = corenet_ds_pic_init, -#ifdef CONFIG_PCI - .pcibios_fixup_bus = fsl_pcibios_fixup_bus, -#endif -/* coreint doesn't play nice with lazy EE, use legacy mpic for now */ -#ifdef CONFIG_PPC64 - .get_irq = mpic_get_irq, -#else - .get_irq = mpic_get_coreint_irq, -#endif - .restart = fsl_rstcr_restart, - .calibrate_decr = generic_calibrate_decr, - .progress = udbg_progress, -#ifdef CONFIG_PPC64 - .power_save = book3e_idle, -#else - .power_save = e500_idle, -#endif - .init_early = corenet_ds_init_early, -}; - -machine_arch_initcall(p5040_ds, corenet_ds_publish_devices); - -#ifdef CONFIG_SWIOTLB -machine_arch_initcall(p5040_ds, swiotlb_setup_bus_notifier); -#endif diff --git a/arch/powerpc/platforms/85xx/ppa8548.c b/arch/powerpc/platforms/85xx/ppa8548.c index 6a7704b..3daff7c 100644 --- a/arch/powerpc/platforms/85xx/ppa8548.c +++ b/arch/powerpc/platforms/85xx/ppa8548.c @@ -19,6 +19,7 @@ #include <linux/init.h> #include <linux/reboot.h> #include <linux/seq_file.h> +#include <linux/of_fdt.h> #include <linux/of_platform.h> #include <asm/machdep.h> diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c index 7179726..b9197ce 100644 --- a/arch/powerpc/platforms/85xx/sgy_cts1000.c +++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c @@ -16,6 +16,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/of_gpio.h> +#include <linux/of_irq.h> #include <linux/workqueue.h> #include <linux/reboot.h> #include <linux/interrupt.h> diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 43cc5c9..1f50419 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -15,6 +15,7 @@ #include <linux/init.h> #include <linux/delay.h> #include <linux/of.h> +#include <linux/of_address.h> #include <linux/kexec.h> #include <linux/highmem.h> #include <linux/cpu.h> @@ -144,11 +145,13 @@ static void __cpuinit mpc85xx_give_timebase(void) { u64 prev; - asm volatile("mftb %0" : "=r" (timebase)); + asm volatile("mfspr %0, %1" : "=r" (timebase) : + "i" (SPRN_TBRL)); do { prev = timebase; - asm volatile("mftb %0" : "=r" (timebase)); + asm volatile("mfspr %0, %1" : "=r" (timebase) : + "i" (SPRN_TBRL)); } while (prev != timebase); } #else @@ -242,8 +245,7 @@ void platform_cpu_die(unsigned int cpu) } } #else -/* for e500v1 and e500v2 */ -static void __cpuinit smp_85xx_mach_cpu_die(void) +static void smp_85xx_mach_cpu_die(void) { unsigned int cpu = smp_processor_id(); u32 tmp; @@ -286,7 +288,7 @@ static inline u32 read_spin_table_addr_l(void *spin_table) return in_be32(&((struct epapr_spin_table *)spin_table)->addr_l); } -static int __cpuinit smp_85xx_kick_cpu(int nr) +static int smp_85xx_kick_cpu(int nr) { unsigned long flags; const u64 *cpu_rel_addr; @@ -563,7 +565,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image) } #endif /* CONFIG_KEXEC */ -static void __cpuinit smp_85xx_setup_cpu(int cpu_nr) +static void smp_85xx_setup_cpu(int cpu_nr) { if (smp_85xx_ops.probe == smp_mpic_probe) mpic_setup_this_cpu(); diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c index 3bbbf74..55a9682 100644 --- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c +++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c @@ -9,6 +9,8 @@ */ #include <linux/irq.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> #include <linux/of_platform.h> #include <linux/io.h> diff --git a/arch/powerpc/platforms/85xx/t4240_qds.c b/arch/powerpc/platforms/85xx/t4240_qds.c deleted file mode 100644 index c68baaa..0000000 --- a/arch/powerpc/platforms/85xx/t4240_qds.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * T4240 QDS Setup - * - * Maintained by Kumar Gala (see MAINTAINERS for contact information) - * - * Copyright 2012 Freescale Semiconductor Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/kdev_t.h> -#include <linux/delay.h> -#include <linux/interrupt.h> -#include <linux/phy.h> - -#include <asm/time.h> -#include <asm/machdep.h> -#include <asm/pci-bridge.h> -#include <mm/mmu_decl.h> -#include <asm/prom.h> -#include <asm/udbg.h> -#include <asm/mpic.h> - -#include <linux/of_platform.h> -#include <sysdev/fsl_soc.h> -#include <sysdev/fsl_pci.h> -#include <asm/ehv_pic.h> - -#include "corenet_ds.h" - -/* - * Called very early, device-tree isn't unflattened - */ -static int __init t4240_qds_probe(void) -{ - unsigned long root = of_get_flat_dt_root(); - - if (of_flat_dt_is_compatible(root, "fsl,T4240QDS")) - return 1; - - /* Check if we're running under the Freescale hypervisor */ - if (of_flat_dt_is_compatible(root, "fsl,T4240QDS-hv")) { - ppc_md.init_IRQ = ehv_pic_init; - ppc_md.get_irq = ehv_pic_get_irq; - ppc_md.restart = fsl_hv_restart; - ppc_md.power_off = fsl_hv_halt; - ppc_md.halt = fsl_hv_halt; - return 1; - } - - return 0; -} - -define_machine(t4240_qds) { - .name = "T4240 QDS", - .probe = t4240_qds_probe, - .setup_arch = corenet_ds_setup_arch, - .init_IRQ = corenet_ds_pic_init, -#ifdef CONFIG_PCI - .pcibios_fixup_bus = fsl_pcibios_fixup_bus, -#endif -/* coreint doesn't play nice with lazy EE, use legacy mpic for now */ -#ifdef CONFIG_PPC64 - .get_irq = mpic_get_irq, -#else - .get_irq = mpic_get_coreint_irq, -#endif - .restart = fsl_rstcr_restart, - .calibrate_decr = generic_calibrate_decr, - .progress = udbg_progress, -#ifdef CONFIG_PPC64 - .power_save = book3e_idle, -#else - .power_save = e500_idle, -#endif - .init_early = corenet_ds_init_early, -}; - -machine_arch_initcall(t4240_qds, corenet_ds_publish_devices); - -#ifdef CONFIG_SWIOTLB -machine_arch_initcall(t4240_qds, swiotlb_setup_bus_notifier); -#endif diff --git a/arch/powerpc/platforms/86xx/pic.c b/arch/powerpc/platforms/86xx/pic.c index 9982f57..d5b98c0 100644 --- a/arch/powerpc/platforms/86xx/pic.c +++ b/arch/powerpc/platforms/86xx/pic.c @@ -10,6 +10,7 @@ #include <linux/stddef.h> #include <linux/kernel.h> #include <linux/interrupt.h> +#include <linux/of_irq.h> #include <linux/of_platform.h> #include <asm/mpic.h> diff --git a/arch/powerpc/platforms/8xx/ep88xc.c b/arch/powerpc/platforms/8xx/ep88xc.c index 7d9ac60..e621666 100644 --- a/arch/powerpc/platforms/8xx/ep88xc.c +++ b/arch/powerpc/platforms/8xx/ep88xc.c @@ -10,6 +10,8 @@ */ #include <linux/init.h> +#include <linux/of_address.h> +#include <linux/of_fdt.h> #include <linux/of_platform.h> #include <asm/machdep.h> diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index 1e12108..587a282 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -43,6 +43,7 @@ static irqreturn_t timebase_interrupt(int irq, void *dev) static struct irqaction tbint_irqaction = { .handler = timebase_interrupt, + .flags = IRQF_NO_THREAD, .name = "tbint", }; @@ -218,19 +219,12 @@ void mpc8xx_restart(char *cmd) static void cpm_cascade(unsigned int irq, struct irq_desc *desc) { - struct irq_chip *chip; - int cascade_irq; - - if ((cascade_irq = cpm_get_irq()) >= 0) { - struct irq_desc *cdesc = irq_to_desc(cascade_irq); + struct irq_chip *chip = irq_desc_get_chip(desc); + int cascade_irq = cpm_get_irq(); + if (cascade_irq >= 0) generic_handle_irq(cascade_irq); - chip = irq_desc_get_chip(cdesc); - chip->irq_eoi(&cdesc->irq_data); - } - - chip = irq_desc_get_chip(desc); chip->irq_eoi(&desc->irq_data); } diff --git a/arch/powerpc/platforms/8xx/mpc86xads_setup.c b/arch/powerpc/platforms/8xx/mpc86xads_setup.c index 866feff..6308464 100644 --- a/arch/powerpc/platforms/8xx/mpc86xads_setup.c +++ b/arch/powerpc/platforms/8xx/mpc86xads_setup.c @@ -15,6 +15,8 @@ */ #include <linux/init.h> +#include <linux/of_address.h> +#include <linux/of_fdt.h> #include <linux/of_platform.h> #include <asm/io.h> diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c index 5d98398..c126258 100644 --- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c +++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c @@ -25,6 +25,8 @@ #include <linux/fs_uart_pd.h> #include <linux/fsl_devices.h> #include <linux/mii.h> +#include <linux/of_address.h> +#include <linux/of_fdt.h> #include <linux/of_platform.h> #include <asm/delay.h> diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c index 8d21ab7..251aba8 100644 --- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c +++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c @@ -28,6 +28,7 @@ #include <linux/fs_uart_pd.h> #include <linux/fsl_devices.h> #include <linux/mii.h> +#include <linux/of_fdt.h> #include <linux/of_platform.h> #include <asm/delay.h> @@ -48,7 +49,7 @@ struct cpm_pin { int port, pin, flags; }; -static struct __initdata cpm_pin tqm8xx_pins[] = { +static struct cpm_pin tqm8xx_pins[] __initdata = { /* SMC1 */ {CPM_PORTB, 24, CPM_PIN_INPUT}, /* RX */ {CPM_PORTB, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */ @@ -63,7 +64,7 @@ static struct __initdata cpm_pin tqm8xx_pins[] = { {CPM_PORTC, 11, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, }; -static struct __initdata cpm_pin tqm8xx_fec_pins[] = { +static struct cpm_pin tqm8xx_fec_pins[] __initdata = { /* MII */ {CPM_PORTD, 3, CPM_PIN_OUTPUT}, {CPM_PORTD, 4, CPM_PIN_OUTPUT}, diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index a1c5686..bf9c6d4 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -92,19 +92,19 @@ config MPIC_TIMER default n help The MPIC global timer is a hardware timer inside the - Freescale PIC comply to Open-PIC standard. When the - timer is timeout of the specified interval, the hardware - timer generates an interrupt. The driver currently is - only tested on fsl chip, but it can potentially support - other global timers complying to Open-PIC standard. + Freescale PIC complying with OpenPIC standard. When the + specified interval times out, the hardware timer generates + an interrupt. The driver currently is only tested on fsl + chip, but it can potentially support other global timers + complying with the OpenPIC standard. config FSL_MPIC_TIMER_WAKEUP tristate "Freescale MPIC global timer wakeup driver" - depends on FSL_SOC && MPIC_TIMER + depends on FSL_SOC && MPIC_TIMER && PM default n help - This is only for freescale powerpc platform. The driver - provides a way to wake up the system by MPIC timer, + The driver provides a way to wake up the system by MPIC + timer. e.g. "echo 5 > /sys/devices/system/mpic/timer_wakeup" config PPC_EPAPR_HV_PIC @@ -185,6 +185,11 @@ config IBMEBUS help Bus device driver for GX bus based adapters. +config EEH + bool + depends on (PPC_POWERNV || PPC_PSERIES) && PCI + default y + config PPC_MPC106 bool default n @@ -197,65 +202,18 @@ config PPC_P7_NAP bool default n -config PPC_INDIRECT_IO - bool - select GENERIC_IOMAP - config PPC_INDIRECT_PIO bool - select PPC_INDIRECT_IO + select GENERIC_IOMAP config PPC_INDIRECT_MMIO bool - select PPC_INDIRECT_IO config PPC_IO_WORKAROUNDS bool source "drivers/cpufreq/Kconfig" -menu "CPU Frequency drivers" - depends on CPU_FREQ - -config CPU_FREQ_PMAC - bool "Support for Apple PowerBooks" - depends on ADB_PMU && PPC32 - select CPU_FREQ_TABLE - help - This adds support for frequency switching on Apple PowerBooks, - this currently includes some models of iBook & Titanium - PowerBook. - -config CPU_FREQ_PMAC64 - bool "Support for some Apple G5s" - depends on PPC_PMAC && PPC64 - select CPU_FREQ_TABLE - help - This adds support for frequency switching on Apple iMac G5, - and some of the more recent desktop G5 machines as well. - -config MPC85xx_CPUFREQ - bool "Support for Freescale MPC85xx CPU freq" - depends on PPC_85xx && PPC32 && !PPC_E500MC - default y - select CPU_FREQ_TABLE - help - This adds support for dynamic frequency switching on - Freescale MPC85xx by cpufreq interface. MPC8536 and P1022 - have a JOG feature, which provides a dynamic mechanism - to lower or raise the CPU core clock at runtime. - -config PPC_PASEMI_CPUFREQ - bool "Support for PA Semi PWRficient" - depends on PPC_PASEMI - default y - select CPU_FREQ_TABLE - help - This adds the support for frequency switching on PA Semi - PWRficient processors. - -endmenu - menu "CPUIdle driver" source "drivers/cpuidle/Kconfig" diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 57e32a3..226f871 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -71,6 +71,7 @@ config PPC_BOOK3S_64 select PPC_FPU select PPC_HAVE_PMU_SUPPORT select SYS_SUPPORTS_HUGETLBFS + select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES config PPC_BOOK3E_64 bool "Embedded processors" @@ -92,21 +93,35 @@ choice config GENERIC_CPU bool "Generic" + depends on !CPU_LITTLE_ENDIAN config CELL_CPU bool "Cell Broadband Engine" + depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN config POWER4_CPU bool "POWER4" + depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN config POWER5_CPU bool "POWER5" + depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN config POWER6_CPU bool "POWER6" + depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN config POWER7_CPU bool "POWER7" + depends on PPC_BOOK3S_64 + +config E5500_CPU + bool "Freescale e5500" + depends on E500 + +config E6500_CPU + bool "Freescale e6500" + depends on E500 endchoice @@ -464,3 +479,28 @@ config PPC_DOORBELL default n endmenu + +choice + prompt "Endianness selection" + default CPU_BIG_ENDIAN + help + This option selects whether a big endian or little endian kernel will + be built. + +config CPU_BIG_ENDIAN + bool "Build big endian kernel" + help + Build a big endian kernel. + + If unsure, select this option. + +config CPU_LITTLE_ENDIAN + bool "Build little endian kernel" + help + Build a little endian kernel. + + Note that if cross compiling a little endian kernel, + CROSS_COMPILE must point to a toolchain capable of targeting + little endian powerpc. + +endchoice diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c index 246e1d8..c34ee4e 100644 --- a/arch/powerpc/platforms/cell/beat_htab.c +++ b/arch/powerpc/platforms/cell/beat_htab.c @@ -185,7 +185,8 @@ static void beat_lpar_hptab_clear(void) static long beat_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, - int psize, int ssize, int local) + int psize, int apsize, + int ssize, int local) { unsigned long lpar_rc; u64 dummy0, dummy1; @@ -274,7 +275,8 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp, } static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, - int psize, int ssize, int local) + int psize, int apsize, + int ssize, int local) { unsigned long want_v; unsigned long lpar_rc; @@ -364,9 +366,10 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group, * already zero. For now I am paranoid. */ static long beat_lpar_hpte_updatepp_v3(unsigned long slot, - unsigned long newpp, - unsigned long vpn, - int psize, int ssize, int local) + unsigned long newpp, + unsigned long vpn, + int psize, int apsize, + int ssize, int local) { unsigned long lpar_rc; unsigned long want_v; @@ -394,7 +397,8 @@ static long beat_lpar_hpte_updatepp_v3(unsigned long slot, } static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn, - int psize, int ssize, int local) + int psize, int apsize, + int ssize, int local) { unsigned long want_v; unsigned long lpar_rc; diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c index 8c6dc42..9e5dfbc 100644 --- a/arch/powerpc/platforms/cell/beat_interrupt.c +++ b/arch/powerpc/platforms/cell/beat_interrupt.c @@ -239,7 +239,7 @@ void __init beatic_init_IRQ(void) ppc_md.get_irq = beatic_get_irq; /* Allocate an irq host */ - beatic_host = irq_domain_add_nomap(NULL, 0, &beatic_pic_host_ops, NULL); + beatic_host = irq_domain_add_nomap(NULL, ~0, &beatic_pic_host_ops, NULL); BUG_ON(beatic_host == NULL); irq_set_default_host(beatic_host); } diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c index 14be2bd..4278acf 100644 --- a/arch/powerpc/platforms/cell/celleb_scc_pciex.c +++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c @@ -486,7 +486,6 @@ static __init int celleb_setup_pciex(struct device_node *node, struct pci_controller *phb) { struct resource r; - struct of_irq oirq; int virq; /* SMMIO registers; used inside this file */ @@ -507,12 +506,11 @@ static __init int celleb_setup_pciex(struct device_node *node, phb->ops = &scc_pciex_pci_ops; /* internal interrupt handler */ - if (of_irq_map_one(node, 1, &oirq)) { + virq = irq_of_parse_and_map(node, 1); + if (!virq) { pr_err("PCIEXC:Failed to map irq\n"); goto error; } - virq = irq_create_of_mapping(oirq.controller, oirq.specifier, - oirq.size); if (request_irq(virq, pciex_handle_internal_irq, 0, "pciex", (void *)phb)) { pr_err("PCIEXC:Failed to request irq\n"); diff --git a/arch/powerpc/platforms/cell/celleb_scc_sio.c b/arch/powerpc/platforms/cell/celleb_scc_sio.c index 9c339ec..c8eb571 100644 --- a/arch/powerpc/platforms/cell/celleb_scc_sio.c +++ b/arch/powerpc/platforms/cell/celleb_scc_sio.c @@ -45,7 +45,7 @@ static int __init txx9_serial_init(void) struct device_node *node; int i; struct uart_port req; - struct of_irq irq; + struct of_phandle_args irq; struct resource res; for_each_compatible_node(node, "serial", "toshiba,sio-scc") { @@ -53,7 +53,7 @@ static int __init txx9_serial_init(void) if (!(txx9_serial_bitmap & (1<<i))) continue; - if (of_irq_map_one(node, i, &irq)) + if (of_irq_parse_one(node, i, &irq)) continue; if (of_address_to_resource(node, txx9_scc_tab[i].index, &res)) @@ -66,8 +66,7 @@ static int __init txx9_serial_init(void) #ifdef CONFIG_SERIAL_TXX9_CONSOLE req.membase = ioremap(req.mapbase, 0x24); #endif - req.irq = irq_create_of_mapping(irq.controller, - irq.specifier, irq.size); + req.irq = irq_create_of_mapping(&irq); req.flags |= UPF_IOREMAP | UPF_BUGGY_UART /*HAVE_CTS_LINE*/; req.uartclk = 83300000; diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 946306b..b535606 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -697,7 +697,7 @@ static int __init cell_iommu_get_window(struct device_node *np, unsigned long *base, unsigned long *size) { - const void *dma_window; + const __be32 *dma_window; unsigned long index; /* Use ibm,dma-window if available, else, hard code ! */ diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c index 8e29944..1f72f4a 100644 --- a/arch/powerpc/platforms/cell/spider-pic.c +++ b/arch/powerpc/platforms/cell/spider-pic.c @@ -235,12 +235,9 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic) /* First, we check whether we have a real "interrupts" in the device * tree in case the device-tree is ever fixed */ - struct of_irq oirq; - if (of_irq_map_one(pic->host->of_node, 0, &oirq) == 0) { - virq = irq_create_of_mapping(oirq.controller, oirq.specifier, - oirq.size); + virq = irq_of_parse_and_map(pic->host->of_node, 0); + if (virq) return virq; - } /* Now do the horrible hacks */ tmp = of_get_property(pic->host->of_node, "#interrupt-cells", NULL); diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c index 2bb6977..c3327f3 100644 --- a/arch/powerpc/platforms/cell/spu_manage.c +++ b/arch/powerpc/platforms/cell/spu_manage.c @@ -177,21 +177,20 @@ out: static int __init spu_map_interrupts(struct spu *spu, struct device_node *np) { - struct of_irq oirq; + struct of_phandle_args oirq; int ret; int i; for (i=0; i < 3; i++) { - ret = of_irq_map_one(np, i, &oirq); + ret = of_irq_parse_one(np, i, &oirq); if (ret) { pr_debug("spu_new: failed to get irq %d\n", i); goto err; } ret = -EINVAL; - pr_debug(" irq %d no 0x%x on %s\n", i, oirq.specifier[0], - oirq.controller->full_name); - spu->irqs[i] = irq_create_of_mapping(oirq.controller, - oirq.specifier, oirq.size); + pr_debug(" irq %d no 0x%x on %s\n", i, oirq.args[0], + oirq.np->full_name); + spu->irqs[i] = irq_create_of_mapping(&oirq); if (spu->irqs[i] == NO_IRQ) { pr_debug("spu_new: failed to map it !\n"); goto err; @@ -200,7 +199,7 @@ static int __init spu_map_interrupts(struct spu *spu, struct device_node *np) return 0; err: - pr_debug("failed to map irq %x for spu %s\n", *oirq.specifier, + pr_debug("failed to map irq %x for spu %s\n", *oirq.args, spu->name); for (; i >= 0; i--) { if (spu->irqs[i] != NO_IRQ) diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c index db4e638..3844f13 100644 --- a/arch/powerpc/platforms/cell/spu_syscalls.c +++ b/arch/powerpc/platforms/cell/spu_syscalls.c @@ -25,6 +25,7 @@ #include <linux/module.h> #include <linux/syscalls.h> #include <linux/rcupdate.h> +#include <linux/binfmts.h> #include <asm/spu.h> @@ -126,7 +127,7 @@ int elf_coredump_extra_notes_size(void) return ret; } -int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset) +int elf_coredump_extra_notes_write(struct coredump_params *cprm) { struct spufs_calls *calls; int ret; @@ -135,7 +136,7 @@ int elf_coredump_extra_notes_write(struct file *file, loff_t *foffset) if (!calls) return 0; - ret = calls->coredump_extra_notes_write(file, foffset); + ret = calls->coredump_extra_notes_write(cprm); spufs_calls_put(calls); diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index c9500ea..be6212d 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -27,6 +27,8 @@ #include <linux/gfp.h> #include <linux/list.h> #include <linux/syscalls.h> +#include <linux/coredump.h> +#include <linux/binfmts.h> #include <asm/uaccess.h> @@ -48,44 +50,6 @@ static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer, return ++ret; /* count trailing NULL */ } -/* - * These are the only things you should do on a core-file: use only these - * functions to write out all the necessary info. - */ -static int spufs_dump_write(struct file *file, const void *addr, int nr, loff_t *foffset) -{ - unsigned long limit = rlimit(RLIMIT_CORE); - ssize_t written; - - if (*foffset + nr > limit) - return -EIO; - - written = file->f_op->write(file, addr, nr, &file->f_pos); - *foffset += written; - - if (written != nr) - return -EIO; - - return 0; -} - -static int spufs_dump_align(struct file *file, char *buf, loff_t new_off, - loff_t *foffset) -{ - int rc, size; - - size = min((loff_t)PAGE_SIZE, new_off - *foffset); - memset(buf, 0, size); - - rc = 0; - while (rc == 0 && new_off > *foffset) { - size = min((loff_t)PAGE_SIZE, new_off - *foffset); - rc = spufs_dump_write(file, buf, size, foffset); - } - - return rc; -} - static int spufs_ctx_note_size(struct spu_context *ctx, int dfd) { int i, sz, total = 0; @@ -165,10 +129,10 @@ int spufs_coredump_extra_notes_size(void) } static int spufs_arch_write_note(struct spu_context *ctx, int i, - struct file *file, int dfd, loff_t *foffset) + struct coredump_params *cprm, int dfd) { loff_t pos = 0; - int sz, rc, nread, total = 0; + int sz, rc, total = 0; const int bufsz = PAGE_SIZE; char *name; char fullname[80], *buf; @@ -186,42 +150,39 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i, en.n_descsz = sz; en.n_type = NT_SPU; - rc = spufs_dump_write(file, &en, sizeof(en), foffset); - if (rc) - goto out; + if (!dump_emit(cprm, &en, sizeof(en))) + goto Eio; - rc = spufs_dump_write(file, fullname, en.n_namesz, foffset); - if (rc) - goto out; + if (!dump_emit(cprm, fullname, en.n_namesz)) + goto Eio; - rc = spufs_dump_align(file, buf, roundup(*foffset, 4), foffset); - if (rc) - goto out; + if (!dump_align(cprm, 4)) + goto Eio; do { - nread = do_coredump_read(i, ctx, buf, bufsz, &pos); - if (nread > 0) { - rc = spufs_dump_write(file, buf, nread, foffset); - if (rc) - goto out; - total += nread; + rc = do_coredump_read(i, ctx, buf, bufsz, &pos); + if (rc > 0) { + if (!dump_emit(cprm, buf, rc)) + goto Eio; + total += rc; } - } while (nread == bufsz && total < sz); + } while (rc == bufsz && total < sz); - if (nread < 0) { - rc = nread; + if (rc < 0) goto out; - } - - rc = spufs_dump_align(file, buf, roundup(*foffset - total + sz, 4), - foffset); + if (!dump_skip(cprm, + roundup(cprm->written - total + sz, 4) - cprm->written)) + goto Eio; out: free_page((unsigned long)buf); return rc; +Eio: + free_page((unsigned long)buf); + return -EIO; } -int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset) +int spufs_coredump_extra_notes_write(struct coredump_params *cprm) { struct spu_context *ctx; int fd, j, rc; @@ -233,7 +194,7 @@ int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset) return rc; for (j = 0; spufs_coredump_read[j].name != NULL; j++) { - rc = spufs_arch_write_note(ctx, j, file, fd, foffset); + rc = spufs_arch_write_note(ctx, j, cprm, fd); if (rc) { spu_release_saved(ctx); return rc; diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 35f77a4..87ba7cf 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -238,7 +238,7 @@ const struct file_operations spufs_context_fops = { .release = spufs_dir_close, .llseek = dcache_dir_lseek, .read = generic_read_dir, - .readdir = dcache_readdir, + .iterate = dcache_readdir, .fsync = noop_fsync, }; EXPORT_SYMBOL_GPL(spufs_context_fops); @@ -620,12 +620,16 @@ spufs_parse_options(struct super_block *sb, char *options, struct inode *root) case Opt_uid: if (match_int(&args[0], &option)) return 0; - root->i_uid = option; + root->i_uid = make_kuid(current_user_ns(), option); + if (!uid_valid(root->i_uid)) + return 0; break; case Opt_gid: if (match_int(&args[0], &option)) return 0; - root->i_gid = option; + root->i_gid = make_kgid(current_user_ns(), option); + if (!gid_valid(root->i_gid)) + return 0; break; case Opt_mode: if (match_octal(&args[0], &option)) diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 67852ad..0ba3c95 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -247,12 +247,13 @@ extern const struct spufs_tree_descr spufs_dir_debug_contents[]; /* system call implementation */ extern struct spufs_calls spufs_calls; +struct coredump_params; long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status); long spufs_create(struct path *nd, struct dentry *dentry, unsigned int flags, umode_t mode, struct file *filp); /* ELF coredump callbacks for writing SPU ELF notes */ extern int spufs_coredump_extra_notes_size(void); -extern int spufs_coredump_extra_notes_write(struct file *file, loff_t *foffset); +extern int spufs_coredump_extra_notes_write(struct coredump_params *cprm); extern const struct file_operations spufs_context_fops; diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c index d3ceff0..9ef8cc3 100644 --- a/arch/powerpc/platforms/chrp/nvram.c +++ b/arch/powerpc/platforms/chrp/nvram.c @@ -66,7 +66,7 @@ static void chrp_nvram_write(int addr, unsigned char val) void __init chrp_nvram_init(void) { struct device_node *nvram; - const unsigned int *nbytes_p; + const __be32 *nbytes_p; unsigned int proplen; nvram = of_find_node_by_type(NULL, "nvram"); @@ -79,7 +79,7 @@ void __init chrp_nvram_init(void) return; } - nvram_size = *nbytes_p; + nvram_size = be32_to_cpup(nbytes_p); printk(KERN_INFO "CHRP nvram contains %u bytes\n", nvram_size); of_node_put(nvram); diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c index 53d6eee0..4cde8e7 100644 --- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c +++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/irq.h> #include <linux/of.h> +#include <linux/of_address.h> #include <asm/io.h> #include "flipper-pic.h" diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index 3006b51..6c03034 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -18,6 +18,8 @@ #include <linux/init.h> #include <linux/irq.h> #include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> #include <asm/io.h> #include "hlwd-pic.h" @@ -181,6 +183,7 @@ struct irq_domain *hlwd_pic_init(struct device_node *np) &hlwd_irq_domain_ops, io_base); if (!irq_domain) { pr_err("failed to allocate irq_domain\n"); + iounmap(io_base); return NULL; } diff --git a/arch/powerpc/platforms/fsl_uli1575.c b/arch/powerpc/platforms/fsl_uli1575.c index 92ac9b5..b97f6f3 100644 --- a/arch/powerpc/platforms/fsl_uli1575.c +++ b/arch/powerpc/platforms/fsl_uli1575.c @@ -321,8 +321,7 @@ static void hpcd_final_uli5288(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); struct device_node *hosenode = hose ? hose->dn : NULL; - struct of_irq oirq; - int virq, pin = 2; + struct of_phandle_args oirq; u32 laddr[3]; if (!machine_is(mpc86xx_hpcd)) @@ -331,12 +330,13 @@ static void hpcd_final_uli5288(struct pci_dev *dev) if (!hosenode) return; + oirq.np = hosenode; + oirq.args[0] = 2; + oirq.args_count = 1; laddr[0] = (hose->first_busno << 16) | (PCI_DEVFN(31, 0) << 8); laddr[1] = laddr[2] = 0; - of_irq_map_raw(hosenode, &pin, 1, laddr, &oirq); - virq = irq_create_of_mapping(oirq.controller, oirq.specifier, - oirq.size); - dev->irq = virq; + of_irq_parse_raw(laddr, &oirq); + dev->irq = irq_create_of_mapping(&oirq); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x1575, hpcd_quirk_uli1575); diff --git a/arch/powerpc/platforms/pasemi/Makefile b/arch/powerpc/platforms/pasemi/Makefile index ce6d789..8e8d4ca 100644 --- a/arch/powerpc/platforms/pasemi/Makefile +++ b/arch/powerpc/platforms/pasemi/Makefile @@ -1,3 +1,2 @@ obj-y += setup.o pci.o time.o idle.o powersave.o iommu.o dma_lib.o misc.o obj-$(CONFIG_PPC_PASEMI_MDIO) += gpio_mdio.o -obj-$(CONFIG_PPC_PASEMI_CPUFREQ) += cpufreq.o diff --git a/arch/powerpc/platforms/pasemi/cpufreq.c b/arch/powerpc/platforms/pasemi/cpufreq.c deleted file mode 100644 index be1e795..0000000 --- a/arch/powerpc/platforms/pasemi/cpufreq.c +++ /dev/null @@ -1,330 +0,0 @@ -/* - * Copyright (C) 2007 PA Semi, Inc - * - * Authors: Egor Martovetsky <egor@pasemi.com> - * Olof Johansson <olof@lixom.net> - * - * Maintained by: Olof Johansson <olof@lixom.net> - * - * Based on arch/powerpc/platforms/cell/cbe_cpufreq.c: - * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <linux/cpufreq.h> -#include <linux/timer.h> -#include <linux/module.h> - -#include <asm/hw_irq.h> -#include <asm/io.h> -#include <asm/prom.h> -#include <asm/time.h> -#include <asm/smp.h> - -#define SDCASR_REG 0x0100 -#define SDCASR_REG_STRIDE 0x1000 -#define SDCPWR_CFGA0_REG 0x0100 -#define SDCPWR_PWST0_REG 0x0000 -#define SDCPWR_GIZTIME_REG 0x0440 - -/* SDCPWR_GIZTIME_REG fields */ -#define SDCPWR_GIZTIME_GR 0x80000000 -#define SDCPWR_GIZTIME_LONGLOCK 0x000000ff - -/* Offset of ASR registers from SDC base */ -#define SDCASR_OFFSET 0x120000 - -static void __iomem *sdcpwr_mapbase; -static void __iomem *sdcasr_mapbase; - -static DEFINE_MUTEX(pas_switch_mutex); - -/* Current astate, is used when waking up from power savings on - * one core, in case the other core has switched states during - * the idle time. - */ -static int current_astate; - -/* We support 5(A0-A4) power states excluding turbo(A5-A6) modes */ -static struct cpufreq_frequency_table pas_freqs[] = { - {0, 0}, - {1, 0}, - {2, 0}, - {3, 0}, - {4, 0}, - {0, CPUFREQ_TABLE_END}, -}; - -static struct freq_attr *pas_cpu_freqs_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -/* - * hardware specific functions - */ - -static int get_astate_freq(int astate) -{ - u32 ret; - ret = in_le32(sdcpwr_mapbase + SDCPWR_CFGA0_REG + (astate * 0x10)); - - return ret & 0x3f; -} - -static int get_cur_astate(int cpu) -{ - u32 ret; - - ret = in_le32(sdcpwr_mapbase + SDCPWR_PWST0_REG); - ret = (ret >> (cpu * 4)) & 0x7; - - return ret; -} - -static int get_gizmo_latency(void) -{ - u32 giztime, ret; - - giztime = in_le32(sdcpwr_mapbase + SDCPWR_GIZTIME_REG); - - /* just provide the upper bound */ - if (giztime & SDCPWR_GIZTIME_GR) - ret = (giztime & SDCPWR_GIZTIME_LONGLOCK) * 128000; - else - ret = (giztime & SDCPWR_GIZTIME_LONGLOCK) * 1000; - - return ret; -} - -static void set_astate(int cpu, unsigned int astate) -{ - unsigned long flags; - - /* Return if called before init has run */ - if (unlikely(!sdcasr_mapbase)) - return; - - local_irq_save(flags); - - out_le32(sdcasr_mapbase + SDCASR_REG + SDCASR_REG_STRIDE*cpu, astate); - - local_irq_restore(flags); -} - -int check_astate(void) -{ - return get_cur_astate(hard_smp_processor_id()); -} - -void restore_astate(int cpu) -{ - set_astate(cpu, current_astate); -} - -/* - * cpufreq functions - */ - -static int pas_cpufreq_cpu_init(struct cpufreq_policy *policy) -{ - const u32 *max_freqp; - u32 max_freq; - int i, cur_astate; - struct resource res; - struct device_node *cpu, *dn; - int err = -ENODEV; - - cpu = of_get_cpu_node(policy->cpu, NULL); - - if (!cpu) - goto out; - - dn = of_find_compatible_node(NULL, NULL, "1682m-sdc"); - if (!dn) - dn = of_find_compatible_node(NULL, NULL, - "pasemi,pwrficient-sdc"); - if (!dn) - goto out; - err = of_address_to_resource(dn, 0, &res); - of_node_put(dn); - if (err) - goto out; - sdcasr_mapbase = ioremap(res.start + SDCASR_OFFSET, 0x2000); - if (!sdcasr_mapbase) { - err = -EINVAL; - goto out; - } - - dn = of_find_compatible_node(NULL, NULL, "1682m-gizmo"); - if (!dn) - dn = of_find_compatible_node(NULL, NULL, - "pasemi,pwrficient-gizmo"); - if (!dn) { - err = -ENODEV; - goto out_unmap_sdcasr; - } - err = of_address_to_resource(dn, 0, &res); - of_node_put(dn); - if (err) - goto out_unmap_sdcasr; - sdcpwr_mapbase = ioremap(res.start, 0x1000); - if (!sdcpwr_mapbase) { - err = -EINVAL; - goto out_unmap_sdcasr; - } - - pr_debug("init cpufreq on CPU %d\n", policy->cpu); - - max_freqp = of_get_property(cpu, "clock-frequency", NULL); - if (!max_freqp) { - err = -EINVAL; - goto out_unmap_sdcpwr; - } - - /* we need the freq in kHz */ - max_freq = *max_freqp / 1000; - - pr_debug("max clock-frequency is at %u kHz\n", max_freq); - pr_debug("initializing frequency table\n"); - - /* initialize frequency table */ - for (i=0; pas_freqs[i].frequency!=CPUFREQ_TABLE_END; i++) { - pas_freqs[i].frequency = get_astate_freq(pas_freqs[i].index) * 100000; - pr_debug("%d: %d\n", i, pas_freqs[i].frequency); - } - - policy->cpuinfo.transition_latency = get_gizmo_latency(); - - cur_astate = get_cur_astate(policy->cpu); - pr_debug("current astate is at %d\n",cur_astate); - - policy->cur = pas_freqs[cur_astate].frequency; - cpumask_copy(policy->cpus, cpu_online_mask); - - ppc_proc_freq = policy->cur * 1000ul; - - cpufreq_frequency_table_get_attr(pas_freqs, policy->cpu); - - /* this ensures that policy->cpuinfo_min and policy->cpuinfo_max - * are set correctly - */ - return cpufreq_frequency_table_cpuinfo(policy, pas_freqs); - -out_unmap_sdcpwr: - iounmap(sdcpwr_mapbase); - -out_unmap_sdcasr: - iounmap(sdcasr_mapbase); -out: - return err; -} - -static int pas_cpufreq_cpu_exit(struct cpufreq_policy *policy) -{ - /* - * We don't support CPU hotplug. Don't unmap after the system - * has already made it to a running state. - */ - if (system_state != SYSTEM_BOOTING) - return 0; - - if (sdcasr_mapbase) - iounmap(sdcasr_mapbase); - if (sdcpwr_mapbase) - iounmap(sdcpwr_mapbase); - - cpufreq_frequency_table_put_attr(policy->cpu); - return 0; -} - -static int pas_cpufreq_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, pas_freqs); -} - -static int pas_cpufreq_target(struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - struct cpufreq_freqs freqs; - int pas_astate_new; - int i; - - cpufreq_frequency_table_target(policy, - pas_freqs, - target_freq, - relation, - &pas_astate_new); - - freqs.old = policy->cur; - freqs.new = pas_freqs[pas_astate_new].frequency; - - mutex_lock(&pas_switch_mutex); - cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); - - pr_debug("setting frequency for cpu %d to %d kHz, 1/%d of max frequency\n", - policy->cpu, - pas_freqs[pas_astate_new].frequency, - pas_freqs[pas_astate_new].index); - - current_astate = pas_astate_new; - - for_each_online_cpu(i) - set_astate(i, pas_astate_new); - - cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); - mutex_unlock(&pas_switch_mutex); - - ppc_proc_freq = freqs.new * 1000ul; - return 0; -} - -static struct cpufreq_driver pas_cpufreq_driver = { - .name = "pas-cpufreq", - .owner = THIS_MODULE, - .flags = CPUFREQ_CONST_LOOPS, - .init = pas_cpufreq_cpu_init, - .exit = pas_cpufreq_cpu_exit, - .verify = pas_cpufreq_verify, - .target = pas_cpufreq_target, - .attr = pas_cpu_freqs_attr, -}; - -/* - * module init and destoy - */ - -static int __init pas_cpufreq_init(void) -{ - if (!of_machine_is_compatible("PA6T-1682M") && - !of_machine_is_compatible("pasemi,pwrficient")) - return -ENODEV; - - return cpufreq_register_driver(&pas_cpufreq_driver); -} - -static void __exit pas_cpufreq_exit(void) -{ - cpufreq_unregister_driver(&pas_cpufreq_driver); -} - -module_init(pas_cpufreq_init); -module_exit(pas_cpufreq_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Egor Martovetsky <egor@pasemi.com>, Olof Johansson <olof@lixom.net>"); diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c index 0237ab7..15adee5 100644 --- a/arch/powerpc/platforms/pasemi/gpio_mdio.c +++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c @@ -30,6 +30,7 @@ #include <linux/ioport.h> #include <linux/interrupt.h> #include <linux/phy.h> +#include <linux/of_address.h> #include <linux/of_mdio.h> #include <linux/of_platform.h> diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile index ea47df6..52c6ce1 100644 --- a/arch/powerpc/platforms/powermac/Makefile +++ b/arch/powerpc/platforms/powermac/Makefile @@ -9,8 +9,6 @@ obj-y += pic.o setup.o time.o feature.o pci.o \ sleep.o low_i2c.o cache.o pfunc_core.o \ pfunc_base.o udbg_scc.o udbg_adb.o obj-$(CONFIG_PMAC_BACKLIGHT) += backlight.o -obj-$(CONFIG_CPU_FREQ_PMAC) += cpufreq_32.o -obj-$(CONFIG_CPU_FREQ_PMAC64) += cpufreq_64.o # CONFIG_NVRAM is an arch. independent tristate symbol, for pmac32 we really # need this to be a bool. Cheat here and pretend CONFIG_NVRAM=m is really # CONFIG_NVRAM=y diff --git a/arch/powerpc/platforms/powermac/cpufreq_32.c b/arch/powerpc/platforms/powermac/cpufreq_32.c deleted file mode 100644 index 3104fad..0000000 --- a/arch/powerpc/platforms/powermac/cpufreq_32.c +++ /dev/null @@ -1,721 +0,0 @@ -/* - * Copyright (C) 2002 - 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org> - * Copyright (C) 2004 John Steele Scott <toojays@toojays.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * TODO: Need a big cleanup here. Basically, we need to have different - * cpufreq_driver structures for the different type of HW instead of the - * current mess. We also need to better deal with the detection of the - * type of machine. - * - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/delay.h> -#include <linux/sched.h> -#include <linux/adb.h> -#include <linux/pmu.h> -#include <linux/cpufreq.h> -#include <linux/init.h> -#include <linux/device.h> -#include <linux/hardirq.h> -#include <asm/prom.h> -#include <asm/machdep.h> -#include <asm/irq.h> -#include <asm/pmac_feature.h> -#include <asm/mmu_context.h> -#include <asm/sections.h> -#include <asm/cputable.h> -#include <asm/time.h> -#include <asm/mpic.h> -#include <asm/keylargo.h> -#include <asm/switch_to.h> - -/* WARNING !!! This will cause calibrate_delay() to be called, - * but this is an __init function ! So you MUST go edit - * init/main.c to make it non-init before enabling DEBUG_FREQ - */ -#undef DEBUG_FREQ - -extern void low_choose_7447a_dfs(int dfs); -extern void low_choose_750fx_pll(int pll); -extern void low_sleep_handler(void); - -/* - * Currently, PowerMac cpufreq supports only high & low frequencies - * that are set by the firmware - */ -static unsigned int low_freq; -static unsigned int hi_freq; -static unsigned int cur_freq; -static unsigned int sleep_freq; -static unsigned long transition_latency; - -/* - * Different models uses different mechanisms to switch the frequency - */ -static int (*set_speed_proc)(int low_speed); -static unsigned int (*get_speed_proc)(void); - -/* - * Some definitions used by the various speedprocs - */ -static u32 voltage_gpio; -static u32 frequency_gpio; -static u32 slew_done_gpio; -static int no_schedule; -static int has_cpu_l2lve; -static int is_pmu_based; - -/* There are only two frequency states for each processor. Values - * are in kHz for the time being. - */ -#define CPUFREQ_HIGH 0 -#define CPUFREQ_LOW 1 - -static struct cpufreq_frequency_table pmac_cpu_freqs[] = { - {CPUFREQ_HIGH, 0}, - {CPUFREQ_LOW, 0}, - {0, CPUFREQ_TABLE_END}, -}; - -static struct freq_attr* pmac_cpu_freqs_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -static inline void local_delay(unsigned long ms) -{ - if (no_schedule) - mdelay(ms); - else - msleep(ms); -} - -#ifdef DEBUG_FREQ -static inline void debug_calc_bogomips(void) -{ - /* This will cause a recalc of bogomips and display the - * result. We backup/restore the value to avoid affecting the - * core cpufreq framework's own calculation. - */ - unsigned long save_lpj = loops_per_jiffy; - calibrate_delay(); - loops_per_jiffy = save_lpj; -} -#endif /* DEBUG_FREQ */ - -/* Switch CPU speed under 750FX CPU control - */ -static int cpu_750fx_cpu_speed(int low_speed) -{ - u32 hid2; - - if (low_speed == 0) { - /* ramping up, set voltage first */ - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05); - /* Make sure we sleep for at least 1ms */ - local_delay(10); - - /* tweak L2 for high voltage */ - if (has_cpu_l2lve) { - hid2 = mfspr(SPRN_HID2); - hid2 &= ~0x2000; - mtspr(SPRN_HID2, hid2); - } - } -#ifdef CONFIG_6xx - low_choose_750fx_pll(low_speed); -#endif - if (low_speed == 1) { - /* tweak L2 for low voltage */ - if (has_cpu_l2lve) { - hid2 = mfspr(SPRN_HID2); - hid2 |= 0x2000; - mtspr(SPRN_HID2, hid2); - } - - /* ramping down, set voltage last */ - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04); - local_delay(10); - } - - return 0; -} - -static unsigned int cpu_750fx_get_cpu_speed(void) -{ - if (mfspr(SPRN_HID1) & HID1_PS) - return low_freq; - else - return hi_freq; -} - -/* Switch CPU speed using DFS */ -static int dfs_set_cpu_speed(int low_speed) -{ - if (low_speed == 0) { - /* ramping up, set voltage first */ - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05); - /* Make sure we sleep for at least 1ms */ - local_delay(1); - } - - /* set frequency */ -#ifdef CONFIG_6xx - low_choose_7447a_dfs(low_speed); -#endif - udelay(100); - - if (low_speed == 1) { - /* ramping down, set voltage last */ - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04); - local_delay(1); - } - - return 0; -} - -static unsigned int dfs_get_cpu_speed(void) -{ - if (mfspr(SPRN_HID1) & HID1_DFS) - return low_freq; - else - return hi_freq; -} - - -/* Switch CPU speed using slewing GPIOs - */ -static int gpios_set_cpu_speed(int low_speed) -{ - int gpio, timeout = 0; - - /* If ramping up, set voltage first */ - if (low_speed == 0) { - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05); - /* Delay is way too big but it's ok, we schedule */ - local_delay(10); - } - - /* Set frequency */ - gpio = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, frequency_gpio, 0); - if (low_speed == ((gpio & 0x01) == 0)) - goto skip; - - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, frequency_gpio, - low_speed ? 0x04 : 0x05); - udelay(200); - do { - if (++timeout > 100) - break; - local_delay(1); - gpio = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, slew_done_gpio, 0); - } while((gpio & 0x02) == 0); - skip: - /* If ramping down, set voltage last */ - if (low_speed == 1) { - pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04); - /* Delay is way too big but it's ok, we schedule */ - local_delay(10); - } - -#ifdef DEBUG_FREQ - debug_calc_bogomips(); -#endif - - return 0; -} - -/* Switch CPU speed under PMU control - */ -static int pmu_set_cpu_speed(int low_speed) -{ - struct adb_request req; - unsigned long save_l2cr; - unsigned long save_l3cr; - unsigned int pic_prio; - unsigned long flags; - - preempt_disable(); - -#ifdef DEBUG_FREQ - printk(KERN_DEBUG "HID1, before: %x\n", mfspr(SPRN_HID1)); -#endif - pmu_suspend(); - - /* Disable all interrupt sources on openpic */ - pic_prio = mpic_cpu_get_priority(); - mpic_cpu_set_priority(0xf); - - /* Make sure the decrementer won't interrupt us */ - asm volatile("mtdec %0" : : "r" (0x7fffffff)); - /* Make sure any pending DEC interrupt occurring while we did - * the above didn't re-enable the DEC */ - mb(); - asm volatile("mtdec %0" : : "r" (0x7fffffff)); - - /* We can now disable MSR_EE */ - local_irq_save(flags); - - /* Giveup the FPU & vec */ - enable_kernel_fp(); - -#ifdef CONFIG_ALTIVEC - if (cpu_has_feature(CPU_FTR_ALTIVEC)) - enable_kernel_altivec(); -#endif /* CONFIG_ALTIVEC */ - - /* Save & disable L2 and L3 caches */ - save_l3cr = _get_L3CR(); /* (returns -1 if not available) */ - save_l2cr = _get_L2CR(); /* (returns -1 if not available) */ - - /* Send the new speed command. My assumption is that this command - * will cause PLL_CFG[0..3] to be changed next time CPU goes to sleep - */ - pmu_request(&req, NULL, 6, PMU_CPU_SPEED, 'W', 'O', 'O', 'F', low_speed); - while (!req.complete) - pmu_poll(); - - /* Prepare the northbridge for the speed transition */ - pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,1); - - /* Call low level code to backup CPU state and recover from - * hardware reset - */ - low_sleep_handler(); - - /* Restore the northbridge */ - pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,0); - - /* Restore L2 cache */ - if (save_l2cr != 0xffffffff && (save_l2cr & L2CR_L2E) != 0) - _set_L2CR(save_l2cr); - /* Restore L3 cache */ - if (save_l3cr != 0xffffffff && (save_l3cr & L3CR_L3E) != 0) - _set_L3CR(save_l3cr); - - /* Restore userland MMU context */ - switch_mmu_context(NULL, current->active_mm); - -#ifdef DEBUG_FREQ - printk(KERN_DEBUG "HID1, after: %x\n", mfspr(SPRN_HID1)); -#endif - - /* Restore low level PMU operations */ - pmu_unlock(); - - /* - * Restore decrementer; we'll take a decrementer interrupt - * as soon as interrupts are re-enabled and the generic - * clockevents code will reprogram it with the right value. - */ - set_dec(1); - - /* Restore interrupts */ - mpic_cpu_set_priority(pic_prio); - - /* Let interrupts flow again ... */ - local_irq_restore(flags); - -#ifdef DEBUG_FREQ - debug_calc_bogomips(); -#endif - - pmu_resume(); - - preempt_enable(); - - return 0; -} - -static int do_set_cpu_speed(struct cpufreq_policy *policy, int speed_mode, - int notify) -{ - struct cpufreq_freqs freqs; - unsigned long l3cr; - static unsigned long prev_l3cr; - - freqs.old = cur_freq; - freqs.new = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq; - - if (freqs.old == freqs.new) - return 0; - - if (notify) - cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); - if (speed_mode == CPUFREQ_LOW && - cpu_has_feature(CPU_FTR_L3CR)) { - l3cr = _get_L3CR(); - if (l3cr & L3CR_L3E) { - prev_l3cr = l3cr; - _set_L3CR(0); - } - } - set_speed_proc(speed_mode == CPUFREQ_LOW); - if (speed_mode == CPUFREQ_HIGH && - cpu_has_feature(CPU_FTR_L3CR)) { - l3cr = _get_L3CR(); - if ((prev_l3cr & L3CR_L3E) && l3cr != prev_l3cr) - _set_L3CR(prev_l3cr); - } - if (notify) - cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); - cur_freq = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq; - - return 0; -} - -static unsigned int pmac_cpufreq_get_speed(unsigned int cpu) -{ - return cur_freq; -} - -static int pmac_cpufreq_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, pmac_cpu_freqs); -} - -static int pmac_cpufreq_target( struct cpufreq_policy *policy, - unsigned int target_freq, - unsigned int relation) -{ - unsigned int newstate = 0; - int rc; - - if (cpufreq_frequency_table_target(policy, pmac_cpu_freqs, - target_freq, relation, &newstate)) - return -EINVAL; - - rc = do_set_cpu_speed(policy, newstate, 1); - - ppc_proc_freq = cur_freq * 1000ul; - return rc; -} - -static int pmac_cpufreq_cpu_init(struct cpufreq_policy *policy) -{ - if (policy->cpu != 0) - return -ENODEV; - - policy->cpuinfo.transition_latency = transition_latency; - policy->cur = cur_freq; - - cpufreq_frequency_table_get_attr(pmac_cpu_freqs, policy->cpu); - return cpufreq_frequency_table_cpuinfo(policy, pmac_cpu_freqs); -} - -static u32 read_gpio(struct device_node *np) -{ - const u32 *reg = of_get_property(np, "reg", NULL); - u32 offset; - - if (reg == NULL) - return 0; - /* That works for all keylargos but shall be fixed properly - * some day... The problem is that it seems we can't rely - * on the "reg" property of the GPIO nodes, they are either - * relative to the base of KeyLargo or to the base of the - * GPIO space, and the device-tree doesn't help. - */ - offset = *reg; - if (offset < KEYLARGO_GPIO_LEVELS0) - offset += KEYLARGO_GPIO_LEVELS0; - return offset; -} - -static int pmac_cpufreq_suspend(struct cpufreq_policy *policy) -{ - /* Ok, this could be made a bit smarter, but let's be robust for now. We - * always force a speed change to high speed before sleep, to make sure - * we have appropriate voltage and/or bus speed for the wakeup process, - * and to make sure our loops_per_jiffies are "good enough", that is will - * not cause too short delays if we sleep in low speed and wake in high - * speed.. - */ - no_schedule = 1; - sleep_freq = cur_freq; - if (cur_freq == low_freq && !is_pmu_based) - do_set_cpu_speed(policy, CPUFREQ_HIGH, 0); - return 0; -} - -static int pmac_cpufreq_resume(struct cpufreq_policy *policy) -{ - /* If we resume, first check if we have a get() function */ - if (get_speed_proc) - cur_freq = get_speed_proc(); - else - cur_freq = 0; - - /* We don't, hrm... we don't really know our speed here, best - * is that we force a switch to whatever it was, which is - * probably high speed due to our suspend() routine - */ - do_set_cpu_speed(policy, sleep_freq == low_freq ? - CPUFREQ_LOW : CPUFREQ_HIGH, 0); - - ppc_proc_freq = cur_freq * 1000ul; - - no_schedule = 0; - return 0; -} - -static struct cpufreq_driver pmac_cpufreq_driver = { - .verify = pmac_cpufreq_verify, - .target = pmac_cpufreq_target, - .get = pmac_cpufreq_get_speed, - .init = pmac_cpufreq_cpu_init, - .suspend = pmac_cpufreq_suspend, - .resume = pmac_cpufreq_resume, - .flags = CPUFREQ_PM_NO_WARN, - .attr = pmac_cpu_freqs_attr, - .name = "powermac", - .owner = THIS_MODULE, -}; - - -static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode) -{ - struct device_node *volt_gpio_np = of_find_node_by_name(NULL, - "voltage-gpio"); - struct device_node *freq_gpio_np = of_find_node_by_name(NULL, - "frequency-gpio"); - struct device_node *slew_done_gpio_np = of_find_node_by_name(NULL, - "slewing-done"); - const u32 *value; - - /* - * Check to see if it's GPIO driven or PMU only - * - * The way we extract the GPIO address is slightly hackish, but it - * works well enough for now. We need to abstract the whole GPIO - * stuff sooner or later anyway - */ - - if (volt_gpio_np) - voltage_gpio = read_gpio(volt_gpio_np); - if (freq_gpio_np) - frequency_gpio = read_gpio(freq_gpio_np); - if (slew_done_gpio_np) - slew_done_gpio = read_gpio(slew_done_gpio_np); - - /* If we use the frequency GPIOs, calculate the min/max speeds based - * on the bus frequencies - */ - if (frequency_gpio && slew_done_gpio) { - int lenp, rc; - const u32 *freqs, *ratio; - - freqs = of_get_property(cpunode, "bus-frequencies", &lenp); - lenp /= sizeof(u32); - if (freqs == NULL || lenp != 2) { - printk(KERN_ERR "cpufreq: bus-frequencies incorrect or missing\n"); - return 1; - } - ratio = of_get_property(cpunode, "processor-to-bus-ratio*2", - NULL); - if (ratio == NULL) { - printk(KERN_ERR "cpufreq: processor-to-bus-ratio*2 missing\n"); - return 1; - } - - /* Get the min/max bus frequencies */ - low_freq = min(freqs[0], freqs[1]); - hi_freq = max(freqs[0], freqs[1]); - - /* Grrrr.. It _seems_ that the device-tree is lying on the low bus - * frequency, it claims it to be around 84Mhz on some models while - * it appears to be approx. 101Mhz on all. Let's hack around here... - * fortunately, we don't need to be too precise - */ - if (low_freq < 98000000) - low_freq = 101000000; - - /* Convert those to CPU core clocks */ - low_freq = (low_freq * (*ratio)) / 2000; - hi_freq = (hi_freq * (*ratio)) / 2000; - - /* Now we get the frequencies, we read the GPIO to see what is out current - * speed - */ - rc = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, frequency_gpio, 0); - cur_freq = (rc & 0x01) ? hi_freq : low_freq; - - set_speed_proc = gpios_set_cpu_speed; - return 1; - } - - /* If we use the PMU, look for the min & max frequencies in the - * device-tree - */ - value = of_get_property(cpunode, "min-clock-frequency", NULL); - if (!value) - return 1; - low_freq = (*value) / 1000; - /* The PowerBook G4 12" (PowerBook6,1) has an error in the device-tree - * here */ - if (low_freq < 100000) - low_freq *= 10; - - value = of_get_property(cpunode, "max-clock-frequency", NULL); - if (!value) - return 1; - hi_freq = (*value) / 1000; - set_speed_proc = pmu_set_cpu_speed; - is_pmu_based = 1; - - return 0; -} - -static int pmac_cpufreq_init_7447A(struct device_node *cpunode) -{ - struct device_node *volt_gpio_np; - - if (of_get_property(cpunode, "dynamic-power-step", NULL) == NULL) - return 1; - - volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select"); - if (volt_gpio_np) - voltage_gpio = read_gpio(volt_gpio_np); - if (!voltage_gpio){ - printk(KERN_ERR "cpufreq: missing cpu-vcore-select gpio\n"); - return 1; - } - - /* OF only reports the high frequency */ - hi_freq = cur_freq; - low_freq = cur_freq/2; - - /* Read actual frequency from CPU */ - cur_freq = dfs_get_cpu_speed(); - set_speed_proc = dfs_set_cpu_speed; - get_speed_proc = dfs_get_cpu_speed; - - return 0; -} - -static int pmac_cpufreq_init_750FX(struct device_node *cpunode) -{ - struct device_node *volt_gpio_np; - u32 pvr; - const u32 *value; - - if (of_get_property(cpunode, "dynamic-power-step", NULL) == NULL) - return 1; - - hi_freq = cur_freq; - value = of_get_property(cpunode, "reduced-clock-frequency", NULL); - if (!value) - return 1; - low_freq = (*value) / 1000; - - volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select"); - if (volt_gpio_np) - voltage_gpio = read_gpio(volt_gpio_np); - - pvr = mfspr(SPRN_PVR); - has_cpu_l2lve = !((pvr & 0xf00) == 0x100); - - set_speed_proc = cpu_750fx_cpu_speed; - get_speed_proc = cpu_750fx_get_cpu_speed; - cur_freq = cpu_750fx_get_cpu_speed(); - - return 0; -} - -/* Currently, we support the following machines: - * - * - Titanium PowerBook 1Ghz (PMU based, 667Mhz & 1Ghz) - * - Titanium PowerBook 800 (PMU based, 667Mhz & 800Mhz) - * - Titanium PowerBook 400 (PMU based, 300Mhz & 400Mhz) - * - Titanium PowerBook 500 (PMU based, 300Mhz & 500Mhz) - * - iBook2 500/600 (PMU based, 400Mhz & 500/600Mhz) - * - iBook2 700 (CPU based, 400Mhz & 700Mhz, support low voltage) - * - Recent MacRISC3 laptops - * - All new machines with 7447A CPUs - */ -static int __init pmac_cpufreq_setup(void) -{ - struct device_node *cpunode; - const u32 *value; - - if (strstr(cmd_line, "nocpufreq")) - return 0; - - /* Assume only one CPU */ - cpunode = of_find_node_by_type(NULL, "cpu"); - if (!cpunode) - goto out; - - /* Get current cpu clock freq */ - value = of_get_property(cpunode, "clock-frequency", NULL); - if (!value) - goto out; - cur_freq = (*value) / 1000; - transition_latency = CPUFREQ_ETERNAL; - - /* Check for 7447A based MacRISC3 */ - if (of_machine_is_compatible("MacRISC3") && - of_get_property(cpunode, "dynamic-power-step", NULL) && - PVR_VER(mfspr(SPRN_PVR)) == 0x8003) { - pmac_cpufreq_init_7447A(cpunode); - transition_latency = 8000000; - /* Check for other MacRISC3 machines */ - } else if (of_machine_is_compatible("PowerBook3,4") || - of_machine_is_compatible("PowerBook3,5") || - of_machine_is_compatible("MacRISC3")) { - pmac_cpufreq_init_MacRISC3(cpunode); - /* Else check for iBook2 500/600 */ - } else if (of_machine_is_compatible("PowerBook4,1")) { - hi_freq = cur_freq; - low_freq = 400000; - set_speed_proc = pmu_set_cpu_speed; - is_pmu_based = 1; - } - /* Else check for TiPb 550 */ - else if (of_machine_is_compatible("PowerBook3,3") && cur_freq == 550000) { - hi_freq = cur_freq; - low_freq = 500000; - set_speed_proc = pmu_set_cpu_speed; - is_pmu_based = 1; - } - /* Else check for TiPb 400 & 500 */ - else if (of_machine_is_compatible("PowerBook3,2")) { - /* We only know about the 400 MHz and the 500Mhz model - * they both have 300 MHz as low frequency - */ - if (cur_freq < 350000 || cur_freq > 550000) - goto out; - hi_freq = cur_freq; - low_freq = 300000; - set_speed_proc = pmu_set_cpu_speed; - is_pmu_based = 1; - } - /* Else check for 750FX */ - else if (PVR_VER(mfspr(SPRN_PVR)) == 0x7000) - pmac_cpufreq_init_750FX(cpunode); -out: - of_node_put(cpunode); - if (set_speed_proc == NULL) - return -ENODEV; - - pmac_cpu_freqs[CPUFREQ_LOW].frequency = low_freq; - pmac_cpu_freqs[CPUFREQ_HIGH].frequency = hi_freq; - ppc_proc_freq = cur_freq * 1000ul; - - printk(KERN_INFO "Registering PowerMac CPU frequency driver\n"); - printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Boot: %d Mhz\n", - low_freq/1000, hi_freq/1000, cur_freq/1000); - - return cpufreq_register_driver(&pmac_cpufreq_driver); -} - -module_init(pmac_cpufreq_setup); - diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c deleted file mode 100644 index 7ba4234..0000000 --- a/arch/powerpc/platforms/powermac/cpufreq_64.c +++ /dev/null @@ -1,746 +0,0 @@ -/* - * Copyright (C) 2002 - 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org> - * and Markus Demleitner <msdemlei@cl.uni-heidelberg.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This driver adds basic cpufreq support for SMU & 970FX based G5 Macs, - * that is iMac G5 and latest single CPU desktop. - */ - -#undef DEBUG - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/delay.h> -#include <linux/sched.h> -#include <linux/cpufreq.h> -#include <linux/init.h> -#include <linux/completion.h> -#include <linux/mutex.h> -#include <asm/prom.h> -#include <asm/machdep.h> -#include <asm/irq.h> -#include <asm/sections.h> -#include <asm/cputable.h> -#include <asm/time.h> -#include <asm/smu.h> -#include <asm/pmac_pfunc.h> - -#define DBG(fmt...) pr_debug(fmt) - -/* see 970FX user manual */ - -#define SCOM_PCR 0x0aa001 /* PCR scom addr */ - -#define PCR_HILO_SELECT 0x80000000U /* 1 = PCR, 0 = PCRH */ -#define PCR_SPEED_FULL 0x00000000U /* 1:1 speed value */ -#define PCR_SPEED_HALF 0x00020000U /* 1:2 speed value */ -#define PCR_SPEED_QUARTER 0x00040000U /* 1:4 speed value */ -#define PCR_SPEED_MASK 0x000e0000U /* speed mask */ -#define PCR_SPEED_SHIFT 17 -#define PCR_FREQ_REQ_VALID 0x00010000U /* freq request valid */ -#define PCR_VOLT_REQ_VALID 0x00008000U /* volt request valid */ -#define PCR_TARGET_TIME_MASK 0x00006000U /* target time */ -#define PCR_STATLAT_MASK 0x00001f00U /* STATLAT value */ -#define PCR_SNOOPLAT_MASK 0x000000f0U /* SNOOPLAT value */ -#define PCR_SNOOPACC_MASK 0x0000000fU /* SNOOPACC value */ - -#define SCOM_PSR 0x408001 /* PSR scom addr */ -/* warning: PSR is a 64 bits register */ -#define PSR_CMD_RECEIVED 0x2000000000000000U /* command received */ -#define PSR_CMD_COMPLETED 0x1000000000000000U /* command completed */ -#define PSR_CUR_SPEED_MASK 0x0300000000000000U /* current speed */ -#define PSR_CUR_SPEED_SHIFT (56) - -/* - * The G5 only supports two frequencies (Quarter speed is not supported) - */ -#define CPUFREQ_HIGH 0 -#define CPUFREQ_LOW 1 - -static struct cpufreq_frequency_table g5_cpu_freqs[] = { - {CPUFREQ_HIGH, 0}, - {CPUFREQ_LOW, 0}, - {0, CPUFREQ_TABLE_END}, -}; - -static struct freq_attr* g5_cpu_freqs_attr[] = { - &cpufreq_freq_attr_scaling_available_freqs, - NULL, -}; - -/* Power mode data is an array of the 32 bits PCR values to use for - * the various frequencies, retrieved from the device-tree - */ -static int g5_pmode_cur; - -static void (*g5_switch_volt)(int speed_mode); -static int (*g5_switch_freq)(int speed_mode); -static int (*g5_query_freq)(void); - -static DEFINE_MUTEX(g5_switch_mutex); - -static unsigned long transition_latency; - -#ifdef CONFIG_PMAC_SMU - -static const u32 *g5_pmode_data; -static int g5_pmode_max; - -static struct smu_sdbp_fvt *g5_fvt_table; /* table of op. points */ -static int g5_fvt_count; /* number of op. points */ -static int g5_fvt_cur; /* current op. point */ - -/* - * SMU based voltage switching for Neo2 platforms - */ - -static void g5_smu_switch_volt(int speed_mode) -{ - struct smu_simple_cmd cmd; - - DECLARE_COMPLETION_ONSTACK(comp); - smu_queue_simple(&cmd, SMU_CMD_POWER_COMMAND, 8, smu_done_complete, - &comp, 'V', 'S', 'L', 'E', 'W', - 0xff, g5_fvt_cur+1, speed_mode); - wait_for_completion(&comp); -} - -/* - * Platform function based voltage/vdnap switching for Neo2 - */ - -static struct pmf_function *pfunc_set_vdnap0; -static struct pmf_function *pfunc_vdnap0_complete; - -static void g5_vdnap_switch_volt(int speed_mode) -{ - struct pmf_args args; - u32 slew, done = 0; - unsigned long timeout; - - slew = (speed_mode == CPUFREQ_LOW) ? 1 : 0; - args.count = 1; - args.u[0].p = &slew; - - pmf_call_one(pfunc_set_vdnap0, &args); - - /* It's an irq GPIO so we should be able to just block here, - * I'll do that later after I've properly tested the IRQ code for - * platform functions - */ - timeout = jiffies + HZ/10; - while(!time_after(jiffies, timeout)) { - args.count = 1; - args.u[0].p = &done; - pmf_call_one(pfunc_vdnap0_complete, &args); - if (done) - break; - msleep(1); - } - if (done == 0) - printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n"); -} - - -/* - * SCOM based frequency switching for 970FX rev3 - */ -static int g5_scom_switch_freq(int speed_mode) -{ - unsigned long flags; - int to; - - /* If frequency is going up, first ramp up the voltage */ - if (speed_mode < g5_pmode_cur) - g5_switch_volt(speed_mode); - - local_irq_save(flags); - - /* Clear PCR high */ - scom970_write(SCOM_PCR, 0); - /* Clear PCR low */ - scom970_write(SCOM_PCR, PCR_HILO_SELECT | 0); - /* Set PCR low */ - scom970_write(SCOM_PCR, PCR_HILO_SELECT | - g5_pmode_data[speed_mode]); - - /* Wait for completion */ - for (to = 0; to < 10; to++) { - unsigned long psr = scom970_read(SCOM_PSR); - - if ((psr & PSR_CMD_RECEIVED) == 0 && - (((psr >> PSR_CUR_SPEED_SHIFT) ^ - (g5_pmode_data[speed_mode] >> PCR_SPEED_SHIFT)) & 0x3) - == 0) - break; - if (psr & PSR_CMD_COMPLETED) - break; - udelay(100); - } - - local_irq_restore(flags); - - /* If frequency is going down, last ramp the voltage */ - if (speed_mode > g5_pmode_cur) - g5_switch_volt(speed_mode); - - g5_pmode_cur = speed_mode; - ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul; - - return 0; -} - -static int g5_scom_query_freq(void) -{ - unsigned long psr = scom970_read(SCOM_PSR); - int i; - - for (i = 0; i <= g5_pmode_max; i++) - if ((((psr >> PSR_CUR_SPEED_SHIFT) ^ - (g5_pmode_data[i] >> PCR_SPEED_SHIFT)) & 0x3) == 0) - break; - return i; -} - -/* - * Fake voltage switching for platforms with missing support - */ - -static void g5_dummy_switch_volt(int speed_mode) -{ -} - -#endif /* CONFIG_PMAC_SMU */ - -/* - * Platform function based voltage switching for PowerMac7,2 & 7,3 - */ - -static struct pmf_function *pfunc_cpu0_volt_high; -static struct pmf_function *pfunc_cpu0_volt_low; -static struct pmf_function *pfunc_cpu1_volt_high; -static struct pmf_function *pfunc_cpu1_volt_low; - -static void g5_pfunc_switch_volt(int speed_mode) -{ - if (speed_mode == CPUFREQ_HIGH) { - if (pfunc_cpu0_volt_high) - pmf_call_one(pfunc_cpu0_volt_high, NULL); - if (pfunc_cpu1_volt_high) - pmf_call_one(pfunc_cpu1_volt_high, NULL); - } else { - if (pfunc_cpu0_volt_low) - pmf_call_one(pfunc_cpu0_volt_low, NULL); - if (pfunc_cpu1_volt_low) - pmf_call_one(pfunc_cpu1_volt_low, NULL); - } - msleep(10); /* should be faster , to fix */ -} - -/* - * Platform function based frequency switching for PowerMac7,2 & 7,3 - */ - -static struct pmf_function *pfunc_cpu_setfreq_high; -static struct pmf_function *pfunc_cpu_setfreq_low; -static struct pmf_function *pfunc_cpu_getfreq; -static struct pmf_function *pfunc_slewing_done; - -static int g5_pfunc_switch_freq(int speed_mode) -{ - struct pmf_args args; - u32 done = 0; - unsigned long timeout; - int rc; - - DBG("g5_pfunc_switch_freq(%d)\n", speed_mode); - - /* If frequency is going up, first ramp up the voltage */ - if (speed_mode < g5_pmode_cur) - g5_switch_volt(speed_mode); - - /* Do it */ - if (speed_mode == CPUFREQ_HIGH) - rc = pmf_call_one(pfunc_cpu_setfreq_high, NULL); - else - rc = pmf_call_one(pfunc_cpu_setfreq_low, NULL); - - if (rc) - printk(KERN_WARNING "cpufreq: pfunc switch error %d\n", rc); - - /* It's an irq GPIO so we should be able to just block here, - * I'll do that later after I've properly tested the IRQ code for - * platform functions - */ - timeout = jiffies + HZ/10; - while(!time_after(jiffies, timeout)) { - args.count = 1; - args.u[0].p = &done; - pmf_call_one(pfunc_slewing_done, &args); - if (done) - break; - msleep(1); - } - if (done == 0) - printk(KERN_WARNING "cpufreq: Timeout in clock slewing !\n"); - - /* If frequency is going down, last ramp the voltage */ - if (speed_mode > g5_pmode_cur) - g5_switch_volt(speed_mode); - - g5_pmode_cur = speed_mode; - ppc_proc_freq = g5_cpu_freqs[speed_mode].frequency * 1000ul; - - return 0; -} - -static int g5_pfunc_query_freq(void) -{ - struct pmf_args args; - u32 val = 0; - - args.count = 1; - args.u[0].p = &val; - pmf_call_one(pfunc_cpu_getfreq, &args); - return val ? CPUFREQ_HIGH : CPUFREQ_LOW; -} - - -/* - * Common interface to the cpufreq core - */ - -static int g5_cpufreq_verify(struct cpufreq_policy *policy) -{ - return cpufreq_frequency_table_verify(policy, g5_cpu_freqs); -} - -static int g5_cpufreq_target(struct cpufreq_policy *policy, - unsigned int target_freq, unsigned int relation) -{ - unsigned int newstate = 0; - struct cpufreq_freqs freqs; - int rc; - - if (cpufreq_frequency_table_target(policy, g5_cpu_freqs, - target_freq, relation, &newstate)) - return -EINVAL; - - if (g5_pmode_cur == newstate) - return 0; - - mutex_lock(&g5_switch_mutex); - - freqs.old = g5_cpu_freqs[g5_pmode_cur].frequency; - freqs.new = g5_cpu_freqs[newstate].frequency; - - cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE); - rc = g5_switch_freq(newstate); - cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE); - - mutex_unlock(&g5_switch_mutex); - - return rc; -} - -static unsigned int g5_cpufreq_get_speed(unsigned int cpu) -{ - return g5_cpu_freqs[g5_pmode_cur].frequency; -} - -static int g5_cpufreq_cpu_init(struct cpufreq_policy *policy) -{ - policy->cpuinfo.transition_latency = transition_latency; - policy->cur = g5_cpu_freqs[g5_query_freq()].frequency; - /* secondary CPUs are tied to the primary one by the - * cpufreq core if in the secondary policy we tell it that - * it actually must be one policy together with all others. */ - cpumask_copy(policy->cpus, cpu_online_mask); - cpufreq_frequency_table_get_attr(g5_cpu_freqs, policy->cpu); - - return cpufreq_frequency_table_cpuinfo(policy, - g5_cpu_freqs); -} - - -static struct cpufreq_driver g5_cpufreq_driver = { - .name = "powermac", - .owner = THIS_MODULE, - .flags = CPUFREQ_CONST_LOOPS, - .init = g5_cpufreq_cpu_init, - .verify = g5_cpufreq_verify, - .target = g5_cpufreq_target, - .get = g5_cpufreq_get_speed, - .attr = g5_cpu_freqs_attr, -}; - - -#ifdef CONFIG_PMAC_SMU - -static int __init g5_neo2_cpufreq_init(struct device_node *cpus) -{ - struct device_node *cpunode; - unsigned int psize, ssize; - unsigned long max_freq; - char *freq_method, *volt_method; - const u32 *valp; - u32 pvr_hi; - int use_volts_vdnap = 0; - int use_volts_smu = 0; - int rc = -ENODEV; - - /* Check supported platforms */ - if (of_machine_is_compatible("PowerMac8,1") || - of_machine_is_compatible("PowerMac8,2") || - of_machine_is_compatible("PowerMac9,1")) - use_volts_smu = 1; - else if (of_machine_is_compatible("PowerMac11,2")) - use_volts_vdnap = 1; - else - return -ENODEV; - - /* Get first CPU node */ - for (cpunode = NULL; - (cpunode = of_get_next_child(cpus, cpunode)) != NULL;) { - const u32 *reg = of_get_property(cpunode, "reg", NULL); - if (reg == NULL || (*reg) != 0) - continue; - if (!strcmp(cpunode->type, "cpu")) - break; - } - if (cpunode == NULL) { - printk(KERN_ERR "cpufreq: Can't find any CPU 0 node\n"); - return -ENODEV; - } - - /* Check 970FX for now */ - valp = of_get_property(cpunode, "cpu-version", NULL); - if (!valp) { - DBG("No cpu-version property !\n"); - goto bail_noprops; - } - pvr_hi = (*valp) >> 16; - if (pvr_hi != 0x3c && pvr_hi != 0x44) { - printk(KERN_ERR "cpufreq: Unsupported CPU version\n"); - goto bail_noprops; - } - - /* Look for the powertune data in the device-tree */ - g5_pmode_data = of_get_property(cpunode, "power-mode-data",&psize); - if (!g5_pmode_data) { - DBG("No power-mode-data !\n"); - goto bail_noprops; - } - g5_pmode_max = psize / sizeof(u32) - 1; - - if (use_volts_smu) { - const struct smu_sdbp_header *shdr; - - /* Look for the FVT table */ - shdr = smu_get_sdb_partition(SMU_SDB_FVT_ID, NULL); - if (!shdr) - goto bail_noprops; - g5_fvt_table = (struct smu_sdbp_fvt *)&shdr[1]; - ssize = (shdr->len * sizeof(u32)) - - sizeof(struct smu_sdbp_header); - g5_fvt_count = ssize / sizeof(struct smu_sdbp_fvt); - g5_fvt_cur = 0; - - /* Sanity checking */ - if (g5_fvt_count < 1 || g5_pmode_max < 1) - goto bail_noprops; - - g5_switch_volt = g5_smu_switch_volt; - volt_method = "SMU"; - } else if (use_volts_vdnap) { - struct device_node *root; - - root = of_find_node_by_path("/"); - if (root == NULL) { - printk(KERN_ERR "cpufreq: Can't find root of " - "device tree\n"); - goto bail_noprops; - } - pfunc_set_vdnap0 = pmf_find_function(root, "set-vdnap0"); - pfunc_vdnap0_complete = - pmf_find_function(root, "slewing-done"); - if (pfunc_set_vdnap0 == NULL || - pfunc_vdnap0_complete == NULL) { - printk(KERN_ERR "cpufreq: Can't find required " - "platform function\n"); - goto bail_noprops; - } - - g5_switch_volt = g5_vdnap_switch_volt; - volt_method = "GPIO"; - } else { - g5_switch_volt = g5_dummy_switch_volt; - volt_method = "none"; - } - - /* - * From what I see, clock-frequency is always the maximal frequency. - * The current driver can not slew sysclk yet, so we really only deal - * with powertune steps for now. We also only implement full freq and - * half freq in this version. So far, I haven't yet seen a machine - * supporting anything else. - */ - valp = of_get_property(cpunode, "clock-frequency", NULL); - if (!valp) - return -ENODEV; - max_freq = (*valp)/1000; - g5_cpu_freqs[0].frequency = max_freq; - g5_cpu_freqs[1].frequency = max_freq/2; - - /* Set callbacks */ - transition_latency = 12000; - g5_switch_freq = g5_scom_switch_freq; - g5_query_freq = g5_scom_query_freq; - freq_method = "SCOM"; - - /* Force apply current frequency to make sure everything is in - * sync (voltage is right for example). Firmware may leave us with - * a strange setting ... - */ - g5_switch_volt(CPUFREQ_HIGH); - msleep(10); - g5_pmode_cur = -1; - g5_switch_freq(g5_query_freq()); - - printk(KERN_INFO "Registering G5 CPU frequency driver\n"); - printk(KERN_INFO "Frequency method: %s, Voltage method: %s\n", - freq_method, volt_method); - printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", - g5_cpu_freqs[1].frequency/1000, - g5_cpu_freqs[0].frequency/1000, - g5_cpu_freqs[g5_pmode_cur].frequency/1000); - - rc = cpufreq_register_driver(&g5_cpufreq_driver); - - /* We keep the CPU node on hold... hopefully, Apple G5 don't have - * hotplug CPU with a dynamic device-tree ... - */ - return rc; - - bail_noprops: - of_node_put(cpunode); - - return rc; -} - -#endif /* CONFIG_PMAC_SMU */ - - -static int __init g5_pm72_cpufreq_init(struct device_node *cpus) -{ - struct device_node *cpuid = NULL, *hwclock = NULL, *cpunode = NULL; - const u8 *eeprom = NULL; - const u32 *valp; - u64 max_freq, min_freq, ih, il; - int has_volt = 1, rc = 0; - - DBG("cpufreq: Initializing for PowerMac7,2, PowerMac7,3 and" - " RackMac3,1...\n"); - - /* Get first CPU node */ - for (cpunode = NULL; - (cpunode = of_get_next_child(cpus, cpunode)) != NULL;) { - if (!strcmp(cpunode->type, "cpu")) - break; - } - if (cpunode == NULL) { - printk(KERN_ERR "cpufreq: Can't find any CPU node\n"); - return -ENODEV; - } - - /* Lookup the cpuid eeprom node */ - cpuid = of_find_node_by_path("/u3@0,f8000000/i2c@f8001000/cpuid@a0"); - if (cpuid != NULL) - eeprom = of_get_property(cpuid, "cpuid", NULL); - if (eeprom == NULL) { - printk(KERN_ERR "cpufreq: Can't find cpuid EEPROM !\n"); - rc = -ENODEV; - goto bail; - } - - /* Lookup the i2c hwclock */ - for (hwclock = NULL; - (hwclock = of_find_node_by_name(hwclock, "i2c-hwclock")) != NULL;){ - const char *loc = of_get_property(hwclock, - "hwctrl-location", NULL); - if (loc == NULL) - continue; - if (strcmp(loc, "CPU CLOCK")) - continue; - if (!of_get_property(hwclock, "platform-get-frequency", NULL)) - continue; - break; - } - if (hwclock == NULL) { - printk(KERN_ERR "cpufreq: Can't find i2c clock chip !\n"); - rc = -ENODEV; - goto bail; - } - - DBG("cpufreq: i2c clock chip found: %s\n", hwclock->full_name); - - /* Now get all the platform functions */ - pfunc_cpu_getfreq = - pmf_find_function(hwclock, "get-frequency"); - pfunc_cpu_setfreq_high = - pmf_find_function(hwclock, "set-frequency-high"); - pfunc_cpu_setfreq_low = - pmf_find_function(hwclock, "set-frequency-low"); - pfunc_slewing_done = - pmf_find_function(hwclock, "slewing-done"); - pfunc_cpu0_volt_high = - pmf_find_function(hwclock, "set-voltage-high-0"); - pfunc_cpu0_volt_low = - pmf_find_function(hwclock, "set-voltage-low-0"); - pfunc_cpu1_volt_high = - pmf_find_function(hwclock, "set-voltage-high-1"); - pfunc_cpu1_volt_low = - pmf_find_function(hwclock, "set-voltage-low-1"); - - /* Check we have minimum requirements */ - if (pfunc_cpu_getfreq == NULL || pfunc_cpu_setfreq_high == NULL || - pfunc_cpu_setfreq_low == NULL || pfunc_slewing_done == NULL) { - printk(KERN_ERR "cpufreq: Can't find platform functions !\n"); - rc = -ENODEV; - goto bail; - } - - /* Check that we have complete sets */ - if (pfunc_cpu0_volt_high == NULL || pfunc_cpu0_volt_low == NULL) { - pmf_put_function(pfunc_cpu0_volt_high); - pmf_put_function(pfunc_cpu0_volt_low); - pfunc_cpu0_volt_high = pfunc_cpu0_volt_low = NULL; - has_volt = 0; - } - if (!has_volt || - pfunc_cpu1_volt_high == NULL || pfunc_cpu1_volt_low == NULL) { - pmf_put_function(pfunc_cpu1_volt_high); - pmf_put_function(pfunc_cpu1_volt_low); - pfunc_cpu1_volt_high = pfunc_cpu1_volt_low = NULL; - } - - /* Note: The device tree also contains a "platform-set-values" - * function for which I haven't quite figured out the usage. It - * might have to be called on init and/or wakeup, I'm not too sure - * but things seem to work fine without it so far ... - */ - - /* Get max frequency from device-tree */ - valp = of_get_property(cpunode, "clock-frequency", NULL); - if (!valp) { - printk(KERN_ERR "cpufreq: Can't find CPU frequency !\n"); - rc = -ENODEV; - goto bail; - } - - max_freq = (*valp)/1000; - - /* Now calculate reduced frequency by using the cpuid input freq - * ratio. This requires 64 bits math unless we are willing to lose - * some precision - */ - ih = *((u32 *)(eeprom + 0x10)); - il = *((u32 *)(eeprom + 0x20)); - - /* Check for machines with no useful settings */ - if (il == ih) { - printk(KERN_WARNING "cpufreq: No low frequency mode available" - " on this model !\n"); - rc = -ENODEV; - goto bail; - } - - min_freq = 0; - if (ih != 0 && il != 0) - min_freq = (max_freq * il) / ih; - - /* Sanity check */ - if (min_freq >= max_freq || min_freq < 1000) { - printk(KERN_ERR "cpufreq: Can't calculate low frequency !\n"); - rc = -ENXIO; - goto bail; - } - g5_cpu_freqs[0].frequency = max_freq; - g5_cpu_freqs[1].frequency = min_freq; - - /* Set callbacks */ - transition_latency = CPUFREQ_ETERNAL; - g5_switch_volt = g5_pfunc_switch_volt; - g5_switch_freq = g5_pfunc_switch_freq; - g5_query_freq = g5_pfunc_query_freq; - - /* Force apply current frequency to make sure everything is in - * sync (voltage is right for example). Firmware may leave us with - * a strange setting ... - */ - g5_switch_volt(CPUFREQ_HIGH); - msleep(10); - g5_pmode_cur = -1; - g5_switch_freq(g5_query_freq()); - - printk(KERN_INFO "Registering G5 CPU frequency driver\n"); - printk(KERN_INFO "Frequency method: i2c/pfunc, " - "Voltage method: %s\n", has_volt ? "i2c/pfunc" : "none"); - printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", - g5_cpu_freqs[1].frequency/1000, - g5_cpu_freqs[0].frequency/1000, - g5_cpu_freqs[g5_pmode_cur].frequency/1000); - - rc = cpufreq_register_driver(&g5_cpufreq_driver); - bail: - if (rc != 0) { - pmf_put_function(pfunc_cpu_getfreq); - pmf_put_function(pfunc_cpu_setfreq_high); - pmf_put_function(pfunc_cpu_setfreq_low); - pmf_put_function(pfunc_slewing_done); - pmf_put_function(pfunc_cpu0_volt_high); - pmf_put_function(pfunc_cpu0_volt_low); - pmf_put_function(pfunc_cpu1_volt_high); - pmf_put_function(pfunc_cpu1_volt_low); - } - of_node_put(hwclock); - of_node_put(cpuid); - of_node_put(cpunode); - - return rc; -} - -static int __init g5_cpufreq_init(void) -{ - struct device_node *cpus; - int rc = 0; - - cpus = of_find_node_by_path("/cpus"); - if (cpus == NULL) { - DBG("No /cpus node !\n"); - return -ENODEV; - } - - if (of_machine_is_compatible("PowerMac7,2") || - of_machine_is_compatible("PowerMac7,3") || - of_machine_is_compatible("RackMac3,1")) - rc = g5_pm72_cpufreq_init(cpus); -#ifdef CONFIG_PMAC_SMU - else - rc = g5_neo2_cpufreq_init(cpus); -#endif /* CONFIG_PMAC_SMU */ - - of_node_put(cpus); - return rc; -} - -module_init(g5_cpufreq_init); - - -MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index fc536f2..7553b6a 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -452,7 +452,7 @@ static int kw_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, */ if (use_irq) { /* Clear completion */ - INIT_COMPLETION(host->complete); + reinit_completion(&host->complete); /* Ack stale interrupts */ kw_write_reg(reg_isr, kw_read_reg(reg_isr)); /* Arm timeout */ @@ -717,7 +717,7 @@ static int pmu_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, return -EINVAL; } - INIT_COMPLETION(comp); + reinit_completion(&comp); req->data[0] = PMU_I2C_CMD; req->reply[0] = 0xff; req->nbytes = sizeof(struct pmu_i2c_hdr) + 1; @@ -748,7 +748,7 @@ static int pmu_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, hdr->bus = PMU_I2C_BUS_STATUS; - INIT_COMPLETION(comp); + reinit_completion(&comp); req->data[0] = PMU_I2C_CMD; req->reply[0] = 0xff; req->nbytes = 2; diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c index f5e3cda..e49d07f 100644 --- a/arch/powerpc/platforms/powermac/pfunc_base.c +++ b/arch/powerpc/platforms/powermac/pfunc_base.c @@ -4,6 +4,7 @@ #include <linux/kernel.h> #include <linux/interrupt.h> #include <linux/spinlock.h> +#include <linux/of_irq.h> #include <asm/pmac_feature.h> #include <asm/pmac_pfunc.h> diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 31036b5..4c24bf6 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -393,8 +393,8 @@ static void __init pmac_pic_probe_oldstyle(void) #endif } -int of_irq_map_oldworld(struct device_node *device, int index, - struct of_irq *out_irq) +int of_irq_parse_oldworld(struct device_node *device, int index, + struct of_phandle_args *out_irq) { const u32 *ints = NULL; int intlen; @@ -422,9 +422,9 @@ int of_irq_map_oldworld(struct device_node *device, int index, if (index >= intlen) return -EINVAL; - out_irq->controller = NULL; - out_irq->specifier[0] = ints[index]; - out_irq->size = 1; + out_irq->np = NULL; + out_irq->args[0] = ints[index]; + out_irq->args_count = 1; return 0; } diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index bdb738a..5cbd4d6 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -192,7 +192,7 @@ static int psurge_secondary_ipi_init(void) { int rc = -ENOMEM; - psurge_host = irq_domain_add_nomap(NULL, 0, &psurge_host_ops, NULL); + psurge_host = irq_domain_add_nomap(NULL, ~0, &psurge_host_ops, NULL); if (psurge_host) psurge_secondary_virq = irq_create_direct_mapping(psurge_host); @@ -885,7 +885,7 @@ static int smp_core99_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata smp_core99_cpu_nb = { +static struct notifier_block smp_core99_cpu_nb = { .notifier_call = smp_core99_cpu_notify, }; #endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig index c24684c..9fced3f 100644 --- a/arch/powerpc/platforms/powernv/Kconfig +++ b/arch/powerpc/platforms/powernv/Kconfig @@ -7,6 +7,10 @@ config PPC_POWERNV select PPC_P7_NAP select PPC_PCI_CHOICE if EMBEDDED select EPAPR_BOOT + select PPC_INDIRECT_PIO + select PPC_UDBG_16550 + select PPC_SCOM + select ARCH_RANDOM default y config POWERNV_MSI diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index bcc3cb4..873fa13 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,5 +1,8 @@ obj-y += setup.o opal-takeover.o opal-wrappers.o opal.o -obj-y += opal-rtc.o opal-nvram.o +obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o +obj-y += rng.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o +obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o +obj-$(CONFIG_PPC_SCOM) += opal-xscom.o diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c new file mode 100644 index 0000000..02245ce --- /dev/null +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -0,0 +1,1017 @@ +/* + * The file intends to implement the functions needed by EEH, which is + * built on IODA compliant chip. Actually, lots of functions related + * to EEH would be built based on the OPAL APIs. + * + * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/bootmem.h> +#include <linux/debugfs.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/msi.h> +#include <linux/notifier.h> +#include <linux/pci.h> +#include <linux/string.h> + +#include <asm/eeh.h> +#include <asm/eeh_event.h> +#include <asm/io.h> +#include <asm/iommu.h> +#include <asm/msi_bitmap.h> +#include <asm/opal.h> +#include <asm/pci-bridge.h> +#include <asm/ppc-pci.h> +#include <asm/tce.h> + +#include "powernv.h" +#include "pci.h" + +static char *hub_diag = NULL; +static int ioda_eeh_nb_init = 0; + +static int ioda_eeh_event(struct notifier_block *nb, + unsigned long events, void *change) +{ + uint64_t changed_evts = (uint64_t)change; + + /* We simply send special EEH event */ + if ((changed_evts & OPAL_EVENT_PCI_ERROR) && + (events & OPAL_EVENT_PCI_ERROR)) + eeh_send_failure_event(NULL); + + return 0; +} + +static struct notifier_block ioda_eeh_nb = { + .notifier_call = ioda_eeh_event, + .next = NULL, + .priority = 0 +}; + +#ifdef CONFIG_DEBUG_FS +static int ioda_eeh_dbgfs_set(void *data, int offset, u64 val) +{ + struct pci_controller *hose = data; + struct pnv_phb *phb = hose->private_data; + + out_be64(phb->regs + offset, val); + return 0; +} + +static int ioda_eeh_dbgfs_get(void *data, int offset, u64 *val) +{ + struct pci_controller *hose = data; + struct pnv_phb *phb = hose->private_data; + + *val = in_be64(phb->regs + offset); + return 0; +} + +static int ioda_eeh_outb_dbgfs_set(void *data, u64 val) +{ + return ioda_eeh_dbgfs_set(data, 0xD10, val); +} + +static int ioda_eeh_outb_dbgfs_get(void *data, u64 *val) +{ + return ioda_eeh_dbgfs_get(data, 0xD10, val); +} + +static int ioda_eeh_inbA_dbgfs_set(void *data, u64 val) +{ + return ioda_eeh_dbgfs_set(data, 0xD90, val); +} + +static int ioda_eeh_inbA_dbgfs_get(void *data, u64 *val) +{ + return ioda_eeh_dbgfs_get(data, 0xD90, val); +} + +static int ioda_eeh_inbB_dbgfs_set(void *data, u64 val) +{ + return ioda_eeh_dbgfs_set(data, 0xE10, val); +} + +static int ioda_eeh_inbB_dbgfs_get(void *data, u64 *val) +{ + return ioda_eeh_dbgfs_get(data, 0xE10, val); +} + +DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_outb_dbgfs_ops, ioda_eeh_outb_dbgfs_get, + ioda_eeh_outb_dbgfs_set, "0x%llx\n"); +DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbA_dbgfs_ops, ioda_eeh_inbA_dbgfs_get, + ioda_eeh_inbA_dbgfs_set, "0x%llx\n"); +DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_dbgfs_ops, ioda_eeh_inbB_dbgfs_get, + ioda_eeh_inbB_dbgfs_set, "0x%llx\n"); +#endif /* CONFIG_DEBUG_FS */ + +/** + * ioda_eeh_post_init - Chip dependent post initialization + * @hose: PCI controller + * + * The function will be called after eeh PEs and devices + * have been built. That means the EEH is ready to supply + * service with I/O cache. + */ +static int ioda_eeh_post_init(struct pci_controller *hose) +{ + struct pnv_phb *phb = hose->private_data; + int ret; + + /* Register OPAL event notifier */ + if (!ioda_eeh_nb_init) { + ret = opal_notifier_register(&ioda_eeh_nb); + if (ret) { + pr_err("%s: Can't register OPAL event notifier (%d)\n", + __func__, ret); + return ret; + } + + ioda_eeh_nb_init = 1; + } + + /* We needn't HUB diag-data on PHB3 */ + if (phb->type == PNV_PHB_IODA1 && !hub_diag) { + hub_diag = (char *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + if (!hub_diag) { + pr_err("%s: Out of memory !\n", __func__); + return -ENOMEM; + } + } + +#ifdef CONFIG_DEBUG_FS + if (phb->dbgfs) { + debugfs_create_file("err_injct_outbound", 0600, + phb->dbgfs, hose, + &ioda_eeh_outb_dbgfs_ops); + debugfs_create_file("err_injct_inboundA", 0600, + phb->dbgfs, hose, + &ioda_eeh_inbA_dbgfs_ops); + debugfs_create_file("err_injct_inboundB", 0600, + phb->dbgfs, hose, + &ioda_eeh_inbB_dbgfs_ops); + } +#endif + + phb->eeh_state |= PNV_EEH_STATE_ENABLED; + + return 0; +} + +/** + * ioda_eeh_set_option - Set EEH operation or I/O setting + * @pe: EEH PE + * @option: options + * + * Enable or disable EEH option for the indicated PE. The + * function also can be used to enable I/O or DMA for the + * PE. + */ +static int ioda_eeh_set_option(struct eeh_pe *pe, int option) +{ + s64 ret; + u32 pe_no; + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + + /* Check on PE number */ + if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) { + pr_err("%s: PE address %x out of range [0, %x] " + "on PHB#%x\n", + __func__, pe->addr, phb->ioda.total_pe, + hose->global_number); + return -EINVAL; + } + + pe_no = pe->addr; + switch (option) { + case EEH_OPT_DISABLE: + ret = -EEXIST; + break; + case EEH_OPT_ENABLE: + ret = 0; + break; + case EEH_OPT_THAW_MMIO: + ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, + OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO); + if (ret) { + pr_warning("%s: Failed to enable MMIO for " + "PHB#%x-PE#%x, err=%lld\n", + __func__, hose->global_number, pe_no, ret); + return -EIO; + } + + break; + case EEH_OPT_THAW_DMA: + ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, + OPAL_EEH_ACTION_CLEAR_FREEZE_DMA); + if (ret) { + pr_warning("%s: Failed to enable DMA for " + "PHB#%x-PE#%x, err=%lld\n", + __func__, hose->global_number, pe_no, ret); + return -EIO; + } + + break; + default: + pr_warning("%s: Invalid option %d\n", __func__, option); + return -EINVAL; + } + + return ret; +} + +/** + * ioda_eeh_get_state - Retrieve the state of PE + * @pe: EEH PE + * + * The PE's state should be retrieved from the PEEV, PEST + * IODA tables. Since the OPAL has exported the function + * to do it, it'd better to use that. + */ +static int ioda_eeh_get_state(struct eeh_pe *pe) +{ + s64 ret = 0; + u8 fstate; + u16 pcierr; + u32 pe_no; + int result; + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + + /* + * Sanity check on PE address. The PHB PE address should + * be zero. + */ + if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) { + pr_err("%s: PE address %x out of range [0, %x] " + "on PHB#%x\n", + __func__, pe->addr, phb->ioda.total_pe, + hose->global_number); + return EEH_STATE_NOT_SUPPORT; + } + + /* Retrieve PE status through OPAL */ + pe_no = pe->addr; + ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, + &fstate, &pcierr, NULL); + if (ret) { + pr_err("%s: Failed to get EEH status on " + "PHB#%x-PE#%x\n, err=%lld\n", + __func__, hose->global_number, pe_no, ret); + return EEH_STATE_NOT_SUPPORT; + } + + /* Check PHB status */ + if (pe->type & EEH_PE_PHB) { + result = 0; + result &= ~EEH_STATE_RESET_ACTIVE; + + if (pcierr != OPAL_EEH_PHB_ERROR) { + result |= EEH_STATE_MMIO_ACTIVE; + result |= EEH_STATE_DMA_ACTIVE; + result |= EEH_STATE_MMIO_ENABLED; + result |= EEH_STATE_DMA_ENABLED; + } + + return result; + } + + /* Parse result out */ + result = 0; + switch (fstate) { + case OPAL_EEH_STOPPED_NOT_FROZEN: + result &= ~EEH_STATE_RESET_ACTIVE; + result |= EEH_STATE_MMIO_ACTIVE; + result |= EEH_STATE_DMA_ACTIVE; + result |= EEH_STATE_MMIO_ENABLED; + result |= EEH_STATE_DMA_ENABLED; + break; + case OPAL_EEH_STOPPED_MMIO_FREEZE: + result &= ~EEH_STATE_RESET_ACTIVE; + result |= EEH_STATE_DMA_ACTIVE; + result |= EEH_STATE_DMA_ENABLED; + break; + case OPAL_EEH_STOPPED_DMA_FREEZE: + result &= ~EEH_STATE_RESET_ACTIVE; + result |= EEH_STATE_MMIO_ACTIVE; + result |= EEH_STATE_MMIO_ENABLED; + break; + case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE: + result &= ~EEH_STATE_RESET_ACTIVE; + break; + case OPAL_EEH_STOPPED_RESET: + result |= EEH_STATE_RESET_ACTIVE; + break; + case OPAL_EEH_STOPPED_TEMP_UNAVAIL: + result |= EEH_STATE_UNAVAILABLE; + break; + case OPAL_EEH_STOPPED_PERM_UNAVAIL: + result |= EEH_STATE_NOT_SUPPORT; + break; + default: + pr_warning("%s: Unexpected EEH status 0x%x " + "on PHB#%x-PE#%x\n", + __func__, fstate, hose->global_number, pe_no); + } + + return result; +} + +static int ioda_eeh_pe_clear(struct eeh_pe *pe) +{ + struct pci_controller *hose; + struct pnv_phb *phb; + u32 pe_no; + u8 fstate; + u16 pcierr; + s64 ret; + + pe_no = pe->addr; + hose = pe->phb; + phb = pe->phb->private_data; + + /* Clear the EEH error on the PE */ + ret = opal_pci_eeh_freeze_clear(phb->opal_id, + pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + if (ret) { + pr_err("%s: Failed to clear EEH error for " + "PHB#%x-PE#%x, err=%lld\n", + __func__, hose->global_number, pe_no, ret); + return -EIO; + } + + /* + * Read the PE state back and verify that the frozen + * state has been removed. + */ + ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, + &fstate, &pcierr, NULL); + if (ret) { + pr_err("%s: Failed to get EEH status on " + "PHB#%x-PE#%x\n, err=%lld\n", + __func__, hose->global_number, pe_no, ret); + return -EIO; + } + + if (fstate != OPAL_EEH_STOPPED_NOT_FROZEN) { + pr_err("%s: Frozen state not cleared on " + "PHB#%x-PE#%x, sts=%x\n", + __func__, hose->global_number, pe_no, fstate); + return -EIO; + } + + return 0; +} + +static s64 ioda_eeh_phb_poll(struct pnv_phb *phb) +{ + s64 rc = OPAL_HARDWARE; + + while (1) { + rc = opal_pci_poll(phb->opal_id); + if (rc <= 0) + break; + + msleep(rc); + } + + return rc; +} + +static int ioda_eeh_phb_reset(struct pci_controller *hose, int option) +{ + struct pnv_phb *phb = hose->private_data; + s64 rc = OPAL_HARDWARE; + + pr_debug("%s: Reset PHB#%x, option=%d\n", + __func__, hose->global_number, option); + + /* Issue PHB complete reset request */ + if (option == EEH_RESET_FUNDAMENTAL || + option == EEH_RESET_HOT) + rc = opal_pci_reset(phb->opal_id, + OPAL_PHB_COMPLETE, + OPAL_ASSERT_RESET); + else if (option == EEH_RESET_DEACTIVATE) + rc = opal_pci_reset(phb->opal_id, + OPAL_PHB_COMPLETE, + OPAL_DEASSERT_RESET); + if (rc < 0) + goto out; + + /* + * Poll state of the PHB until the request is done + * successfully. + */ + rc = ioda_eeh_phb_poll(phb); +out: + if (rc != OPAL_SUCCESS) + return -EIO; + + return 0; +} + +static int ioda_eeh_root_reset(struct pci_controller *hose, int option) +{ + struct pnv_phb *phb = hose->private_data; + s64 rc = OPAL_SUCCESS; + + pr_debug("%s: Reset PHB#%x, option=%d\n", + __func__, hose->global_number, option); + + /* + * During the reset deassert time, we needn't care + * the reset scope because the firmware does nothing + * for fundamental or hot reset during deassert phase. + */ + if (option == EEH_RESET_FUNDAMENTAL) + rc = opal_pci_reset(phb->opal_id, + OPAL_PCI_FUNDAMENTAL_RESET, + OPAL_ASSERT_RESET); + else if (option == EEH_RESET_HOT) + rc = opal_pci_reset(phb->opal_id, + OPAL_PCI_HOT_RESET, + OPAL_ASSERT_RESET); + else if (option == EEH_RESET_DEACTIVATE) + rc = opal_pci_reset(phb->opal_id, + OPAL_PCI_HOT_RESET, + OPAL_DEASSERT_RESET); + if (rc < 0) + goto out; + + /* Poll state of the PHB until the request is done */ + rc = ioda_eeh_phb_poll(phb); +out: + if (rc != OPAL_SUCCESS) + return -EIO; + + return 0; +} + +static int ioda_eeh_bridge_reset(struct pci_controller *hose, + struct pci_dev *dev, int option) +{ + u16 ctrl; + + pr_debug("%s: Reset device %04x:%02x:%02x.%01x with option %d\n", + __func__, hose->global_number, dev->bus->number, + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), option); + + switch (option) { + case EEH_RESET_FUNDAMENTAL: + case EEH_RESET_HOT: + pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); + ctrl |= PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); + break; + case EEH_RESET_DEACTIVATE: + pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); + ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); + break; + } + + return 0; +} + +/** + * ioda_eeh_reset - Reset the indicated PE + * @pe: EEH PE + * @option: reset option + * + * Do reset on the indicated PE. For PCI bus sensitive PE, + * we need to reset the parent p2p bridge. The PHB has to + * be reinitialized if the p2p bridge is root bridge. For + * PCI device sensitive PE, we will try to reset the device + * through FLR. For now, we don't have OPAL APIs to do HARD + * reset yet, so all reset would be SOFT (HOT) reset. + */ +static int ioda_eeh_reset(struct eeh_pe *pe, int option) +{ + struct pci_controller *hose = pe->phb; + struct eeh_dev *edev; + struct pci_dev *dev; + int ret; + + /* + * Anyway, we have to clear the problematic state for the + * corresponding PE. However, we needn't do it if the PE + * is PHB associated. That means the PHB is having fatal + * errors and it needs reset. Further more, the AIB interface + * isn't reliable any more. + */ + if (!(pe->type & EEH_PE_PHB) && + (option == EEH_RESET_HOT || + option == EEH_RESET_FUNDAMENTAL)) { + ret = ioda_eeh_pe_clear(pe); + if (ret) + return -EIO; + } + + /* + * The rules applied to reset, either fundamental or hot reset: + * + * We always reset the direct upstream bridge of the PE. If the + * direct upstream bridge isn't root bridge, we always take hot + * reset no matter what option (fundamental or hot) is. Otherwise, + * we should do the reset according to the required option. + */ + if (pe->type & EEH_PE_PHB) { + ret = ioda_eeh_phb_reset(hose, option); + } else { + if (pe->type & EEH_PE_DEVICE) { + /* + * If it's device PE, we didn't refer to the parent + * PCI bus yet. So we have to figure it out indirectly. + */ + edev = list_first_entry(&pe->edevs, + struct eeh_dev, list); + dev = eeh_dev_to_pci_dev(edev); + dev = dev->bus->self; + } else { + /* + * If it's bus PE, the parent PCI bus is already there + * and just pick it up. + */ + dev = pe->bus->self; + } + + /* + * Do reset based on the fact that the direct upstream bridge + * is root bridge (port) or not. + */ + if (dev->bus->number == 0) + ret = ioda_eeh_root_reset(hose, option); + else + ret = ioda_eeh_bridge_reset(hose, dev, option); + } + + return ret; +} + +/** + * ioda_eeh_get_log - Retrieve error log + * @pe: EEH PE + * @severity: Severity level of the log + * @drv_log: buffer to store the log + * @len: space of the log buffer + * + * The function is used to retrieve error log from P7IOC. + */ +static int ioda_eeh_get_log(struct eeh_pe *pe, int severity, + char *drv_log, unsigned long len) +{ + s64 ret; + unsigned long flags; + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + + spin_lock_irqsave(&phb->lock, flags); + + ret = opal_pci_get_phb_diag_data2(phb->opal_id, + phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); + if (ret) { + spin_unlock_irqrestore(&phb->lock, flags); + pr_warning("%s: Can't get log for PHB#%x-PE#%x (%lld)\n", + __func__, hose->global_number, pe->addr, ret); + return -EIO; + } + + /* + * FIXME: We probably need log the error in somewhere. + * Lets make it up in future. + */ + /* pr_info("%s", phb->diag.blob); */ + + spin_unlock_irqrestore(&phb->lock, flags); + + return 0; +} + +/** + * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE + * @pe: EEH PE + * + * For particular PE, it might have included PCI bridges. In order + * to make the PE work properly, those PCI bridges should be configured + * correctly. However, we need do nothing on P7IOC since the reset + * function will do everything that should be covered by the function. + */ +static int ioda_eeh_configure_bridge(struct eeh_pe *pe) +{ + return 0; +} + +static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data) +{ + /* GEM */ + pr_info(" GEM XFIR: %016llx\n", data->gemXfir); + pr_info(" GEM RFIR: %016llx\n", data->gemRfir); + pr_info(" GEM RIRQFIR: %016llx\n", data->gemRirqfir); + pr_info(" GEM Mask: %016llx\n", data->gemMask); + pr_info(" GEM RWOF: %016llx\n", data->gemRwof); + + /* LEM */ + pr_info(" LEM FIR: %016llx\n", data->lemFir); + pr_info(" LEM Error Mask: %016llx\n", data->lemErrMask); + pr_info(" LEM Action 0: %016llx\n", data->lemAction0); + pr_info(" LEM Action 1: %016llx\n", data->lemAction1); + pr_info(" LEM WOF: %016llx\n", data->lemWof); +} + +static void ioda_eeh_hub_diag(struct pci_controller *hose) +{ + struct pnv_phb *phb = hose->private_data; + struct OpalIoP7IOCErrorData *data; + long rc; + + data = (struct OpalIoP7IOCErrorData *)ioda_eeh_hub_diag; + rc = opal_pci_get_hub_diag_data(phb->hub_id, data, PAGE_SIZE); + if (rc != OPAL_SUCCESS) { + pr_warning("%s: Failed to get HUB#%llx diag-data (%ld)\n", + __func__, phb->hub_id, rc); + return; + } + + switch (data->type) { + case OPAL_P7IOC_DIAG_TYPE_RGC: + pr_info("P7IOC diag-data for RGC\n\n"); + ioda_eeh_hub_diag_common(data); + pr_info(" RGC Status: %016llx\n", data->rgc.rgcStatus); + pr_info(" RGC LDCP: %016llx\n", data->rgc.rgcLdcp); + break; + case OPAL_P7IOC_DIAG_TYPE_BI: + pr_info("P7IOC diag-data for BI %s\n\n", + data->bi.biDownbound ? "Downbound" : "Upbound"); + ioda_eeh_hub_diag_common(data); + pr_info(" BI LDCP 0: %016llx\n", data->bi.biLdcp0); + pr_info(" BI LDCP 1: %016llx\n", data->bi.biLdcp1); + pr_info(" BI LDCP 2: %016llx\n", data->bi.biLdcp2); + pr_info(" BI Fence Status: %016llx\n", data->bi.biFenceStatus); + break; + case OPAL_P7IOC_DIAG_TYPE_CI: + pr_info("P7IOC diag-data for CI Port %d\\nn", + data->ci.ciPort); + ioda_eeh_hub_diag_common(data); + pr_info(" CI Port Status: %016llx\n", data->ci.ciPortStatus); + pr_info(" CI Port LDCP: %016llx\n", data->ci.ciPortLdcp); + break; + case OPAL_P7IOC_DIAG_TYPE_MISC: + pr_info("P7IOC diag-data for MISC\n\n"); + ioda_eeh_hub_diag_common(data); + break; + case OPAL_P7IOC_DIAG_TYPE_I2C: + pr_info("P7IOC diag-data for I2C\n\n"); + ioda_eeh_hub_diag_common(data); + break; + default: + pr_warning("%s: Invalid type of HUB#%llx diag-data (%d)\n", + __func__, phb->hub_id, data->type); + } +} + +static void ioda_eeh_p7ioc_phb_diag(struct pci_controller *hose, + struct OpalIoPhbErrorCommon *common) +{ + struct OpalIoP7IOCPhbErrorData *data; + int i; + + data = (struct OpalIoP7IOCPhbErrorData *)common; + + pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n\n", + hose->global_number, common->version); + + pr_info(" brdgCtl: %08x\n", data->brdgCtl); + + pr_info(" portStatusReg: %08x\n", data->portStatusReg); + pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus); + pr_info(" busAgentStatus: %08x\n", data->busAgentStatus); + + pr_info(" deviceStatus: %08x\n", data->deviceStatus); + pr_info(" slotStatus: %08x\n", data->slotStatus); + pr_info(" linkStatus: %08x\n", data->linkStatus); + pr_info(" devCmdStatus: %08x\n", data->devCmdStatus); + pr_info(" devSecStatus: %08x\n", data->devSecStatus); + + pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus); + pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus); + pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus); + pr_info(" tlpHdr1: %08x\n", data->tlpHdr1); + pr_info(" tlpHdr2: %08x\n", data->tlpHdr2); + pr_info(" tlpHdr3: %08x\n", data->tlpHdr3); + pr_info(" tlpHdr4: %08x\n", data->tlpHdr4); + pr_info(" sourceId: %08x\n", data->sourceId); + + pr_info(" errorClass: %016llx\n", data->errorClass); + pr_info(" correlator: %016llx\n", data->correlator); + pr_info(" p7iocPlssr: %016llx\n", data->p7iocPlssr); + pr_info(" p7iocCsr: %016llx\n", data->p7iocCsr); + pr_info(" lemFir: %016llx\n", data->lemFir); + pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask); + pr_info(" lemWOF: %016llx\n", data->lemWOF); + pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus); + pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus); + pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0); + pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1); + pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus); + pr_info(" mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus); + pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0); + pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1); + pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus); + pr_info(" dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus); + pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0); + pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1); + pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus); + pr_info(" dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus); + pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0); + pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1); + + for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) { + if ((data->pestA[i] >> 63) == 0 && + (data->pestB[i] >> 63) == 0) + continue; + + pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]); + pr_info(" PESTB: %016llx\n", data->pestB[i]); + } +} + +static void ioda_eeh_phb3_phb_diag(struct pci_controller *hose, + struct OpalIoPhbErrorCommon *common) +{ + struct OpalIoPhb3ErrorData *data; + int i; + + data = (struct OpalIoPhb3ErrorData*)common; + pr_info("PHB3 PHB#%x Diag-data (Version: %d)\n\n", + hose->global_number, common->version); + + pr_info(" brdgCtl: %08x\n", data->brdgCtl); + + pr_info(" portStatusReg: %08x\n", data->portStatusReg); + pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus); + pr_info(" busAgentStatus: %08x\n", data->busAgentStatus); + + pr_info(" deviceStatus: %08x\n", data->deviceStatus); + pr_info(" slotStatus: %08x\n", data->slotStatus); + pr_info(" linkStatus: %08x\n", data->linkStatus); + pr_info(" devCmdStatus: %08x\n", data->devCmdStatus); + pr_info(" devSecStatus: %08x\n", data->devSecStatus); + + pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus); + pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus); + pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus); + pr_info(" tlpHdr1: %08x\n", data->tlpHdr1); + pr_info(" tlpHdr2: %08x\n", data->tlpHdr2); + pr_info(" tlpHdr3: %08x\n", data->tlpHdr3); + pr_info(" tlpHdr4: %08x\n", data->tlpHdr4); + pr_info(" sourceId: %08x\n", data->sourceId); + pr_info(" errorClass: %016llx\n", data->errorClass); + pr_info(" correlator: %016llx\n", data->correlator); + pr_info(" nFir: %016llx\n", data->nFir); + pr_info(" nFirMask: %016llx\n", data->nFirMask); + pr_info(" nFirWOF: %016llx\n", data->nFirWOF); + pr_info(" PhbPlssr: %016llx\n", data->phbPlssr); + pr_info(" PhbCsr: %016llx\n", data->phbCsr); + pr_info(" lemFir: %016llx\n", data->lemFir); + pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask); + pr_info(" lemWOF: %016llx\n", data->lemWOF); + pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus); + pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus); + pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0); + pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1); + pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus); + pr_info(" mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus); + pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0); + pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1); + pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus); + pr_info(" dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus); + pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0); + pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1); + pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus); + pr_info(" dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus); + pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0); + pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1); + + for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) { + if ((data->pestA[i] >> 63) == 0 && + (data->pestB[i] >> 63) == 0) + continue; + + pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]); + pr_info(" PESTB: %016llx\n", data->pestB[i]); + } +} + +static void ioda_eeh_phb_diag(struct pci_controller *hose) +{ + struct pnv_phb *phb = hose->private_data; + struct OpalIoPhbErrorCommon *common; + long rc; + + common = (struct OpalIoPhbErrorCommon *)phb->diag.blob; + rc = opal_pci_get_phb_diag_data2(phb->opal_id, common, PAGE_SIZE); + if (rc != OPAL_SUCCESS) { + pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n", + __func__, hose->global_number, rc); + return; + } + + switch (common->ioType) { + case OPAL_PHB_ERROR_DATA_TYPE_P7IOC: + ioda_eeh_p7ioc_phb_diag(hose, common); + break; + case OPAL_PHB_ERROR_DATA_TYPE_PHB3: + ioda_eeh_phb3_phb_diag(hose, common); + break; + default: + pr_warning("%s: Unrecognized I/O chip %d\n", + __func__, common->ioType); + } +} + +static int ioda_eeh_get_phb_pe(struct pci_controller *hose, + struct eeh_pe **pe) +{ + struct eeh_pe *phb_pe; + + phb_pe = eeh_phb_pe_get(hose); + if (!phb_pe) { + pr_warning("%s Can't find PE for PHB#%d\n", + __func__, hose->global_number); + return -EEXIST; + } + + *pe = phb_pe; + return 0; +} + +static int ioda_eeh_get_pe(struct pci_controller *hose, + u16 pe_no, struct eeh_pe **pe) +{ + struct eeh_pe *phb_pe, *dev_pe; + struct eeh_dev dev; + + /* Find the PHB PE */ + if (ioda_eeh_get_phb_pe(hose, &phb_pe)) + return -EEXIST; + + /* Find the PE according to PE# */ + memset(&dev, 0, sizeof(struct eeh_dev)); + dev.phb = hose; + dev.pe_config_addr = pe_no; + dev_pe = eeh_pe_get(&dev); + if (!dev_pe) { + pr_warning("%s: Can't find PE for PHB#%x - PE#%x\n", + __func__, hose->global_number, pe_no); + return -EEXIST; + } + + *pe = dev_pe; + return 0; +} + +/** + * ioda_eeh_next_error - Retrieve next error for EEH core to handle + * @pe: The affected PE + * + * The function is expected to be called by EEH core while it gets + * special EEH event (without binding PE). The function calls to + * OPAL APIs for next error to handle. The informational error is + * handled internally by platform. However, the dead IOC, dead PHB, + * fenced PHB and frozen PE should be handled by EEH core eventually. + */ +static int ioda_eeh_next_error(struct eeh_pe **pe) +{ + struct pci_controller *hose, *tmp; + struct pnv_phb *phb; + u64 frozen_pe_no; + u16 err_type, severity; + long rc; + int ret = 1; + + /* + * While running here, it's safe to purge the event queue. + * And we should keep the cached OPAL notifier event sychronized + * between the kernel and firmware. + */ + eeh_remove_event(NULL); + opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + /* + * If the subordinate PCI buses of the PHB has been + * removed, we needn't take care of it any more. + */ + phb = hose->private_data; + if (phb->eeh_state & PNV_EEH_STATE_REMOVED) + continue; + + rc = opal_pci_next_error(phb->opal_id, + &frozen_pe_no, &err_type, &severity); + + /* If OPAL API returns error, we needn't proceed */ + if (rc != OPAL_SUCCESS) { + pr_devel("%s: Invalid return value on " + "PHB#%x (0x%lx) from opal_pci_next_error", + __func__, hose->global_number, rc); + continue; + } + + /* If the PHB doesn't have error, stop processing */ + if (err_type == OPAL_EEH_NO_ERROR || + severity == OPAL_EEH_SEV_NO_ERROR) { + pr_devel("%s: No error found on PHB#%x\n", + __func__, hose->global_number); + continue; + } + + /* + * Processing the error. We're expecting the error with + * highest priority reported upon multiple errors on the + * specific PHB. + */ + pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n", + __func__, err_type, severity, + frozen_pe_no, hose->global_number); + switch (err_type) { + case OPAL_EEH_IOC_ERROR: + if (severity == OPAL_EEH_SEV_IOC_DEAD) { + list_for_each_entry_safe(hose, tmp, + &hose_list, list_node) { + phb = hose->private_data; + phb->eeh_state |= PNV_EEH_STATE_REMOVED; + } + + pr_err("EEH: dead IOC detected\n"); + ret = 4; + goto out; + } else if (severity == OPAL_EEH_SEV_INF) { + pr_info("EEH: IOC informative error " + "detected\n"); + ioda_eeh_hub_diag(hose); + } + + break; + case OPAL_EEH_PHB_ERROR: + if (severity == OPAL_EEH_SEV_PHB_DEAD) { + if (ioda_eeh_get_phb_pe(hose, pe)) + break; + + pr_err("EEH: dead PHB#%x detected\n", + hose->global_number); + phb->eeh_state |= PNV_EEH_STATE_REMOVED; + ret = 3; + goto out; + } else if (severity == OPAL_EEH_SEV_PHB_FENCED) { + if (ioda_eeh_get_phb_pe(hose, pe)) + break; + + pr_err("EEH: fenced PHB#%x detected\n", + hose->global_number); + ret = 2; + goto out; + } else if (severity == OPAL_EEH_SEV_INF) { + pr_info("EEH: PHB#%x informative error " + "detected\n", + hose->global_number); + ioda_eeh_phb_diag(hose); + } + + break; + case OPAL_EEH_PE_ERROR: + if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) + break; + + pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", + (*pe)->addr, (*pe)->phb->global_number); + ret = 1; + goto out; + } + } + + ret = 0; +out: + return ret; +} + +struct pnv_eeh_ops ioda_eeh_ops = { + .post_init = ioda_eeh_post_init, + .set_option = ioda_eeh_set_option, + .get_state = ioda_eeh_get_state, + .reset = ioda_eeh_reset, + .get_log = ioda_eeh_get_log, + .configure_bridge = ioda_eeh_configure_bridge, + .next_error = ioda_eeh_next_error +}; diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c new file mode 100644 index 0000000..73b9814 --- /dev/null +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -0,0 +1,387 @@ +/* + * The file intends to implement the platform dependent EEH operations on + * powernv platform. Actually, the powernv was created in order to fully + * hypervisor support. + * + * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/atomic.h> +#include <linux/delay.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/msi.h> +#include <linux/of.h> +#include <linux/pci.h> +#include <linux/proc_fs.h> +#include <linux/rbtree.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/spinlock.h> + +#include <asm/eeh.h> +#include <asm/eeh_event.h> +#include <asm/firmware.h> +#include <asm/io.h> +#include <asm/iommu.h> +#include <asm/machdep.h> +#include <asm/msi_bitmap.h> +#include <asm/opal.h> +#include <asm/ppc-pci.h> + +#include "powernv.h" +#include "pci.h" + +/** + * powernv_eeh_init - EEH platform dependent initialization + * + * EEH platform dependent initialization on powernv + */ +static int powernv_eeh_init(void) +{ + /* We require OPALv3 */ + if (!firmware_has_feature(FW_FEATURE_OPALv3)) { + pr_warning("%s: OPALv3 is required !\n", __func__); + return -EINVAL; + } + + /* Set EEH probe mode */ + eeh_probe_mode_set(EEH_PROBE_MODE_DEV); + + return 0; +} + +/** + * powernv_eeh_post_init - EEH platform dependent post initialization + * + * EEH platform dependent post initialization on powernv. When + * the function is called, the EEH PEs and devices should have + * been built. If the I/O cache staff has been built, EEH is + * ready to supply service. + */ +static int powernv_eeh_post_init(void) +{ + struct pci_controller *hose; + struct pnv_phb *phb; + int ret = 0; + + list_for_each_entry(hose, &hose_list, list_node) { + phb = hose->private_data; + + if (phb->eeh_ops && phb->eeh_ops->post_init) { + ret = phb->eeh_ops->post_init(hose); + if (ret) + break; + } + } + + return ret; +} + +/** + * powernv_eeh_dev_probe - Do probe on PCI device + * @dev: PCI device + * @flag: unused + * + * When EEH module is installed during system boot, all PCI devices + * are checked one by one to see if it supports EEH. The function + * is introduced for the purpose. By default, EEH has been enabled + * on all PCI devices. That's to say, we only need do necessary + * initialization on the corresponding eeh device and create PE + * accordingly. + * + * It's notable that's unsafe to retrieve the EEH device through + * the corresponding PCI device. During the PCI device hotplug, which + * was possiblly triggered by EEH core, the binding between EEH device + * and the PCI device isn't built yet. + */ +static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct device_node *dn = pci_device_to_OF_node(dev); + struct eeh_dev *edev = of_node_to_eeh_dev(dn); + + /* + * When probing the root bridge, which doesn't have any + * subordinate PCI devices. We don't have OF node for + * the root bridge. So it's not reasonable to continue + * the probing. + */ + if (!dn || !edev || edev->pe) + return 0; + + /* Skip for PCI-ISA bridge */ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_ISA) + return 0; + + /* Initialize eeh device */ + edev->class_code = dev->class; + edev->mode &= 0xFFFFFF00; + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) + edev->mode |= EEH_DEV_BRIDGE; + if (pci_is_pcie(dev)) { + edev->pcie_cap = pci_pcie_cap(dev); + + if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) + edev->mode |= EEH_DEV_ROOT_PORT; + else if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) + edev->mode |= EEH_DEV_DS_PORT; + } + + edev->config_addr = ((dev->bus->number << 8) | dev->devfn); + edev->pe_config_addr = phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff); + + /* Create PE */ + eeh_add_to_parent_pe(edev); + + /* + * Enable EEH explicitly so that we will do EEH check + * while accessing I/O stuff + */ + eeh_subsystem_enabled = 1; + + /* Save memory bars */ + eeh_save_bars(edev); + + return 0; +} + +/** + * powernv_eeh_set_option - Initialize EEH or MMIO/DMA reenable + * @pe: EEH PE + * @option: operation to be issued + * + * The function is used to control the EEH functionality globally. + * Currently, following options are support according to PAPR: + * Enable EEH, Disable EEH, Enable MMIO and Enable DMA + */ +static int powernv_eeh_set_option(struct eeh_pe *pe, int option) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + int ret = -EEXIST; + + /* + * What we need do is pass it down for hardware + * implementation to handle it. + */ + if (phb->eeh_ops && phb->eeh_ops->set_option) + ret = phb->eeh_ops->set_option(pe, option); + + return ret; +} + +/** + * powernv_eeh_get_pe_addr - Retrieve PE address + * @pe: EEH PE + * + * Retrieve the PE address according to the given tranditional + * PCI BDF (Bus/Device/Function) address. + */ +static int powernv_eeh_get_pe_addr(struct eeh_pe *pe) +{ + return pe->addr; +} + +/** + * powernv_eeh_get_state - Retrieve PE state + * @pe: EEH PE + * @delay: delay while PE state is temporarily unavailable + * + * Retrieve the state of the specified PE. For IODA-compitable + * platform, it should be retrieved from IODA table. Therefore, + * we prefer passing down to hardware implementation to handle + * it. + */ +static int powernv_eeh_get_state(struct eeh_pe *pe, int *delay) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + int ret = EEH_STATE_NOT_SUPPORT; + + if (phb->eeh_ops && phb->eeh_ops->get_state) { + ret = phb->eeh_ops->get_state(pe); + + /* + * If the PE state is temporarily unavailable, + * to inform the EEH core delay for default + * period (1 second) + */ + if (delay) { + *delay = 0; + if (ret & EEH_STATE_UNAVAILABLE) + *delay = 1000; + } + } + + return ret; +} + +/** + * powernv_eeh_reset - Reset the specified PE + * @pe: EEH PE + * @option: reset option + * + * Reset the specified PE + */ +static int powernv_eeh_reset(struct eeh_pe *pe, int option) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + int ret = -EEXIST; + + if (phb->eeh_ops && phb->eeh_ops->reset) + ret = phb->eeh_ops->reset(pe, option); + + return ret; +} + +/** + * powernv_eeh_wait_state - Wait for PE state + * @pe: EEH PE + * @max_wait: maximal period in microsecond + * + * Wait for the state of associated PE. It might take some time + * to retrieve the PE's state. + */ +static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait) +{ + int ret; + int mwait; + + while (1) { + ret = powernv_eeh_get_state(pe, &mwait); + + /* + * If the PE's state is temporarily unavailable, + * we have to wait for the specified time. Otherwise, + * the PE's state will be returned immediately. + */ + if (ret != EEH_STATE_UNAVAILABLE) + return ret; + + max_wait -= mwait; + if (max_wait <= 0) { + pr_warning("%s: Timeout getting PE#%x's state (%d)\n", + __func__, pe->addr, max_wait); + return EEH_STATE_NOT_SUPPORT; + } + + msleep(mwait); + } + + return EEH_STATE_NOT_SUPPORT; +} + +/** + * powernv_eeh_get_log - Retrieve error log + * @pe: EEH PE + * @severity: temporary or permanent error log + * @drv_log: driver log to be combined with retrieved error log + * @len: length of driver log + * + * Retrieve the temporary or permanent error from the PE. + */ +static int powernv_eeh_get_log(struct eeh_pe *pe, int severity, + char *drv_log, unsigned long len) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + int ret = -EEXIST; + + if (phb->eeh_ops && phb->eeh_ops->get_log) + ret = phb->eeh_ops->get_log(pe, severity, drv_log, len); + + return ret; +} + +/** + * powernv_eeh_configure_bridge - Configure PCI bridges in the indicated PE + * @pe: EEH PE + * + * The function will be called to reconfigure the bridges included + * in the specified PE so that the mulfunctional PE would be recovered + * again. + */ +static int powernv_eeh_configure_bridge(struct eeh_pe *pe) +{ + struct pci_controller *hose = pe->phb; + struct pnv_phb *phb = hose->private_data; + int ret = 0; + + if (phb->eeh_ops && phb->eeh_ops->configure_bridge) + ret = phb->eeh_ops->configure_bridge(pe); + + return ret; +} + +/** + * powernv_eeh_next_error - Retrieve next EEH error to handle + * @pe: Affected PE + * + * Using OPAL API, to retrieve next EEH error for EEH core to handle + */ +static int powernv_eeh_next_error(struct eeh_pe **pe) +{ + struct pci_controller *hose; + struct pnv_phb *phb = NULL; + + list_for_each_entry(hose, &hose_list, list_node) { + phb = hose->private_data; + break; + } + + if (phb && phb->eeh_ops->next_error) + return phb->eeh_ops->next_error(pe); + + return -EEXIST; +} + +static struct eeh_ops powernv_eeh_ops = { + .name = "powernv", + .init = powernv_eeh_init, + .post_init = powernv_eeh_post_init, + .of_probe = NULL, + .dev_probe = powernv_eeh_dev_probe, + .set_option = powernv_eeh_set_option, + .get_pe_addr = powernv_eeh_get_pe_addr, + .get_state = powernv_eeh_get_state, + .reset = powernv_eeh_reset, + .wait_state = powernv_eeh_wait_state, + .get_log = powernv_eeh_get_log, + .configure_bridge = powernv_eeh_configure_bridge, + .read_config = pnv_pci_cfg_read, + .write_config = pnv_pci_cfg_write, + .next_error = powernv_eeh_next_error +}; + +/** + * eeh_powernv_init - Register platform dependent EEH operations + * + * EEH initialization on powernv platform. This function should be + * called before any EEH related functions. + */ +static int __init eeh_powernv_init(void) +{ + int ret = -EINVAL; + + if (!machine_is(powernv)) + return ret; + + ret = eeh_ops_register(&powernv_eeh_ops); + if (!ret) + pr_info("EEH: PowerNV platform initialized\n"); + else + pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret); + + return ret; +} + +early_initcall(eeh_powernv_init); diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c new file mode 100644 index 0000000..6ffa6b1 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-flash.c @@ -0,0 +1,667 @@ +/* + * PowerNV OPAL Firmware Update Interface + * + * Copyright 2013 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define DEBUG + +#include <linux/kernel.h> +#include <linux/reboot.h> +#include <linux/init.h> +#include <linux/kobject.h> +#include <linux/sysfs.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <linux/pagemap.h> + +#include <asm/opal.h> + +/* FLASH status codes */ +#define FLASH_NO_OP -1099 /* No operation initiated by user */ +#define FLASH_NO_AUTH -9002 /* Not a service authority partition */ + +/* Validate image status values */ +#define VALIDATE_IMG_READY -1001 /* Image ready for validation */ +#define VALIDATE_IMG_INCOMPLETE -1002 /* User copied < VALIDATE_BUF_SIZE */ + +/* Manage image status values */ +#define MANAGE_ACTIVE_ERR -9001 /* Cannot overwrite active img */ + +/* Flash image status values */ +#define FLASH_IMG_READY 0 /* Img ready for flash on reboot */ +#define FLASH_INVALID_IMG -1003 /* Flash image shorter than expected */ +#define FLASH_IMG_NULL_DATA -1004 /* Bad data in sg list entry */ +#define FLASH_IMG_BAD_LEN -1005 /* Bad length in sg list entry */ + +/* Manage operation tokens */ +#define FLASH_REJECT_TMP_SIDE 0 /* Reject temporary fw image */ +#define FLASH_COMMIT_TMP_SIDE 1 /* Commit temporary fw image */ + +/* Update tokens */ +#define FLASH_UPDATE_CANCEL 0 /* Cancel update request */ +#define FLASH_UPDATE_INIT 1 /* Initiate update */ + +/* Validate image update result tokens */ +#define VALIDATE_TMP_UPDATE 0 /* T side will be updated */ +#define VALIDATE_FLASH_AUTH 1 /* Partition does not have authority */ +#define VALIDATE_INVALID_IMG 2 /* Candidate image is not valid */ +#define VALIDATE_CUR_UNKNOWN 3 /* Current fixpack level is unknown */ +/* + * Current T side will be committed to P side before being replace with new + * image, and the new image is downlevel from current image + */ +#define VALIDATE_TMP_COMMIT_DL 4 +/* + * Current T side will be committed to P side before being replaced with new + * image + */ +#define VALIDATE_TMP_COMMIT 5 +/* + * T side will be updated with a downlevel image + */ +#define VALIDATE_TMP_UPDATE_DL 6 +/* + * The candidate image's release date is later than the system's firmware + * service entitlement date - service warranty period has expired + */ +#define VALIDATE_OUT_OF_WRNTY 7 + +/* Validate buffer size */ +#define VALIDATE_BUF_SIZE 4096 + +/* XXX: Assume candidate image size is <= 256MB */ +#define MAX_IMAGE_SIZE 0x10000000 + +/* Flash sg list version */ +#define SG_LIST_VERSION (1UL) + +/* Image status */ +enum { + IMAGE_INVALID, + IMAGE_LOADING, + IMAGE_READY, +}; + +/* Candidate image data */ +struct image_data_t { + int status; + void *data; + uint32_t size; +}; + +/* Candidate image header */ +struct image_header_t { + uint16_t magic; + uint16_t version; + uint32_t size; +}; + +/* Scatter/gather entry */ +struct opal_sg_entry { + void *data; + long length; +}; + +/* We calculate number of entries based on PAGE_SIZE */ +#define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry)) + +/* + * This struct is very similar but not identical to that + * needed by the opal flash update. All we need to do for + * opal is rewrite num_entries into a version/length and + * translate the pointers to absolute. + */ +struct opal_sg_list { + unsigned long num_entries; + struct opal_sg_list *next; + struct opal_sg_entry entry[SG_ENTRIES_PER_NODE]; +}; + +struct validate_flash_t { + int status; /* Return status */ + void *buf; /* Candiate image buffer */ + uint32_t buf_size; /* Image size */ + uint32_t result; /* Update results token */ +}; + +struct manage_flash_t { + int status; /* Return status */ +}; + +struct update_flash_t { + int status; /* Return status */ +}; + +static struct image_header_t image_header; +static struct image_data_t image_data; +static struct validate_flash_t validate_flash_data; +static struct manage_flash_t manage_flash_data; +static struct update_flash_t update_flash_data; + +static DEFINE_MUTEX(image_data_mutex); + +/* + * Validate candidate image + */ +static inline void opal_flash_validate(void) +{ + struct validate_flash_t *args_buf = &validate_flash_data; + + args_buf->status = opal_validate_flash(__pa(args_buf->buf), + &(args_buf->buf_size), + &(args_buf->result)); +} + +/* + * Validate output format: + * validate result token + * current image version details + * new image version details + */ +static ssize_t validate_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct validate_flash_t *args_buf = &validate_flash_data; + int len; + + /* Candidate image is not validated */ + if (args_buf->status < VALIDATE_TMP_UPDATE) { + len = sprintf(buf, "%d\n", args_buf->status); + goto out; + } + + /* Result token */ + len = sprintf(buf, "%d\n", args_buf->result); + + /* Current and candidate image version details */ + if ((args_buf->result != VALIDATE_TMP_UPDATE) && + (args_buf->result < VALIDATE_CUR_UNKNOWN)) + goto out; + + if (args_buf->buf_size > (VALIDATE_BUF_SIZE - len)) { + memcpy(buf + len, args_buf->buf, VALIDATE_BUF_SIZE - len); + len = VALIDATE_BUF_SIZE; + } else { + memcpy(buf + len, args_buf->buf, args_buf->buf_size); + len += args_buf->buf_size; + } +out: + /* Set status to default */ + args_buf->status = FLASH_NO_OP; + return len; +} + +/* + * Validate candidate firmware image + * + * Note: + * We are only interested in first 4K bytes of the + * candidate image. + */ +static ssize_t validate_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct validate_flash_t *args_buf = &validate_flash_data; + + if (buf[0] != '1') + return -EINVAL; + + mutex_lock(&image_data_mutex); + + if (image_data.status != IMAGE_READY || + image_data.size < VALIDATE_BUF_SIZE) { + args_buf->result = VALIDATE_INVALID_IMG; + args_buf->status = VALIDATE_IMG_INCOMPLETE; + goto out; + } + + /* Copy first 4k bytes of candidate image */ + memcpy(args_buf->buf, image_data.data, VALIDATE_BUF_SIZE); + + args_buf->status = VALIDATE_IMG_READY; + args_buf->buf_size = VALIDATE_BUF_SIZE; + + /* Validate candidate image */ + opal_flash_validate(); + +out: + mutex_unlock(&image_data_mutex); + return count; +} + +/* + * Manage flash routine + */ +static inline void opal_flash_manage(uint8_t op) +{ + struct manage_flash_t *const args_buf = &manage_flash_data; + + args_buf->status = opal_manage_flash(op); +} + +/* + * Show manage flash status + */ +static ssize_t manage_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct manage_flash_t *const args_buf = &manage_flash_data; + int rc; + + rc = sprintf(buf, "%d\n", args_buf->status); + /* Set status to default*/ + args_buf->status = FLASH_NO_OP; + return rc; +} + +/* + * Manage operations: + * 0 - Reject + * 1 - Commit + */ +static ssize_t manage_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + uint8_t op; + switch (buf[0]) { + case '0': + op = FLASH_REJECT_TMP_SIDE; + break; + case '1': + op = FLASH_COMMIT_TMP_SIDE; + break; + default: + return -EINVAL; + } + + /* commit/reject temporary image */ + opal_flash_manage(op); + return count; +} + +/* + * Free sg list + */ +static void free_sg_list(struct opal_sg_list *list) +{ + struct opal_sg_list *sg1; + while (list) { + sg1 = list->next; + kfree(list); + list = sg1; + } + list = NULL; +} + +/* + * Build candidate image scatter gather list + * + * list format: + * ----------------------------------- + * | VER (8) | Entry length in bytes | + * ----------------------------------- + * | Pointer to next entry | + * ----------------------------------- + * | Address of memory area 1 | + * ----------------------------------- + * | Length of memory area 1 | + * ----------------------------------- + * | ......... | + * ----------------------------------- + * | ......... | + * ----------------------------------- + * | Address of memory area N | + * ----------------------------------- + * | Length of memory area N | + * ----------------------------------- + */ +static struct opal_sg_list *image_data_to_sglist(void) +{ + struct opal_sg_list *sg1, *list = NULL; + void *addr; + int size; + + addr = image_data.data; + size = image_data.size; + + sg1 = kzalloc((sizeof(struct opal_sg_list)), GFP_KERNEL); + if (!sg1) + return NULL; + + list = sg1; + sg1->num_entries = 0; + while (size > 0) { + /* Translate virtual address to physical address */ + sg1->entry[sg1->num_entries].data = + (void *)(vmalloc_to_pfn(addr) << PAGE_SHIFT); + + if (size > PAGE_SIZE) + sg1->entry[sg1->num_entries].length = PAGE_SIZE; + else + sg1->entry[sg1->num_entries].length = size; + + sg1->num_entries++; + if (sg1->num_entries >= SG_ENTRIES_PER_NODE) { + sg1->next = kzalloc((sizeof(struct opal_sg_list)), + GFP_KERNEL); + if (!sg1->next) { + pr_err("%s : Failed to allocate memory\n", + __func__); + goto nomem; + } + + sg1 = sg1->next; + sg1->num_entries = 0; + } + addr += PAGE_SIZE; + size -= PAGE_SIZE; + } + return list; +nomem: + free_sg_list(list); + return NULL; +} + +/* + * OPAL update flash + */ +static int opal_flash_update(int op) +{ + struct opal_sg_list *sg, *list, *next; + unsigned long addr; + int64_t rc = OPAL_PARAMETER; + + if (op == FLASH_UPDATE_CANCEL) { + pr_alert("FLASH: Image update cancelled\n"); + addr = '\0'; + goto flash; + } + + list = image_data_to_sglist(); + if (!list) + goto invalid_img; + + /* First entry address */ + addr = __pa(list); + + /* Translate sg list address to absolute */ + for (sg = list; sg; sg = next) { + next = sg->next; + /* Don't translate NULL pointer for last entry */ + if (sg->next) + sg->next = (struct opal_sg_list *)__pa(sg->next); + else + sg->next = NULL; + + /* Make num_entries into the version/length field */ + sg->num_entries = (SG_LIST_VERSION << 56) | + (sg->num_entries * sizeof(struct opal_sg_entry) + 16); + } + + pr_alert("FLASH: Image is %u bytes\n", image_data.size); + pr_alert("FLASH: Image update requested\n"); + pr_alert("FLASH: Image will be updated during system reboot\n"); + pr_alert("FLASH: This will take several minutes. Do not power off!\n"); + +flash: + rc = opal_update_flash(addr); + +invalid_img: + return rc; +} + +/* + * Show candidate image status + */ +static ssize_t update_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct update_flash_t *const args_buf = &update_flash_data; + return sprintf(buf, "%d\n", args_buf->status); +} + +/* + * Set update image flag + * 1 - Flash new image + * 0 - Cancel flash request + */ +static ssize_t update_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct update_flash_t *const args_buf = &update_flash_data; + int rc = count; + + mutex_lock(&image_data_mutex); + + switch (buf[0]) { + case '0': + if (args_buf->status == FLASH_IMG_READY) + opal_flash_update(FLASH_UPDATE_CANCEL); + args_buf->status = FLASH_NO_OP; + break; + case '1': + /* Image is loaded? */ + if (image_data.status == IMAGE_READY) + args_buf->status = + opal_flash_update(FLASH_UPDATE_INIT); + else + args_buf->status = FLASH_INVALID_IMG; + break; + default: + rc = -EINVAL; + } + + mutex_unlock(&image_data_mutex); + return rc; +} + +/* + * Free image buffer + */ +static void free_image_buf(void) +{ + void *addr; + int size; + + addr = image_data.data; + size = PAGE_ALIGN(image_data.size); + while (size > 0) { + ClearPageReserved(vmalloc_to_page(addr)); + addr += PAGE_SIZE; + size -= PAGE_SIZE; + } + vfree(image_data.data); + image_data.data = NULL; + image_data.status = IMAGE_INVALID; +} + +/* + * Allocate image buffer. + */ +static int alloc_image_buf(char *buffer, size_t count) +{ + void *addr; + int size; + + if (count < sizeof(struct image_header_t)) { + pr_warn("FLASH: Invalid candidate image\n"); + return -EINVAL; + } + + memcpy(&image_header, (void *)buffer, sizeof(struct image_header_t)); + image_data.size = be32_to_cpu(image_header.size); + pr_debug("FLASH: Candiate image size = %u\n", image_data.size); + + if (image_data.size > MAX_IMAGE_SIZE) { + pr_warn("FLASH: Too large image\n"); + return -EINVAL; + } + if (image_data.size < VALIDATE_BUF_SIZE) { + pr_warn("FLASH: Image is shorter than expected\n"); + return -EINVAL; + } + + image_data.data = vzalloc(PAGE_ALIGN(image_data.size)); + if (!image_data.data) { + pr_err("%s : Failed to allocate memory\n", __func__); + return -ENOMEM; + } + + /* Pin memory */ + addr = image_data.data; + size = PAGE_ALIGN(image_data.size); + while (size > 0) { + SetPageReserved(vmalloc_to_page(addr)); + addr += PAGE_SIZE; + size -= PAGE_SIZE; + } + + image_data.status = IMAGE_LOADING; + return 0; +} + +/* + * Copy candidate image + * + * Parse candidate image header to get total image size + * and pre-allocate required memory. + */ +static ssize_t image_data_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t count) +{ + int rc; + + mutex_lock(&image_data_mutex); + + /* New image ? */ + if (pos == 0) { + /* Free memory, if already allocated */ + if (image_data.data) + free_image_buf(); + + /* Cancel outstanding image update request */ + if (update_flash_data.status == FLASH_IMG_READY) + opal_flash_update(FLASH_UPDATE_CANCEL); + + /* Allocate memory */ + rc = alloc_image_buf(buffer, count); + if (rc) + goto out; + } + + if (image_data.status != IMAGE_LOADING) { + rc = -ENOMEM; + goto out; + } + + if ((pos + count) > image_data.size) { + rc = -EINVAL; + goto out; + } + + memcpy(image_data.data + pos, (void *)buffer, count); + rc = count; + + /* Set image status */ + if ((pos + count) == image_data.size) { + pr_debug("FLASH: Candidate image loaded....\n"); + image_data.status = IMAGE_READY; + } + +out: + mutex_unlock(&image_data_mutex); + return rc; +} + +/* + * sysfs interface : + * OPAL uses below sysfs files for code update. + * We create these files under /sys/firmware/opal. + * + * image : Interface to load candidate firmware image + * validate_flash : Validate firmware image + * manage_flash : Commit/Reject firmware image + * update_flash : Flash new firmware image + * + */ +static struct bin_attribute image_data_attr = { + .attr = {.name = "image", .mode = 0200}, + .size = MAX_IMAGE_SIZE, /* Limit image size */ + .write = image_data_write, +}; + +static struct kobj_attribute validate_attribute = + __ATTR(validate_flash, 0600, validate_show, validate_store); + +static struct kobj_attribute manage_attribute = + __ATTR(manage_flash, 0600, manage_show, manage_store); + +static struct kobj_attribute update_attribute = + __ATTR(update_flash, 0600, update_show, update_store); + +static struct attribute *image_op_attrs[] = { + &validate_attribute.attr, + &manage_attribute.attr, + &update_attribute.attr, + NULL /* need to NULL terminate the list of attributes */ +}; + +static struct attribute_group image_op_attr_group = { + .attrs = image_op_attrs, +}; + +void __init opal_flash_init(void) +{ + int ret; + + /* Allocate validate image buffer */ + validate_flash_data.buf = kzalloc(VALIDATE_BUF_SIZE, GFP_KERNEL); + if (!validate_flash_data.buf) { + pr_err("%s : Failed to allocate memory\n", __func__); + return; + } + + /* Make sure /sys/firmware/opal directory is created */ + if (!opal_kobj) { + pr_warn("FLASH: opal kobject is not available\n"); + goto nokobj; + } + + /* Create the sysfs files */ + ret = sysfs_create_group(opal_kobj, &image_op_attr_group); + if (ret) { + pr_warn("FLASH: Failed to create sysfs files\n"); + goto nokobj; + } + + ret = sysfs_create_bin_file(opal_kobj, &image_data_attr); + if (ret) { + pr_warn("FLASH: Failed to create sysfs files\n"); + goto nosysfs_file; + } + + /* Set default status */ + validate_flash_data.status = FLASH_NO_OP; + manage_flash_data.status = FLASH_NO_OP; + update_flash_data.status = FLASH_NO_OP; + image_data.status = IMAGE_INVALID; + return; + +nosysfs_file: + sysfs_remove_group(opal_kobj, &image_op_attr_group); + +nokobj: + kfree(validate_flash_data.buf); + return; +} diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c new file mode 100644 index 0000000..e7e59e4 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -0,0 +1,204 @@ +/* + * PowerNV LPC bus handling. + * + * Copyright 2013 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/bug.h> + +#include <asm/machdep.h> +#include <asm/firmware.h> +#include <asm/xics.h> +#include <asm/opal.h> +#include <asm/prom.h> + +static int opal_lpc_chip_id = -1; + +static u8 opal_lpc_inb(unsigned long port) +{ + int64_t rc; + uint32_t data; + + if (opal_lpc_chip_id < 0 || port > 0xffff) + return 0xff; + rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 1); + return rc ? 0xff : data; +} + +static __le16 __opal_lpc_inw(unsigned long port) +{ + int64_t rc; + uint32_t data; + + if (opal_lpc_chip_id < 0 || port > 0xfffe) + return 0xffff; + if (port & 1) + return (__le16)opal_lpc_inb(port) << 8 | opal_lpc_inb(port + 1); + rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 2); + return rc ? 0xffff : data; +} +static u16 opal_lpc_inw(unsigned long port) +{ + return le16_to_cpu(__opal_lpc_inw(port)); +} + +static __le32 __opal_lpc_inl(unsigned long port) +{ + int64_t rc; + uint32_t data; + + if (opal_lpc_chip_id < 0 || port > 0xfffc) + return 0xffffffff; + if (port & 3) + return (__le32)opal_lpc_inb(port ) << 24 | + (__le32)opal_lpc_inb(port + 1) << 16 | + (__le32)opal_lpc_inb(port + 2) << 8 | + opal_lpc_inb(port + 3); + rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 4); + return rc ? 0xffffffff : data; +} + +static u32 opal_lpc_inl(unsigned long port) +{ + return le32_to_cpu(__opal_lpc_inl(port)); +} + +static void opal_lpc_outb(u8 val, unsigned long port) +{ + if (opal_lpc_chip_id < 0 || port > 0xffff) + return; + opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 1); +} + +static void __opal_lpc_outw(__le16 val, unsigned long port) +{ + if (opal_lpc_chip_id < 0 || port > 0xfffe) + return; + if (port & 1) { + opal_lpc_outb(val >> 8, port); + opal_lpc_outb(val , port + 1); + return; + } + opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 2); +} + +static void opal_lpc_outw(u16 val, unsigned long port) +{ + __opal_lpc_outw(cpu_to_le16(val), port); +} + +static void __opal_lpc_outl(__le32 val, unsigned long port) +{ + if (opal_lpc_chip_id < 0 || port > 0xfffc) + return; + if (port & 3) { + opal_lpc_outb(val >> 24, port); + opal_lpc_outb(val >> 16, port + 1); + opal_lpc_outb(val >> 8, port + 2); + opal_lpc_outb(val , port + 3); + return; + } + opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 4); +} + +static void opal_lpc_outl(u32 val, unsigned long port) +{ + __opal_lpc_outl(cpu_to_le32(val), port); +} + +static void opal_lpc_insb(unsigned long p, void *b, unsigned long c) +{ + u8 *ptr = b; + + while(c--) + *(ptr++) = opal_lpc_inb(p); +} + +static void opal_lpc_insw(unsigned long p, void *b, unsigned long c) +{ + __le16 *ptr = b; + + while(c--) + *(ptr++) = __opal_lpc_inw(p); +} + +static void opal_lpc_insl(unsigned long p, void *b, unsigned long c) +{ + __le32 *ptr = b; + + while(c--) + *(ptr++) = __opal_lpc_inl(p); +} + +static void opal_lpc_outsb(unsigned long p, const void *b, unsigned long c) +{ + const u8 *ptr = b; + + while(c--) + opal_lpc_outb(*(ptr++), p); +} + +static void opal_lpc_outsw(unsigned long p, const void *b, unsigned long c) +{ + const __le16 *ptr = b; + + while(c--) + __opal_lpc_outw(*(ptr++), p); +} + +static void opal_lpc_outsl(unsigned long p, const void *b, unsigned long c) +{ + const __le32 *ptr = b; + + while(c--) + __opal_lpc_outl(*(ptr++), p); +} + +static const struct ppc_pci_io opal_lpc_io = { + .inb = opal_lpc_inb, + .inw = opal_lpc_inw, + .inl = opal_lpc_inl, + .outb = opal_lpc_outb, + .outw = opal_lpc_outw, + .outl = opal_lpc_outl, + .insb = opal_lpc_insb, + .insw = opal_lpc_insw, + .insl = opal_lpc_insl, + .outsb = opal_lpc_outsb, + .outsw = opal_lpc_outsw, + .outsl = opal_lpc_outsl, +}; + +void opal_lpc_init(void) +{ + struct device_node *np; + + /* + * Look for a Power8 LPC bus tagged as "primary", + * we currently support only one though the OPAL APIs + * support any number. + */ + for_each_compatible_node(np, NULL, "ibm,power8-lpc") { + if (!of_device_is_available(np)) + continue; + if (!of_get_property(np, "primary", NULL)) + continue; + opal_lpc_chip_id = of_get_ibm_chip_id(np); + break; + } + if (opal_lpc_chip_id < 0) + return; + + /* Setup special IO ops */ + ppc_pci_io = opal_lpc_io; + isa_io_special = true; + + pr_info("OPAL: Power8 LPC bus found, chip ID %d\n", opal_lpc_chip_id); +} diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c index 3f83e1a..acd9f7e 100644 --- a/arch/powerpc/platforms/powernv/opal-nvram.c +++ b/arch/powerpc/platforms/powernv/opal-nvram.c @@ -65,7 +65,7 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index) void __init opal_nvram_init(void) { struct device_node *np; - const u32 *nbytes_p; + const __be32 *nbytes_p; np = of_find_compatible_node(NULL, NULL, "ibm,opal-nvram"); if (np == NULL) @@ -76,7 +76,7 @@ void __init opal_nvram_init(void) of_node_put(np); return; } - nvram_size = *nbytes_p; + nvram_size = be32_to_cpup(nbytes_p); printk(KERN_INFO "OPAL nvram setup, %u bytes\n", nvram_size); of_node_put(np); diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c index 2aa7641..7d07c7e 100644 --- a/arch/powerpc/platforms/powernv/opal-rtc.c +++ b/arch/powerpc/platforms/powernv/opal-rtc.c @@ -37,10 +37,12 @@ unsigned long __init opal_get_boot_time(void) struct rtc_time tm; u32 y_m_d; u64 h_m_s_ms; + __be32 __y_m_d; + __be64 __h_m_s_ms; long rc = OPAL_BUSY; while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { - rc = opal_rtc_read(&y_m_d, &h_m_s_ms); + rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); if (rc == OPAL_BUSY_EVENT) opal_poll_events(NULL); else @@ -48,6 +50,8 @@ unsigned long __init opal_get_boot_time(void) } if (rc != OPAL_SUCCESS) return 0; + y_m_d = be32_to_cpu(__y_m_d); + h_m_s_ms = be64_to_cpu(__h_m_s_ms); opal_to_tm(y_m_d, h_m_s_ms, &tm); return mktime(tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec); @@ -58,9 +62,11 @@ void opal_get_rtc_time(struct rtc_time *tm) long rc = OPAL_BUSY; u32 y_m_d; u64 h_m_s_ms; + __be32 __y_m_d; + __be64 __h_m_s_ms; while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { - rc = opal_rtc_read(&y_m_d, &h_m_s_ms); + rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); if (rc == OPAL_BUSY_EVENT) opal_poll_events(NULL); else @@ -68,6 +74,8 @@ void opal_get_rtc_time(struct rtc_time *tm) } if (rc != OPAL_SUCCESS) return; + y_m_d = be32_to_cpu(__y_m_d); + h_m_s_ms = be64_to_cpu(__h_m_s_ms); opal_to_tm(y_m_d, h_m_s_ms, tm); } diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 6fabe92..e780650 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -24,7 +24,7 @@ mflr r0; \ mfcr r12; \ std r0,16(r1); \ - std r12,8(r1); \ + stw r12,8(r1); \ std r1,PACAR1(r13); \ li r0,0; \ mfmsr r12; \ @@ -34,7 +34,7 @@ mtmsrd r12,1; \ LOAD_REG_ADDR(r0,.opal_return); \ mtlr r0; \ - li r0,MSR_DR|MSR_IR; \ + li r0,MSR_DR|MSR_IR|MSR_LE;\ andc r12,r12,r0; \ li r0,token; \ mtspr SPRN_HSRR1,r12; \ @@ -45,8 +45,15 @@ hrfid _STATIC(opal_return) + /* + * Fixup endian on OPAL return... we should be able to simplify + * this by instead converting the below trampoline to a set of + * bytes (always BE) since MSR:LE will end up fixed up as a side + * effect of the rfid. + */ + FIXUP_ENDIAN ld r2,PACATOC(r13); - ld r4,8(r1); + lwz r4,8(r1); ld r5,16(r1); ld r6,PACASAVEDMSR(r13); mtspr SPRN_SRR0,r5; @@ -107,4 +114,15 @@ OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR); OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS); OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS); OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED); +OPAL_CALL(opal_pci_next_error, OPAL_PCI_NEXT_ERROR); +OPAL_CALL(opal_pci_poll, OPAL_PCI_POLL); OPAL_CALL(opal_pci_msi_eoi, OPAL_PCI_MSI_EOI); +OPAL_CALL(opal_pci_get_phb_diag_data2, OPAL_PCI_GET_PHB_DIAG_DATA2); +OPAL_CALL(opal_xscom_read, OPAL_XSCOM_READ); +OPAL_CALL(opal_xscom_write, OPAL_XSCOM_WRITE); +OPAL_CALL(opal_lpc_read, OPAL_LPC_READ); +OPAL_CALL(opal_lpc_write, OPAL_LPC_WRITE); +OPAL_CALL(opal_return_cpu, OPAL_RETURN_CPU); +OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE); +OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE); +OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE); diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c new file mode 100644 index 0000000..4d99a8f --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-xscom.c @@ -0,0 +1,128 @@ +/* + * PowerNV LPC bus handling. + * + * Copyright 2013 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/bug.h> +#include <linux/gfp.h> +#include <linux/slab.h> + +#include <asm/machdep.h> +#include <asm/firmware.h> +#include <asm/opal.h> +#include <asm/scom.h> + +/* + * We could probably fit that inside the scom_map_t + * which is a void* after all but it's really too ugly + * so let's kmalloc it for now + */ +struct opal_scom_map { + uint32_t chip; + uint64_t addr; +}; + +static scom_map_t opal_scom_map(struct device_node *dev, u64 reg, u64 count) +{ + struct opal_scom_map *m; + const __be32 *gcid; + + if (!of_get_property(dev, "scom-controller", NULL)) { + pr_err("%s: device %s is not a SCOM controller\n", + __func__, dev->full_name); + return SCOM_MAP_INVALID; + } + gcid = of_get_property(dev, "ibm,chip-id", NULL); + if (!gcid) { + pr_err("%s: device %s has no ibm,chip-id\n", + __func__, dev->full_name); + return SCOM_MAP_INVALID; + } + m = kmalloc(sizeof(struct opal_scom_map), GFP_KERNEL); + if (!m) + return NULL; + m->chip = be32_to_cpup(gcid); + m->addr = reg; + + return (scom_map_t)m; +} + +static void opal_scom_unmap(scom_map_t map) +{ + kfree(map); +} + +static int opal_xscom_err_xlate(int64_t rc) +{ + switch(rc) { + case 0: + return 0; + /* Add more translations if necessary */ + default: + return -EIO; + } +} + +static u64 opal_scom_unmangle(u64 reg) +{ + /* + * XSCOM indirect addresses have the top bit set. Additionally + * the reset of the top 3 nibbles is always 0. + * + * Because the debugfs interface uses signed offsets and shifts + * the address left by 3, we basically cannot use the top 4 bits + * of the 64-bit address, and thus cannot use the indirect bit. + * + * To deal with that, we support the indirect bit being in bit + * 4 (IBM notation) instead of bit 0 in this API, we do the + * conversion here. To leave room for further xscom address + * expansion, we only clear out the top byte + * + */ + if (reg & (1ull << 59)) + reg = (reg & ~(0xffull << 56)) | (1ull << 63); + return reg; +} + +static int opal_scom_read(scom_map_t map, u64 reg, u64 *value) +{ + struct opal_scom_map *m = map; + int64_t rc; + + reg = opal_scom_unmangle(reg); + rc = opal_xscom_read(m->chip, m->addr + reg, (uint64_t *)__pa(value)); + return opal_xscom_err_xlate(rc); +} + +static int opal_scom_write(scom_map_t map, u64 reg, u64 value) +{ + struct opal_scom_map *m = map; + int64_t rc; + + reg = opal_scom_unmangle(reg); + rc = opal_xscom_write(m->chip, m->addr + reg, value); + return opal_xscom_err_xlate(rc); +} + +static const struct scom_controller opal_scom_controller = { + .map = opal_scom_map, + .unmap = opal_scom_unmap, + .read = opal_scom_read, + .write = opal_scom_write +}; + +static int opal_xscom_init(void) +{ + if (firmware_has_feature(FW_FEATURE_OPALv3)) + scom_init(&opal_scom_controller); + return 0; +} +arch_initcall(opal_xscom_init); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 628c564..1c798cd 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -13,14 +13,20 @@ #include <linux/types.h> #include <linux/of.h> +#include <linux/of_fdt.h> #include <linux/of_platform.h> #include <linux/interrupt.h> +#include <linux/notifier.h> #include <linux/slab.h> +#include <linux/kobject.h> #include <asm/opal.h> #include <asm/firmware.h> #include "powernv.h" +/* /sys/firmware/opal */ +struct kobject *opal_kobj; + struct opal { u64 base; u64 entry; @@ -31,6 +37,10 @@ static DEFINE_SPINLOCK(opal_write_lock); extern u64 opal_mc_secondary_handler[]; static unsigned int *opal_irqs; static unsigned int opal_irq_count; +static ATOMIC_NOTIFIER_HEAD(opal_notifier_head); +static DEFINE_SPINLOCK(opal_notifier_lock); +static uint64_t last_notified_mask = 0x0ul; +static atomic_t opal_notifier_hold = ATOMIC_INIT(0); int __init early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data) @@ -72,6 +82,7 @@ int __init early_init_dt_scan_opal(unsigned long node, static int __init opal_register_exception_handlers(void) { +#ifdef __BIG_ENDIAN__ u64 glue; if (!(powerpc_firmware_features & FW_FEATURE_OPAL)) @@ -89,35 +100,99 @@ static int __init opal_register_exception_handlers(void) 0, glue); glue += 128; opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue); +#endif return 0; } early_initcall(opal_register_exception_handlers); +int opal_notifier_register(struct notifier_block *nb) +{ + if (!nb) { + pr_warning("%s: Invalid argument (%p)\n", + __func__, nb); + return -EINVAL; + } + + atomic_notifier_chain_register(&opal_notifier_head, nb); + return 0; +} + +static void opal_do_notifier(uint64_t events) +{ + unsigned long flags; + uint64_t changed_mask; + + if (atomic_read(&opal_notifier_hold)) + return; + + spin_lock_irqsave(&opal_notifier_lock, flags); + changed_mask = last_notified_mask ^ events; + last_notified_mask = events; + spin_unlock_irqrestore(&opal_notifier_lock, flags); + + /* + * We feed with the event bits and changed bits for + * enough information to the callback. + */ + atomic_notifier_call_chain(&opal_notifier_head, + events, (void *)changed_mask); +} + +void opal_notifier_update_evt(uint64_t evt_mask, + uint64_t evt_val) +{ + unsigned long flags; + + spin_lock_irqsave(&opal_notifier_lock, flags); + last_notified_mask &= ~evt_mask; + last_notified_mask |= evt_val; + spin_unlock_irqrestore(&opal_notifier_lock, flags); +} + +void opal_notifier_enable(void) +{ + int64_t rc; + uint64_t evt = 0; + + atomic_set(&opal_notifier_hold, 0); + + /* Process pending events */ + rc = opal_poll_events(&evt); + if (rc == OPAL_SUCCESS && evt) + opal_do_notifier(evt); +} + +void opal_notifier_disable(void) +{ + atomic_set(&opal_notifier_hold, 1); +} + int opal_get_chars(uint32_t vtermno, char *buf, int count) { - s64 len, rc; - u64 evt; + s64 rc; + __be64 evt, len; if (!opal.entry) return -ENODEV; opal_poll_events(&evt); - if ((evt & OPAL_EVENT_CONSOLE_INPUT) == 0) + if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0) return 0; - len = count; - rc = opal_console_read(vtermno, &len, buf); + len = cpu_to_be64(count); + rc = opal_console_read(vtermno, &len, buf); if (rc == OPAL_SUCCESS) - return len; + return be64_to_cpu(len); return 0; } int opal_put_chars(uint32_t vtermno, const char *data, int total_len) { int written = 0; + __be64 olen; s64 len, rc; unsigned long flags; - u64 evt; + __be64 evt; if (!opal.entry) return -ENODEV; @@ -132,13 +207,14 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) */ spin_lock_irqsave(&opal_write_lock, flags); if (firmware_has_feature(FW_FEATURE_OPALv2)) { - rc = opal_console_write_buffer_space(vtermno, &len); + rc = opal_console_write_buffer_space(vtermno, &olen); + len = be64_to_cpu(olen); if (rc || len < total_len) { spin_unlock_irqrestore(&opal_write_lock, flags); /* Closed -> drop characters */ if (rc) return total_len; - opal_poll_events(&evt); + opal_poll_events(NULL); return -EAGAIN; } } @@ -149,8 +225,9 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) rc = OPAL_BUSY; while(total_len > 0 && (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) { - len = total_len; - rc = opal_console_write(vtermno, &len, data); + olen = cpu_to_be64(total_len); + rc = opal_console_write(vtermno, &olen, data); + len = be64_to_cpu(olen); /* Closed or other error drop */ if (rc != OPAL_SUCCESS && rc != OPAL_BUSY && @@ -170,7 +247,8 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len) */ do opal_poll_events(&evt); - while(rc == OPAL_SUCCESS && (evt & OPAL_EVENT_CONSOLE_OUTPUT)); + while(rc == OPAL_SUCCESS && + (be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT)); } spin_unlock_irqrestore(&opal_write_lock, flags); return written; @@ -293,19 +371,30 @@ int opal_machine_check(struct pt_regs *regs) static irqreturn_t opal_interrupt(int irq, void *data) { - uint64_t events; + __be64 events; opal_handle_interrupt(virq_to_hw(irq), &events); - /* XXX TODO: Do something with the events */ + opal_do_notifier(events); return IRQ_HANDLED; } +static int opal_sysfs_init(void) +{ + opal_kobj = kobject_create_and_add("opal", firmware_kobj); + if (!opal_kobj) { + pr_warn("kobject_create_and_add opal failed\n"); + return -ENOMEM; + } + + return 0; +} + static int __init opal_init(void) { struct device_node *np, *consoles; - const u32 *irqs; + const __be32 *irqs; int rc, i, irqlen; opal_node = of_find_node_by_path("/ibm,opal"); @@ -313,18 +402,20 @@ static int __init opal_init(void) pr_warn("opal: Node not found\n"); return -ENODEV; } + + /* Register OPAL consoles if any ports */ if (firmware_has_feature(FW_FEATURE_OPALv2)) consoles = of_find_node_by_path("/ibm,opal/consoles"); else consoles = of_node_get(opal_node); - - /* Register serial ports */ - for_each_child_of_node(consoles, np) { - if (strcmp(np->name, "serial")) - continue; - of_platform_device_create(np, NULL, NULL); + if (consoles) { + for_each_child_of_node(consoles, np) { + if (strcmp(np->name, "serial")) + continue; + of_platform_device_create(np, NULL, NULL); + } + of_node_put(consoles); } - of_node_put(consoles); /* Find all OPAL interrupts and request them */ irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); @@ -345,6 +436,14 @@ static int __init opal_init(void) " (0x%x)\n", rc, irq, hwirq); opal_irqs[i] = irq; } + + /* Create "opal" kobject under /sys/firmware */ + rc = opal_sysfs_init(); + if (rc == 0) { + /* Setup code update interface */ + opal_flash_init(); + } + return 0; } subsys_initcall(opal_init); @@ -355,7 +454,7 @@ void opal_shutdown(void) for (i = 0; i < opal_irq_count; i++) { if (opal_irqs[i]) - free_irq(opal_irqs[i], 0); + free_irq(opal_irqs[i], NULL); opal_irqs[i] = 0; } } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 9c9d15e..084cdfa 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/pci.h> +#include <linux/debugfs.h> #include <linux/delay.h> #include <linux/string.h> #include <linux/init.h> @@ -32,6 +33,7 @@ #include <asm/iommu.h> #include <asm/tce.h> #include <asm/xics.h> +#include <asm/debug.h> #include "powernv.h" #include "pci.h" @@ -68,6 +70,16 @@ define_pe_printk_level(pe_err, KERN_ERR); define_pe_printk_level(pe_warn, KERN_WARNING); define_pe_printk_level(pe_info, KERN_INFO); +/* + * stdcix is only supposed to be used in hypervisor real mode as per + * the architecture spec + */ +static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) +{ + __asm__ __volatile__("stdcix %0,0,%1" + : : "r" (val), "r" (paddr) : "memory"); +} + static int pnv_ioda_alloc_pe(struct pnv_phb *phb) { unsigned long pe; @@ -151,13 +163,23 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) rid_end = pe->rid + 1; } - /* Associate PE in PELT */ + /* + * Associate PE in PELT. We need add the PE into the + * corresponding PELT-V as well. Otherwise, the error + * originated from the PE might contribute to other + * PEs. + */ rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, bcomp, dcomp, fcomp, OPAL_MAP_PE); if (rc) { pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); return -ENXIO; } + + rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, + pe->pe_number, OPAL_ADD_PE_TO_DOMAIN); + if (rc) + pe_warn(pe, "OPAL error %d adding self to PELTV\n", rc); opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); @@ -441,10 +463,24 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev set_iommu_table_base(&pdev->dev, &pe->tce32_table); } -static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, - u64 *startp, u64 *endp) +static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus) { - u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index; + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + set_iommu_table_base(&dev->dev, &pe->tce32_table); + if (dev->subordinate) + pnv_ioda_setup_bus_dma(pe, dev->subordinate); + } +} + +static void pnv_pci_ioda1_tce_invalidate(struct pnv_ioda_pe *pe, + struct iommu_table *tbl, + __be64 *startp, __be64 *endp, bool rm) +{ + __be64 __iomem *invalidate = rm ? + (__be64 __iomem *)pe->tce_inval_reg_phys : + (__be64 __iomem *)tbl->it_index; unsigned long start, end, inc; start = __pa(startp); @@ -471,7 +507,10 @@ static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, mb(); /* Ensure above stores are visible */ while (start <= end) { - __raw_writeq(start, invalidate); + if (rm) + __raw_rm_writeq(cpu_to_be64(start), invalidate); + else + __raw_writeq(cpu_to_be64(start), invalidate); start += inc; } @@ -483,10 +522,12 @@ static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl, static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, struct iommu_table *tbl, - u64 *startp, u64 *endp) + __be64 *startp, __be64 *endp, bool rm) { unsigned long start, end, inc; - u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index; + __be64 __iomem *invalidate = rm ? + (__be64 __iomem *)pe->tce_inval_reg_phys : + (__be64 __iomem *)tbl->it_index; /* We'll invalidate DMA address in PE scope */ start = 0x2ul << 60; @@ -502,22 +543,25 @@ static void pnv_pci_ioda2_tce_invalidate(struct pnv_ioda_pe *pe, mb(); while (start <= end) { - __raw_writeq(start, invalidate); + if (rm) + __raw_rm_writeq(cpu_to_be64(start), invalidate); + else + __raw_writeq(cpu_to_be64(start), invalidate); start += inc; } } void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, - u64 *startp, u64 *endp) + __be64 *startp, __be64 *endp, bool rm) { struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe, tce32_table); struct pnv_phb *phb = pe->phb; if (phb->type == PNV_PHB_IODA1) - pnv_pci_ioda1_tce_invalidate(tbl, startp, endp); + pnv_pci_ioda1_tce_invalidate(pe, tbl, startp, endp, rm); else - pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp); + pnv_pci_ioda2_tce_invalidate(pe, tbl, startp, endp, rm); } static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, @@ -590,11 +634,19 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb, * bus number, print that out instead. */ tbl->it_busno = 0; - tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8); + pe->tce_inval_reg_phys = be64_to_cpup(swinvp); + tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, + 8); tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE | TCE_PCI_SWINV_PAIR; } iommu_init_table(tbl, phb->hose->node); + iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number); + + if (pe->pdev) + set_iommu_table_base(&pe->pdev->dev, tbl); + else + pnv_ioda_setup_bus_dma(pe, pe->pbus); return; fail: @@ -662,11 +714,18 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, * bus number, print that out instead. */ tbl->it_busno = 0; - tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8); + pe->tce_inval_reg_phys = be64_to_cpup(swinvp); + tbl->it_index = (unsigned long)ioremap(pe->tce_inval_reg_phys, + 8); tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE; } iommu_init_table(tbl, phb->hose->node); + if (pe->pdev) + set_iommu_table_base(&pe->pdev->dev, tbl); + else + pnv_ioda_setup_bus_dma(pe, pe->pbus); + return; fail: if (pe->tce32_seg >= 0) @@ -762,8 +821,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, struct irq_data *idata; struct irq_chip *ichip; unsigned int xive_num = hwirq - phb->msi_base; - uint64_t addr64; - uint32_t addr32, data; + __be32 data; int rc; /* No PE assigned ? bail out ... no MSI for you ! */ @@ -787,6 +845,8 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, } if (is_64) { + __be64 addr64; + rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1, &addr64, &data); if (rc) { @@ -794,9 +854,11 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, pci_name(dev), rc); return -EIO; } - msg->address_hi = addr64 >> 32; - msg->address_lo = addr64 & 0xfffffffful; + msg->address_hi = be64_to_cpu(addr64) >> 32; + msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful; } else { + __be32 addr32; + rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1, &addr32, &data); if (rc) { @@ -805,9 +867,9 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, return -EIO; } msg->address_hi = 0; - msg->address_lo = addr32; + msg->address_lo = be32_to_cpu(addr32); } - msg->data = data; + msg->data = be32_to_cpu(data); /* * Change the IRQ chip for the MSI interrupts on PHB3. @@ -968,11 +1030,38 @@ static void pnv_pci_ioda_setup_DMA(void) } } +static void pnv_pci_ioda_create_dbgfs(void) +{ +#ifdef CONFIG_DEBUG_FS + struct pci_controller *hose, *tmp; + struct pnv_phb *phb; + char name[16]; + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + phb = hose->private_data; + + sprintf(name, "PCI%04x", hose->global_number); + phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); + if (!phb->dbgfs) + pr_warning("%s: Error on creating debugfs on PHB#%x\n", + __func__, hose->global_number); + } +#endif /* CONFIG_DEBUG_FS */ +} + static void pnv_pci_ioda_fixup(void) { pnv_pci_ioda_setup_PEs(); pnv_pci_ioda_setup_seg(); pnv_pci_ioda_setup_DMA(); + + pnv_pci_ioda_create_dbgfs(); + +#ifdef CONFIG_EEH + eeh_probe_mode_set(EEH_PROBE_MODE_DEV); + eeh_addr_cache_build(); + eeh_init(); +#endif } /* @@ -1049,19 +1138,20 @@ static void pnv_pci_ioda_shutdown(struct pnv_phb *phb) OPAL_ASSERT_RESET); } -void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) +void __init pnv_pci_init_ioda_phb(struct device_node *np, + u64 hub_id, int ioda_type) { struct pci_controller *hose; - static int primary = 1; struct pnv_phb *phb; unsigned long size, m32map_off, iomap_off, pemap_off; - const u64 *prop64; - const u32 *prop32; + const __be64 *prop64; + const __be32 *prop32; + int len; u64 phb_id; void *aux; long rc; - pr_info(" Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name); + pr_info("Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name); prop64 = of_get_property(np, "ibm,opal-phbid", NULL); if (!prop64) { @@ -1072,21 +1162,33 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) pr_debug(" PHB-ID : 0x%016llx\n", phb_id); phb = alloc_bootmem(sizeof(struct pnv_phb)); - if (phb) { - memset(phb, 0, sizeof(struct pnv_phb)); - phb->hose = hose = pcibios_alloc_controller(np); + if (!phb) { + pr_err(" Out of memory !\n"); + return; } - if (!phb || !phb->hose) { - pr_err("PCI: Failed to allocate PCI controller for %s\n", + + /* Allocate PCI controller */ + memset(phb, 0, sizeof(struct pnv_phb)); + phb->hose = hose = pcibios_alloc_controller(np); + if (!phb->hose) { + pr_err(" Can't allocate PCI controller for %s\n", np->full_name); + free_bootmem((unsigned long)phb, sizeof(struct pnv_phb)); return; } spin_lock_init(&phb->lock); - /* XXX Use device-tree */ - hose->first_busno = 0; - hose->last_busno = 0xff; + prop32 = of_get_property(np, "bus-range", &len); + if (prop32 && len == 8) { + hose->first_busno = be32_to_cpu(prop32[0]); + hose->last_busno = be32_to_cpu(prop32[1]); + } else { + pr_warn(" Broken <bus-range> on %s\n", np->full_name); + hose->first_busno = 0; + hose->last_busno = 0xff; + } hose->private_data = phb; + phb->hub_id = hub_id; phb->opal_id = phb_id; phb->type = ioda_type; @@ -1099,8 +1201,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) phb->model = PNV_PHB_MODEL_UNKNOWN; /* Parse 32-bit and IO ranges (if any) */ - pci_process_bridge_OF_ranges(phb->hose, np, primary); - primary = 0; + pci_process_bridge_OF_ranges(hose, np, !hose->global_number); /* Get registers */ phb->regs = of_iomap(np, 0); @@ -1108,12 +1209,13 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) pr_err(" Failed to map registers !\n"); /* Initialize more IODA stuff */ + phb->ioda.total_pe = 1; prop32 = of_get_property(np, "ibm,opal-num-pes", NULL); - if (!prop32) - phb->ioda.total_pe = 1; - else - phb->ioda.total_pe = *prop32; - + if (prop32) + phb->ioda.total_pe = be32_to_cpup(prop32); + prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL); + if (prop32) + phb->ioda.reserved_pe = be32_to_cpup(prop32); phb->ioda.m32_size = resource_size(&hose->mem_resources[0]); /* FW Has already off top 64k of M32 space (MSI space) */ phb->ioda.m32_size += 0x10000; @@ -1124,24 +1226,25 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe; phb->ioda.io_pci_base = 0; /* XXX calculate this ? */ - /* Allocate aux data & arrays - * - * XXX TODO: Don't allocate io segmap on PHB3 - */ + /* Allocate aux data & arrays. We don't have IO ports on PHB3 */ size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long)); m32map_off = size; size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]); iomap_off = size; - size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]); + if (phb->type == PNV_PHB_IODA1) { + iomap_off = size; + size += phb->ioda.total_pe * sizeof(phb->ioda.io_segmap[0]); + } pemap_off = size; size += phb->ioda.total_pe * sizeof(struct pnv_ioda_pe); aux = alloc_bootmem(size); memset(aux, 0, size); phb->ioda.pe_alloc = aux; phb->ioda.m32_segmap = aux + m32map_off; - phb->ioda.io_segmap = aux + iomap_off; + if (phb->type == PNV_PHB_IODA1) + phb->ioda.io_segmap = aux + iomap_off; phb->ioda.pe_array = aux + pemap_off; - set_bit(0, phb->ioda.pe_alloc); + set_bit(phb->ioda.reserved_pe, phb->ioda.pe_alloc); INIT_LIST_HEAD(&phb->ioda.pe_dma_list); INIT_LIST_HEAD(&phb->ioda.pe_list); @@ -1166,12 +1269,17 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) segment_size); #endif - pr_info(" %d PE's M32: 0x%x [segment=0x%x] IO: 0x%x [segment=0x%x]\n", + pr_info(" %d (%d) PE's M32: 0x%x [segment=0x%x]" + " IO: 0x%x [segment=0x%x]\n", phb->ioda.total_pe, + phb->ioda.reserved_pe, phb->ioda.m32_size, phb->ioda.m32_segsize, phb->ioda.io_size, phb->ioda.io_segsize); phb->hose->ops = &pnv_pci_ops; +#ifdef CONFIG_EEH + phb->eeh_ops = &ioda_eeh_ops; +#endif /* Setup RID -> PE mapping function */ phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; @@ -1201,24 +1309,17 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type) rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET); if (rc) pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); - - /* - * On IODA1 map everything to PE#0, on IODA2 we assume the IODA reset - * has cleared the RTT which has the same effect - */ - if (ioda_type == PNV_PHB_IODA1) - opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE); } -void pnv_pci_init_ioda2_phb(struct device_node *np) +void __init pnv_pci_init_ioda2_phb(struct device_node *np) { - pnv_pci_init_ioda_phb(np, PNV_PHB_IODA2); + pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2); } void __init pnv_pci_init_ioda_hub(struct device_node *np) { struct device_node *phbn; - const u64 *prop64; + const __be64 *prop64; u64 hub_id; pr_info("Probing IODA IO-Hub %s\n", np->full_name); @@ -1235,6 +1336,6 @@ void __init pnv_pci_init_ioda_hub(struct device_node *np) for_each_child_of_node(np, phbn) { /* Look for IODA1 PHBs */ if (of_device_is_compatible(phbn, "ibm,ioda-phb")) - pnv_pci_init_ioda_phb(phbn, PNV_PHB_IODA1); + pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1); } } diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c index 92b37a0..f8b4bd8 100644 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c @@ -86,17 +86,20 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { } static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev) { - if (phb->p5ioc2.iommu_table.it_map == NULL) + if (phb->p5ioc2.iommu_table.it_map == NULL) { iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node); + iommu_register_group(&phb->p5ioc2.iommu_table, + pci_domain_nr(phb->hose->bus), phb->opal_id); + } set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table); } -static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, +static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, void *tce_mem, u64 tce_size) { struct pnv_phb *phb; - const u64 *prop64; + const __be64 *prop64; u64 phb_id; int64_t rc; static int primary = 1; @@ -133,6 +136,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, phb->hose->first_busno = 0; phb->hose->last_busno = 0xff; phb->hose->private_data = phb; + phb->hub_id = hub_id; phb->opal_id = phb_id; phb->type = PNV_PHB_P5IOC2; phb->model = PNV_PHB_MODEL_P5IOC2; @@ -174,7 +178,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) { struct device_node *phbn; - const u64 *prop64; + const __be64 *prop64; u64 hub_id; void *tce_mem; uint64_t tce_per_phb; @@ -226,7 +230,8 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) for_each_child_of_node(np, phbn) { if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) { - pnv_pci_init_p5ioc2_phb(phbn, tce_mem, tce_per_phb); + pnv_pci_init_p5ioc2_phb(phbn, hub_id, + tce_mem, tce_per_phb); tce_mem += tce_per_phb; } } diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 277343c..4eb33a9 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -20,6 +20,7 @@ #include <linux/irq.h> #include <linux/io.h> #include <linux/msi.h> +#include <linux/iommu.h> #include <asm/sections.h> #include <asm/io.h> @@ -32,6 +33,8 @@ #include <asm/iommu.h> #include <asm/tce.h> #include <asm/firmware.h> +#include <asm/eeh_event.h> +#include <asm/eeh.h> #include "powernv.h" #include "pci.h" @@ -202,7 +205,8 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no) spin_lock_irqsave(&phb->lock, flags); - rc = opal_pci_get_phb_diag_data(phb->opal_id, phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); + rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, + PNV_PCI_DIAG_BUF_SIZE); has_diag = (rc == OPAL_SUCCESS); rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, @@ -227,43 +231,54 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no) spin_unlock_irqrestore(&phb->lock, flags); } -static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus, - u32 bdfn) +static void pnv_pci_config_check_eeh(struct pnv_phb *phb, + struct device_node *dn) { s64 rc; u8 fstate; - u16 pcierr; + __be16 pcierr; u32 pe_no; - /* Get PE# if we support IODA */ - pe_no = phb->bdfn_to_pe ? phb->bdfn_to_pe(phb, bus, bdfn & 0xff) : 0; + /* + * Get the PE#. During the PCI probe stage, we might not + * setup that yet. So all ER errors should be mapped to + * reserved PE. + */ + pe_no = PCI_DN(dn)->pe_number; + if (pe_no == IODA_INVALID_PE) { + if (phb->type == PNV_PHB_P5IOC2) + pe_no = 0; + else + pe_no = phb->ioda.reserved_pe; + } /* Read freeze status */ rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr, NULL); if (rc) { - pr_warning("PCI %d: Failed to read EEH status for PE#%d," - " err %lld\n", phb->hose->global_number, pe_no, rc); + pr_warning("%s: Can't read EEH status (PE#%d) for " + "%s, err %lld\n", + __func__, pe_no, dn->full_name, rc); return; } - cfg_dbg(" -> EEH check, bdfn=%04x PE%d fstate=%x\n", - bdfn, pe_no, fstate); + cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n", + (PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn), + pe_no, fstate); if (fstate != 0) pnv_pci_handle_eeh_config(phb, pe_no); } -static int pnv_pci_read_config(struct pci_bus *bus, - unsigned int devfn, - int where, int size, u32 *val) +int pnv_pci_cfg_read(struct device_node *dn, + int where, int size, u32 *val) { - struct pci_controller *hose = pci_bus_to_host(bus); - struct pnv_phb *phb = hose->private_data; - u32 bdfn = (((uint64_t)bus->number) << 8) | devfn; + struct pci_dn *pdn = PCI_DN(dn); + struct pnv_phb *phb = pdn->phb->private_data; + u32 bdfn = (pdn->busno << 8) | pdn->devfn; +#ifdef CONFIG_EEH + struct eeh_pe *phb_pe = NULL; +#endif s64 rc; - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - switch (size) { case 1: { u8 v8; @@ -272,43 +287,58 @@ static int pnv_pci_read_config(struct pci_bus *bus, break; } case 2: { - u16 v16; + __be16 v16; rc = opal_pci_config_read_half_word(phb->opal_id, bdfn, where, &v16); - *val = (rc == OPAL_SUCCESS) ? v16 : 0xffff; + *val = (rc == OPAL_SUCCESS) ? be16_to_cpu(v16) : 0xffff; break; } case 4: { - u32 v32; + __be32 v32; rc = opal_pci_config_read_word(phb->opal_id, bdfn, where, &v32); - *val = (rc == OPAL_SUCCESS) ? v32 : 0xffffffff; + *val = (rc == OPAL_SUCCESS) ? be32_to_cpu(v32) : 0xffffffff; break; } default: return PCIBIOS_FUNC_NOT_SUPPORTED; } - cfg_dbg("pnv_pci_read_config bus: %x devfn: %x +%x/%x -> %08x\n", - bus->number, devfn, where, size, *val); - - /* Check if the PHB got frozen due to an error (no response) */ - pnv_pci_config_check_eeh(phb, bus, bdfn); + cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n", + __func__, pdn->busno, pdn->devfn, where, size, *val); + + /* + * Check if the specified PE has been put into frozen + * state. On the other hand, we needn't do that while + * the PHB has been put into frozen state because of + * PHB-fatal errors. + */ +#ifdef CONFIG_EEH + phb_pe = eeh_phb_pe_get(pdn->phb); + if (phb_pe && (phb_pe->state & EEH_PE_ISOLATED)) + return PCIBIOS_SUCCESSFUL; + + if (phb->eeh_state & PNV_EEH_STATE_ENABLED) { + if (*val == EEH_IO_ERROR_VALUE(size) && + eeh_dev_check_failure(of_node_to_eeh_dev(dn))) + return PCIBIOS_DEVICE_NOT_FOUND; + } else { + pnv_pci_config_check_eeh(phb, dn); + } +#else + pnv_pci_config_check_eeh(phb, dn); +#endif return PCIBIOS_SUCCESSFUL; } -static int pnv_pci_write_config(struct pci_bus *bus, - unsigned int devfn, - int where, int size, u32 val) +int pnv_pci_cfg_write(struct device_node *dn, + int where, int size, u32 val) { - struct pci_controller *hose = pci_bus_to_host(bus); - struct pnv_phb *phb = hose->private_data; - u32 bdfn = (((uint64_t)bus->number) << 8) | devfn; - - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; + struct pci_dn *pdn = PCI_DN(dn); + struct pnv_phb *phb = pdn->phb->private_data; + u32 bdfn = (pdn->busno << 8) | pdn->devfn; - cfg_dbg("pnv_pci_write_config bus: %x devfn: %x +%x/%x -> %08x\n", - bus->number, devfn, where, size, val); + cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n", + pdn->busno, pdn->devfn, where, size, val); switch (size) { case 1: opal_pci_config_write_byte(phb->opal_id, bdfn, where, val); @@ -322,23 +352,63 @@ static int pnv_pci_write_config(struct pci_bus *bus, default: return PCIBIOS_FUNC_NOT_SUPPORTED; } + /* Check if the PHB got frozen due to an error (no response) */ - pnv_pci_config_check_eeh(phb, bus, bdfn); +#ifdef CONFIG_EEH + if (!(phb->eeh_state & PNV_EEH_STATE_ENABLED)) + pnv_pci_config_check_eeh(phb, dn); +#else + pnv_pci_config_check_eeh(phb, dn); +#endif return PCIBIOS_SUCCESSFUL; } +static int pnv_pci_read_config(struct pci_bus *bus, + unsigned int devfn, + int where, int size, u32 *val) +{ + struct device_node *dn, *busdn = pci_bus_to_OF_node(bus); + struct pci_dn *pdn; + + for (dn = busdn->child; dn; dn = dn->sibling) { + pdn = PCI_DN(dn); + if (pdn && pdn->devfn == devfn) + return pnv_pci_cfg_read(dn, where, size, val); + } + + *val = 0xFFFFFFFF; + return PCIBIOS_DEVICE_NOT_FOUND; + +} + +static int pnv_pci_write_config(struct pci_bus *bus, + unsigned int devfn, + int where, int size, u32 val) +{ + struct device_node *dn, *busdn = pci_bus_to_OF_node(bus); + struct pci_dn *pdn; + + for (dn = busdn->child; dn; dn = dn->sibling) { + pdn = PCI_DN(dn); + if (pdn && pdn->devfn == devfn) + return pnv_pci_cfg_write(dn, where, size, val); + } + + return PCIBIOS_DEVICE_NOT_FOUND; +} + struct pci_ops pnv_pci_ops = { - .read = pnv_pci_read_config, + .read = pnv_pci_read_config, .write = pnv_pci_write_config, }; static int pnv_tce_build(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, enum dma_data_direction direction, - struct dma_attrs *attrs) + struct dma_attrs *attrs, bool rm) { u64 proto_tce; - u64 *tcep, *tces; + __be64 *tcep, *tces; u64 rpn; proto_tce = TCE_PCI_READ; // Read allowed @@ -346,33 +416,48 @@ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages, if (direction != DMA_TO_DEVICE) proto_tce |= TCE_PCI_WRITE; - tces = tcep = ((u64 *)tbl->it_base) + index - tbl->it_offset; + tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; rpn = __pa(uaddr) >> TCE_SHIFT; while (npages--) - *(tcep++) = proto_tce | (rpn++ << TCE_RPN_SHIFT); + *(tcep++) = cpu_to_be64(proto_tce | (rpn++ << TCE_RPN_SHIFT)); /* Some implementations won't cache invalid TCEs and thus may not * need that flush. We'll probably turn it_type into a bit mask * of flags if that becomes the case */ if (tbl->it_type & TCE_PCI_SWINV_CREATE) - pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1); + pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm); return 0; } -static void pnv_tce_free(struct iommu_table *tbl, long index, long npages) +static int pnv_tce_build_vm(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, + enum dma_data_direction direction, + struct dma_attrs *attrs) { - u64 *tcep, *tces; + return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs, + false); +} - tces = tcep = ((u64 *)tbl->it_base) + index - tbl->it_offset; +static void pnv_tce_free(struct iommu_table *tbl, long index, long npages, + bool rm) +{ + __be64 *tcep, *tces; + + tces = tcep = ((__be64 *)tbl->it_base) + index - tbl->it_offset; while (npages--) - *(tcep++) = 0; + *(tcep++) = cpu_to_be64(0); if (tbl->it_type & TCE_PCI_SWINV_FREE) - pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1); + pnv_pci_ioda_tce_invalidate(tbl, tces, tcep - 1, rm); +} + +static void pnv_tce_free_vm(struct iommu_table *tbl, long index, long npages) +{ + pnv_tce_free(tbl, index, npages, false); } static unsigned long pnv_tce_get(struct iommu_table *tbl, long index) @@ -380,6 +465,19 @@ static unsigned long pnv_tce_get(struct iommu_table *tbl, long index) return ((u64 *)tbl->it_base)[index - tbl->it_offset]; } +static int pnv_tce_build_rm(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + return pnv_tce_build(tbl, index, npages, uaddr, direction, attrs, true); +} + +static void pnv_tce_free_rm(struct iommu_table *tbl, long index, long npages) +{ + pnv_tce_free(tbl, index, npages, true); +} + void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset) @@ -412,13 +510,14 @@ static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose) pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)), be32_to_cpup(sizep), 0); iommu_init_table(tbl, hose->node); + iommu_register_group(tbl, pci_domain_nr(hose->bus), 0); /* Deal with SW invalidated TCEs when needed (BML way) */ swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info", NULL); if (swinvp) { - tbl->it_busno = swinvp[1]; - tbl->it_index = (unsigned long)ioremap(swinvp[0], 8); + tbl->it_busno = be64_to_cpu(swinvp[1]); + tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8); tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE; } return tbl; @@ -543,8 +642,10 @@ void __init pnv_pci_init(void) /* Configure IOMMU DMA hooks */ ppc_md.pci_dma_dev_setup = pnv_pci_dma_dev_setup; - ppc_md.tce_build = pnv_tce_build; - ppc_md.tce_free = pnv_tce_free; + ppc_md.tce_build = pnv_tce_build_vm; + ppc_md.tce_free = pnv_tce_free_vm; + ppc_md.tce_build_rm = pnv_tce_build_rm; + ppc_md.tce_free_rm = pnv_tce_free_rm; ppc_md.tce_get = pnv_tce_get; ppc_md.pci_probe_mode = pnv_pci_probe_mode; set_pci_dma_ops(&dma_iommu_ops); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 25d76c4..911c24e 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -17,7 +17,7 @@ enum pnv_phb_model { PNV_PHB_MODEL_PHB3, }; -#define PNV_PCI_DIAG_BUF_SIZE 4096 +#define PNV_PCI_DIAG_BUF_SIZE 8192 #define PNV_IODA_PE_DEV (1 << 0) /* PE has single PCI device */ #define PNV_IODA_PE_BUS (1 << 1) /* PE has primary PCI bus */ #define PNV_IODA_PE_BUS_ALL (1 << 2) /* PE has subordinate buses */ @@ -52,6 +52,7 @@ struct pnv_ioda_pe { int tce32_seg; int tce32_segcount; struct iommu_table tce32_table; + phys_addr_t tce_inval_reg_phys; /* XXX TODO: Add support for additional 64-bit iommus */ @@ -66,15 +67,43 @@ struct pnv_ioda_pe { struct list_head list; }; +/* IOC dependent EEH operations */ +#ifdef CONFIG_EEH +struct pnv_eeh_ops { + int (*post_init)(struct pci_controller *hose); + int (*set_option)(struct eeh_pe *pe, int option); + int (*get_state)(struct eeh_pe *pe); + int (*reset)(struct eeh_pe *pe, int option); + int (*get_log)(struct eeh_pe *pe, int severity, + char *drv_log, unsigned long len); + int (*configure_bridge)(struct eeh_pe *pe); + int (*next_error)(struct eeh_pe **pe); +}; + +#define PNV_EEH_STATE_ENABLED (1 << 0) /* EEH enabled */ +#define PNV_EEH_STATE_REMOVED (1 << 1) /* PHB removed */ + +#endif /* CONFIG_EEH */ + struct pnv_phb { struct pci_controller *hose; enum pnv_phb_type type; enum pnv_phb_model model; + u64 hub_id; u64 opal_id; void __iomem *regs; int initialized; spinlock_t lock; +#ifdef CONFIG_EEH + struct pnv_eeh_ops *eeh_ops; + int eeh_state; +#endif + +#ifdef CONFIG_DEBUG_FS + struct dentry *dbgfs; +#endif + #ifdef CONFIG_PCI_MSI unsigned int msi_base; unsigned int msi32_support; @@ -96,6 +125,7 @@ struct pnv_phb { struct { /* Global bridge info */ unsigned int total_pe; + unsigned int reserved_pe; unsigned int m32_size; unsigned int m32_segsize; unsigned int m32_pci_base; @@ -150,7 +180,14 @@ struct pnv_phb { }; extern struct pci_ops pnv_pci_ops; +#ifdef CONFIG_EEH +extern struct pnv_eeh_ops ioda_eeh_ops; +#endif +int pnv_pci_cfg_read(struct device_node *dn, + int where, int size, u32 *val); +int pnv_pci_cfg_write(struct device_node *dn, + int where, int size, u32 val); extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset); @@ -158,6 +195,6 @@ extern void pnv_pci_init_p5ioc2_hub(struct device_node *np); extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, - u64 *startp, u64 *endp); + __be64 *startp, __be64 *endp, bool rm); #endif /* __POWERNV_PCI_H */ diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index a1c6f83..de6819b 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -15,4 +15,6 @@ static inline void pnv_pci_init(void) { } static inline void pnv_pci_shutdown(void) { } #endif +extern void pnv_lpc_init(void); + #endif /* _POWERNV_H */ diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c new file mode 100644 index 0000000..1cb160d --- /dev/null +++ b/arch/powerpc/platforms/powernv/rng.c @@ -0,0 +1,126 @@ +/* + * Copyright 2013, Michael Ellerman, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) "powernv-rng: " fmt + +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_platform.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <asm/archrandom.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/smp.h> + + +struct powernv_rng { + void __iomem *regs; + unsigned long mask; +}; + +static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng); + + +static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) +{ + unsigned long parity; + + /* Calculate the parity of the value */ + asm ("popcntd %0,%1" : "=r" (parity) : "r" (val)); + + /* xor our value with the previous mask */ + val ^= rng->mask; + + /* update the mask based on the parity of this value */ + rng->mask = (rng->mask << 1) | (parity & 1); + + return val; +} + +int powernv_get_random_long(unsigned long *v) +{ + struct powernv_rng *rng; + + rng = get_cpu_var(powernv_rng); + + *v = rng_whiten(rng, in_be64(rng->regs)); + + put_cpu_var(rng); + + return 1; +} +EXPORT_SYMBOL_GPL(powernv_get_random_long); + +static __init void rng_init_per_cpu(struct powernv_rng *rng, + struct device_node *dn) +{ + int chip_id, cpu; + + chip_id = of_get_ibm_chip_id(dn); + if (chip_id == -1) + pr_warn("No ibm,chip-id found for %s.\n", dn->full_name); + + for_each_possible_cpu(cpu) { + if (per_cpu(powernv_rng, cpu) == NULL || + cpu_to_chip_id(cpu) == chip_id) { + per_cpu(powernv_rng, cpu) = rng; + } + } +} + +static __init int rng_create(struct device_node *dn) +{ + struct powernv_rng *rng; + unsigned long val; + + rng = kzalloc(sizeof(*rng), GFP_KERNEL); + if (!rng) + return -ENOMEM; + + rng->regs = of_iomap(dn, 0); + if (!rng->regs) { + kfree(rng); + return -ENXIO; + } + + val = in_be64(rng->regs); + rng->mask = val; + + rng_init_per_cpu(rng, dn); + + pr_info_once("Registering arch random hook.\n"); + + ppc_md.get_random_long = powernv_get_random_long; + + return 0; +} + +static __init int rng_init(void) +{ + struct device_node *dn; + int rc; + + for_each_compatible_node(dn, NULL, "ibm,power-rng") { + rc = rng_create(dn); + if (rc) { + pr_err("Failed creating rng for %s (%d).\n", + dn->full_name, rc); + continue; + } + + /* Create devices for hwrng driver */ + of_platform_device_create(dn, NULL, NULL); + } + + return 0; +} +subsys_initcall(rng_init); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index d4459bf..19884b2 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -23,6 +23,7 @@ #include <linux/irq.h> #include <linux/seq_file.h> #include <linux/of.h> +#include <linux/of_fdt.h> #include <linux/interrupt.h> #include <linux/bug.h> @@ -31,6 +32,7 @@ #include <asm/xics.h> #include <asm/rtas.h> #include <asm/opal.h> +#include <asm/kexec.h> #include "powernv.h" @@ -54,6 +56,12 @@ static void __init pnv_setup_arch(void) static void __init pnv_init_early(void) { + /* + * Initialize the LPC bus now so that legacy serial + * ports can be found on it + */ + opal_lpc_init(); + #ifdef CONFIG_HVC_OPAL if (firmware_has_feature(FW_FEATURE_OPAL)) hvc_opal_init_early(); @@ -93,6 +101,8 @@ static void __noreturn pnv_restart(char *cmd) { long rc = OPAL_BUSY; + opal_notifier_disable(); + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_cec_reboot(); if (rc == OPAL_BUSY_EVENT) @@ -108,6 +118,8 @@ static void __noreturn pnv_power_off(void) { long rc = OPAL_BUSY; + opal_notifier_disable(); + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_cec_power_down(0); if (rc == OPAL_BUSY_EVENT) @@ -143,6 +155,16 @@ static void pnv_shutdown(void) static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) { xics_kexec_teardown_cpu(secondary); + + /* Return secondary CPUs to firmware on OPAL v3 */ + if (firmware_has_feature(FW_FEATURE_OPALv3) && secondary) { + mb(); + get_paca()->kexec_state = KEXEC_STATE_REAL_MODE; + mb(); + + /* Return the CPU to OPAL */ + opal_return_cpu(); + } } #endif /* CONFIG_KEXEC */ diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 8cbaa59..908672b 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -40,7 +40,7 @@ #define DBG(fmt...) #endif -static void __cpuinit pnv_smp_setup_cpu(int cpu) +static void pnv_smp_setup_cpu(int cpu) { if (cpu != boot_cpuid) xics_setup_cpu(); diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index 177a2f7..3e270e3 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -109,7 +109,8 @@ static long ps3_hpte_remove(unsigned long hpte_group) } static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp, - unsigned long vpn, int psize, int ssize, int local) + unsigned long vpn, int psize, int apsize, + int ssize, int local) { int result; u64 hpte_v, want_v, hpte_rs; @@ -162,7 +163,7 @@ static void ps3_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, } static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn, - int psize, int ssize, int local) + int psize, int apsize, int ssize, int local) { unsigned long flags; int result; diff --git a/arch/powerpc/platforms/ps3/time.c b/arch/powerpc/platforms/ps3/time.c index cba1e6b..ce73ce8 100644 --- a/arch/powerpc/platforms/ps3/time.c +++ b/arch/powerpc/platforms/ps3/time.c @@ -90,7 +90,7 @@ static int __init ps3_rtc_init(void) pdev = platform_device_register_simple("rtc-ps3", -1, NULL, 0); - return PTR_RET(pdev); + return PTR_ERR_OR_ZERO(pdev); } module_init(ps3_rtc_init); diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 4459eff..62b4f80 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -19,7 +19,6 @@ config PPC_PSERIES select ZLIB_DEFLATE select PPC_DOORBELL select HAVE_CONTEXT_TRACKING - select HOTPLUG if SMP select HOTPLUG_CPU if SMP default y @@ -33,11 +32,6 @@ config PPC_SPLPAR processors, that is, which share physical processors between two or more partitions. -config EEH - bool - depends on PPC_PSERIES && PCI - default y - config PSERIES_MSI bool depends on PCI_MSI && EEH diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 53866e5..fbccac9 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -3,12 +3,10 @@ ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG obj-y := lpar.o hvCall.o nvram.o reconfig.o \ setup.o iommu.o event_sources.o ras.o \ - firmware.o power.o dlpar.o mobility.o + firmware.o power.o dlpar.o mobility.o rng.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SCANLOG) += scanlog.o -obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \ - eeh_driver.o eeh_event.o eeh_sysfs.o \ - eeh_pseries.o +obj-$(CONFIG_EEH) += eeh_pseries.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_PCI) += pci.o pci_dlpar.o obj-$(CONFIG_PSERIES_MSI) += msi.o @@ -24,6 +22,7 @@ obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_DTL) += dtl.o obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o obj-$(CONFIG_PSERIES_IDLE) += processor_idle.o +obj-$(CONFIG_LPARCFG) += lparcfg.o ifeq ($(CONFIG_PPC_PSERIES),y) obj-$(CONFIG_SUSPEND) += suspend.o diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index c638535..1e561be 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -40,8 +40,7 @@ #include <asm/pgalloc.h> #include <asm/uaccess.h> #include <linux/memory.h> - -#include "plpar_wrappers.h" +#include <asm/plpar_wrappers.h> #define CMM_DRIVER_VERSION "1.0.0" #define CMM_DEFAULT_DELAY 1 diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index a1a7b9a..a8fe5aa 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -63,26 +63,32 @@ static struct property *dlpar_parse_cc_property(struct cc_workarea *ccwa) return prop; } -static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa) +static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa, + const char *path) { struct device_node *dn; char *name; + /* If parent node path is "/" advance path to NULL terminator to + * prevent double leading slashs in full_name. + */ + if (!path[1]) + path++; + dn = kzalloc(sizeof(*dn), GFP_KERNEL); if (!dn) return NULL; - /* The configure connector reported name does not contain a - * preceding '/', so we allocate a buffer large enough to - * prepend this to the full_name. - */ name = (char *)ccwa + ccwa->name_offset; - dn->full_name = kasprintf(GFP_KERNEL, "/%s", name); + dn->full_name = kasprintf(GFP_KERNEL, "%s/%s", path, name); if (!dn->full_name) { kfree(dn); return NULL; } + of_node_set_flag(dn, OF_DYNAMIC); + kref_init(&dn->kref); + return dn; } @@ -120,7 +126,8 @@ void dlpar_free_cc_nodes(struct device_node *dn) #define CALL_AGAIN -2 #define ERR_CFG_USE -9003 -struct device_node *dlpar_configure_connector(u32 drc_index) +struct device_node *dlpar_configure_connector(u32 drc_index, + struct device_node *parent) { struct device_node *dn; struct device_node *first_dn = NULL; @@ -129,6 +136,7 @@ struct device_node *dlpar_configure_connector(u32 drc_index) struct property *last_property = NULL; struct cc_workarea *ccwa; char *data_buf; + const char *parent_path = parent->full_name; int cc_token; int rc = -1; @@ -162,7 +170,7 @@ struct device_node *dlpar_configure_connector(u32 drc_index) break; case NEXT_SIBLING: - dn = dlpar_parse_cc_node(ccwa); + dn = dlpar_parse_cc_node(ccwa, parent_path); if (!dn) goto cc_error; @@ -172,13 +180,17 @@ struct device_node *dlpar_configure_connector(u32 drc_index) break; case NEXT_CHILD: - dn = dlpar_parse_cc_node(ccwa); + if (first_dn) + parent_path = last_dn->full_name; + + dn = dlpar_parse_cc_node(ccwa, parent_path); if (!dn) goto cc_error; - if (!first_dn) + if (!first_dn) { + dn->parent = parent; first_dn = dn; - else { + } else { dn->parent = last_dn; if (last_dn) last_dn->child = dn; @@ -202,6 +214,7 @@ struct device_node *dlpar_configure_connector(u32 drc_index) case PREV_PARENT: last_dn = last_dn->parent; + parent_path = last_dn->parent->full_name; break; case CALL_AGAIN: @@ -256,8 +269,6 @@ int dlpar_attach_node(struct device_node *dn) { int rc; - of_node_set_flag(dn, OF_DYNAMIC); - kref_init(&dn->kref); dn->parent = derive_parent(dn->full_name); if (!dn->parent) return -ENOMEM; @@ -275,8 +286,15 @@ int dlpar_attach_node(struct device_node *dn) int dlpar_detach_node(struct device_node *dn) { + struct device_node *child; int rc; + child = of_get_next_child(dn, NULL); + while (child) { + dlpar_detach_node(child); + child = of_get_next_child(dn, child); + } + rc = of_detach_node(dn); if (rc) return rc; @@ -382,57 +400,42 @@ out: static ssize_t dlpar_cpu_probe(const char *buf, size_t count) { - struct device_node *dn; + struct device_node *dn, *parent; unsigned long drc_index; - char *cpu_name; int rc; - cpu_hotplug_driver_lock(); rc = strict_strtoul(buf, 0, &drc_index); - if (rc) { - rc = -EINVAL; - goto out; - } + if (rc) + return -EINVAL; - dn = dlpar_configure_connector(drc_index); - if (!dn) { - rc = -EINVAL; - goto out; - } + parent = of_find_node_by_path("/cpus"); + if (!parent) + return -ENODEV; - /* configure-connector reports cpus as living in the base - * directory of the device tree. CPUs actually live in the - * cpus directory so we need to fixup the full_name. - */ - cpu_name = kasprintf(GFP_KERNEL, "/cpus%s", dn->full_name); - if (!cpu_name) { - dlpar_free_cc_nodes(dn); - rc = -ENOMEM; - goto out; - } + dn = dlpar_configure_connector(drc_index, parent); + if (!dn) + return -EINVAL; - kfree(dn->full_name); - dn->full_name = cpu_name; + of_node_put(parent); rc = dlpar_acquire_drc(drc_index); if (rc) { dlpar_free_cc_nodes(dn); - rc = -EINVAL; - goto out; + return -EINVAL; } rc = dlpar_attach_node(dn); if (rc) { dlpar_release_drc(drc_index); dlpar_free_cc_nodes(dn); - goto out; + return rc; } rc = dlpar_online_cpu(dn); -out: - cpu_hotplug_driver_unlock(); + if (rc) + return rc; - return rc ? rc : count; + return count; } static int dlpar_offline_cpu(struct device_node *dn) @@ -505,30 +508,27 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count) return -EINVAL; } - cpu_hotplug_driver_lock(); rc = dlpar_offline_cpu(dn); if (rc) { of_node_put(dn); - rc = -EINVAL; - goto out; + return -EINVAL; } rc = dlpar_release_drc(*drc_index); if (rc) { of_node_put(dn); - goto out; + return rc; } rc = dlpar_detach_node(dn); if (rc) { dlpar_acquire_drc(*drc_index); - goto out; + return rc; } of_node_put(dn); -out: - cpu_hotplug_driver_unlock(); - return rc ? rc : count; + + return count; } static int __init pseries_dlpar_init(void) diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 0cc0ac0..5db66f1 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -29,8 +29,7 @@ #include <asm/firmware.h> #include <asm/lppaca.h> #include <asm/debug.h> - -#include "plpar_wrappers.h" +#include <asm/plpar_wrappers.h> struct dtl { struct dtl_entry *buf; @@ -87,7 +86,7 @@ static void consume_dtle(struct dtl_entry *dtle, u64 index) barrier(); /* check for hypervisor ring buffer overflow, ignore this entry if so */ - if (index + N_DISPATCH_LOG < vpa->dtl_idx) + if (index + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) return; ++wp; diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c deleted file mode 100644 index 6b73d6c..0000000 --- a/arch/powerpc/platforms/pseries/eeh.c +++ /dev/null @@ -1,942 +0,0 @@ -/* - * Copyright IBM Corporation 2001, 2005, 2006 - * Copyright Dave Engebretsen & Todd Inglett 2001 - * Copyright Linas Vepstas 2005, 2006 - * Copyright 2001-2012 IBM Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> - */ - -#include <linux/delay.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/list.h> -#include <linux/pci.h> -#include <linux/proc_fs.h> -#include <linux/rbtree.h> -#include <linux/seq_file.h> -#include <linux/spinlock.h> -#include <linux/export.h> -#include <linux/of.h> - -#include <linux/atomic.h> -#include <asm/eeh.h> -#include <asm/eeh_event.h> -#include <asm/io.h> -#include <asm/machdep.h> -#include <asm/ppc-pci.h> -#include <asm/rtas.h> - - -/** Overview: - * EEH, or "Extended Error Handling" is a PCI bridge technology for - * dealing with PCI bus errors that can't be dealt with within the - * usual PCI framework, except by check-stopping the CPU. Systems - * that are designed for high-availability/reliability cannot afford - * to crash due to a "mere" PCI error, thus the need for EEH. - * An EEH-capable bridge operates by converting a detected error - * into a "slot freeze", taking the PCI adapter off-line, making - * the slot behave, from the OS'es point of view, as if the slot - * were "empty": all reads return 0xff's and all writes are silently - * ignored. EEH slot isolation events can be triggered by parity - * errors on the address or data busses (e.g. during posted writes), - * which in turn might be caused by low voltage on the bus, dust, - * vibration, humidity, radioactivity or plain-old failed hardware. - * - * Note, however, that one of the leading causes of EEH slot - * freeze events are buggy device drivers, buggy device microcode, - * or buggy device hardware. This is because any attempt by the - * device to bus-master data to a memory address that is not - * assigned to the device will trigger a slot freeze. (The idea - * is to prevent devices-gone-wild from corrupting system memory). - * Buggy hardware/drivers will have a miserable time co-existing - * with EEH. - * - * Ideally, a PCI device driver, when suspecting that an isolation - * event has occurred (e.g. by reading 0xff's), will then ask EEH - * whether this is the case, and then take appropriate steps to - * reset the PCI slot, the PCI device, and then resume operations. - * However, until that day, the checking is done here, with the - * eeh_check_failure() routine embedded in the MMIO macros. If - * the slot is found to be isolated, an "EEH Event" is synthesized - * and sent out for processing. - */ - -/* If a device driver keeps reading an MMIO register in an interrupt - * handler after a slot isolation event, it might be broken. - * This sets the threshold for how many read attempts we allow - * before printing an error message. - */ -#define EEH_MAX_FAILS 2100000 - -/* Time to wait for a PCI slot to report status, in milliseconds */ -#define PCI_BUS_RESET_WAIT_MSEC (60*1000) - -/* Platform dependent EEH operations */ -struct eeh_ops *eeh_ops = NULL; - -int eeh_subsystem_enabled; -EXPORT_SYMBOL(eeh_subsystem_enabled); - -/* - * EEH probe mode support. The intention is to support multiple - * platforms for EEH. Some platforms like pSeries do PCI emunation - * based on device tree. However, other platforms like powernv probe - * PCI devices from hardware. The flag is used to distinguish that. - * In addition, struct eeh_ops::probe would be invoked for particular - * OF node or PCI device so that the corresponding PE would be created - * there. - */ -int eeh_probe_mode; - -/* Global EEH mutex */ -DEFINE_MUTEX(eeh_mutex); - -/* Lock to avoid races due to multiple reports of an error */ -static DEFINE_RAW_SPINLOCK(confirm_error_lock); - -/* Buffer for reporting pci register dumps. Its here in BSS, and - * not dynamically alloced, so that it ends up in RMO where RTAS - * can access it. - */ -#define EEH_PCI_REGS_LOG_LEN 4096 -static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; - -/* - * The struct is used to maintain the EEH global statistic - * information. Besides, the EEH global statistics will be - * exported to user space through procfs - */ -struct eeh_stats { - u64 no_device; /* PCI device not found */ - u64 no_dn; /* OF node not found */ - u64 no_cfg_addr; /* Config address not found */ - u64 ignored_check; /* EEH check skipped */ - u64 total_mmio_ffs; /* Total EEH checks */ - u64 false_positives; /* Unnecessary EEH checks */ - u64 slot_resets; /* PE reset */ -}; - -static struct eeh_stats eeh_stats; - -#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) - -/** - * eeh_gather_pci_data - Copy assorted PCI config space registers to buff - * @edev: device to report data for - * @buf: point to buffer in which to log - * @len: amount of room in buffer - * - * This routine captures assorted PCI configuration space data, - * and puts them into a buffer for RTAS error logging. - */ -static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) -{ - struct device_node *dn = eeh_dev_to_of_node(edev); - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - u32 cfg; - int cap, i; - int n = 0; - - n += scnprintf(buf+n, len-n, "%s\n", dn->full_name); - printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name); - - eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg); - n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); - printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg); - - eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg); - n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); - printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg); - - if (!dev) { - printk(KERN_WARNING "EEH: no PCI device for this of node\n"); - return n; - } - - /* Gather bridge-specific registers */ - if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) { - eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg); - n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); - printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg); - - eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg); - n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); - printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg); - } - - /* Dump out the PCI-X command and status regs */ - cap = pci_find_capability(dev, PCI_CAP_ID_PCIX); - if (cap) { - eeh_ops->read_config(dn, cap, 4, &cfg); - n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); - printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg); - - eeh_ops->read_config(dn, cap+4, 4, &cfg); - n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); - printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg); - } - - /* If PCI-E capable, dump PCI-E cap 10, and the AER */ - cap = pci_find_capability(dev, PCI_CAP_ID_EXP); - if (cap) { - n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); - printk(KERN_WARNING - "EEH: PCI-E capabilities and status follow:\n"); - - for (i=0; i<=8; i++) { - eeh_ops->read_config(dn, cap+4*i, 4, &cfg); - n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); - printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg); - } - - cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); - if (cap) { - n += scnprintf(buf+n, len-n, "pci-e AER:\n"); - printk(KERN_WARNING - "EEH: PCI-E AER capability register set follows:\n"); - - for (i=0; i<14; i++) { - eeh_ops->read_config(dn, cap+4*i, 4, &cfg); - n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); - printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg); - } - } - } - - return n; -} - -/** - * eeh_slot_error_detail - Generate combined log including driver log and error log - * @pe: EEH PE - * @severity: temporary or permanent error log - * - * This routine should be called to generate the combined log, which - * is comprised of driver log and error log. The driver log is figured - * out from the config space of the corresponding PCI device, while - * the error log is fetched through platform dependent function call. - */ -void eeh_slot_error_detail(struct eeh_pe *pe, int severity) -{ - size_t loglen = 0; - struct eeh_dev *edev; - - eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); - eeh_ops->configure_bridge(pe); - eeh_pe_restore_bars(pe); - - pci_regs_buf[0] = 0; - eeh_pe_for_each_dev(pe, edev) { - loglen += eeh_gather_pci_data(edev, pci_regs_buf, - EEH_PCI_REGS_LOG_LEN); - } - - eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); -} - -/** - * eeh_token_to_phys - Convert EEH address token to phys address - * @token: I/O token, should be address in the form 0xA.... - * - * This routine should be called to convert virtual I/O address - * to physical one. - */ -static inline unsigned long eeh_token_to_phys(unsigned long token) -{ - pte_t *ptep; - unsigned long pa; - - ptep = find_linux_pte(init_mm.pgd, token); - if (!ptep) - return token; - pa = pte_pfn(*ptep) << PAGE_SHIFT; - - return pa | (token & (PAGE_SIZE-1)); -} - -/** - * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze - * @edev: eeh device - * - * Check for an EEH failure for the given device node. Call this - * routine if the result of a read was all 0xff's and you want to - * find out if this is due to an EEH slot freeze. This routine - * will query firmware for the EEH status. - * - * Returns 0 if there has not been an EEH error; otherwise returns - * a non-zero value and queues up a slot isolation event notification. - * - * It is safe to call this routine in an interrupt context. - */ -int eeh_dev_check_failure(struct eeh_dev *edev) -{ - int ret; - unsigned long flags; - struct device_node *dn; - struct pci_dev *dev; - struct eeh_pe *pe; - int rc = 0; - const char *location; - - eeh_stats.total_mmio_ffs++; - - if (!eeh_subsystem_enabled) - return 0; - - if (!edev) { - eeh_stats.no_dn++; - return 0; - } - dn = eeh_dev_to_of_node(edev); - dev = eeh_dev_to_pci_dev(edev); - pe = edev->pe; - - /* Access to IO BARs might get this far and still not want checking. */ - if (!pe) { - eeh_stats.ignored_check++; - pr_debug("EEH: Ignored check for %s %s\n", - eeh_pci_name(dev), dn->full_name); - return 0; - } - - if (!pe->addr && !pe->config_addr) { - eeh_stats.no_cfg_addr++; - return 0; - } - - /* If we already have a pending isolation event for this - * slot, we know it's bad already, we don't need to check. - * Do this checking under a lock; as multiple PCI devices - * in one slot might report errors simultaneously, and we - * only want one error recovery routine running. - */ - raw_spin_lock_irqsave(&confirm_error_lock, flags); - rc = 1; - if (pe->state & EEH_PE_ISOLATED) { - pe->check_count++; - if (pe->check_count % EEH_MAX_FAILS == 0) { - location = of_get_property(dn, "ibm,loc-code", NULL); - printk(KERN_ERR "EEH: %d reads ignored for recovering device at " - "location=%s driver=%s pci addr=%s\n", - pe->check_count, location, - eeh_driver_name(dev), eeh_pci_name(dev)); - printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", - eeh_driver_name(dev)); - dump_stack(); - } - goto dn_unlock; - } - - /* - * Now test for an EEH failure. This is VERY expensive. - * Note that the eeh_config_addr may be a parent device - * in the case of a device behind a bridge, or it may be - * function zero of a multi-function device. - * In any case they must share a common PHB. - */ - ret = eeh_ops->get_state(pe, NULL); - - /* Note that config-io to empty slots may fail; - * they are empty when they don't have children. - * We will punt with the following conditions: Failure to get - * PE's state, EEH not support and Permanently unavailable - * state, PE is in good state. - */ - if ((ret < 0) || - (ret == EEH_STATE_NOT_SUPPORT) || - (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == - (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { - eeh_stats.false_positives++; - pe->false_positives++; - rc = 0; - goto dn_unlock; - } - - eeh_stats.slot_resets++; - - /* Avoid repeated reports of this failure, including problems - * with other functions on this device, and functions under - * bridges. - */ - eeh_pe_state_mark(pe, EEH_PE_ISOLATED); - raw_spin_unlock_irqrestore(&confirm_error_lock, flags); - - eeh_send_failure_event(pe); - - /* Most EEH events are due to device driver bugs. Having - * a stack trace will help the device-driver authors figure - * out what happened. So print that out. - */ - WARN(1, "EEH: failure detected\n"); - return 1; - -dn_unlock: - raw_spin_unlock_irqrestore(&confirm_error_lock, flags); - return rc; -} - -EXPORT_SYMBOL_GPL(eeh_dev_check_failure); - -/** - * eeh_check_failure - Check if all 1's data is due to EEH slot freeze - * @token: I/O token, should be address in the form 0xA.... - * @val: value, should be all 1's (XXX why do we need this arg??) - * - * Check for an EEH failure at the given token address. Call this - * routine if the result of a read was all 0xff's and you want to - * find out if this is due to an EEH slot freeze event. This routine - * will query firmware for the EEH status. - * - * Note this routine is safe to call in an interrupt context. - */ -unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) -{ - unsigned long addr; - struct eeh_dev *edev; - - /* Finding the phys addr + pci device; this is pretty quick. */ - addr = eeh_token_to_phys((unsigned long __force) token); - edev = eeh_addr_cache_get_dev(addr); - if (!edev) { - eeh_stats.no_device++; - return val; - } - - eeh_dev_check_failure(edev); - - pci_dev_put(eeh_dev_to_pci_dev(edev)); - return val; -} - -EXPORT_SYMBOL(eeh_check_failure); - - -/** - * eeh_pci_enable - Enable MMIO or DMA transfers for this slot - * @pe: EEH PE - * - * This routine should be called to reenable frozen MMIO or DMA - * so that it would work correctly again. It's useful while doing - * recovery or log collection on the indicated device. - */ -int eeh_pci_enable(struct eeh_pe *pe, int function) -{ - int rc; - - rc = eeh_ops->set_option(pe, function); - if (rc) - pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n", - __func__, function, pe->phb->global_number, pe->addr, rc); - - rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) && - (function == EEH_OPT_THAW_MMIO)) - return 0; - - return rc; -} - -/** - * pcibios_set_pcie_slot_reset - Set PCI-E reset state - * @dev: pci device struct - * @state: reset state to enter - * - * Return value: - * 0 if success - */ -int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) -{ - struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); - struct eeh_pe *pe = edev->pe; - - if (!pe) { - pr_err("%s: No PE found on PCI device %s\n", - __func__, pci_name(dev)); - return -EINVAL; - } - - switch (state) { - case pcie_deassert_reset: - eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); - break; - case pcie_hot_reset: - eeh_ops->reset(pe, EEH_RESET_HOT); - break; - case pcie_warm_reset: - eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); - break; - default: - return -EINVAL; - }; - - return 0; -} - -/** - * eeh_set_pe_freset - Check the required reset for the indicated device - * @data: EEH device - * @flag: return value - * - * Each device might have its preferred reset type: fundamental or - * hot reset. The routine is used to collected the information for - * the indicated device and its children so that the bunch of the - * devices could be reset properly. - */ -static void *eeh_set_dev_freset(void *data, void *flag) -{ - struct pci_dev *dev; - unsigned int *freset = (unsigned int *)flag; - struct eeh_dev *edev = (struct eeh_dev *)data; - - dev = eeh_dev_to_pci_dev(edev); - if (dev) - *freset |= dev->needs_freset; - - return NULL; -} - -/** - * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second - * @pe: EEH PE - * - * Assert the PCI #RST line for 1/4 second. - */ -static void eeh_reset_pe_once(struct eeh_pe *pe) -{ - unsigned int freset = 0; - - /* Determine type of EEH reset required for - * Partitionable Endpoint, a hot-reset (1) - * or a fundamental reset (3). - * A fundamental reset required by any device under - * Partitionable Endpoint trumps hot-reset. - */ - eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); - - if (freset) - eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); - else - eeh_ops->reset(pe, EEH_RESET_HOT); - - /* The PCI bus requires that the reset be held high for at least - * a 100 milliseconds. We wait a bit longer 'just in case'. - */ -#define PCI_BUS_RST_HOLD_TIME_MSEC 250 - msleep(PCI_BUS_RST_HOLD_TIME_MSEC); - - /* We might get hit with another EEH freeze as soon as the - * pci slot reset line is dropped. Make sure we don't miss - * these, and clear the flag now. - */ - eeh_pe_state_clear(pe, EEH_PE_ISOLATED); - - eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); - - /* After a PCI slot has been reset, the PCI Express spec requires - * a 1.5 second idle time for the bus to stabilize, before starting - * up traffic. - */ -#define PCI_BUS_SETTLE_TIME_MSEC 1800 - msleep(PCI_BUS_SETTLE_TIME_MSEC); -} - -/** - * eeh_reset_pe - Reset the indicated PE - * @pe: EEH PE - * - * This routine should be called to reset indicated device, including - * PE. A PE might include multiple PCI devices and sometimes PCI bridges - * might be involved as well. - */ -int eeh_reset_pe(struct eeh_pe *pe) -{ - int i, rc; - - /* Take three shots at resetting the bus */ - for (i=0; i<3; i++) { - eeh_reset_pe_once(pe); - - rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); - if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) - return 0; - - if (rc < 0) { - pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x", - __func__, pe->phb->global_number, pe->addr); - return -1; - } - pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n", - i+1, pe->phb->global_number, pe->addr, rc); - } - - return -1; -} - -/** - * eeh_save_bars - Save device bars - * @edev: PCI device associated EEH device - * - * Save the values of the device bars. Unlike the restore - * routine, this routine is *not* recursive. This is because - * PCI devices are added individually; but, for the restore, - * an entire slot is reset at a time. - */ -void eeh_save_bars(struct eeh_dev *edev) -{ - int i; - struct device_node *dn; - - if (!edev) - return; - dn = eeh_dev_to_of_node(edev); - - for (i = 0; i < 16; i++) - eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]); -} - -/** - * eeh_ops_register - Register platform dependent EEH operations - * @ops: platform dependent EEH operations - * - * Register the platform dependent EEH operation callback - * functions. The platform should call this function before - * any other EEH operations. - */ -int __init eeh_ops_register(struct eeh_ops *ops) -{ - if (!ops->name) { - pr_warning("%s: Invalid EEH ops name for %p\n", - __func__, ops); - return -EINVAL; - } - - if (eeh_ops && eeh_ops != ops) { - pr_warning("%s: EEH ops of platform %s already existing (%s)\n", - __func__, eeh_ops->name, ops->name); - return -EEXIST; - } - - eeh_ops = ops; - - return 0; -} - -/** - * eeh_ops_unregister - Unreigster platform dependent EEH operations - * @name: name of EEH platform operations - * - * Unregister the platform dependent EEH operation callback - * functions. - */ -int __exit eeh_ops_unregister(const char *name) -{ - if (!name || !strlen(name)) { - pr_warning("%s: Invalid EEH ops name\n", - __func__); - return -EINVAL; - } - - if (eeh_ops && !strcmp(eeh_ops->name, name)) { - eeh_ops = NULL; - return 0; - } - - return -EEXIST; -} - -/** - * eeh_init - EEH initialization - * - * Initialize EEH by trying to enable it for all of the adapters in the system. - * As a side effect we can determine here if eeh is supported at all. - * Note that we leave EEH on so failed config cycles won't cause a machine - * check. If a user turns off EEH for a particular adapter they are really - * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't - * grant access to a slot if EEH isn't enabled, and so we always enable - * EEH for all slots/all devices. - * - * The eeh-force-off option disables EEH checking globally, for all slots. - * Even if force-off is set, the EEH hardware is still enabled, so that - * newer systems can boot. - */ -static int __init eeh_init(void) -{ - struct pci_controller *hose, *tmp; - struct device_node *phb; - int ret; - - /* call platform initialization function */ - if (!eeh_ops) { - pr_warning("%s: Platform EEH operation not found\n", - __func__); - return -EEXIST; - } else if ((ret = eeh_ops->init())) { - pr_warning("%s: Failed to call platform init function (%d)\n", - __func__, ret); - return ret; - } - - raw_spin_lock_init(&confirm_error_lock); - - /* Enable EEH for all adapters */ - if (eeh_probe_mode_devtree()) { - list_for_each_entry_safe(hose, tmp, - &hose_list, list_node) { - phb = hose->dn; - traverse_pci_devices(phb, eeh_ops->of_probe, NULL); - } - } - - if (eeh_subsystem_enabled) - pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); - else - pr_warning("EEH: No capable adapters found\n"); - - return ret; -} - -core_initcall_sync(eeh_init); - -/** - * eeh_add_device_early - Enable EEH for the indicated device_node - * @dn: device node for which to set up EEH - * - * This routine must be used to perform EEH initialization for PCI - * devices that were added after system boot (e.g. hotplug, dlpar). - * This routine must be called before any i/o is performed to the - * adapter (inluding any config-space i/o). - * Whether this actually enables EEH or not for this device depends - * on the CEC architecture, type of the device, on earlier boot - * command-line arguments & etc. - */ -static void eeh_add_device_early(struct device_node *dn) -{ - struct pci_controller *phb; - - if (!of_node_to_eeh_dev(dn)) - return; - phb = of_node_to_eeh_dev(dn)->phb; - - /* USB Bus children of PCI devices will not have BUID's */ - if (NULL == phb || 0 == phb->buid) - return; - - /* FIXME: hotplug support on POWERNV */ - eeh_ops->of_probe(dn, NULL); -} - -/** - * eeh_add_device_tree_early - Enable EEH for the indicated device - * @dn: device node - * - * This routine must be used to perform EEH initialization for the - * indicated PCI device that was added after system boot (e.g. - * hotplug, dlpar). - */ -void eeh_add_device_tree_early(struct device_node *dn) -{ - struct device_node *sib; - - for_each_child_of_node(dn, sib) - eeh_add_device_tree_early(sib); - eeh_add_device_early(dn); -} -EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); - -/** - * eeh_add_device_late - Perform EEH initialization for the indicated pci device - * @dev: pci device for which to set up EEH - * - * This routine must be used to complete EEH initialization for PCI - * devices that were added after system boot (e.g. hotplug, dlpar). - */ -static void eeh_add_device_late(struct pci_dev *dev) -{ - struct device_node *dn; - struct eeh_dev *edev; - - if (!dev || !eeh_subsystem_enabled) - return; - - pr_debug("EEH: Adding device %s\n", pci_name(dev)); - - dn = pci_device_to_OF_node(dev); - edev = of_node_to_eeh_dev(dn); - if (edev->pdev == dev) { - pr_debug("EEH: Already referenced !\n"); - return; - } - WARN_ON(edev->pdev); - - pci_dev_get(dev); - edev->pdev = dev; - dev->dev.archdata.edev = edev; - - eeh_addr_cache_insert_dev(dev); -} - -/** - * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus - * @bus: PCI bus - * - * This routine must be used to perform EEH initialization for PCI - * devices which are attached to the indicated PCI bus. The PCI bus - * is added after system boot through hotplug or dlpar. - */ -void eeh_add_device_tree_late(struct pci_bus *bus) -{ - struct pci_dev *dev; - - list_for_each_entry(dev, &bus->devices, bus_list) { - eeh_add_device_late(dev); - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - struct pci_bus *subbus = dev->subordinate; - if (subbus) - eeh_add_device_tree_late(subbus); - } - } -} -EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); - -/** - * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus - * @bus: PCI bus - * - * This routine must be used to add EEH sysfs files for PCI - * devices which are attached to the indicated PCI bus. The PCI bus - * is added after system boot through hotplug or dlpar. - */ -void eeh_add_sysfs_files(struct pci_bus *bus) -{ - struct pci_dev *dev; - - list_for_each_entry(dev, &bus->devices, bus_list) { - eeh_sysfs_add_device(dev); - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - struct pci_bus *subbus = dev->subordinate; - if (subbus) - eeh_add_sysfs_files(subbus); - } - } -} -EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); - -/** - * eeh_remove_device - Undo EEH setup for the indicated pci device - * @dev: pci device to be removed - * @purge_pe: remove the PE or not - * - * This routine should be called when a device is removed from - * a running system (e.g. by hotplug or dlpar). It unregisters - * the PCI device from the EEH subsystem. I/O errors affecting - * this device will no longer be detected after this call; thus, - * i/o errors affecting this slot may leave this device unusable. - */ -static void eeh_remove_device(struct pci_dev *dev, int purge_pe) -{ - struct eeh_dev *edev; - - if (!dev || !eeh_subsystem_enabled) - return; - edev = pci_dev_to_eeh_dev(dev); - - /* Unregister the device with the EEH/PCI address search system */ - pr_debug("EEH: Removing device %s\n", pci_name(dev)); - - if (!edev || !edev->pdev) { - pr_debug("EEH: Not referenced !\n"); - return; - } - edev->pdev = NULL; - dev->dev.archdata.edev = NULL; - pci_dev_put(dev); - - eeh_rmv_from_parent_pe(edev, purge_pe); - eeh_addr_cache_rmv_dev(dev); - eeh_sysfs_remove_device(dev); -} - -/** - * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device - * @dev: PCI device - * @purge_pe: remove the corresponding PE or not - * - * This routine must be called when a device is removed from the - * running system through hotplug or dlpar. The corresponding - * PCI address cache will be removed. - */ -void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) -{ - struct pci_bus *bus = dev->subordinate; - struct pci_dev *child, *tmp; - - eeh_remove_device(dev, purge_pe); - - if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { - list_for_each_entry_safe(child, tmp, &bus->devices, bus_list) - eeh_remove_bus_device(child, purge_pe); - } -} -EXPORT_SYMBOL_GPL(eeh_remove_bus_device); - -static int proc_eeh_show(struct seq_file *m, void *v) -{ - if (0 == eeh_subsystem_enabled) { - seq_printf(m, "EEH Subsystem is globally disabled\n"); - seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); - } else { - seq_printf(m, "EEH Subsystem is enabled\n"); - seq_printf(m, - "no device=%llu\n" - "no device node=%llu\n" - "no config address=%llu\n" - "check not wanted=%llu\n" - "eeh_total_mmio_ffs=%llu\n" - "eeh_false_positives=%llu\n" - "eeh_slot_resets=%llu\n", - eeh_stats.no_device, - eeh_stats.no_dn, - eeh_stats.no_cfg_addr, - eeh_stats.ignored_check, - eeh_stats.total_mmio_ffs, - eeh_stats.false_positives, - eeh_stats.slot_resets); - } - - return 0; -} - -static int proc_eeh_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_eeh_show, NULL); -} - -static const struct file_operations proc_eeh_operations = { - .open = proc_eeh_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int __init eeh_init_proc(void) -{ - if (machine_is(pseries)) - proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); - return 0; -} -__initcall(eeh_init_proc); diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c deleted file mode 100644 index 5ce3ba7..0000000 --- a/arch/powerpc/platforms/pseries/eeh_cache.c +++ /dev/null @@ -1,319 +0,0 @@ -/* - * PCI address cache; allows the lookup of PCI devices based on I/O address - * - * Copyright IBM Corporation 2004 - * Copyright Linas Vepstas <linas@austin.ibm.com> 2004 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/list.h> -#include <linux/pci.h> -#include <linux/rbtree.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/atomic.h> -#include <asm/pci-bridge.h> -#include <asm/ppc-pci.h> - - -/** - * The pci address cache subsystem. This subsystem places - * PCI device address resources into a red-black tree, sorted - * according to the address range, so that given only an i/o - * address, the corresponding PCI device can be **quickly** - * found. It is safe to perform an address lookup in an interrupt - * context; this ability is an important feature. - * - * Currently, the only customer of this code is the EEH subsystem; - * thus, this code has been somewhat tailored to suit EEH better. - * In particular, the cache does *not* hold the addresses of devices - * for which EEH is not enabled. - * - * (Implementation Note: The RB tree seems to be better/faster - * than any hash algo I could think of for this problem, even - * with the penalty of slow pointer chases for d-cache misses). - */ -struct pci_io_addr_range { - struct rb_node rb_node; - unsigned long addr_lo; - unsigned long addr_hi; - struct eeh_dev *edev; - struct pci_dev *pcidev; - unsigned int flags; -}; - -static struct pci_io_addr_cache { - struct rb_root rb_root; - spinlock_t piar_lock; -} pci_io_addr_cache_root; - -static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr) -{ - struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node; - - while (n) { - struct pci_io_addr_range *piar; - piar = rb_entry(n, struct pci_io_addr_range, rb_node); - - if (addr < piar->addr_lo) { - n = n->rb_left; - } else { - if (addr > piar->addr_hi) { - n = n->rb_right; - } else { - pci_dev_get(piar->pcidev); - return piar->edev; - } - } - } - - return NULL; -} - -/** - * eeh_addr_cache_get_dev - Get device, given only address - * @addr: mmio (PIO) phys address or i/o port number - * - * Given an mmio phys address, or a port number, find a pci device - * that implements this address. Be sure to pci_dev_put the device - * when finished. I/O port numbers are assumed to be offset - * from zero (that is, they do *not* have pci_io_addr added in). - * It is safe to call this function within an interrupt. - */ -struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr) -{ - struct eeh_dev *edev; - unsigned long flags; - - spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - edev = __eeh_addr_cache_get_device(addr); - spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); - return edev; -} - -#ifdef DEBUG -/* - * Handy-dandy debug print routine, does nothing more - * than print out the contents of our addr cache. - */ -static void eeh_addr_cache_print(struct pci_io_addr_cache *cache) -{ - struct rb_node *n; - int cnt = 0; - - n = rb_first(&cache->rb_root); - while (n) { - struct pci_io_addr_range *piar; - piar = rb_entry(n, struct pci_io_addr_range, rb_node); - pr_debug("PCI: %s addr range %d [%lx-%lx]: %s\n", - (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt, - piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev)); - cnt++; - n = rb_next(n); - } -} -#endif - -/* Insert address range into the rb tree. */ -static struct pci_io_addr_range * -eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo, - unsigned long ahi, unsigned int flags) -{ - struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node; - struct rb_node *parent = NULL; - struct pci_io_addr_range *piar; - - /* Walk tree, find a place to insert into tree */ - while (*p) { - parent = *p; - piar = rb_entry(parent, struct pci_io_addr_range, rb_node); - if (ahi < piar->addr_lo) { - p = &parent->rb_left; - } else if (alo > piar->addr_hi) { - p = &parent->rb_right; - } else { - if (dev != piar->pcidev || - alo != piar->addr_lo || ahi != piar->addr_hi) { - pr_warning("PIAR: overlapping address range\n"); - } - return piar; - } - } - piar = kzalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC); - if (!piar) - return NULL; - - pci_dev_get(dev); - piar->addr_lo = alo; - piar->addr_hi = ahi; - piar->edev = pci_dev_to_eeh_dev(dev); - piar->pcidev = dev; - piar->flags = flags; - -#ifdef DEBUG - pr_debug("PIAR: insert range=[%lx:%lx] dev=%s\n", - alo, ahi, pci_name(dev)); -#endif - - rb_link_node(&piar->rb_node, parent, p); - rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root); - - return piar; -} - -static void __eeh_addr_cache_insert_dev(struct pci_dev *dev) -{ - struct device_node *dn; - struct eeh_dev *edev; - int i; - - dn = pci_device_to_OF_node(dev); - if (!dn) { - pr_warning("PCI: no pci dn found for dev=%s\n", pci_name(dev)); - return; - } - - edev = of_node_to_eeh_dev(dn); - if (!edev) { - pr_warning("PCI: no EEH dev found for dn=%s\n", - dn->full_name); - return; - } - - /* Skip any devices for which EEH is not enabled. */ - if (!edev->pe) { -#ifdef DEBUG - pr_info("PCI: skip building address cache for=%s - %s\n", - pci_name(dev), dn->full_name); -#endif - return; - } - - /* Walk resources on this device, poke them into the tree */ - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - unsigned long start = pci_resource_start(dev,i); - unsigned long end = pci_resource_end(dev,i); - unsigned int flags = pci_resource_flags(dev,i); - - /* We are interested only bus addresses, not dma or other stuff */ - if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM))) - continue; - if (start == 0 || ~start == 0 || end == 0 || ~end == 0) - continue; - eeh_addr_cache_insert(dev, start, end, flags); - } -} - -/** - * eeh_addr_cache_insert_dev - Add a device to the address cache - * @dev: PCI device whose I/O addresses we are interested in. - * - * In order to support the fast lookup of devices based on addresses, - * we maintain a cache of devices that can be quickly searched. - * This routine adds a device to that cache. - */ -void eeh_addr_cache_insert_dev(struct pci_dev *dev) -{ - unsigned long flags; - - /* Ignore PCI bridges */ - if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) - return; - - spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - __eeh_addr_cache_insert_dev(dev); - spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); -} - -static inline void __eeh_addr_cache_rmv_dev(struct pci_dev *dev) -{ - struct rb_node *n; - -restart: - n = rb_first(&pci_io_addr_cache_root.rb_root); - while (n) { - struct pci_io_addr_range *piar; - piar = rb_entry(n, struct pci_io_addr_range, rb_node); - - if (piar->pcidev == dev) { - rb_erase(n, &pci_io_addr_cache_root.rb_root); - pci_dev_put(piar->pcidev); - kfree(piar); - goto restart; - } - n = rb_next(n); - } -} - -/** - * eeh_addr_cache_rmv_dev - remove pci device from addr cache - * @dev: device to remove - * - * Remove a device from the addr-cache tree. - * This is potentially expensive, since it will walk - * the tree multiple times (once per resource). - * But so what; device removal doesn't need to be that fast. - */ -void eeh_addr_cache_rmv_dev(struct pci_dev *dev) -{ - unsigned long flags; - - spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); - __eeh_addr_cache_rmv_dev(dev); - spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); -} - -/** - * eeh_addr_cache_build - Build a cache of I/O addresses - * - * Build a cache of pci i/o addresses. This cache will be used to - * find the pci device that corresponds to a given address. - * This routine scans all pci busses to build the cache. - * Must be run late in boot process, after the pci controllers - * have been scanned for devices (after all device resources are known). - */ -void __init eeh_addr_cache_build(void) -{ - struct device_node *dn; - struct eeh_dev *edev; - struct pci_dev *dev = NULL; - - spin_lock_init(&pci_io_addr_cache_root.piar_lock); - - for_each_pci_dev(dev) { - dn = pci_device_to_OF_node(dev); - if (!dn) - continue; - - edev = of_node_to_eeh_dev(dn); - if (!edev) - continue; - - pci_dev_get(dev); /* matching put is in eeh_remove_device() */ - dev->dev.archdata.edev = edev; - edev->pdev = dev; - - eeh_addr_cache_insert_dev(dev); - - eeh_sysfs_add_device(dev); - } - -#ifdef DEBUG - /* Verify tree built up above, echo back the list of addrs. */ - eeh_addr_cache_print(&pci_io_addr_cache_root); -#endif -} - diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c deleted file mode 100644 index 1efa28f..0000000 --- a/arch/powerpc/platforms/pseries/eeh_dev.c +++ /dev/null @@ -1,112 +0,0 @@ -/* - * The file intends to implement dynamic creation of EEH device, which will - * be bound with OF node and PCI device simutaneously. The EEH devices would - * be foundamental information for EEH core components to work proerly. Besides, - * We have to support multiple situations where dynamic creation of EEH device - * is required: - * - * 1) Before PCI emunation starts, we need create EEH devices according to the - * PCI sensitive OF nodes. - * 2) When PCI emunation is done, we need do the binding between PCI device and - * the associated EEH device. - * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device - * will be created while PCI sensitive OF node is detected from DR. - * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If - * PHB is newly inserted, we also need create EEH devices accordingly. - * - * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/export.h> -#include <linux/gfp.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/string.h> - -#include <asm/pci-bridge.h> -#include <asm/ppc-pci.h> - -/** - * eeh_dev_init - Create EEH device according to OF node - * @dn: device node - * @data: PHB - * - * It will create EEH device according to the given OF node. The function - * might be called by PCI emunation, DR, PHB hotplug. - */ -void *eeh_dev_init(struct device_node *dn, void *data) -{ - struct pci_controller *phb = data; - struct eeh_dev *edev; - - /* Allocate EEH device */ - edev = kzalloc(sizeof(*edev), GFP_KERNEL); - if (!edev) { - pr_warning("%s: out of memory\n", __func__); - return NULL; - } - - /* Associate EEH device with OF node */ - PCI_DN(dn)->edev = edev; - edev->dn = dn; - edev->phb = phb; - INIT_LIST_HEAD(&edev->list); - - return NULL; -} - -/** - * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB - * @phb: PHB - * - * Scan the PHB OF node and its child association, then create the - * EEH devices accordingly - */ -void eeh_dev_phb_init_dynamic(struct pci_controller *phb) -{ - struct device_node *dn = phb->dn; - - /* EEH PE for PHB */ - eeh_phb_pe_create(phb); - - /* EEH device for PHB */ - eeh_dev_init(dn, phb); - - /* EEH devices for children OF nodes */ - traverse_pci_devices(dn, eeh_dev_init, phb); -} - -/** - * eeh_dev_phb_init - Create EEH devices for devices included in existing PHBs - * - * Scan all the existing PHBs and create EEH devices for their OF - * nodes and their children OF nodes - */ -static int __init eeh_dev_phb_init(void) -{ - struct pci_controller *phb, *tmp; - - list_for_each_entry_safe(phb, tmp, &hose_list, list_node) - eeh_dev_phb_init_dynamic(phb); - - pr_info("EEH: devices created\n"); - - return 0; -} - -core_initcall(eeh_dev_phb_init); diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c deleted file mode 100644 index a3fefb6..0000000 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ /dev/null @@ -1,552 +0,0 @@ -/* - * PCI Error Recovery Driver for RPA-compliant PPC64 platform. - * Copyright IBM Corp. 2004 2005 - * Copyright Linas Vepstas <linas@linas.org> 2004, 2005 - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com> - */ -#include <linux/delay.h> -#include <linux/interrupt.h> -#include <linux/irq.h> -#include <linux/module.h> -#include <linux/pci.h> -#include <asm/eeh.h> -#include <asm/eeh_event.h> -#include <asm/ppc-pci.h> -#include <asm/pci-bridge.h> -#include <asm/prom.h> -#include <asm/rtas.h> - -/** - * eeh_pcid_name - Retrieve name of PCI device driver - * @pdev: PCI device - * - * This routine is used to retrieve the name of PCI device driver - * if that's valid. - */ -static inline const char *eeh_pcid_name(struct pci_dev *pdev) -{ - if (pdev && pdev->dev.driver) - return pdev->dev.driver->name; - return ""; -} - -/** - * eeh_pcid_get - Get the PCI device driver - * @pdev: PCI device - * - * The function is used to retrieve the PCI device driver for - * the indicated PCI device. Besides, we will increase the reference - * of the PCI device driver to prevent that being unloaded on - * the fly. Otherwise, kernel crash would be seen. - */ -static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev) -{ - if (!pdev || !pdev->driver) - return NULL; - - if (!try_module_get(pdev->driver->driver.owner)) - return NULL; - - return pdev->driver; -} - -/** - * eeh_pcid_put - Dereference on the PCI device driver - * @pdev: PCI device - * - * The function is called to do dereference on the PCI device - * driver of the indicated PCI device. - */ -static inline void eeh_pcid_put(struct pci_dev *pdev) -{ - if (!pdev || !pdev->driver) - return; - - module_put(pdev->driver->driver.owner); -} - -#if 0 -static void print_device_node_tree(struct pci_dn *pdn, int dent) -{ - int i; - struct device_node *pc; - - if (!pdn) - return; - for (i = 0; i < dent; i++) - printk(" "); - printk("dn=%s mode=%x \tcfg_addr=%x pe_addr=%x \tfull=%s\n", - pdn->node->name, pdn->eeh_mode, pdn->eeh_config_addr, - pdn->eeh_pe_config_addr, pdn->node->full_name); - dent += 3; - pc = pdn->node->child; - while (pc) { - print_device_node_tree(PCI_DN(pc), dent); - pc = pc->sibling; - } -} -#endif - -/** - * eeh_disable_irq - Disable interrupt for the recovering device - * @dev: PCI device - * - * This routine must be called when reporting temporary or permanent - * error to the particular PCI device to disable interrupt of that - * device. If the device has enabled MSI or MSI-X interrupt, we needn't - * do real work because EEH should freeze DMA transfers for those PCI - * devices encountering EEH errors, which includes MSI or MSI-X. - */ -static void eeh_disable_irq(struct pci_dev *dev) -{ - struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); - - /* Don't disable MSI and MSI-X interrupts. They are - * effectively disabled by the DMA Stopped state - * when an EEH error occurs. - */ - if (dev->msi_enabled || dev->msix_enabled) - return; - - if (!irq_has_action(dev->irq)) - return; - - edev->mode |= EEH_DEV_IRQ_DISABLED; - disable_irq_nosync(dev->irq); -} - -/** - * eeh_enable_irq - Enable interrupt for the recovering device - * @dev: PCI device - * - * This routine must be called to enable interrupt while failed - * device could be resumed. - */ -static void eeh_enable_irq(struct pci_dev *dev) -{ - struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); - - if ((edev->mode) & EEH_DEV_IRQ_DISABLED) { - edev->mode &= ~EEH_DEV_IRQ_DISABLED; - enable_irq(dev->irq); - } -} - -/** - * eeh_report_error - Report pci error to each device driver - * @data: eeh device - * @userdata: return value - * - * Report an EEH error to each device driver, collect up and - * merge the device driver responses. Cumulative response - * passed back in "userdata". - */ -static void *eeh_report_error(void *data, void *userdata) -{ - struct eeh_dev *edev = (struct eeh_dev *)data; - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - enum pci_ers_result rc, *res = userdata; - struct pci_driver *driver; - - /* We might not have the associated PCI device, - * then we should continue for next one. - */ - if (!dev) return NULL; - dev->error_state = pci_channel_io_frozen; - - driver = eeh_pcid_get(dev); - if (!driver) return NULL; - - eeh_disable_irq(dev); - - if (!driver->err_handler || - !driver->err_handler->error_detected) { - eeh_pcid_put(dev); - return NULL; - } - - rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen); - - /* A driver that needs a reset trumps all others */ - if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; - if (*res == PCI_ERS_RESULT_NONE) *res = rc; - - eeh_pcid_put(dev); - return NULL; -} - -/** - * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled - * @data: eeh device - * @userdata: return value - * - * Tells each device driver that IO ports, MMIO and config space I/O - * are now enabled. Collects up and merges the device driver responses. - * Cumulative response passed back in "userdata". - */ -static void *eeh_report_mmio_enabled(void *data, void *userdata) -{ - struct eeh_dev *edev = (struct eeh_dev *)data; - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - enum pci_ers_result rc, *res = userdata; - struct pci_driver *driver; - - driver = eeh_pcid_get(dev); - if (!driver) return NULL; - - if (!driver->err_handler || - !driver->err_handler->mmio_enabled) { - eeh_pcid_put(dev); - return NULL; - } - - rc = driver->err_handler->mmio_enabled(dev); - - /* A driver that needs a reset trumps all others */ - if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; - if (*res == PCI_ERS_RESULT_NONE) *res = rc; - - eeh_pcid_put(dev); - return NULL; -} - -/** - * eeh_report_reset - Tell device that slot has been reset - * @data: eeh device - * @userdata: return value - * - * This routine must be called while EEH tries to reset particular - * PCI device so that the associated PCI device driver could take - * some actions, usually to save data the driver needs so that the - * driver can work again while the device is recovered. - */ -static void *eeh_report_reset(void *data, void *userdata) -{ - struct eeh_dev *edev = (struct eeh_dev *)data; - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - enum pci_ers_result rc, *res = userdata; - struct pci_driver *driver; - - if (!dev) return NULL; - dev->error_state = pci_channel_io_normal; - - driver = eeh_pcid_get(dev); - if (!driver) return NULL; - - eeh_enable_irq(dev); - - if (!driver->err_handler || - !driver->err_handler->slot_reset) { - eeh_pcid_put(dev); - return NULL; - } - - rc = driver->err_handler->slot_reset(dev); - if ((*res == PCI_ERS_RESULT_NONE) || - (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc; - if (*res == PCI_ERS_RESULT_DISCONNECT && - rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; - - eeh_pcid_put(dev); - return NULL; -} - -/** - * eeh_report_resume - Tell device to resume normal operations - * @data: eeh device - * @userdata: return value - * - * This routine must be called to notify the device driver that it - * could resume so that the device driver can do some initialization - * to make the recovered device work again. - */ -static void *eeh_report_resume(void *data, void *userdata) -{ - struct eeh_dev *edev = (struct eeh_dev *)data; - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - struct pci_driver *driver; - - if (!dev) return NULL; - dev->error_state = pci_channel_io_normal; - - driver = eeh_pcid_get(dev); - if (!driver) return NULL; - - eeh_enable_irq(dev); - - if (!driver->err_handler || - !driver->err_handler->resume) { - eeh_pcid_put(dev); - return NULL; - } - - driver->err_handler->resume(dev); - - eeh_pcid_put(dev); - return NULL; -} - -/** - * eeh_report_failure - Tell device driver that device is dead. - * @data: eeh device - * @userdata: return value - * - * This informs the device driver that the device is permanently - * dead, and that no further recovery attempts will be made on it. - */ -static void *eeh_report_failure(void *data, void *userdata) -{ - struct eeh_dev *edev = (struct eeh_dev *)data; - struct pci_dev *dev = eeh_dev_to_pci_dev(edev); - struct pci_driver *driver; - - if (!dev) return NULL; - dev->error_state = pci_channel_io_perm_failure; - - driver = eeh_pcid_get(dev); - if (!driver) return NULL; - - eeh_disable_irq(dev); - - if (!driver->err_handler || - !driver->err_handler->error_detected) { - eeh_pcid_put(dev); - return NULL; - } - - driver->err_handler->error_detected(dev, pci_channel_io_perm_failure); - - eeh_pcid_put(dev); - return NULL; -} - -/** - * eeh_reset_device - Perform actual reset of a pci slot - * @pe: EEH PE - * @bus: PCI bus corresponding to the isolcated slot - * - * This routine must be called to do reset on the indicated PE. - * During the reset, udev might be invoked because those affected - * PCI devices will be removed and then added. - */ -static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) -{ - int cnt, rc; - - /* pcibios will clear the counter; save the value */ - cnt = pe->freeze_count; - - /* - * We don't remove the corresponding PE instances because - * we need the information afterwords. The attached EEH - * devices are expected to be attached soon when calling - * into pcibios_add_pci_devices(). - */ - if (bus) - __pcibios_remove_pci_devices(bus, 0); - - /* Reset the pci controller. (Asserts RST#; resets config space). - * Reconfigure bridges and devices. Don't try to bring the system - * up if the reset failed for some reason. - */ - rc = eeh_reset_pe(pe); - if (rc) - return rc; - - /* Restore PE */ - eeh_ops->configure_bridge(pe); - eeh_pe_restore_bars(pe); - - /* Give the system 5 seconds to finish running the user-space - * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, - * this is a hack, but if we don't do this, and try to bring - * the device up before the scripts have taken it down, - * potentially weird things happen. - */ - if (bus) { - ssleep(5); - pcibios_add_pci_devices(bus); - } - pe->freeze_count = cnt; - - return 0; -} - -/* The longest amount of time to wait for a pci device - * to come back on line, in seconds. - */ -#define MAX_WAIT_FOR_RECOVERY 150 - -/** - * eeh_handle_event - Reset a PCI device after hard lockup. - * @pe: EEH PE - * - * While PHB detects address or data parity errors on particular PCI - * slot, the associated PE will be frozen. Besides, DMA's occurring - * to wild addresses (which usually happen due to bugs in device - * drivers or in PCI adapter firmware) can cause EEH error. #SERR, - * #PERR or other misc PCI-related errors also can trigger EEH errors. - * - * Recovery process consists of unplugging the device driver (which - * generated hotplug events to userspace), then issuing a PCI #RST to - * the device, then reconfiguring the PCI config space for all bridges - * & devices under this slot, and then finally restarting the device - * drivers (which cause a second set of hotplug events to go out to - * userspace). - */ -void eeh_handle_event(struct eeh_pe *pe) -{ - struct pci_bus *frozen_bus; - int rc = 0; - enum pci_ers_result result = PCI_ERS_RESULT_NONE; - - frozen_bus = eeh_pe_bus_get(pe); - if (!frozen_bus) { - pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n", - __func__, pe->phb->global_number, pe->addr); - return; - } - - pe->freeze_count++; - if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) - goto excess_failures; - pr_warning("EEH: This PCI device has failed %d times in the last hour\n", - pe->freeze_count); - - /* Walk the various device drivers attached to this slot through - * a reset sequence, giving each an opportunity to do what it needs - * to accomplish the reset. Each child gets a report of the - * status ... if any child can't handle the reset, then the entire - * slot is dlpar removed and added. - */ - eeh_pe_dev_traverse(pe, eeh_report_error, &result); - - /* Get the current PCI slot state. This can take a long time, - * sometimes over 3 seconds for certain systems. - */ - rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); - if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { - printk(KERN_WARNING "EEH: Permanent failure\n"); - goto hard_fail; - } - - /* Since rtas may enable MMIO when posting the error log, - * don't post the error log until after all dev drivers - * have been informed. - */ - eeh_slot_error_detail(pe, EEH_LOG_TEMP); - - /* If all device drivers were EEH-unaware, then shut - * down all of the device drivers, and hope they - * go down willingly, without panicing the system. - */ - if (result == PCI_ERS_RESULT_NONE) { - rc = eeh_reset_device(pe, frozen_bus); - if (rc) { - printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc); - goto hard_fail; - } - } - - /* If all devices reported they can proceed, then re-enable MMIO */ - if (result == PCI_ERS_RESULT_CAN_RECOVER) { - rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); - - if (rc < 0) - goto hard_fail; - if (rc) { - result = PCI_ERS_RESULT_NEED_RESET; - } else { - result = PCI_ERS_RESULT_NONE; - eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result); - } - } - - /* If all devices reported they can proceed, then re-enable DMA */ - if (result == PCI_ERS_RESULT_CAN_RECOVER) { - rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); - - if (rc < 0) - goto hard_fail; - if (rc) - result = PCI_ERS_RESULT_NEED_RESET; - else - result = PCI_ERS_RESULT_RECOVERED; - } - - /* If any device has a hard failure, then shut off everything. */ - if (result == PCI_ERS_RESULT_DISCONNECT) { - printk(KERN_WARNING "EEH: Device driver gave up\n"); - goto hard_fail; - } - - /* If any device called out for a reset, then reset the slot */ - if (result == PCI_ERS_RESULT_NEED_RESET) { - rc = eeh_reset_device(pe, NULL); - if (rc) { - printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc); - goto hard_fail; - } - result = PCI_ERS_RESULT_NONE; - eeh_pe_dev_traverse(pe, eeh_report_reset, &result); - } - - /* All devices should claim they have recovered by now. */ - if ((result != PCI_ERS_RESULT_RECOVERED) && - (result != PCI_ERS_RESULT_NONE)) { - printk(KERN_WARNING "EEH: Not recovered\n"); - goto hard_fail; - } - - /* Tell all device drivers that they can resume operations */ - eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); - - return; - -excess_failures: - /* - * About 90% of all real-life EEH failures in the field - * are due to poorly seated PCI cards. Only 10% or so are - * due to actual, failed cards. - */ - pr_err("EEH: PHB#%d-PE#%x has failed %d times in the\n" - "last hour and has been permanently disabled.\n" - "Please try reseating or replacing it.\n", - pe->phb->global_number, pe->addr, - pe->freeze_count); - goto perm_error; - -hard_fail: - pr_err("EEH: Unable to recover from failure from PHB#%d-PE#%x.\n" - "Please try reseating or replacing it\n", - pe->phb->global_number, pe->addr); - -perm_error: - eeh_slot_error_detail(pe, EEH_LOG_PERM); - - /* Notify all devices that they're about to go down. */ - eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); - - /* Shut down the device drivers for good. */ - if (frozen_bus) - pcibios_remove_pci_devices(frozen_bus); -} - diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c deleted file mode 100644 index 185bedd..0000000 --- a/arch/powerpc/platforms/pseries/eeh_event.c +++ /dev/null @@ -1,142 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Copyright (c) 2005 Linas Vepstas <linas@linas.org> - */ - -#include <linux/delay.h> -#include <linux/list.h> -#include <linux/mutex.h> -#include <linux/sched.h> -#include <linux/pci.h> -#include <linux/slab.h> -#include <linux/workqueue.h> -#include <linux/kthread.h> -#include <asm/eeh_event.h> -#include <asm/ppc-pci.h> - -/** Overview: - * EEH error states may be detected within exception handlers; - * however, the recovery processing needs to occur asynchronously - * in a normal kernel context and not an interrupt context. - * This pair of routines creates an event and queues it onto a - * work-queue, where a worker thread can drive recovery. - */ - -/* EEH event workqueue setup. */ -static DEFINE_SPINLOCK(eeh_eventlist_lock); -LIST_HEAD(eeh_eventlist); -static void eeh_thread_launcher(struct work_struct *); -DECLARE_WORK(eeh_event_wq, eeh_thread_launcher); - -/* Serialize reset sequences for a given pci device */ -DEFINE_MUTEX(eeh_event_mutex); - -/** - * eeh_event_handler - Dispatch EEH events. - * @dummy - unused - * - * The detection of a frozen slot can occur inside an interrupt, - * where it can be hard to do anything about it. The goal of this - * routine is to pull these detection events out of the context - * of the interrupt handler, and re-dispatch them for processing - * at a later time in a normal context. - */ -static int eeh_event_handler(void * dummy) -{ - unsigned long flags; - struct eeh_event *event; - struct eeh_pe *pe; - - spin_lock_irqsave(&eeh_eventlist_lock, flags); - event = NULL; - - /* Unqueue the event, get ready to process. */ - if (!list_empty(&eeh_eventlist)) { - event = list_entry(eeh_eventlist.next, struct eeh_event, list); - list_del(&event->list); - } - spin_unlock_irqrestore(&eeh_eventlist_lock, flags); - - if (event == NULL) - return 0; - - /* Serialize processing of EEH events */ - mutex_lock(&eeh_event_mutex); - pe = event->pe; - eeh_pe_state_mark(pe, EEH_PE_RECOVERING); - pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n", - pe->phb->global_number, pe->addr); - - set_current_state(TASK_INTERRUPTIBLE); /* Don't add to load average */ - eeh_handle_event(pe); - eeh_pe_state_clear(pe, EEH_PE_RECOVERING); - - kfree(event); - mutex_unlock(&eeh_event_mutex); - - /* If there are no new errors after an hour, clear the counter. */ - if (pe && pe->freeze_count > 0) { - msleep_interruptible(3600*1000); - if (pe->freeze_count > 0) - pe->freeze_count--; - - } - - return 0; -} - -/** - * eeh_thread_launcher - Start kernel thread to handle EEH events - * @dummy - unused - * - * This routine is called to start the kernel thread for processing - * EEH event. - */ -static void eeh_thread_launcher(struct work_struct *dummy) -{ - if (IS_ERR(kthread_run(eeh_event_handler, NULL, "eehd"))) - printk(KERN_ERR "Failed to start EEH daemon\n"); -} - -/** - * eeh_send_failure_event - Generate a PCI error event - * @pe: EEH PE - * - * This routine can be called within an interrupt context; - * the actual event will be delivered in a normal context - * (from a workqueue). - */ -int eeh_send_failure_event(struct eeh_pe *pe) -{ - unsigned long flags; - struct eeh_event *event; - - event = kzalloc(sizeof(*event), GFP_ATOMIC); - if (!event) { - pr_err("EEH: out of memory, event not handled\n"); - return -ENOMEM; - } - event->pe = pe; - - /* We may or may not be called in an interrupt context */ - spin_lock_irqsave(&eeh_eventlist_lock, flags); - list_add(&event->list, &eeh_eventlist); - spin_unlock_irqrestore(&eeh_eventlist_lock, flags); - - schedule_work(&eeh_event_wq); - - return 0; -} diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c deleted file mode 100644 index 9d4a9e8..0000000 --- a/arch/powerpc/platforms/pseries/eeh_pe.c +++ /dev/null @@ -1,653 +0,0 @@ -/* - * The file intends to implement PE based on the information from - * platforms. Basically, there have 3 types of PEs: PHB/Bus/Device. - * All the PEs should be organized as hierarchy tree. The first level - * of the tree will be associated to existing PHBs since the particular - * PE is only meaningful in one PHB domain. - * - * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/export.h> -#include <linux/gfp.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/string.h> - -#include <asm/pci-bridge.h> -#include <asm/ppc-pci.h> - -static LIST_HEAD(eeh_phb_pe); - -/** - * eeh_pe_alloc - Allocate PE - * @phb: PCI controller - * @type: PE type - * - * Allocate PE instance dynamically. - */ -static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type) -{ - struct eeh_pe *pe; - - /* Allocate PHB PE */ - pe = kzalloc(sizeof(struct eeh_pe), GFP_KERNEL); - if (!pe) return NULL; - - /* Initialize PHB PE */ - pe->type = type; - pe->phb = phb; - INIT_LIST_HEAD(&pe->child_list); - INIT_LIST_HEAD(&pe->child); - INIT_LIST_HEAD(&pe->edevs); - - return pe; -} - -/** - * eeh_phb_pe_create - Create PHB PE - * @phb: PCI controller - * - * The function should be called while the PHB is detected during - * system boot or PCI hotplug in order to create PHB PE. - */ -int eeh_phb_pe_create(struct pci_controller *phb) -{ - struct eeh_pe *pe; - - /* Allocate PHB PE */ - pe = eeh_pe_alloc(phb, EEH_PE_PHB); - if (!pe) { - pr_err("%s: out of memory!\n", __func__); - return -ENOMEM; - } - - /* Put it into the list */ - eeh_lock(); - list_add_tail(&pe->child, &eeh_phb_pe); - eeh_unlock(); - - pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number); - - return 0; -} - -/** - * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB - * @phb: PCI controller - * - * The overall PEs form hierarchy tree. The first layer of the - * hierarchy tree is composed of PHB PEs. The function is used - * to retrieve the corresponding PHB PE according to the given PHB. - */ -static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb) -{ - struct eeh_pe *pe; - - list_for_each_entry(pe, &eeh_phb_pe, child) { - /* - * Actually, we needn't check the type since - * the PE for PHB has been determined when that - * was created. - */ - if ((pe->type & EEH_PE_PHB) && pe->phb == phb) - return pe; - } - - return NULL; -} - -/** - * eeh_pe_next - Retrieve the next PE in the tree - * @pe: current PE - * @root: root PE - * - * The function is used to retrieve the next PE in the - * hierarchy PE tree. - */ -static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, - struct eeh_pe *root) -{ - struct list_head *next = pe->child_list.next; - - if (next == &pe->child_list) { - while (1) { - if (pe == root) - return NULL; - next = pe->child.next; - if (next != &pe->parent->child_list) - break; - pe = pe->parent; - } - } - - return list_entry(next, struct eeh_pe, child); -} - -/** - * eeh_pe_traverse - Traverse PEs in the specified PHB - * @root: root PE - * @fn: callback - * @flag: extra parameter to callback - * - * The function is used to traverse the specified PE and its - * child PEs. The traversing is to be terminated once the - * callback returns something other than NULL, or no more PEs - * to be traversed. - */ -static void *eeh_pe_traverse(struct eeh_pe *root, - eeh_traverse_func fn, void *flag) -{ - struct eeh_pe *pe; - void *ret; - - for (pe = root; pe; pe = eeh_pe_next(pe, root)) { - ret = fn(pe, flag); - if (ret) return ret; - } - - return NULL; -} - -/** - * eeh_pe_dev_traverse - Traverse the devices from the PE - * @root: EEH PE - * @fn: function callback - * @flag: extra parameter to callback - * - * The function is used to traverse the devices of the specified - * PE and its child PEs. - */ -void *eeh_pe_dev_traverse(struct eeh_pe *root, - eeh_traverse_func fn, void *flag) -{ - struct eeh_pe *pe; - struct eeh_dev *edev; - void *ret; - - if (!root) { - pr_warning("%s: Invalid PE %p\n", __func__, root); - return NULL; - } - - eeh_lock(); - - /* Traverse root PE */ - for (pe = root; pe; pe = eeh_pe_next(pe, root)) { - eeh_pe_for_each_dev(pe, edev) { - ret = fn(edev, flag); - if (ret) { - eeh_unlock(); - return ret; - } - } - } - - eeh_unlock(); - - return NULL; -} - -/** - * __eeh_pe_get - Check the PE address - * @data: EEH PE - * @flag: EEH device - * - * For one particular PE, it can be identified by PE address - * or tranditional BDF address. BDF address is composed of - * Bus/Device/Function number. The extra data referred by flag - * indicates which type of address should be used. - */ -static void *__eeh_pe_get(void *data, void *flag) -{ - struct eeh_pe *pe = (struct eeh_pe *)data; - struct eeh_dev *edev = (struct eeh_dev *)flag; - - /* Unexpected PHB PE */ - if (pe->type & EEH_PE_PHB) - return NULL; - - /* We prefer PE address */ - if (edev->pe_config_addr && - (edev->pe_config_addr == pe->addr)) - return pe; - - /* Try BDF address */ - if (edev->pe_config_addr && - (edev->config_addr == pe->config_addr)) - return pe; - - return NULL; -} - -/** - * eeh_pe_get - Search PE based on the given address - * @edev: EEH device - * - * Search the corresponding PE based on the specified address which - * is included in the eeh device. The function is used to check if - * the associated PE has been created against the PE address. It's - * notable that the PE address has 2 format: traditional PE address - * which is composed of PCI bus/device/function number, or unified - * PE address. - */ -static struct eeh_pe *eeh_pe_get(struct eeh_dev *edev) -{ - struct eeh_pe *root = eeh_phb_pe_get(edev->phb); - struct eeh_pe *pe; - - pe = eeh_pe_traverse(root, __eeh_pe_get, edev); - - return pe; -} - -/** - * eeh_pe_get_parent - Retrieve the parent PE - * @edev: EEH device - * - * The whole PEs existing in the system are organized as hierarchy - * tree. The function is used to retrieve the parent PE according - * to the parent EEH device. - */ -static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev) -{ - struct device_node *dn; - struct eeh_dev *parent; - - /* - * It might have the case for the indirect parent - * EEH device already having associated PE, but - * the direct parent EEH device doesn't have yet. - */ - dn = edev->dn->parent; - while (dn) { - /* We're poking out of PCI territory */ - if (!PCI_DN(dn)) return NULL; - - parent = of_node_to_eeh_dev(dn); - /* We're poking out of PCI territory */ - if (!parent) return NULL; - - if (parent->pe) - return parent->pe; - - dn = dn->parent; - } - - return NULL; -} - -/** - * eeh_add_to_parent_pe - Add EEH device to parent PE - * @edev: EEH device - * - * Add EEH device to the parent PE. If the parent PE already - * exists, the PE type will be changed to EEH_PE_BUS. Otherwise, - * we have to create new PE to hold the EEH device and the new - * PE will be linked to its parent PE as well. - */ -int eeh_add_to_parent_pe(struct eeh_dev *edev) -{ - struct eeh_pe *pe, *parent; - - eeh_lock(); - - /* - * Search the PE has been existing or not according - * to the PE address. If that has been existing, the - * PE should be composed of PCI bus and its subordinate - * components. - */ - pe = eeh_pe_get(edev); - if (pe && !(pe->type & EEH_PE_INVALID)) { - if (!edev->pe_config_addr) { - eeh_unlock(); - pr_err("%s: PE with addr 0x%x already exists\n", - __func__, edev->config_addr); - return -EEXIST; - } - - /* Mark the PE as type of PCI bus */ - pe->type = EEH_PE_BUS; - edev->pe = pe; - - /* Put the edev to PE */ - list_add_tail(&edev->list, &pe->edevs); - eeh_unlock(); - pr_debug("EEH: Add %s to Bus PE#%x\n", - edev->dn->full_name, pe->addr); - - return 0; - } else if (pe && (pe->type & EEH_PE_INVALID)) { - list_add_tail(&edev->list, &pe->edevs); - edev->pe = pe; - /* - * We're running to here because of PCI hotplug caused by - * EEH recovery. We need clear EEH_PE_INVALID until the top. - */ - parent = pe; - while (parent) { - if (!(parent->type & EEH_PE_INVALID)) - break; - parent->type &= ~EEH_PE_INVALID; - parent = parent->parent; - } - eeh_unlock(); - pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", - edev->dn->full_name, pe->addr, pe->parent->addr); - - return 0; - } - - /* Create a new EEH PE */ - pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE); - if (!pe) { - eeh_unlock(); - pr_err("%s: out of memory!\n", __func__); - return -ENOMEM; - } - pe->addr = edev->pe_config_addr; - pe->config_addr = edev->config_addr; - - /* - * Put the new EEH PE into hierarchy tree. If the parent - * can't be found, the newly created PE will be attached - * to PHB directly. Otherwise, we have to associate the - * PE with its parent. - */ - parent = eeh_pe_get_parent(edev); - if (!parent) { - parent = eeh_phb_pe_get(edev->phb); - if (!parent) { - eeh_unlock(); - pr_err("%s: No PHB PE is found (PHB Domain=%d)\n", - __func__, edev->phb->global_number); - edev->pe = NULL; - kfree(pe); - return -EEXIST; - } - } - pe->parent = parent; - - /* - * Put the newly created PE into the child list and - * link the EEH device accordingly. - */ - list_add_tail(&pe->child, &parent->child_list); - list_add_tail(&edev->list, &pe->edevs); - edev->pe = pe; - eeh_unlock(); - pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n", - edev->dn->full_name, pe->addr, pe->parent->addr); - - return 0; -} - -/** - * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE - * @edev: EEH device - * @purge_pe: remove PE or not - * - * The PE hierarchy tree might be changed when doing PCI hotplug. - * Also, the PCI devices or buses could be removed from the system - * during EEH recovery. So we have to call the function remove the - * corresponding PE accordingly if necessary. - */ -int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe) -{ - struct eeh_pe *pe, *parent, *child; - int cnt; - - if (!edev->pe) { - pr_warning("%s: No PE found for EEH device %s\n", - __func__, edev->dn->full_name); - return -EEXIST; - } - - eeh_lock(); - - /* Remove the EEH device */ - pe = edev->pe; - edev->pe = NULL; - list_del(&edev->list); - - /* - * Check if the parent PE includes any EEH devices. - * If not, we should delete that. Also, we should - * delete the parent PE if it doesn't have associated - * child PEs and EEH devices. - */ - while (1) { - parent = pe->parent; - if (pe->type & EEH_PE_PHB) - break; - - if (purge_pe) { - if (list_empty(&pe->edevs) && - list_empty(&pe->child_list)) { - list_del(&pe->child); - kfree(pe); - } else { - break; - } - } else { - if (list_empty(&pe->edevs)) { - cnt = 0; - list_for_each_entry(child, &pe->child_list, child) { - if (!(child->type & EEH_PE_INVALID)) { - cnt++; - break; - } - } - - if (!cnt) - pe->type |= EEH_PE_INVALID; - else - break; - } - } - - pe = parent; - } - - eeh_unlock(); - - return 0; -} - -/** - * __eeh_pe_state_mark - Mark the state for the PE - * @data: EEH PE - * @flag: state - * - * The function is used to mark the indicated state for the given - * PE. Also, the associated PCI devices will be put into IO frozen - * state as well. - */ -static void *__eeh_pe_state_mark(void *data, void *flag) -{ - struct eeh_pe *pe = (struct eeh_pe *)data; - int state = *((int *)flag); - struct eeh_dev *tmp; - struct pci_dev *pdev; - - /* - * Mark the PE with the indicated state. Also, - * the associated PCI device will be put into - * I/O frozen state to avoid I/O accesses from - * the PCI device driver. - */ - pe->state |= state; - eeh_pe_for_each_dev(pe, tmp) { - pdev = eeh_dev_to_pci_dev(tmp); - if (pdev) - pdev->error_state = pci_channel_io_frozen; - } - - return NULL; -} - -/** - * eeh_pe_state_mark - Mark specified state for PE and its associated device - * @pe: EEH PE - * - * EEH error affects the current PE and its child PEs. The function - * is used to mark appropriate state for the affected PEs and the - * associated devices. - */ -void eeh_pe_state_mark(struct eeh_pe *pe, int state) -{ - eeh_lock(); - eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); - eeh_unlock(); -} - -/** - * __eeh_pe_state_clear - Clear state for the PE - * @data: EEH PE - * @flag: state - * - * The function is used to clear the indicated state from the - * given PE. Besides, we also clear the check count of the PE - * as well. - */ -static void *__eeh_pe_state_clear(void *data, void *flag) -{ - struct eeh_pe *pe = (struct eeh_pe *)data; - int state = *((int *)flag); - - pe->state &= ~state; - pe->check_count = 0; - - return NULL; -} - -/** - * eeh_pe_state_clear - Clear state for the PE and its children - * @pe: PE - * @state: state to be cleared - * - * When the PE and its children has been recovered from error, - * we need clear the error state for that. The function is used - * for the purpose. - */ -void eeh_pe_state_clear(struct eeh_pe *pe, int state) -{ - eeh_lock(); - eeh_pe_traverse(pe, __eeh_pe_state_clear, &state); - eeh_unlock(); -} - -/** - * eeh_restore_one_device_bars - Restore the Base Address Registers for one device - * @data: EEH device - * @flag: Unused - * - * Loads the PCI configuration space base address registers, - * the expansion ROM base address, the latency timer, and etc. - * from the saved values in the device node. - */ -static void *eeh_restore_one_device_bars(void *data, void *flag) -{ - int i; - u32 cmd; - struct eeh_dev *edev = (struct eeh_dev *)data; - struct device_node *dn = eeh_dev_to_of_node(edev); - - for (i = 4; i < 10; i++) - eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]); - /* 12 == Expansion ROM Address */ - eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]); - -#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) -#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)]) - - eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1, - SAVED_BYTE(PCI_CACHE_LINE_SIZE)); - eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1, - SAVED_BYTE(PCI_LATENCY_TIMER)); - - /* max latency, min grant, interrupt pin and line */ - eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]); - - /* - * Restore PERR & SERR bits, some devices require it, - * don't touch the other command bits - */ - eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd); - if (edev->config_space[1] & PCI_COMMAND_PARITY) - cmd |= PCI_COMMAND_PARITY; - else - cmd &= ~PCI_COMMAND_PARITY; - if (edev->config_space[1] & PCI_COMMAND_SERR) - cmd |= PCI_COMMAND_SERR; - else - cmd &= ~PCI_COMMAND_SERR; - eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd); - - return NULL; -} - -/** - * eeh_pe_restore_bars - Restore the PCI config space info - * @pe: EEH PE - * - * This routine performs a recursive walk to the children - * of this device as well. - */ -void eeh_pe_restore_bars(struct eeh_pe *pe) -{ - /* - * We needn't take the EEH lock since eeh_pe_dev_traverse() - * will take that. - */ - eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL); -} - -/** - * eeh_pe_bus_get - Retrieve PCI bus according to the given PE - * @pe: EEH PE - * - * Retrieve the PCI bus according to the given PE. Basically, - * there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the - * primary PCI bus will be retrieved. The parent bus will be - * returned for BUS PE. However, we don't have associated PCI - * bus for DEVICE PE. - */ -struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) -{ - struct pci_bus *bus = NULL; - struct eeh_dev *edev; - struct pci_dev *pdev; - - eeh_lock(); - - if (pe->type & EEH_PE_PHB) { - bus = pe->phb->bus; - } else if (pe->type & EEH_PE_BUS || - pe->type & EEH_PE_DEVICE) { - edev = list_first_entry(&pe->edevs, struct eeh_dev, list); - pdev = eeh_dev_to_pci_dev(edev); - if (pdev) - bus = pdev->bus; - } - - eeh_unlock(); - - return bus; -} diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index b456b15..ccb633e 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -133,6 +133,48 @@ static int pseries_eeh_init(void) return 0; } +static int pseries_eeh_cap_start(struct device_node *dn) +{ + struct pci_dn *pdn = PCI_DN(dn); + u32 status; + + if (!pdn) + return 0; + + rtas_read_config(pdn, PCI_STATUS, 2, &status); + if (!(status & PCI_STATUS_CAP_LIST)) + return 0; + + return PCI_CAPABILITY_LIST; +} + + +static int pseries_eeh_find_cap(struct device_node *dn, int cap) +{ + struct pci_dn *pdn = PCI_DN(dn); + int pos = pseries_eeh_cap_start(dn); + int cnt = 48; /* Maximal number of capabilities */ + u32 id; + + if (!pos) + return 0; + + while (cnt--) { + rtas_read_config(pdn, pos, 1, &pos); + if (pos < 0x40) + break; + pos &= ~3; + rtas_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id); + if (id == 0xff) + break; + if (id == cap) + return pos; + pos += PCI_CAP_LIST_NEXT; + } + + return 0; +} + /** * pseries_eeh_of_probe - EEH probe on the given device * @dn: OF node @@ -146,30 +188,52 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) { struct eeh_dev *edev; struct eeh_pe pe; - const u32 *class_code, *vendor_id, *device_id; - const u32 *regs; + struct pci_dn *pdn = PCI_DN(dn); + const __be32 *classp, *vendorp, *devicep; + u32 class_code; + const __be32 *regs; + u32 pcie_flags; int enable = 0; int ret; /* Retrieve OF node and eeh device */ edev = of_node_to_eeh_dev(dn); - if (!of_device_is_available(dn)) + if (edev->pe || !of_device_is_available(dn)) return NULL; /* Retrieve class/vendor/device IDs */ - class_code = of_get_property(dn, "class-code", NULL); - vendor_id = of_get_property(dn, "vendor-id", NULL); - device_id = of_get_property(dn, "device-id", NULL); + classp = of_get_property(dn, "class-code", NULL); + vendorp = of_get_property(dn, "vendor-id", NULL); + devicep = of_get_property(dn, "device-id", NULL); /* Skip for bad OF node or PCI-ISA bridge */ - if (!class_code || !vendor_id || !device_id) + if (!classp || !vendorp || !devicep) return NULL; if (dn->type && !strcmp(dn->type, "isa")) return NULL; - /* Update class code and mode of eeh device */ - edev->class_code = *class_code; - edev->mode = 0; + class_code = of_read_number(classp, 1); + + /* + * Update class code and mode of eeh device. We need + * correctly reflects that current device is root port + * or PCIe switch downstream port. + */ + edev->class_code = class_code; + edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP); + edev->mode &= 0xFFFFFF00; + if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { + edev->mode |= EEH_DEV_BRIDGE; + if (edev->pcie_cap) { + rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS, + 2, &pcie_flags); + pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4; + if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT) + edev->mode |= EEH_DEV_ROOT_PORT; + else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM) + edev->mode |= EEH_DEV_DS_PORT; + } + } /* Retrieve the device address */ regs = of_get_property(dn, "reg", NULL); @@ -182,12 +246,12 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) /* Initialize the fake PE */ memset(&pe, 0, sizeof(struct eeh_pe)); pe.phb = edev->phb; - pe.config_addr = regs[0]; + pe.config_addr = of_read_number(regs, 1); /* Enable EEH on the device */ ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE); if (!ret) { - edev->config_addr = regs[0]; + edev->config_addr = of_read_number(regs, 1); /* Retrieve PE address */ edev->pe_config_addr = eeh_ops->get_pe_addr(&pe); pe.addr = edev->pe_config_addr; diff --git a/arch/powerpc/platforms/pseries/eeh_sysfs.c b/arch/powerpc/platforms/pseries/eeh_sysfs.c deleted file mode 100644 index d377083..0000000 --- a/arch/powerpc/platforms/pseries/eeh_sysfs.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Sysfs entries for PCI Error Recovery for PAPR-compliant platform. - * Copyright IBM Corporation 2007 - * Copyright Linas Vepstas <linas@austin.ibm.com> 2007 - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com> - */ -#include <linux/pci.h> -#include <linux/stat.h> -#include <asm/ppc-pci.h> -#include <asm/pci-bridge.h> - -/** - * EEH_SHOW_ATTR -- Create sysfs entry for eeh statistic - * @_name: name of file in sysfs directory - * @_memb: name of member in struct pci_dn to access - * @_format: printf format for display - * - * All of the attributes look very similar, so just - * auto-gen a cut-n-paste routine to display them. - */ -#define EEH_SHOW_ATTR(_name,_memb,_format) \ -static ssize_t eeh_show_##_name(struct device *dev, \ - struct device_attribute *attr, char *buf) \ -{ \ - struct pci_dev *pdev = to_pci_dev(dev); \ - struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); \ - \ - if (!edev) \ - return 0; \ - \ - return sprintf(buf, _format "\n", edev->_memb); \ -} \ -static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL); - -EEH_SHOW_ATTR(eeh_mode, mode, "0x%x"); -EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x"); -EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x"); - -void eeh_sysfs_add_device(struct pci_dev *pdev) -{ - int rc=0; - - rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode); - rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr); - rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); - - if (rc) - printk(KERN_WARNING "EEH: Unable to create sysfs entries\n"); -} - -void eeh_sysfs_remove_device(struct pci_dev *pdev) -{ - device_remove_file(&pdev->dev, &dev_attr_eeh_mode); - device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); - device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); -} - diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c index 2605c31..18380e8 100644 --- a/arch/powerpc/platforms/pseries/event_sources.c +++ b/arch/powerpc/platforms/pseries/event_sources.c @@ -25,7 +25,7 @@ void request_event_sources_irqs(struct device_node *np, const char *name) { int i, index, count = 0; - struct of_irq oirq; + struct of_phandle_args oirq; const u32 *opicprop; unsigned int opicplen; unsigned int virqs[16]; @@ -55,13 +55,11 @@ void request_event_sources_irqs(struct device_node *np, /* Else use normal interrupt tree parsing */ else { /* First try to do a proper OF tree parsing */ - for (index = 0; of_irq_map_one(np, index, &oirq) == 0; + for (index = 0; of_irq_parse_one(np, index, &oirq) == 0; index++) { if (count > 15) break; - virqs[count] = irq_create_of_mapping(oirq.controller, - oirq.specifier, - oirq.size); + virqs[count] = irq_create_of_mapping(&oirq); if (virqs[count] == NO_IRQ) { pr_err("event-sources: Unable to allocate " "interrupt number for %s\n", diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 217ca5c..82789e7 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -30,7 +30,8 @@ #include <asm/machdep.h> #include <asm/vdso_datapage.h> #include <asm/xics.h> -#include "plpar_wrappers.h" +#include <asm/plpar_wrappers.h> + #include "offline_states.h" /* This version can't take the spinlock, because it never returns */ @@ -123,7 +124,7 @@ static void pseries_mach_cpu_die(void) cede_latency_hint = 2; get_lppaca()->idle = 1; - if (!get_lppaca()->shared_proc) + if (!lppaca_shared_proc(get_lppaca())) get_lppaca()->donate_dedicated_cpu = 1; while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { @@ -137,7 +138,7 @@ static void pseries_mach_cpu_die(void) local_irq_disable(); - if (!get_lppaca()->shared_proc) + if (!lppaca_shared_proc(get_lppaca())) get_lppaca()->donate_dedicated_cpu = 0; get_lppaca()->idle = 0; diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 9a432de..9590dbb 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -10,12 +10,14 @@ */ #include <linux/of.h> +#include <linux/of_address.h> #include <linux/memblock.h> #include <linux/vmalloc.h> #include <linux/memory.h> #include <asm/firmware.h> #include <asm/machdep.h> +#include <asm/prom.h> #include <asm/sparsemem.h> static unsigned long get_memblock_size(void) diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c index b344f94..849b29b 100644 --- a/arch/powerpc/platforms/pseries/hvconsole.c +++ b/arch/powerpc/platforms/pseries/hvconsole.c @@ -28,7 +28,7 @@ #include <linux/errno.h> #include <asm/hvcall.h> #include <asm/hvconsole.h> -#include "plpar_wrappers.h" +#include <asm/plpar_wrappers.h> /** * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper @@ -40,10 +40,16 @@ */ int hvc_get_chars(uint32_t vtermno, char *buf, int count) { - unsigned long got; + long ret; + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + unsigned long *lbuf = (unsigned long *)buf; + + ret = plpar_hcall(H_GET_TERM_CHAR, retbuf, vtermno); + lbuf[0] = be64_to_cpu(retbuf[1]); + lbuf[1] = be64_to_cpu(retbuf[2]); - if (plpar_get_term_char(vtermno, &got, buf) == H_SUCCESS) - return got; + if (ret == H_SUCCESS) + return retbuf[0]; return 0; } @@ -69,8 +75,9 @@ int hvc_put_chars(uint32_t vtermno, const char *buf, int count) if (count > MAX_VIO_PUT_CHARS) count = MAX_VIO_PUT_CHARS; - ret = plpar_hcall_norets(H_PUT_TERM_CHAR, vtermno, count, lbuf[0], - lbuf[1]); + ret = plpar_hcall_norets(H_PUT_TERM_CHAR, vtermno, count, + cpu_to_be64(lbuf[0]), + cpu_to_be64(lbuf[1])); if (ret == H_SUCCESS) return count; if (ret == H_BUSY) diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c index ef9d9d8..5ea88d1 100644 --- a/arch/powerpc/platforms/pseries/io_event_irq.c +++ b/arch/powerpc/platforms/pseries/io_event_irq.c @@ -115,7 +115,7 @@ static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog) * by scope or event type alone. For example, Torrent ISR route change * event is reported with scope 0x00 (Not Applicatable) rather than * 0x3B (Torrent-hub). It is better to let the clients to identify - * who owns the the event. + * who owns the event. */ static irqreturn_t ioei_interrupt(int irq, void *dev_id) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 86ae364..f253361 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -48,12 +48,11 @@ #include <asm/ppc-pci.h> #include <asm/udbg.h> #include <asm/mmzone.h> - -#include "plpar_wrappers.h" +#include <asm/plpar_wrappers.h> static void tce_invalidate_pSeries_sw(struct iommu_table *tbl, - u64 *startp, u64 *endp) + __be64 *startp, __be64 *endp) { u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index; unsigned long start, end, inc; @@ -87,7 +86,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index, struct dma_attrs *attrs) { u64 proto_tce; - u64 *tcep, *tces; + __be64 *tcep, *tces; u64 rpn; proto_tce = TCE_PCI_READ; // Read allowed @@ -95,12 +94,12 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index, if (direction != DMA_TO_DEVICE) proto_tce |= TCE_PCI_WRITE; - tces = tcep = ((u64 *)tbl->it_base) + index; + tces = tcep = ((__be64 *)tbl->it_base) + index; while (npages--) { /* can't move this out since we might cross MEMBLOCK boundary */ rpn = __pa(uaddr) >> TCE_SHIFT; - *tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; + *tcep = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT); uaddr += TCE_PAGE_SIZE; tcep++; @@ -114,9 +113,9 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index, static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) { - u64 *tcep, *tces; + __be64 *tcep, *tces; - tces = tcep = ((u64 *)tbl->it_base) + index; + tces = tcep = ((__be64 *)tbl->it_base) + index; while (npages--) *(tcep++) = 0; @@ -127,11 +126,11 @@ static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) { - u64 *tcep; + __be64 *tcep; - tcep = ((u64 *)tbl->it_base) + index; + tcep = ((__be64 *)tbl->it_base) + index; - return *tcep; + return be64_to_cpu(*tcep); } static void tce_free_pSeriesLP(struct iommu_table*, long, long); @@ -178,7 +177,7 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, return ret; } -static DEFINE_PER_CPU(u64 *, tce_page); +static DEFINE_PER_CPU(__be64 *, tce_page); static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages, unsigned long uaddr, @@ -187,7 +186,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, { u64 rc = 0; u64 proto_tce; - u64 *tcep; + __be64 *tcep; u64 rpn; long l, limit; long tcenum_start = tcenum, npages_start = npages; @@ -207,7 +206,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, * from iommu_alloc{,_sg}() */ if (!tcep) { - tcep = (u64 *)__get_free_page(GFP_ATOMIC); + tcep = (__be64 *)__get_free_page(GFP_ATOMIC); /* If allocation fails, fall back to the loop implementation */ if (!tcep) { local_irq_restore(flags); @@ -231,7 +230,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE); for (l = 0; l < limit; l++) { - tcep[l] = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; + tcep[l] = cpu_to_be64(proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT); rpn++; } @@ -330,16 +329,16 @@ struct direct_window { /* Dynamic DMA Window support */ struct ddw_query_response { - u32 windows_available; - u32 largest_available_block; - u32 page_size; - u32 migration_capable; + __be32 windows_available; + __be32 largest_available_block; + __be32 page_size; + __be32 migration_capable; }; struct ddw_create_response { - u32 liobn; - u32 addr_hi; - u32 addr_lo; + __be32 liobn; + __be32 addr_hi; + __be32 addr_lo; }; static LIST_HEAD(direct_window_list); @@ -393,7 +392,8 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, unsigned long num_pfn, const void *arg) { const struct dynamic_dma_window_prop *maprange = arg; - u64 *tcep, tce_size, num_tce, dma_offset, next, proto_tce, liobn; + u64 tce_size, num_tce, dma_offset, next, proto_tce, liobn; + __be64 *tcep; u32 tce_shift; u64 rc = 0; long l, limit; @@ -402,7 +402,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, tcep = __get_cpu_var(tce_page); if (!tcep) { - tcep = (u64 *)__get_free_page(GFP_ATOMIC); + tcep = (__be64 *)__get_free_page(GFP_ATOMIC); if (!tcep) { local_irq_enable(); return -ENOMEM; @@ -436,7 +436,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, dma_offset = next + be64_to_cpu(maprange->dma_base); for (l = 0; l < limit; l++) { - tcep[l] = proto_tce | next; + tcep[l] = cpu_to_be64(proto_tce | next); next += tce_size; } @@ -530,7 +530,7 @@ static void iommu_table_setparms(struct pci_controller *phb, static void iommu_table_setparms_lpar(struct pci_controller *phb, struct device_node *dn, struct iommu_table *tbl, - const void *dma_window) + const __be32 *dma_window) { unsigned long offset, size; @@ -614,6 +614,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) iommu_table_setparms(pci->phb, dn, tbl); pci->iommu_table = iommu_init_table(tbl, pci->phb->node); + iommu_register_group(tbl, pci_domain_nr(bus), 0); /* Divide the rest (1.75GB) among the children */ pci->phb->dma_window_size = 0x80000000ul; @@ -629,7 +630,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) struct iommu_table *tbl; struct device_node *dn, *pdn; struct pci_dn *ppci; - const void *dma_window = NULL; + const __be32 *dma_window = NULL; dn = pci_bus_to_OF_node(bus); @@ -658,6 +659,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) ppci->phb->node); iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window); ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node); + iommu_register_group(tbl, pci_domain_nr(bus), 0); pr_debug(" created table: %p\n", ppci->iommu_table); } } @@ -684,6 +686,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) phb->node); iommu_table_setparms(phb, dn, tbl); PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node); + iommu_register_group(tbl, pci_domain_nr(phb->bus), 0); set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table); return; } @@ -778,7 +781,7 @@ static u64 find_existing_ddw(struct device_node *pdn) list_for_each_entry(window, &direct_window_list, list) { if (window->device == pdn) { direct64 = window->prop; - dma_addr = direct64->dma_base; + dma_addr = be64_to_cpu(direct64->dma_base); break; } } @@ -1043,11 +1046,11 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) dev_dbg(&dev->dev, "no free dynamic windows"); goto out_restore_window; } - if (query.page_size & 4) { + if (be32_to_cpu(query.page_size) & 4) { page_shift = 24; /* 16MB */ - } else if (query.page_size & 2) { + } else if (be32_to_cpu(query.page_size) & 2) { page_shift = 16; /* 64kB */ - } else if (query.page_size & 1) { + } else if (be32_to_cpu(query.page_size) & 1) { page_shift = 12; /* 4kB */ } else { dev_dbg(&dev->dev, "no supported direct page size in mask %x", @@ -1057,7 +1060,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) /* verify the window * number of ptes will map the partition */ /* check largest block * page size > max memory hotplug addr */ max_addr = memory_hotplug_max(); - if (query.largest_available_block < (max_addr >> page_shift)) { + if (be32_to_cpu(query.largest_available_block) < (max_addr >> page_shift)) { dev_dbg(&dev->dev, "can't map partiton max 0x%llx with %u " "%llu-sized pages\n", max_addr, query.largest_available_block, 1ULL << page_shift); @@ -1083,7 +1086,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn) if (ret != 0) goto out_free_prop; - ddwprop->liobn = cpu_to_be32(create.liobn); + ddwprop->liobn = create.liobn; ddwprop->dma_base = cpu_to_be64(of_read_number(&create.addr_hi, 2)); ddwprop->tce_shift = cpu_to_be32(page_shift); ddwprop->window_shift = cpu_to_be32(len); @@ -1149,7 +1152,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) { struct device_node *pdn, *dn; struct iommu_table *tbl; - const void *dma_window = NULL; + const __be32 *dma_window = NULL; struct pci_dn *pci; pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); @@ -1184,6 +1187,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) pci->phb->node); iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window); pci->iommu_table = iommu_init_table(tbl, pci->phb->node); + iommu_register_group(tbl, pci_domain_nr(pci->phb->bus), 0); pr_debug(" created table: %p\n", pci->iommu_table); } else { pr_debug(" found DMA window, table: %p\n", pci->iommu_table); @@ -1197,7 +1201,7 @@ static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask) bool ddw_enabled = false; struct device_node *pdn, *dn; struct pci_dev *pdev; - const void *dma_window = NULL; + const __be32 *dma_window = NULL; u64 dma_offset; if (!dev->dma_mask) diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c index 7d94bdc..13fa95b3 100644 --- a/arch/powerpc/platforms/pseries/kexec.c +++ b/arch/powerpc/platforms/pseries/kexec.c @@ -17,9 +17,9 @@ #include <asm/mpic.h> #include <asm/xics.h> #include <asm/smp.h> +#include <asm/plpar_wrappers.h> #include "pseries.h" -#include "plpar_wrappers.h" static void pseries_kexec_cpu_down(int crash_shutdown, int secondary) { diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 6d62072..4fca3de 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -41,10 +41,17 @@ #include <asm/smp.h> #include <asm/trace.h> #include <asm/firmware.h> +#include <asm/plpar_wrappers.h> -#include "plpar_wrappers.h" #include "pseries.h" +/* Flag bits for H_BULK_REMOVE */ +#define HBR_REQUEST 0x4000000000000000UL +#define HBR_RESPONSE 0x8000000000000000UL +#define HBR_END 0xc000000000000000UL +#define HBR_AVPN 0x0200000000000000UL +#define HBR_ANDCOND 0x0100000000000000UL + /* in hvCall.S */ EXPORT_SYMBOL(plpar_hcall); @@ -61,9 +68,18 @@ void vpa_init(int cpu) struct paca_struct *pp; struct dtl_entry *dtl; + /* + * The spec says it "may be problematic" if CPU x registers the VPA of + * CPU y. We should never do that, but wail if we ever do. + */ + WARN_ON(cpu != smp_processor_id()); + if (cpu_has_feature(CPU_FTR_ALTIVEC)) lppaca_of(cpu).vmxregs_in_use = 1; + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + lppaca_of(cpu).ebb_regs_in_use = 1; + addr = __pa(&lppaca_of(cpu)); ret = register_vpa(hwcpu, addr); @@ -96,7 +112,7 @@ void vpa_init(int cpu) lppaca_of(cpu).dtl_idx = 0; /* hypervisor reads buffer length from this field */ - dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES; + dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES); ret = register_dtl(hwcpu, __pa(dtl)); if (ret) pr_err("WARNING: DTL registration of cpu %d (hw %d) " @@ -136,7 +152,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, flags = 0; /* Make pHyp happy */ - if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU)) + if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU)) hpte_r &= ~_PAGE_COHERENT; if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N)) flags |= H_COALESCE_CAND; @@ -229,6 +245,23 @@ static void pSeries_lpar_hptab_clear(void) &(ptes[j].pteh), &(ptes[j].ptel)); } } + +#ifdef __LITTLE_ENDIAN__ + /* Reset exceptions to big endian */ + if (firmware_has_feature(FW_FEATURE_SET_MODE)) { + long rc; + + rc = pseries_big_endian_exceptions(); + /* + * At this point it is unlikely panic() will get anything + * out to the user, but at least this will stop us from + * continuing on further and creating an even more + * difficult to debug situation. + */ + if (rc) + panic("Could not enable big endian exceptions"); + } +#endif } /* @@ -240,7 +273,8 @@ static void pSeries_lpar_hptab_clear(void) static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, - int psize, int ssize, int local) + int psize, int apsize, + int ssize, int local) { unsigned long lpar_rc; unsigned long flags = (newpp & 7) | H_AVPN; @@ -328,7 +362,8 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, } static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, - int psize, int ssize, int local) + int psize, int apsize, + int ssize, int local) { unsigned long want_v; unsigned long lpar_rc; @@ -345,6 +380,113 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, BUG_ON(lpar_rc != H_SUCCESS); } +/* + * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need + * to make sure that we avoid bouncing the hypervisor tlbie lock. + */ +#define PPC64_HUGE_HPTE_BATCH 12 + +static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, + unsigned long *vpn, int count, + int psize, int ssize) +{ + unsigned long param[8]; + int i = 0, pix = 0, rc; + unsigned long flags = 0; + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); + + if (lock_tlbie) + spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); + + for (i = 0; i < count; i++) { + + if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { + pSeries_lpar_hpte_invalidate(slot[i], vpn[i], psize, 0, + ssize, 0); + } else { + param[pix] = HBR_REQUEST | HBR_AVPN | slot[i]; + param[pix+1] = hpte_encode_avpn(vpn[i], psize, ssize); + pix += 2; + if (pix == 8) { + rc = plpar_hcall9(H_BULK_REMOVE, param, + param[0], param[1], param[2], + param[3], param[4], param[5], + param[6], param[7]); + BUG_ON(rc != H_SUCCESS); + pix = 0; + } + } + } + if (pix) { + param[pix] = HBR_END; + rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1], + param[2], param[3], param[4], param[5], + param[6], param[7]); + BUG_ON(rc != H_SUCCESS); + } + + if (lock_tlbie) + spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); +} + +static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm, + unsigned char *hpte_slot_array, + unsigned long addr, int psize) +{ + int ssize = 0, i, index = 0; + unsigned long s_addr = addr; + unsigned int max_hpte_count, valid; + unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH]; + unsigned long slot_array[PPC64_HUGE_HPTE_BATCH]; + unsigned long shift, hidx, vpn = 0, vsid, hash, slot; + + shift = mmu_psize_defs[psize].shift; + max_hpte_count = 1U << (PMD_SHIFT - shift); + + for (i = 0; i < max_hpte_count; i++) { + valid = hpte_valid(hpte_slot_array, i); + if (!valid) + continue; + hidx = hpte_hash_index(hpte_slot_array, i); + + /* get the vpn */ + addr = s_addr + (i * (1ul << shift)); + if (!is_kernel_addr(addr)) { + ssize = user_segment_size(addr); + vsid = get_vsid(mm->context.id, addr, ssize); + WARN_ON(vsid == 0); + } else { + vsid = get_kernel_vsid(addr, mmu_kernel_ssize); + ssize = mmu_kernel_ssize; + } + + vpn = hpt_vpn(addr, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + + slot_array[index] = slot; + vpn_array[index] = vpn; + if (index == PPC64_HUGE_HPTE_BATCH - 1) { + /* + * Now do a bluk invalidate + */ + __pSeries_lpar_hugepage_invalidate(slot_array, + vpn_array, + PPC64_HUGE_HPTE_BATCH, + psize, ssize); + index = 0; + } else + index++; + } + if (index) + __pSeries_lpar_hugepage_invalidate(slot_array, vpn_array, + index, psize, ssize); +} + static void pSeries_lpar_hpte_removebolted(unsigned long ea, int psize, int ssize) { @@ -356,17 +498,12 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea, slot = pSeries_lpar_hpte_find(vpn, psize, ssize); BUG_ON(slot == -1); - - pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, 0); + /* + * lpar doesn't use the passed actual page size + */ + pSeries_lpar_hpte_invalidate(slot, vpn, psize, 0, ssize, 0); } -/* Flag bits for H_BULK_REMOVE */ -#define HBR_REQUEST 0x4000000000000000UL -#define HBR_RESPONSE 0x8000000000000000UL -#define HBR_END 0xc000000000000000UL -#define HBR_AVPN 0x0200000000000000UL -#define HBR_ANDCOND 0x0100000000000000UL - /* * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie * lock. @@ -400,8 +537,11 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local) slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { + /* + * lpar doesn't use the passed actual page size + */ pSeries_lpar_hpte_invalidate(slot, vpn, psize, - ssize, local); + 0, ssize, local); } else { param[pix] = HBR_REQUEST | HBR_AVPN | slot; param[pix+1] = hpte_encode_avpn(vpn, psize, @@ -452,6 +592,7 @@ void __init hpte_init_lpar(void) ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted; ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range; ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear; + ppc_md.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; } #ifdef CONFIG_PPC_SMLPAR @@ -606,7 +747,7 @@ int h_get_mpp(struct hvcall_mpp_data *mpp_data) mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff; mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff; - mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff; + mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffffUL; mpp_data->pool_size = retbuf[4]; mpp_data->loan_request = retbuf[5]; diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c new file mode 100644 index 0000000..e738007 --- /dev/null +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -0,0 +1,710 @@ +/* + * PowerPC64 LPAR Configuration Information Driver + * + * Dave Engebretsen engebret@us.ibm.com + * Copyright (c) 2003 Dave Engebretsen + * Will Schmidt willschm@us.ibm.com + * SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation. + * seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation. + * Nathan Lynch nathanl@austin.ibm.com + * Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This driver creates a proc file at /proc/ppc64/lparcfg which contains + * keyword - value pairs that specify the configuration of the partition. + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/proc_fs.h> +#include <linux/init.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <asm/uaccess.h> +#include <asm/lppaca.h> +#include <asm/hvcall.h> +#include <asm/firmware.h> +#include <asm/rtas.h> +#include <asm/time.h> +#include <asm/prom.h> +#include <asm/vdso_datapage.h> +#include <asm/vio.h> +#include <asm/mmu.h> +#include <asm/machdep.h> + + +/* + * This isn't a module but we expose that to userspace + * via /proc so leave the definitions here + */ +#define MODULE_VERS "1.9" +#define MODULE_NAME "lparcfg" + +/* #define LPARCFG_DEBUG */ + +/* + * Track sum of all purrs across all processors. This is used to further + * calculate usage values by different applications + */ +static unsigned long get_purr(void) +{ + unsigned long sum_purr = 0; + int cpu; + + for_each_possible_cpu(cpu) { + struct cpu_usage *cu; + + cu = &per_cpu(cpu_usage_array, cpu); + sum_purr += cu->current_tb; + } + return sum_purr; +} + +/* + * Methods used to fetch LPAR data when running on a pSeries platform. + */ + +struct hvcall_ppp_data { + u64 entitlement; + u64 unallocated_entitlement; + u16 group_num; + u16 pool_num; + u8 capped; + u8 weight; + u8 unallocated_weight; + u16 active_procs_in_pool; + u16 active_system_procs; + u16 phys_platform_procs; + u32 max_proc_cap_avail; + u32 entitled_proc_cap_avail; +}; + +/* + * H_GET_PPP hcall returns info in 4 parms. + * entitled_capacity,unallocated_capacity, + * aggregation, resource_capability). + * + * R4 = Entitled Processor Capacity Percentage. + * R5 = Unallocated Processor Capacity Percentage. + * R6 (AABBCCDDEEFFGGHH). + * XXXX - reserved (0) + * XXXX - reserved (0) + * XXXX - Group Number + * XXXX - Pool Number. + * R7 (IIJJKKLLMMNNOOPP). + * XX - reserved. (0) + * XX - bit 0-6 reserved (0). bit 7 is Capped indicator. + * XX - variable processor Capacity Weight + * XX - Unallocated Variable Processor Capacity Weight. + * XXXX - Active processors in Physical Processor Pool. + * XXXX - Processors active on platform. + * R8 (QQQQRRRRRRSSSSSS). if ibm,partition-performance-parameters-level >= 1 + * XXXX - Physical platform procs allocated to virtualization. + * XXXXXX - Max procs capacity % available to the partitions pool. + * XXXXXX - Entitled procs capacity % available to the + * partitions pool. + */ +static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data) +{ + unsigned long rc; + unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; + + rc = plpar_hcall9(H_GET_PPP, retbuf); + + ppp_data->entitlement = retbuf[0]; + ppp_data->unallocated_entitlement = retbuf[1]; + + ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; + ppp_data->pool_num = retbuf[2] & 0xffff; + + ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01; + ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff; + ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff; + ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff; + ppp_data->active_system_procs = retbuf[3] & 0xffff; + + ppp_data->phys_platform_procs = retbuf[4] >> 6 * 8; + ppp_data->max_proc_cap_avail = (retbuf[4] >> 3 * 8) & 0xffffff; + ppp_data->entitled_proc_cap_avail = retbuf[4] & 0xffffff; + + return rc; +} + +static unsigned h_pic(unsigned long *pool_idle_time, + unsigned long *num_procs) +{ + unsigned long rc; + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + rc = plpar_hcall(H_PIC, retbuf); + + *pool_idle_time = retbuf[0]; + *num_procs = retbuf[1]; + + return rc; +} + +/* + * parse_ppp_data + * Parse out the data returned from h_get_ppp and h_pic + */ +static void parse_ppp_data(struct seq_file *m) +{ + struct hvcall_ppp_data ppp_data; + struct device_node *root; + const int *perf_level; + int rc; + + rc = h_get_ppp(&ppp_data); + if (rc) + return; + + seq_printf(m, "partition_entitled_capacity=%lld\n", + ppp_data.entitlement); + seq_printf(m, "group=%d\n", ppp_data.group_num); + seq_printf(m, "system_active_processors=%d\n", + ppp_data.active_system_procs); + + /* pool related entries are appropriate for shared configs */ + if (lppaca_shared_proc(get_lppaca())) { + unsigned long pool_idle_time, pool_procs; + + seq_printf(m, "pool=%d\n", ppp_data.pool_num); + + /* report pool_capacity in percentage */ + seq_printf(m, "pool_capacity=%d\n", + ppp_data.active_procs_in_pool * 100); + + h_pic(&pool_idle_time, &pool_procs); + seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time); + seq_printf(m, "pool_num_procs=%ld\n", pool_procs); + } + + seq_printf(m, "unallocated_capacity_weight=%d\n", + ppp_data.unallocated_weight); + seq_printf(m, "capacity_weight=%d\n", ppp_data.weight); + seq_printf(m, "capped=%d\n", ppp_data.capped); + seq_printf(m, "unallocated_capacity=%lld\n", + ppp_data.unallocated_entitlement); + + /* The last bits of information returned from h_get_ppp are only + * valid if the ibm,partition-performance-parameters-level + * property is >= 1. + */ + root = of_find_node_by_path("/"); + if (root) { + perf_level = of_get_property(root, + "ibm,partition-performance-parameters-level", + NULL); + if (perf_level && (*perf_level >= 1)) { + seq_printf(m, + "physical_procs_allocated_to_virtualization=%d\n", + ppp_data.phys_platform_procs); + seq_printf(m, "max_proc_capacity_available=%d\n", + ppp_data.max_proc_cap_avail); + seq_printf(m, "entitled_proc_capacity_available=%d\n", + ppp_data.entitled_proc_cap_avail); + } + + of_node_put(root); + } +} + +/** + * parse_mpp_data + * Parse out data returned from h_get_mpp + */ +static void parse_mpp_data(struct seq_file *m) +{ + struct hvcall_mpp_data mpp_data; + int rc; + + rc = h_get_mpp(&mpp_data); + if (rc) + return; + + seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem); + + if (mpp_data.mapped_mem != -1) + seq_printf(m, "mapped_entitled_memory=%ld\n", + mpp_data.mapped_mem); + + seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num); + seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num); + + seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight); + seq_printf(m, "unallocated_entitled_memory_weight=%d\n", + mpp_data.unallocated_mem_weight); + seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n", + mpp_data.unallocated_entitlement); + + if (mpp_data.pool_size != -1) + seq_printf(m, "entitled_memory_pool_size=%ld bytes\n", + mpp_data.pool_size); + + seq_printf(m, "entitled_memory_loan_request=%ld\n", + mpp_data.loan_request); + + seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem); +} + +/** + * parse_mpp_x_data + * Parse out data returned from h_get_mpp_x + */ +static void parse_mpp_x_data(struct seq_file *m) +{ + struct hvcall_mpp_x_data mpp_x_data; + + if (!firmware_has_feature(FW_FEATURE_XCMO)) + return; + if (h_get_mpp_x(&mpp_x_data)) + return; + + seq_printf(m, "coalesced_bytes=%ld\n", mpp_x_data.coalesced_bytes); + + if (mpp_x_data.pool_coalesced_bytes) + seq_printf(m, "pool_coalesced_bytes=%ld\n", + mpp_x_data.pool_coalesced_bytes); + if (mpp_x_data.pool_purr_cycles) + seq_printf(m, "coalesce_pool_purr=%ld\n", mpp_x_data.pool_purr_cycles); + if (mpp_x_data.pool_spurr_cycles) + seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles); +} + +#define SPLPAR_CHARACTERISTICS_TOKEN 20 +#define SPLPAR_MAXLENGTH 1026*(sizeof(char)) + +/* + * parse_system_parameter_string() + * Retrieve the potential_processors, max_entitled_capacity and friends + * through the get-system-parameter rtas call. Replace keyword strings as + * necessary. + */ +static void parse_system_parameter_string(struct seq_file *m) +{ + int call_status; + + unsigned char *local_buffer = kmalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); + if (!local_buffer) { + printk(KERN_ERR "%s %s kmalloc failure at line %d\n", + __FILE__, __func__, __LINE__); + return; + } + + spin_lock(&rtas_data_buf_lock); + memset(rtas_data_buf, 0, SPLPAR_MAXLENGTH); + call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, + NULL, + SPLPAR_CHARACTERISTICS_TOKEN, + __pa(rtas_data_buf), + RTAS_DATA_BUF_SIZE); + memcpy(local_buffer, rtas_data_buf, SPLPAR_MAXLENGTH); + local_buffer[SPLPAR_MAXLENGTH - 1] = '\0'; + spin_unlock(&rtas_data_buf_lock); + + if (call_status != 0) { + printk(KERN_INFO + "%s %s Error calling get-system-parameter (0x%x)\n", + __FILE__, __func__, call_status); + } else { + int splpar_strlen; + int idx, w_idx; + char *workbuffer = kzalloc(SPLPAR_MAXLENGTH, GFP_KERNEL); + if (!workbuffer) { + printk(KERN_ERR "%s %s kmalloc failure at line %d\n", + __FILE__, __func__, __LINE__); + kfree(local_buffer); + return; + } +#ifdef LPARCFG_DEBUG + printk(KERN_INFO "success calling get-system-parameter\n"); +#endif + splpar_strlen = local_buffer[0] * 256 + local_buffer[1]; + local_buffer += 2; /* step over strlen value */ + + w_idx = 0; + idx = 0; + while ((*local_buffer) && (idx < splpar_strlen)) { + workbuffer[w_idx++] = local_buffer[idx++]; + if ((local_buffer[idx] == ',') + || (local_buffer[idx] == '\0')) { + workbuffer[w_idx] = '\0'; + if (w_idx) { + /* avoid the empty string */ + seq_printf(m, "%s\n", workbuffer); + } + memset(workbuffer, 0, SPLPAR_MAXLENGTH); + idx++; /* skip the comma */ + w_idx = 0; + } else if (local_buffer[idx] == '=') { + /* code here to replace workbuffer contents + with different keyword strings */ + if (0 == strcmp(workbuffer, "MaxEntCap")) { + strcpy(workbuffer, + "partition_max_entitled_capacity"); + w_idx = strlen(workbuffer); + } + if (0 == strcmp(workbuffer, "MaxPlatProcs")) { + strcpy(workbuffer, + "system_potential_processors"); + w_idx = strlen(workbuffer); + } + } + } + kfree(workbuffer); + local_buffer -= 2; /* back up over strlen value */ + } + kfree(local_buffer); +} + +/* Return the number of processors in the system. + * This function reads through the device tree and counts + * the virtual processors, this does not include threads. + */ +static int lparcfg_count_active_processors(void) +{ + struct device_node *cpus_dn = NULL; + int count = 0; + + while ((cpus_dn = of_find_node_by_type(cpus_dn, "cpu"))) { +#ifdef LPARCFG_DEBUG + printk(KERN_ERR "cpus_dn %p\n", cpus_dn); +#endif + count++; + } + return count; +} + +static void pseries_cmo_data(struct seq_file *m) +{ + int cpu; + unsigned long cmo_faults = 0; + unsigned long cmo_fault_time = 0; + + seq_printf(m, "cmo_enabled=%d\n", firmware_has_feature(FW_FEATURE_CMO)); + + if (!firmware_has_feature(FW_FEATURE_CMO)) + return; + + for_each_possible_cpu(cpu) { + cmo_faults += be64_to_cpu(lppaca_of(cpu).cmo_faults); + cmo_fault_time += be64_to_cpu(lppaca_of(cpu).cmo_fault_time); + } + + seq_printf(m, "cmo_faults=%lu\n", cmo_faults); + seq_printf(m, "cmo_fault_time_usec=%lu\n", + cmo_fault_time / tb_ticks_per_usec); + seq_printf(m, "cmo_primary_psp=%d\n", cmo_get_primary_psp()); + seq_printf(m, "cmo_secondary_psp=%d\n", cmo_get_secondary_psp()); + seq_printf(m, "cmo_page_size=%lu\n", cmo_get_page_size()); +} + +static void splpar_dispatch_data(struct seq_file *m) +{ + int cpu; + unsigned long dispatches = 0; + unsigned long dispatch_dispersions = 0; + + for_each_possible_cpu(cpu) { + dispatches += be32_to_cpu(lppaca_of(cpu).yield_count); + dispatch_dispersions += + be32_to_cpu(lppaca_of(cpu).dispersion_count); + } + + seq_printf(m, "dispatches=%lu\n", dispatches); + seq_printf(m, "dispatch_dispersions=%lu\n", dispatch_dispersions); +} + +static void parse_em_data(struct seq_file *m) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + if (firmware_has_feature(FW_FEATURE_LPAR) && + plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS) + seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]); +} + +static int pseries_lparcfg_data(struct seq_file *m, void *v) +{ + int partition_potential_processors; + int partition_active_processors; + struct device_node *rtas_node; + const int *lrdrp = NULL; + + rtas_node = of_find_node_by_path("/rtas"); + if (rtas_node) + lrdrp = of_get_property(rtas_node, "ibm,lrdr-capacity", NULL); + + if (lrdrp == NULL) { + partition_potential_processors = vdso_data->processorCount; + } else { + partition_potential_processors = *(lrdrp + 4); + } + of_node_put(rtas_node); + + partition_active_processors = lparcfg_count_active_processors(); + + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + /* this call handles the ibm,get-system-parameter contents */ + parse_system_parameter_string(m); + parse_ppp_data(m); + parse_mpp_data(m); + parse_mpp_x_data(m); + pseries_cmo_data(m); + splpar_dispatch_data(m); + + seq_printf(m, "purr=%ld\n", get_purr()); + } else { /* non SPLPAR case */ + + seq_printf(m, "system_active_processors=%d\n", + partition_potential_processors); + + seq_printf(m, "system_potential_processors=%d\n", + partition_potential_processors); + + seq_printf(m, "partition_max_entitled_capacity=%d\n", + partition_potential_processors * 100); + + seq_printf(m, "partition_entitled_capacity=%d\n", + partition_active_processors * 100); + } + + seq_printf(m, "partition_active_processors=%d\n", + partition_active_processors); + + seq_printf(m, "partition_potential_processors=%d\n", + partition_potential_processors); + + seq_printf(m, "shared_processor_mode=%d\n", + lppaca_shared_proc(get_lppaca())); + + seq_printf(m, "slb_size=%d\n", mmu_slb_size); + + parse_em_data(m); + + return 0; +} + +static ssize_t update_ppp(u64 *entitlement, u8 *weight) +{ + struct hvcall_ppp_data ppp_data; + u8 new_weight; + u64 new_entitled; + ssize_t retval; + + /* Get our current parameters */ + retval = h_get_ppp(&ppp_data); + if (retval) + return retval; + + if (entitlement) { + new_weight = ppp_data.weight; + new_entitled = *entitlement; + } else if (weight) { + new_weight = *weight; + new_entitled = ppp_data.entitlement; + } else + return -EINVAL; + + pr_debug("%s: current_entitled = %llu, current_weight = %u\n", + __func__, ppp_data.entitlement, ppp_data.weight); + + pr_debug("%s: new_entitled = %llu, new_weight = %u\n", + __func__, new_entitled, new_weight); + + retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight); + return retval; +} + +/** + * update_mpp + * + * Update the memory entitlement and weight for the partition. Caller must + * specify either a new entitlement or weight, not both, to be updated + * since the h_set_mpp call takes both entitlement and weight as parameters. + */ +static ssize_t update_mpp(u64 *entitlement, u8 *weight) +{ + struct hvcall_mpp_data mpp_data; + u64 new_entitled; + u8 new_weight; + ssize_t rc; + + if (entitlement) { + /* Check with vio to ensure the new memory entitlement + * can be handled. + */ + rc = vio_cmo_entitlement_update(*entitlement); + if (rc) + return rc; + } + + rc = h_get_mpp(&mpp_data); + if (rc) + return rc; + + if (entitlement) { + new_weight = mpp_data.mem_weight; + new_entitled = *entitlement; + } else if (weight) { + new_weight = *weight; + new_entitled = mpp_data.entitled_mem; + } else + return -EINVAL; + + pr_debug("%s: current_entitled = %lu, current_weight = %u\n", + __func__, mpp_data.entitled_mem, mpp_data.mem_weight); + + pr_debug("%s: new_entitled = %llu, new_weight = %u\n", + __func__, new_entitled, new_weight); + + rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight); + return rc; +} + +/* + * Interface for changing system parameters (variable capacity weight + * and entitled capacity). Format of input is "param_name=value"; + * anything after value is ignored. Valid parameters at this time are + * "partition_entitled_capacity" and "capacity_weight". We use + * H_SET_PPP to alter parameters. + * + * This function should be invoked only on systems with + * FW_FEATURE_SPLPAR. + */ +static ssize_t lparcfg_write(struct file *file, const char __user * buf, + size_t count, loff_t * off) +{ + int kbuf_sz = 64; + char kbuf[kbuf_sz]; + char *tmp; + u64 new_entitled, *new_entitled_ptr = &new_entitled; + u8 new_weight, *new_weight_ptr = &new_weight; + ssize_t retval; + + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + return -EINVAL; + + if (count > kbuf_sz) + return -EINVAL; + + if (copy_from_user(kbuf, buf, count)) + return -EFAULT; + + kbuf[count - 1] = '\0'; + tmp = strchr(kbuf, '='); + if (!tmp) + return -EINVAL; + + *tmp++ = '\0'; + + if (!strcmp(kbuf, "partition_entitled_capacity")) { + char *endp; + *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + return -EINVAL; + + retval = update_ppp(new_entitled_ptr, NULL); + } else if (!strcmp(kbuf, "capacity_weight")) { + char *endp; + *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + return -EINVAL; + + retval = update_ppp(NULL, new_weight_ptr); + } else if (!strcmp(kbuf, "entitled_memory")) { + char *endp; + *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + return -EINVAL; + + retval = update_mpp(new_entitled_ptr, NULL); + } else if (!strcmp(kbuf, "entitled_memory_weight")) { + char *endp; + *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10); + if (endp == tmp) + return -EINVAL; + + retval = update_mpp(NULL, new_weight_ptr); + } else + return -EINVAL; + + if (retval == H_SUCCESS || retval == H_CONSTRAINED) { + retval = count; + } else if (retval == H_BUSY) { + retval = -EBUSY; + } else if (retval == H_HARDWARE) { + retval = -EIO; + } else if (retval == H_PARAMETER) { + retval = -EINVAL; + } + + return retval; +} + +static int lparcfg_data(struct seq_file *m, void *v) +{ + struct device_node *rootdn; + const char *model = ""; + const char *system_id = ""; + const char *tmp; + const unsigned int *lp_index_ptr; + unsigned int lp_index = 0; + + seq_printf(m, "%s %s\n", MODULE_NAME, MODULE_VERS); + + rootdn = of_find_node_by_path("/"); + if (rootdn) { + tmp = of_get_property(rootdn, "model", NULL); + if (tmp) + model = tmp; + tmp = of_get_property(rootdn, "system-id", NULL); + if (tmp) + system_id = tmp; + lp_index_ptr = of_get_property(rootdn, "ibm,partition-no", + NULL); + if (lp_index_ptr) + lp_index = *lp_index_ptr; + of_node_put(rootdn); + } + seq_printf(m, "serial_number=%s\n", system_id); + seq_printf(m, "system_type=%s\n", model); + seq_printf(m, "partition_id=%d\n", (int)lp_index); + + return pseries_lparcfg_data(m, v); +} + +static int lparcfg_open(struct inode *inode, struct file *file) +{ + return single_open(file, lparcfg_data, NULL); +} + +static const struct file_operations lparcfg_fops = { + .read = seq_read, + .write = lparcfg_write, + .open = lparcfg_open, + .release = single_release, + .llseek = seq_lseek, +}; + +static int __init lparcfg_init(void) +{ + umode_t mode = S_IRUSR | S_IRGRP | S_IROTH; + + /* Allow writing if we have FW_FEATURE_SPLPAR */ + if (firmware_has_feature(FW_FEATURE_SPLPAR)) + mode |= S_IWUSR; + + if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops)) { + printk(KERN_ERR "Failed to create powerpc/lparcfg\n"); + return -EIO; + } + return 0; +} +machine_device_initcall(pseries, lparcfg_init); diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 3d01eee..cde4e0a 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -28,7 +28,7 @@ struct update_props_workarea { u32 state; u64 reserved; u32 nprops; -}; +} __packed; #define NODE_ACTION_MASK 0xff000000 #define NODE_COUNT_MASK 0x00ffffff @@ -62,6 +62,7 @@ static int delete_dt_node(u32 phandle) return -ENOENT; dlpar_detach_node(dn); + of_node_put(dn); return 0; } @@ -119,7 +120,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop, if (!more) { of_update_property(dn, new_prop); - new_prop = NULL; + *prop = NULL; } return 0; @@ -130,7 +131,7 @@ static int update_dt_node(u32 phandle, s32 scope) struct update_props_workarea *upwa; struct device_node *dn; struct property *prop = NULL; - int i, rc; + int i, rc, rtas_rc; char *prop_data; char *rtas_buf; int update_properties_token; @@ -154,25 +155,26 @@ static int update_dt_node(u32 phandle, s32 scope) upwa->phandle = phandle; do { - rc = mobility_rtas_call(update_properties_token, rtas_buf, + rtas_rc = mobility_rtas_call(update_properties_token, rtas_buf, scope); - if (rc < 0) + if (rtas_rc < 0) break; prop_data = rtas_buf + sizeof(*upwa); - /* The first element of the buffer is the path of the node - * being updated in the form of a 8 byte string length - * followed by the string. Skip past this to get to the - * properties being updated. + /* On the first call to ibm,update-properties for a node the + * the first property value descriptor contains an empty + * property name, the property value length encoded as u32, + * and the property value is the node path being updated. */ - vd = *prop_data++; - prop_data += vd; + if (*prop_data == 0) { + prop_data++; + vd = *(u32 *)prop_data; + prop_data += vd + sizeof(vd); + upwa->nprops--; + } - /* The path we skipped over is counted as one of the elements - * returned so start counting at one. - */ - for (i = 1; i < upwa->nprops; i++) { + for (i = 0; i < upwa->nprops; i++) { char *prop_name; prop_name = prop_data; @@ -202,7 +204,7 @@ static int update_dt_node(u32 phandle, s32 scope) prop_data += vd; } } - } while (rc == 1); + } while (rtas_rc == 1); of_node_put(dn); kfree(rtas_buf); @@ -215,17 +217,14 @@ static int add_dt_node(u32 parent_phandle, u32 drc_index) struct device_node *parent_dn; int rc; - dn = dlpar_configure_connector(drc_index); - if (!dn) + parent_dn = of_find_node_by_phandle(parent_phandle); + if (!parent_dn) return -ENOENT; - parent_dn = of_find_node_by_phandle(parent_phandle); - if (!parent_dn) { - dlpar_free_cc_nodes(dn); + dn = dlpar_configure_connector(drc_index, parent_dn); + if (!dn) return -ENOENT; - } - dn->parent = parent_dn; rc = dlpar_attach_node(dn); if (rc) dlpar_free_cc_nodes(dn); diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 8733a86..7bfaf58 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -18,6 +18,7 @@ #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/kmsg_dump.h> +#include <linux/pstore.h> #include <linux/ctype.h> #include <linux/zlib.h> #include <asm/uaccess.h> @@ -29,6 +30,13 @@ /* Max bytes to read/write in one go */ #define NVRW_CNT 0x20 +/* + * Set oops header version to distinguish between old and new format header. + * lnx,oops-log partition max size is 4000, header version > 4000 will + * help in identifying new header. + */ +#define OOPS_HDR_VERSION 5000 + static unsigned int nvram_size; static int nvram_fetch, nvram_store; static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */ @@ -45,20 +53,23 @@ struct nvram_os_partition { int min_size; /* minimum acceptable size (0 means req_size) */ long size; /* size of data portion (excluding err_log_info) */ long index; /* offset of data portion of partition */ + bool os_partition; /* partition initialized by OS, not FW */ }; static struct nvram_os_partition rtas_log_partition = { .name = "ibm,rtas-log", .req_size = 2079, .min_size = 1055, - .index = -1 + .index = -1, + .os_partition = true }; static struct nvram_os_partition oops_log_partition = { .name = "lnx,oops-log", .req_size = 4000, .min_size = 2000, - .index = -1 + .index = -1, + .os_partition = true }; static const char *pseries_nvram_os_partitions[] = { @@ -67,6 +78,12 @@ static const char *pseries_nvram_os_partitions[] = { NULL }; +struct oops_log_info { + u16 version; + u16 report_length; + u64 timestamp; +} __attribute__((packed)); + static void oops_to_nvram(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason); @@ -83,28 +100,28 @@ static unsigned long last_unread_rtas_event; /* timestamp */ * big_oops_buf[] holds the uncompressed text we're capturing. * - * oops_buf[] holds the compressed text, preceded by a prefix. - * The prefix is just a u16 holding the length of the compressed* text. - * (*Or uncompressed, if compression fails.) oops_buf[] gets written - * to NVRAM. + * oops_buf[] holds the compressed text, preceded by a oops header. + * oops header has u16 holding the version of oops header (to differentiate + * between old and new format header) followed by u16 holding the length of + * the compressed* text (*Or uncompressed, if compression fails.) and u64 + * holding the timestamp. oops_buf[] gets written to NVRAM. * - * oops_len points to the prefix. oops_data points to the compressed text. + * oops_log_info points to the header. oops_data points to the compressed text. * * +- oops_buf - * | +- oops_data - * v v - * +------------+-----------------------------------------------+ - * | length | text | - * | (2 bytes) | (oops_data_sz bytes) | - * +------------+-----------------------------------------------+ + * | +- oops_data + * v v + * +-----------+-----------+-----------+------------------------+ + * | version | length | timestamp | text | + * | (2 bytes) | (2 bytes) | (8 bytes) | (oops_data_sz bytes) | + * +-----------+-----------+-----------+------------------------+ * ^ - * +- oops_len + * +- oops_log_info * * We preallocate these buffers during init to avoid kmalloc during oops/panic. */ static size_t big_oops_buf_sz; static char *big_oops_buf, *oops_buf; -static u16 *oops_len; static char *oops_data; static size_t oops_data_sz; @@ -114,6 +131,30 @@ static size_t oops_data_sz; #define MEM_LEVEL 4 static struct z_stream_s stream; +#ifdef CONFIG_PSTORE +static struct nvram_os_partition of_config_partition = { + .name = "of-config", + .index = -1, + .os_partition = false +}; + +static struct nvram_os_partition common_partition = { + .name = "common", + .index = -1, + .os_partition = false +}; + +static enum pstore_type_id nvram_type_ids[] = { + PSTORE_TYPE_DMESG, + PSTORE_TYPE_PPC_RTAS, + PSTORE_TYPE_PPC_OF, + PSTORE_TYPE_PPC_COMMON, + -1 +}; +static int read_type; +static unsigned long last_rtas_event; +#endif + static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) { unsigned int i; @@ -275,48 +316,72 @@ int nvram_write_error_log(char * buff, int length, { int rc = nvram_write_os_partition(&rtas_log_partition, buff, length, err_type, error_log_cnt); - if (!rc) + if (!rc) { last_unread_rtas_event = get_seconds(); +#ifdef CONFIG_PSTORE + last_rtas_event = get_seconds(); +#endif + } + return rc; } -/* nvram_read_error_log +/* nvram_read_partition * - * Reads nvram for error log for at most 'length' + * Reads nvram partition for at most 'length' */ -int nvram_read_error_log(char * buff, int length, - unsigned int * err_type, unsigned int * error_log_cnt) +int nvram_read_partition(struct nvram_os_partition *part, char *buff, + int length, unsigned int *err_type, + unsigned int *error_log_cnt) { int rc; loff_t tmp_index; struct err_log_info info; - if (rtas_log_partition.index == -1) + if (part->index == -1) return -1; - if (length > rtas_log_partition.size) - length = rtas_log_partition.size; + if (length > part->size) + length = part->size; - tmp_index = rtas_log_partition.index; + tmp_index = part->index; - rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index); - if (rc <= 0) { - printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); - return rc; + if (part->os_partition) { + rc = ppc_md.nvram_read((char *)&info, + sizeof(struct err_log_info), + &tmp_index); + if (rc <= 0) { + pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__, + rc); + return rc; + } } rc = ppc_md.nvram_read(buff, length, &tmp_index); if (rc <= 0) { - printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc); + pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__, rc); return rc; } - *error_log_cnt = info.seq_num; - *err_type = info.error_type; + if (part->os_partition) { + *error_log_cnt = info.seq_num; + *err_type = info.error_type; + } return 0; } +/* nvram_read_error_log + * + * Reads nvram for error log for at most 'length' + */ +int nvram_read_error_log(char *buff, int length, + unsigned int *err_type, unsigned int *error_log_cnt) +{ + return nvram_read_partition(&rtas_log_partition, buff, length, + err_type, error_log_cnt); +} + /* This doesn't actually zero anything, but it sets the event_logged * word to tell that this event is safely in syslog. */ @@ -364,9 +429,6 @@ static int __init pseries_nvram_init_os_partition(struct nvram_os_partition loff_t p; int size; - /* Scan nvram for partitions */ - nvram_scan_partitions(); - /* Look for ours */ p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size); @@ -405,6 +467,267 @@ static int __init pseries_nvram_init_os_partition(struct nvram_os_partition return 0; } +/* + * Are we using the ibm,rtas-log for oops/panic reports? And if so, + * would logging this oops/panic overwrite an RTAS event that rtas_errd + * hasn't had a chance to read and process? Return 1 if so, else 0. + * + * We assume that if rtas_errd hasn't read the RTAS event in + * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to. + */ +static int clobbering_unread_rtas_event(void) +{ + return (oops_log_partition.index == rtas_log_partition.index + && last_unread_rtas_event + && get_seconds() - last_unread_rtas_event <= + NVRAM_RTAS_READ_TIMEOUT); +} + +/* Derived from logfs_compress() */ +static int nvram_compress(const void *in, void *out, size_t inlen, + size_t outlen) +{ + int err, ret; + + ret = -EIO; + err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS, + MEM_LEVEL, Z_DEFAULT_STRATEGY); + if (err != Z_OK) + goto error; + + stream.next_in = in; + stream.avail_in = inlen; + stream.total_in = 0; + stream.next_out = out; + stream.avail_out = outlen; + stream.total_out = 0; + + err = zlib_deflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) + goto error; + + err = zlib_deflateEnd(&stream); + if (err != Z_OK) + goto error; + + if (stream.total_out >= stream.total_in) + goto error; + + ret = stream.total_out; +error: + return ret; +} + +/* Compress the text from big_oops_buf into oops_buf. */ +static int zip_oops(size_t text_len) +{ + struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf; + int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len, + oops_data_sz); + if (zipped_len < 0) { + pr_err("nvram: compression failed; returned %d\n", zipped_len); + pr_err("nvram: logging uncompressed oops/panic report\n"); + return -1; + } + oops_hdr->version = OOPS_HDR_VERSION; + oops_hdr->report_length = (u16) zipped_len; + oops_hdr->timestamp = get_seconds(); + return 0; +} + +#ifdef CONFIG_PSTORE +static int nvram_pstore_open(struct pstore_info *psi) +{ + /* Reset the iterator to start reading partitions again */ + read_type = -1; + return 0; +} + +/** + * nvram_pstore_write - pstore write callback for nvram + * @type: Type of message logged + * @reason: reason behind dump (oops/panic) + * @id: identifier to indicate the write performed + * @part: pstore writes data to registered buffer in parts, + * part number will indicate the same. + * @count: Indicates oops count + * @compressed: Flag to indicate the log is compressed + * @size: number of bytes written to the registered buffer + * @psi: registered pstore_info structure + * + * Called by pstore_dump() when an oops or panic report is logged in the + * printk buffer. + * Returns 0 on successful write. + */ +static int nvram_pstore_write(enum pstore_type_id type, + enum kmsg_dump_reason reason, + u64 *id, unsigned int part, int count, + bool compressed, size_t size, + struct pstore_info *psi) +{ + int rc; + unsigned int err_type = ERR_TYPE_KERNEL_PANIC; + struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf; + + /* part 1 has the recent messages from printk buffer */ + if (part > 1 || type != PSTORE_TYPE_DMESG || + clobbering_unread_rtas_event()) + return -1; + + oops_hdr->version = OOPS_HDR_VERSION; + oops_hdr->report_length = (u16) size; + oops_hdr->timestamp = get_seconds(); + + if (compressed) + err_type = ERR_TYPE_KERNEL_PANIC_GZ; + + rc = nvram_write_os_partition(&oops_log_partition, oops_buf, + (int) (sizeof(*oops_hdr) + size), err_type, count); + + if (rc != 0) + return rc; + + *id = part; + return 0; +} + +/* + * Reads the oops/panic report, rtas, of-config and common partition. + * Returns the length of the data we read from each partition. + * Returns 0 if we've been called before. + */ +static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, + int *count, struct timespec *time, char **buf, + bool *compressed, struct pstore_info *psi) +{ + struct oops_log_info *oops_hdr; + unsigned int err_type, id_no, size = 0; + struct nvram_os_partition *part = NULL; + char *buff = NULL; + int sig = 0; + loff_t p; + + read_type++; + + switch (nvram_type_ids[read_type]) { + case PSTORE_TYPE_DMESG: + part = &oops_log_partition; + *type = PSTORE_TYPE_DMESG; + break; + case PSTORE_TYPE_PPC_RTAS: + part = &rtas_log_partition; + *type = PSTORE_TYPE_PPC_RTAS; + time->tv_sec = last_rtas_event; + time->tv_nsec = 0; + break; + case PSTORE_TYPE_PPC_OF: + sig = NVRAM_SIG_OF; + part = &of_config_partition; + *type = PSTORE_TYPE_PPC_OF; + *id = PSTORE_TYPE_PPC_OF; + time->tv_sec = 0; + time->tv_nsec = 0; + break; + case PSTORE_TYPE_PPC_COMMON: + sig = NVRAM_SIG_SYS; + part = &common_partition; + *type = PSTORE_TYPE_PPC_COMMON; + *id = PSTORE_TYPE_PPC_COMMON; + time->tv_sec = 0; + time->tv_nsec = 0; + break; + default: + return 0; + } + + if (!part->os_partition) { + p = nvram_find_partition(part->name, sig, &size); + if (p <= 0) { + pr_err("nvram: Failed to find partition %s, " + "err %d\n", part->name, (int)p); + return 0; + } + part->index = p; + part->size = size; + } + + buff = kmalloc(part->size, GFP_KERNEL); + + if (!buff) + return -ENOMEM; + + if (nvram_read_partition(part, buff, part->size, &err_type, &id_no)) { + kfree(buff); + return 0; + } + + *count = 0; + + if (part->os_partition) + *id = id_no; + + if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) { + size_t length, hdr_size; + + oops_hdr = (struct oops_log_info *)buff; + if (oops_hdr->version < OOPS_HDR_VERSION) { + /* Old format oops header had 2-byte record size */ + hdr_size = sizeof(u16); + length = oops_hdr->version; + time->tv_sec = 0; + time->tv_nsec = 0; + } else { + hdr_size = sizeof(*oops_hdr); + length = oops_hdr->report_length; + time->tv_sec = oops_hdr->timestamp; + time->tv_nsec = 0; + } + *buf = kmalloc(length, GFP_KERNEL); + if (*buf == NULL) + return -ENOMEM; + memcpy(*buf, buff + hdr_size, length); + kfree(buff); + + if (err_type == ERR_TYPE_KERNEL_PANIC_GZ) + *compressed = true; + else + *compressed = false; + return length; + } + + *buf = buff; + return part->size; +} + +static struct pstore_info nvram_pstore_info = { + .owner = THIS_MODULE, + .name = "nvram", + .open = nvram_pstore_open, + .read = nvram_pstore_read, + .write = nvram_pstore_write, +}; + +static int nvram_pstore_init(void) +{ + int rc = 0; + + nvram_pstore_info.buf = oops_data; + nvram_pstore_info.bufsize = oops_data_sz; + + rc = pstore_register(&nvram_pstore_info); + if (rc != 0) + pr_err("nvram: pstore_register() failed, defaults to " + "kmsg_dump; returned %d\n", rc); + + return rc; +} +#else +static int nvram_pstore_init(void) +{ + return -1; +} +#endif + static void __init nvram_init_oops_partition(int rtas_partition_exists) { int rc; @@ -425,9 +748,13 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists) oops_log_partition.name); return; } - oops_len = (u16*) oops_buf; - oops_data = oops_buf + sizeof(u16); - oops_data_sz = oops_log_partition.size - sizeof(u16); + oops_data = oops_buf + sizeof(struct oops_log_info); + oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info); + + rc = nvram_pstore_init(); + + if (!rc) + return; /* * Figure compression (preceded by elimination of each line's <n> @@ -437,8 +764,8 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists) big_oops_buf_sz = (oops_data_sz * 100) / 45; big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL); if (big_oops_buf) { - stream.workspace = kmalloc(zlib_deflate_workspacesize( - WINDOW_BITS, MEM_LEVEL), GFP_KERNEL); + stream.workspace = kmalloc(zlib_deflate_workspacesize( + WINDOW_BITS, MEM_LEVEL), GFP_KERNEL); if (!stream.workspace) { pr_err("nvram: No memory for compression workspace; " "skipping compression of %s partition data\n", @@ -465,6 +792,9 @@ static int __init pseries_nvram_init_log_partitions(void) { int rc; + /* Scan nvram for partitions */ + nvram_scan_partitions(); + rc = pseries_nvram_init_os_partition(&rtas_log_partition); nvram_init_oops_partition(rc == 0); return 0; @@ -474,7 +804,7 @@ machine_arch_initcall(pseries, pseries_nvram_init_log_partitions); int __init pSeries_nvram_init(void) { struct device_node *nvram; - const unsigned int *nbytes_p; + const __be32 *nbytes_p; unsigned int proplen; nvram = of_find_node_by_type(NULL, "nvram"); @@ -487,7 +817,7 @@ int __init pSeries_nvram_init(void) return -EIO; } - nvram_size = *nbytes_p; + nvram_size = be32_to_cpup(nbytes_p); nvram_fetch = rtas_token("nvram-fetch"); nvram_store = rtas_token("nvram-store"); @@ -501,70 +831,6 @@ int __init pSeries_nvram_init(void) return 0; } -/* - * Are we using the ibm,rtas-log for oops/panic reports? And if so, - * would logging this oops/panic overwrite an RTAS event that rtas_errd - * hasn't had a chance to read and process? Return 1 if so, else 0. - * - * We assume that if rtas_errd hasn't read the RTAS event in - * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to. - */ -static int clobbering_unread_rtas_event(void) -{ - return (oops_log_partition.index == rtas_log_partition.index - && last_unread_rtas_event - && get_seconds() - last_unread_rtas_event <= - NVRAM_RTAS_READ_TIMEOUT); -} - -/* Derived from logfs_compress() */ -static int nvram_compress(const void *in, void *out, size_t inlen, - size_t outlen) -{ - int err, ret; - - ret = -EIO; - err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS, - MEM_LEVEL, Z_DEFAULT_STRATEGY); - if (err != Z_OK) - goto error; - - stream.next_in = in; - stream.avail_in = inlen; - stream.total_in = 0; - stream.next_out = out; - stream.avail_out = outlen; - stream.total_out = 0; - - err = zlib_deflate(&stream, Z_FINISH); - if (err != Z_STREAM_END) - goto error; - - err = zlib_deflateEnd(&stream); - if (err != Z_OK) - goto error; - - if (stream.total_out >= stream.total_in) - goto error; - - ret = stream.total_out; -error: - return ret; -} - -/* Compress the text from big_oops_buf into oops_buf. */ -static int zip_oops(size_t text_len) -{ - int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len, - oops_data_sz); - if (zipped_len < 0) { - pr_err("nvram: compression failed; returned %d\n", zipped_len); - pr_err("nvram: logging uncompressed oops/panic report\n"); - return -1; - } - *oops_len = (u16) zipped_len; - return 0; -} /* * This is our kmsg_dump callback, called after an oops or panic report @@ -576,6 +842,7 @@ static int zip_oops(size_t text_len) static void oops_to_nvram(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason) { + struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf; static unsigned int oops_count = 0; static bool panicking = false; static DEFINE_SPINLOCK(lock); @@ -619,14 +886,17 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, } if (rc != 0) { kmsg_dump_rewind(dumper); - kmsg_dump_get_buffer(dumper, true, + kmsg_dump_get_buffer(dumper, false, oops_data, oops_data_sz, &text_len); err_type = ERR_TYPE_KERNEL_PANIC; - *oops_len = (u16) text_len; + oops_hdr->version = OOPS_HDR_VERSION; + oops_hdr->report_length = (u16) text_len; + oops_hdr->timestamp = get_seconds(); } (void) nvram_write_os_partition(&oops_log_partition, oops_buf, - (int) (sizeof(*oops_len) + *oops_len), err_type, ++oops_count); + (int) (sizeof(*oops_hdr) + oops_hdr->report_length), err_type, + ++oops_count); spin_unlock_irqrestore(&lock, flags); } diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index c91b22b..efe6137 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -64,91 +64,6 @@ pcibios_find_pci_bus(struct device_node *dn) } EXPORT_SYMBOL_GPL(pcibios_find_pci_bus); -/** - * __pcibios_remove_pci_devices - remove all devices under this bus - * @bus: the indicated PCI bus - * @purge_pe: destroy the PE on removal of PCI devices - * - * Remove all of the PCI devices under this bus both from the - * linux pci device tree, and from the powerpc EEH address cache. - * By default, the corresponding PE will be destroied during the - * normal PCI hotplug path. For PCI hotplug during EEH recovery, - * the corresponding PE won't be destroied and deallocated. - */ -void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe) -{ - struct pci_dev *dev, *tmp; - struct pci_bus *child_bus; - - /* First go down child busses */ - list_for_each_entry(child_bus, &bus->children, node) - __pcibios_remove_pci_devices(child_bus, purge_pe); - - pr_debug("PCI: Removing devices on bus %04x:%02x\n", - pci_domain_nr(bus), bus->number); - list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) { - pr_debug(" * Removing %s...\n", pci_name(dev)); - eeh_remove_bus_device(dev, purge_pe); - pci_stop_and_remove_bus_device(dev); - } -} - -/** - * pcibios_remove_pci_devices - remove all devices under this bus - * - * Remove all of the PCI devices under this bus both from the - * linux pci device tree, and from the powerpc EEH address cache. - */ -void pcibios_remove_pci_devices(struct pci_bus *bus) -{ - __pcibios_remove_pci_devices(bus, 1); -} -EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices); - -/** - * pcibios_add_pci_devices - adds new pci devices to bus - * - * This routine will find and fixup new pci devices under - * the indicated bus. This routine presumes that there - * might already be some devices under this bridge, so - * it carefully tries to add only new devices. (And that - * is how this routine differs from other, similar pcibios - * routines.) - */ -void pcibios_add_pci_devices(struct pci_bus * bus) -{ - int slotno, num, mode, pass, max; - struct pci_dev *dev; - struct device_node *dn = pci_bus_to_OF_node(bus); - - eeh_add_device_tree_early(dn); - - mode = PCI_PROBE_NORMAL; - if (ppc_md.pci_probe_mode) - mode = ppc_md.pci_probe_mode(bus); - - if (mode == PCI_PROBE_DEVTREE) { - /* use ofdt-based probe */ - of_rescan_bus(dn, bus); - } else if (mode == PCI_PROBE_NORMAL) { - /* use legacy probe */ - slotno = PCI_SLOT(PCI_DN(dn->child)->devfn); - num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); - if (!num) - return; - pcibios_setup_bus_devices(bus); - max = bus->busn_res.start; - for (pass=0; pass < 2; pass++) - list_for_each_entry(dev, &bus->devices, bus_list) { - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || - dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) - max = pci_scan_bridge(bus, dev, max, pass); - } - } - pcibios_finish_adding_to_bus(bus); -} -EXPORT_SYMBOL_GPL(pcibios_add_pci_devices); - struct pci_controller *init_phb_dynamic(struct device_node *dn) { struct pci_controller *phb; diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h deleted file mode 100644 index f35787b..0000000 --- a/arch/powerpc/platforms/pseries/plpar_wrappers.h +++ /dev/null @@ -1,324 +0,0 @@ -#ifndef _PSERIES_PLPAR_WRAPPERS_H -#define _PSERIES_PLPAR_WRAPPERS_H - -#include <linux/string.h> -#include <linux/irqflags.h> - -#include <asm/hvcall.h> -#include <asm/paca.h> -#include <asm/page.h> - -/* Get state of physical CPU from query_cpu_stopped */ -int smp_query_cpu_stopped(unsigned int pcpu); -#define QCSS_STOPPED 0 -#define QCSS_STOPPING 1 -#define QCSS_NOT_STOPPED 2 -#define QCSS_HARDWARE_ERROR -1 -#define QCSS_HARDWARE_BUSY -2 - -static inline long poll_pending(void) -{ - return plpar_hcall_norets(H_POLL_PENDING); -} - -static inline u8 get_cede_latency_hint(void) -{ - return get_lppaca()->cede_latency_hint; -} - -static inline void set_cede_latency_hint(u8 latency_hint) -{ - get_lppaca()->cede_latency_hint = latency_hint; -} - -static inline long cede_processor(void) -{ - return plpar_hcall_norets(H_CEDE); -} - -static inline long extended_cede_processor(unsigned long latency_hint) -{ - long rc; - u8 old_latency_hint = get_cede_latency_hint(); - - set_cede_latency_hint(latency_hint); - - rc = cede_processor(); -#ifdef CONFIG_TRACE_IRQFLAGS - /* Ensure that H_CEDE returns with IRQs on */ - if (WARN_ON(!(mfmsr() & MSR_EE))) - __hard_irq_enable(); -#endif - - set_cede_latency_hint(old_latency_hint); - - return rc; -} - -static inline long vpa_call(unsigned long flags, unsigned long cpu, - unsigned long vpa) -{ - flags = flags << H_VPA_FUNC_SHIFT; - - return plpar_hcall_norets(H_REGISTER_VPA, flags, cpu, vpa); -} - -static inline long unregister_vpa(unsigned long cpu) -{ - return vpa_call(H_VPA_DEREG_VPA, cpu, 0); -} - -static inline long register_vpa(unsigned long cpu, unsigned long vpa) -{ - return vpa_call(H_VPA_REG_VPA, cpu, vpa); -} - -static inline long unregister_slb_shadow(unsigned long cpu) -{ - return vpa_call(H_VPA_DEREG_SLB, cpu, 0); -} - -static inline long register_slb_shadow(unsigned long cpu, unsigned long vpa) -{ - return vpa_call(H_VPA_REG_SLB, cpu, vpa); -} - -static inline long unregister_dtl(unsigned long cpu) -{ - return vpa_call(H_VPA_DEREG_DTL, cpu, 0); -} - -static inline long register_dtl(unsigned long cpu, unsigned long vpa) -{ - return vpa_call(H_VPA_REG_DTL, cpu, vpa); -} - -static inline long plpar_page_set_loaned(unsigned long vpa) -{ - unsigned long cmo_page_sz = cmo_get_page_size(); - long rc = 0; - int i; - - for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) - rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0); - - for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) - plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, - vpa + i - cmo_page_sz, 0); - - return rc; -} - -static inline long plpar_page_set_active(unsigned long vpa) -{ - unsigned long cmo_page_sz = cmo_get_page_size(); - long rc = 0; - int i; - - for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) - rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0); - - for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) - plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, - vpa + i - cmo_page_sz, 0); - - return rc; -} - -extern void vpa_init(int cpu); - -static inline long plpar_pte_enter(unsigned long flags, - unsigned long hpte_group, unsigned long hpte_v, - unsigned long hpte_r, unsigned long *slot) -{ - long rc; - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - - rc = plpar_hcall(H_ENTER, retbuf, flags, hpte_group, hpte_v, hpte_r); - - *slot = retbuf[0]; - - return rc; -} - -static inline long plpar_pte_remove(unsigned long flags, unsigned long ptex, - unsigned long avpn, unsigned long *old_pteh_ret, - unsigned long *old_ptel_ret) -{ - long rc; - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - - rc = plpar_hcall(H_REMOVE, retbuf, flags, ptex, avpn); - - *old_pteh_ret = retbuf[0]; - *old_ptel_ret = retbuf[1]; - - return rc; -} - -/* plpar_pte_remove_raw can be called in real mode. It calls plpar_hcall_raw */ -static inline long plpar_pte_remove_raw(unsigned long flags, unsigned long ptex, - unsigned long avpn, unsigned long *old_pteh_ret, - unsigned long *old_ptel_ret) -{ - long rc; - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - - rc = plpar_hcall_raw(H_REMOVE, retbuf, flags, ptex, avpn); - - *old_pteh_ret = retbuf[0]; - *old_ptel_ret = retbuf[1]; - - return rc; -} - -static inline long plpar_pte_read(unsigned long flags, unsigned long ptex, - unsigned long *old_pteh_ret, unsigned long *old_ptel_ret) -{ - long rc; - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - - rc = plpar_hcall(H_READ, retbuf, flags, ptex); - - *old_pteh_ret = retbuf[0]; - *old_ptel_ret = retbuf[1]; - - return rc; -} - -/* plpar_pte_read_raw can be called in real mode. It calls plpar_hcall_raw */ -static inline long plpar_pte_read_raw(unsigned long flags, unsigned long ptex, - unsigned long *old_pteh_ret, unsigned long *old_ptel_ret) -{ - long rc; - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - - rc = plpar_hcall_raw(H_READ, retbuf, flags, ptex); - - *old_pteh_ret = retbuf[0]; - *old_ptel_ret = retbuf[1]; - - return rc; -} - -/* - * plpar_pte_read_4_raw can be called in real mode. - * ptes must be 8*sizeof(unsigned long) - */ -static inline long plpar_pte_read_4_raw(unsigned long flags, unsigned long ptex, - unsigned long *ptes) - -{ - long rc; - unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; - - rc = plpar_hcall9_raw(H_READ, retbuf, flags | H_READ_4, ptex); - - memcpy(ptes, retbuf, 8*sizeof(unsigned long)); - - return rc; -} - -static inline long plpar_pte_protect(unsigned long flags, unsigned long ptex, - unsigned long avpn) -{ - return plpar_hcall_norets(H_PROTECT, flags, ptex, avpn); -} - -static inline long plpar_tce_get(unsigned long liobn, unsigned long ioba, - unsigned long *tce_ret) -{ - long rc; - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - - rc = plpar_hcall(H_GET_TCE, retbuf, liobn, ioba); - - *tce_ret = retbuf[0]; - - return rc; -} - -static inline long plpar_tce_put(unsigned long liobn, unsigned long ioba, - unsigned long tceval) -{ - return plpar_hcall_norets(H_PUT_TCE, liobn, ioba, tceval); -} - -static inline long plpar_tce_put_indirect(unsigned long liobn, - unsigned long ioba, unsigned long page, unsigned long count) -{ - return plpar_hcall_norets(H_PUT_TCE_INDIRECT, liobn, ioba, page, count); -} - -static inline long plpar_tce_stuff(unsigned long liobn, unsigned long ioba, - unsigned long tceval, unsigned long count) -{ - return plpar_hcall_norets(H_STUFF_TCE, liobn, ioba, tceval, count); -} - -static inline long plpar_get_term_char(unsigned long termno, - unsigned long *len_ret, char *buf_ret) -{ - long rc; - unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; - unsigned long *lbuf = (unsigned long *)buf_ret; /* TODO: alignment? */ - - rc = plpar_hcall(H_GET_TERM_CHAR, retbuf, termno); - - *len_ret = retbuf[0]; - lbuf[0] = retbuf[1]; - lbuf[1] = retbuf[2]; - - return rc; -} - -static inline long plpar_put_term_char(unsigned long termno, unsigned long len, - const char *buffer) -{ - unsigned long *lbuf = (unsigned long *)buffer; /* TODO: alignment? */ - return plpar_hcall_norets(H_PUT_TERM_CHAR, termno, len, lbuf[0], - lbuf[1]); -} - -/* Set various resource mode parameters */ -static inline long plpar_set_mode(unsigned long mflags, unsigned long resource, - unsigned long value1, unsigned long value2) -{ - return plpar_hcall_norets(H_SET_MODE, mflags, resource, value1, value2); -} - -/* - * Enable relocation on exceptions on this partition - * - * Note: this call has a partition wide scope and can take a while to complete. - * If it returns H_LONG_BUSY_* it should be retried periodically until it - * returns H_SUCCESS. - */ -static inline long enable_reloc_on_exceptions(void) -{ - /* mflags = 3: Exceptions at 0xC000000000004000 */ - return plpar_set_mode(3, 3, 0, 0); -} - -/* - * Disable relocation on exceptions on this partition - * - * Note: this call has a partition wide scope and can take a while to complete. - * If it returns H_LONG_BUSY_* it should be retried periodically until it - * returns H_SUCCESS. - */ -static inline long disable_reloc_on_exceptions(void) { - return plpar_set_mode(0, 3, 0, 0); -} - -static inline long plapr_set_ciabr(unsigned long ciabr) -{ - return plpar_set_mode(0, 1, ciabr, 0); -} - -static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0) -{ - return plpar_set_mode(0, 2, dawr0, dawrx0); -} - -#endif /* _PSERIES_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c index 4644efa0..a166e38 100644 --- a/arch/powerpc/platforms/pseries/processor_idle.c +++ b/arch/powerpc/platforms/pseries/processor_idle.c @@ -18,9 +18,7 @@ #include <asm/machdep.h> #include <asm/firmware.h> #include <asm/runlatch.h> - -#include "plpar_wrappers.h" -#include "pseries.h" +#include <asm/plpar_wrappers.h> struct cpuidle_driver pseries_idle_driver = { .name = "pseries_idle", @@ -45,7 +43,11 @@ static inline void idle_loop_prolog(unsigned long *in_purr) static inline void idle_loop_epilog(unsigned long in_purr) { - get_lppaca()->wait_state_cycles += mfspr(SPRN_PURR) - in_purr; + u64 wait_cycles; + + wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles); + wait_cycles += mfspr(SPRN_PURR) - in_purr; + get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles); get_lppaca()->idle = 0; } @@ -308,7 +310,7 @@ static int pseries_idle_probe(void) return -EPERM; } - if (get_lppaca()->shared_proc) + if (lppaca_shared_proc(get_lppaca())) cpuidle_state_table = shared_states; else cpuidle_state_table = dedicated_states; diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index c2a3a25..9921953 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -56,13 +56,10 @@ extern void hvc_vio_init_early(void); /* Dynamic logical Partitioning/Mobility */ extern void dlpar_free_cc_nodes(struct device_node *); extern void dlpar_free_cc_property(struct property *); -extern struct device_node *dlpar_configure_connector(u32); +extern struct device_node *dlpar_configure_connector(u32, struct device_node *); extern int dlpar_attach_node(struct device_node *); extern int dlpar_detach_node(struct device_node *); -/* Snooze Delay, pseries_idle */ -DECLARE_PER_CPU(long, smt_snooze_delay); - /* PCI root bridge prepare function override for pseries */ struct pci_host_bridge; int pseries_root_bridge_prepare(struct pci_host_bridge *bridge); diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c index a91e6da..9276779 100644 --- a/arch/powerpc/platforms/pseries/pseries_energy.c +++ b/arch/powerpc/platforms/pseries/pseries_energy.c @@ -108,8 +108,8 @@ err: * energy consumption. */ -#define FLAGS_MODE1 0x004E200000080E01 -#define FLAGS_MODE2 0x004E200000080401 +#define FLAGS_MODE1 0x004E200000080E01UL +#define FLAGS_MODE2 0x004E200000080401UL #define FLAGS_ACTIVATE 0x100 static ssize_t get_best_energy_list(char *page, int activate) diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index c4dfccd..721c058 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -83,7 +83,7 @@ static void handle_system_shutdown(char event_modifier) switch (event_modifier) { case EPOW_SHUTDOWN_NORMAL: pr_emerg("Firmware initiated power off"); - orderly_poweroff(1); + orderly_poweroff(true); break; case EPOW_SHUTDOWN_ON_UPS: @@ -95,13 +95,13 @@ static void handle_system_shutdown(char event_modifier) pr_emerg("Loss of system critical functions reported by " "firmware"); pr_emerg("Check RTAS error log for details"); - orderly_poweroff(1); + orderly_poweroff(true); break; case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH: pr_emerg("Ambient temperature too high reported by firmware"); pr_emerg("Check RTAS error log for details"); - orderly_poweroff(1); + orderly_poweroff(true); break; default: @@ -162,7 +162,7 @@ void rtas_parse_epow_errlog(struct rtas_error_log *log) case EPOW_SYSTEM_HALT: pr_emerg("Firmware initiated power off"); - orderly_poweroff(1); + orderly_poweroff(true); break; case EPOW_MAIN_ENCLOSURE: @@ -287,6 +287,9 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) unsigned long *savep; struct rtas_error_log *h, *errhdr = NULL; + /* Mask top two bits */ + regs->gpr[3] &= ~(0x3UL << 62); + if (!VALID_FWNMI_BUFFER(regs->gpr[3])) { printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); return NULL; diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c new file mode 100644 index 0000000..72a1027 --- /dev/null +++ b/arch/powerpc/platforms/pseries/rng.c @@ -0,0 +1,45 @@ +/* + * Copyright 2013, Michael Ellerman, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) "pseries-rng: " fmt + +#include <linux/kernel.h> +#include <linux/of.h> +#include <asm/archrandom.h> +#include <asm/machdep.h> +#include <asm/plpar_wrappers.h> + + +static int pseries_get_random_long(unsigned long *v) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + + if (plpar_hcall(H_RANDOM, retbuf) == H_SUCCESS) { + *v = retbuf[0]; + return 1; + } + + return 0; +} + +static __init int rng_init(void) +{ + struct device_node *dn; + + dn = of_find_compatible_node(NULL, NULL, "ibm,random"); + if (!dn) + return -ENODEV; + + pr_info("Registering arch random hook.\n"); + + ppc_md.get_random_long = pseries_get_random_long; + + return 0; +} +subsys_initcall(rng_init); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index c11c823..c1f1908 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -66,8 +66,8 @@ #include <asm/firmware.h> #include <asm/eeh.h> #include <asm/reg.h> +#include <asm/plpar_wrappers.h> -#include "plpar_wrappers.h" #include "pseries.h" int CMO_PrPSP = -1; @@ -183,7 +183,7 @@ static void __init pseries_mpic_init_IRQ(void) np = of_find_node_by_path("/"); naddr = of_n_addr_cells(np); opprop = of_get_property(np, "platform-open-pic", &opplen); - if (opprop != 0) { + if (opprop != NULL) { openpic_addr = of_read_number(opprop, naddr); printk(KERN_DEBUG "OpenPIC addr: %lx\n", openpic_addr); } @@ -323,7 +323,7 @@ static int alloc_dispatch_logs(void) get_paca()->lppaca_ptr->dtl_idx = 0; /* hypervisor reads buffer length from this field */ - dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES; + dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES); ret = register_dtl(hard_smp_processor_id(), __pa(dtl)); if (ret) pr_err("WARNING: DTL registration of cpu %d (hw %d) failed " @@ -354,7 +354,7 @@ static int alloc_dispatch_log_kmem_cache(void) } early_initcall(alloc_dispatch_log_kmem_cache); -static void pSeries_idle(void) +static void pseries_lpar_idle(void) { /* This would call on the cpuidle framework, and the back-end pseries * driver to go to idle states @@ -362,10 +362,22 @@ static void pSeries_idle(void) if (cpuidle_idle_call()) { /* On error, execute default handler * to go into low thread priority and possibly - * low power mode. + * low power mode by cedeing processor to hypervisor */ - HMT_low(); - HMT_very_low(); + + /* Indicate to hypervisor that we are idle. */ + get_lppaca()->idle = 1; + + /* + * Yield the processor to the hypervisor. We return if + * an external interrupt occurs (which are driven prior + * to returning here) or if a prod occurs from another + * processor. When returning here, external interrupts + * are enabled. + */ + cede_processor(); + + get_lppaca()->idle = 0; } } @@ -430,6 +442,32 @@ static void pSeries_machine_kexec(struct kimage *image) } #endif +#ifdef __LITTLE_ENDIAN__ +long pseries_big_endian_exceptions(void) +{ + long rc; + + while (1) { + rc = enable_big_endian_exceptions(); + if (!H_IS_LONG_BUSY(rc)) + return rc; + mdelay(get_longbusy_msecs(rc)); + } +} + +static long pseries_little_endian_exceptions(void) +{ + long rc; + + while (1) { + rc = enable_little_endian_exceptions(); + if (!H_IS_LONG_BUSY(rc)) + return rc; + mdelay(get_longbusy_msecs(rc)); + } +} +#endif + static void __init pSeries_setup_arch(void) { panic_timeout = 10; @@ -456,15 +494,14 @@ static void __init pSeries_setup_arch(void) pSeries_nvram_init(); - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + if (firmware_has_feature(FW_FEATURE_LPAR)) { vpa_init(boot_cpuid); - ppc_md.power_save = pSeries_idle; - } - - if (firmware_has_feature(FW_FEATURE_LPAR)) + ppc_md.power_save = pseries_lpar_idle; ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; - else + } else { + /* No special idle routine */ ppc_md.enable_pmcs = power4_enable_pmcs; + } ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare; @@ -687,6 +724,22 @@ static int __init pSeries_probe(void) /* Now try to figure out if we are running on LPAR */ of_scan_flat_dt(pseries_probe_fw_features, NULL); +#ifdef __LITTLE_ENDIAN__ + if (firmware_has_feature(FW_FEATURE_SET_MODE)) { + long rc; + /* + * Tell the hypervisor that we want our exceptions to + * be taken in little endian mode. If this fails we don't + * want to use BUG() because it will trigger an exception. + */ + rc = pseries_little_endian_exceptions(); + if (rc) { + ppc_md.progress("H_SET_MODE LE exception fail", 0); + panic("Could not enable little endian exceptions"); + } + } +#endif + if (firmware_has_feature(FW_FEATURE_LPAR)) hpte_init_lpar(); else diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index ca2d1f6..24f58cb 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -43,8 +43,8 @@ #include <asm/cputhreads.h> #include <asm/xics.h> #include <asm/dbell.h> +#include <asm/plpar_wrappers.h> -#include "plpar_wrappers.h" #include "pseries.h" #include "offline_states.h" @@ -233,18 +233,24 @@ static void __init smp_init_pseries(void) alloc_bootmem_cpumask_var(&of_spin_mask); - /* Mark threads which are still spinning in hold loops. */ - if (cpu_has_feature(CPU_FTR_SMT)) { - for_each_present_cpu(i) { - if (cpu_thread_in_core(i) == 0) - cpumask_set_cpu(i, of_spin_mask); - } - } else { - cpumask_copy(of_spin_mask, cpu_present_mask); + /* + * Mark threads which are still spinning in hold loops + * + * We know prom_init will not have started them if RTAS supports + * query-cpu-stopped-state. + */ + if (rtas_token("query-cpu-stopped-state") == RTAS_UNKNOWN_SERVICE) { + if (cpu_has_feature(CPU_FTR_SMT)) { + for_each_present_cpu(i) { + if (cpu_thread_in_core(i) == 0) + cpumask_set_cpu(i, of_spin_mask); + } + } else + cpumask_copy(of_spin_mask, cpu_present_mask); + + cpumask_clear_cpu(boot_cpuid, of_spin_mask); } - cpumask_clear_cpu(boot_cpuid, of_spin_mask); - /* Non-lpar has additional take/give timebase */ if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { smp_ops->give_timebase = rtas_give_timebase; diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 5f997e7..16a2552 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -106,7 +106,7 @@ static int pseries_prepare_late(void) atomic_set(&suspend_data.done, 0); atomic_set(&suspend_data.error, 0); suspend_data.complete = &suspend_work; - INIT_COMPLETION(suspend_work); + reinit_completion(&suspend_work); return 0; } diff --git a/arch/powerpc/platforms/wsp/chroma.c b/arch/powerpc/platforms/wsp/chroma.c index 8ef53bc..aaa46b3 100644 --- a/arch/powerpc/platforms/wsp/chroma.c +++ b/arch/powerpc/platforms/wsp/chroma.c @@ -15,6 +15,7 @@ #include <linux/of.h> #include <linux/smp.h> #include <linux/time.h> +#include <linux/of_fdt.h> #include <asm/machdep.h> #include <asm/udbg.h> diff --git a/arch/powerpc/platforms/wsp/h8.c b/arch/powerpc/platforms/wsp/h8.c index d18e6cc..a3c87f3 100644 --- a/arch/powerpc/platforms/wsp/h8.c +++ b/arch/powerpc/platforms/wsp/h8.c @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/of.h> #include <linux/io.h> +#include <linux/of_address.h> #include "wsp.h" diff --git a/arch/powerpc/platforms/wsp/ics.c b/arch/powerpc/platforms/wsp/ics.c index 2d3b1dd..9cd92e6 100644 --- a/arch/powerpc/platforms/wsp/ics.c +++ b/arch/powerpc/platforms/wsp/ics.c @@ -18,6 +18,8 @@ #include <linux/smp.h> #include <linux/spinlock.h> #include <linux/types.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> #include <asm/io.h> #include <asm/irq.h> diff --git a/arch/powerpc/platforms/wsp/opb_pic.c b/arch/powerpc/platforms/wsp/opb_pic.c index cb565bf..3f67298 100644 --- a/arch/powerpc/platforms/wsp/opb_pic.c +++ b/arch/powerpc/platforms/wsp/opb_pic.c @@ -15,6 +15,8 @@ #include <linux/of.h> #include <linux/slab.h> #include <linux/time.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> #include <asm/reg_a2.h> #include <asm/irq.h> diff --git a/arch/powerpc/platforms/wsp/psr2.c b/arch/powerpc/platforms/wsp/psr2.c index 508ec82..a87b414 100644 --- a/arch/powerpc/platforms/wsp/psr2.c +++ b/arch/powerpc/platforms/wsp/psr2.c @@ -15,6 +15,7 @@ #include <linux/of.h> #include <linux/smp.h> #include <linux/time.h> +#include <linux/of_fdt.h> #include <asm/machdep.h> #include <asm/udbg.h> diff --git a/arch/powerpc/platforms/wsp/scom_smp.c b/arch/powerpc/platforms/wsp/scom_smp.c index b56b70a..268bc89 100644 --- a/arch/powerpc/platforms/wsp/scom_smp.c +++ b/arch/powerpc/platforms/wsp/scom_smp.c @@ -116,7 +116,14 @@ static int a2_scom_ram(scom_map_t scom, int thread, u32 insn, int extmask) scom_write(scom, SCOM_RAMIC, cmd); - while (!((val = scom_read(scom, SCOM_RAMC)) & mask)) { + for (;;) { + if (scom_read(scom, SCOM_RAMC, &val) != 0) { + pr_err("SCOM error on instruction 0x%08x, thread %d\n", + insn, thread); + return -1; + } + if (val & mask) + break; pr_devel("Waiting on RAMC = 0x%llx\n", val); if (++n == 3) { pr_err("RAMC timeout on instruction 0x%08x, thread %d\n", @@ -151,9 +158,7 @@ static int a2_scom_getgpr(scom_map_t scom, int thread, int gpr, int alt, if (rc) return rc; - *out_gpr = scom_read(scom, SCOM_RAMD); - - return 0; + return scom_read(scom, SCOM_RAMD, out_gpr); } static int a2_scom_getspr(scom_map_t scom, int thread, int spr, u64 *out_spr) @@ -353,7 +358,10 @@ int a2_scom_startup_cpu(unsigned int lcpu, int thr_idx, struct device_node *np) pr_devel("Bringing up CPU%d using SCOM...\n", lcpu); - pccr0 = scom_read(scom, SCOM_PCCR0); + if (scom_read(scom, SCOM_PCCR0, &pccr0) != 0) { + printk(KERN_ERR "XSCOM failure readng PCCR0 on CPU%d\n", lcpu); + return -1; + } scom_write(scom, SCOM_PCCR0, pccr0 | SCOM_PCCR0_ENABLE_DEBUG | SCOM_PCCR0_ENABLE_RAM); diff --git a/arch/powerpc/platforms/wsp/scom_wsp.c b/arch/powerpc/platforms/wsp/scom_wsp.c index 4052e22..6538b4d 100644 --- a/arch/powerpc/platforms/wsp/scom_wsp.c +++ b/arch/powerpc/platforms/wsp/scom_wsp.c @@ -14,6 +14,7 @@ #include <linux/of.h> #include <linux/spinlock.h> #include <linux/types.h> +#include <linux/of_address.h> #include <asm/cputhreads.h> #include <asm/reg_a2.h> @@ -50,18 +51,22 @@ static void wsp_scom_unmap(scom_map_t map) iounmap((void *)map); } -static u64 wsp_scom_read(scom_map_t map, u32 reg) +static int wsp_scom_read(scom_map_t map, u64 reg, u64 *value) { u64 __iomem *addr = (u64 __iomem *)map; - return in_be64(addr + reg); + *value = in_be64(addr + reg); + + return 0; } -static void wsp_scom_write(scom_map_t map, u32 reg, u64 value) +static int wsp_scom_write(scom_map_t map, u64 reg, u64 value) { u64 __iomem *addr = (u64 __iomem *)map; - return out_be64(addr + reg, value); + out_be64(addr + reg, value); + + return 0; } static const struct scom_controller wsp_scom_controller = { diff --git a/arch/powerpc/platforms/wsp/wsp.c b/arch/powerpc/platforms/wsp/wsp.c index d25cc96..58cd1f0 100644 --- a/arch/powerpc/platforms/wsp/wsp.c +++ b/arch/powerpc/platforms/wsp/wsp.c @@ -13,6 +13,7 @@ #include <linux/smp.h> #include <linux/delay.h> #include <linux/time.h> +#include <linux/of_address.h> #include <asm/scom.h> @@ -89,6 +90,7 @@ void wsp_halt(void) struct device_node *dn; struct device_node *mine; struct device_node *me; + int rc; me = of_get_cpu_node(smp_processor_id(), NULL); mine = scom_find_parent(me); @@ -101,15 +103,15 @@ void wsp_halt(void) /* read-modify-write it so the HW probe does not get * confused */ - val = scom_read(m, 0); - val |= 1; - scom_write(m, 0, val); + rc = scom_read(m, 0, &val); + if (rc == 0) + scom_write(m, 0, val | 1); scom_unmap(m); } m = scom_map(mine, 0, 1); - val = scom_read(m, 0); - val |= 1; - scom_write(m, 0, val); + rc = scom_read(m, 0, &val); + if (rc == 0) + scom_write(m, 0, val | 1); /* should never return */ scom_unmap(m); } |