diff options
author | Scott Wood <scottwood@freescale.com> | 2015-02-13 22:12:06 (GMT) |
---|---|---|
committer | Scott Wood <scottwood@freescale.com> | 2015-02-13 22:19:22 (GMT) |
commit | 6faa2909871d8937cb2f79a10e1b21ffe193fac1 (patch) | |
tree | f558a94f1553814cc122ab8d9e04c0ebad5262a5 /arch | |
parent | fcb2fb84301c673ee15ca04e7a2fc965712d49a0 (diff) | |
download | linux-fsl-qoriq-6faa2909871d8937cb2f79a10e1b21ffe193fac1.tar.xz |
Reset to 3.12.37
Diffstat (limited to 'arch')
460 files changed, 4631 insertions, 2941 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 77e7e80..af2cc6e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -6,7 +6,6 @@ config OPROFILE tristate "OProfile system profiling" depends on PROFILING depends on HAVE_OPROFILE - depends on !PREEMPT_RT_FULL select RING_BUFFER select RING_BUFFER_ALLOW_SWAP help diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index ee01270..98838a0 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -107,7 +107,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, /* If we're in an interrupt context, or have no user context, we must not take the fault. */ - if (!mm || pagefault_disabled()) + if (!mm || in_atomic()) goto no_context; #ifdef CONFIG_ALPHA_LARGE_VMALLOC diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index e4abdaa..b7d4dab 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -61,4 +61,31 @@ extern void read_decode_cache_bcr(void); #endif /* !__ASSEMBLY__ */ +/* Instruction cache related Auxiliary registers */ +#define ARC_REG_IC_BCR 0x77 /* Build Config reg */ +#define ARC_REG_IC_IVIC 0x10 +#define ARC_REG_IC_CTRL 0x11 +#define ARC_REG_IC_IVIL 0x19 +#if defined(CONFIG_ARC_MMU_V3) || defined (CONFIG_ARC_MMU_V4) +#define ARC_REG_IC_PTAG 0x1E +#endif + +/* Bit val in IC_CTRL */ +#define IC_CTRL_CACHE_DISABLE 0x1 + +/* Data cache related Auxiliary registers */ +#define ARC_REG_DC_BCR 0x72 /* Build Config reg */ +#define ARC_REG_DC_IVDC 0x47 +#define ARC_REG_DC_CTRL 0x48 +#define ARC_REG_DC_IVDL 0x4A +#define ARC_REG_DC_FLSH 0x4B +#define ARC_REG_DC_FLDL 0x4C +#if defined(CONFIG_ARC_MMU_V3) || defined (CONFIG_ARC_MMU_V4) +#define ARC_REG_DC_PTAG 0x5C +#endif + +/* Bit val in DC_CTRL */ +#define DC_CTRL_INV_MODE_FLUSH 0x40 +#define DC_CTRL_FLUSH_STATUS 0x100 + #endif /* _ASM_CACHE_H */ diff --git a/arch/arc/include/uapi/asm/ptrace.h b/arch/arc/include/uapi/asm/ptrace.h index 2618cc1..76a7739 100644 --- a/arch/arc/include/uapi/asm/ptrace.h +++ b/arch/arc/include/uapi/asm/ptrace.h @@ -11,6 +11,7 @@ #ifndef _UAPI__ASM_ARC_PTRACE_H #define _UAPI__ASM_ARC_PTRACE_H +#define PTRACE_GET_THREAD_AREA 25 #ifndef __ASSEMBLY__ /* diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index b908dde..15588b0 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -610,11 +610,13 @@ resume_user_mode_begin: resume_kernel_mode: -#ifdef CONFIG_PREEMPT - - ; This is a must for preempt_schedule_irq() + ; Disable Interrupts from this point on + ; CONFIG_PREEMPT: This is a must for preempt_schedule_irq() + ; !CONFIG_PREEMPT: To ensure restore_regs is intr safe IRQ_DISABLE r9 +#ifdef CONFIG_PREEMPT + ; Can't preempt if preemption disabled GET_CURR_THR_INFO_FROM_SP r10 ld r8, [r10, THREAD_INFO_PREEMPT_COUNT] diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 0f944f0..a2bca37 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -12,10 +12,42 @@ * to skip certain things during boot on simulator */ +#include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/entry.h> -#include <linux/linkage.h> #include <asm/arcregs.h> +#include <asm/cache.h> + +.macro CPU_EARLY_SETUP + + ; Setting up Vectror Table (in case exception happens in early boot + sr @_int_vec_base_lds, [AUX_INTR_VEC_BASE] + + ; Disable I-cache/D-cache if kernel so configured + lr r5, [ARC_REG_IC_BCR] + breq r5, 0, 1f ; I$ doesn't exist + lr r5, [ARC_REG_IC_CTRL] +#ifdef CONFIG_ARC_HAS_ICACHE + bclr r5, r5, 0 ; 0 - Enable, 1 is Disable +#else + bset r5, r5, 0 ; I$ exists, but is not used +#endif + sr r5, [ARC_REG_IC_CTRL] + +1: + lr r5, [ARC_REG_DC_BCR] + breq r5, 0, 1f ; D$ doesn't exist + lr r5, [ARC_REG_DC_CTRL] + bclr r5, r5, 6 ; Invalidate (discard w/o wback) +#ifdef CONFIG_ARC_HAS_DCACHE + bclr r5, r5, 0 ; Enable (+Inv) +#else + bset r5, r5, 0 ; Disable (+Inv) +#endif + sr r5, [ARC_REG_DC_CTRL] + +1: +.endm .cpu A7 @@ -24,13 +56,13 @@ .globl stext stext: ;------------------------------------------------------------------- - ; Don't clobber r0-r4 yet. It might have bootloader provided info + ; Don't clobber r0-r2 yet. It might have bootloader provided info ;------------------------------------------------------------------- - sr @_int_vec_base_lds, [AUX_INTR_VEC_BASE] + CPU_EARLY_SETUP #ifdef CONFIG_SMP - ; Only Boot (Master) proceeds. Others wait in platform dependent way + ; Ensure Boot (Master) proceeds. Others wait in platform dependent way ; IDENTITY Reg [ 3 2 1 0 ] ; (cpu-id) ^^^ => Zero for UP ARC700 ; => #Core-ID if SMP (Master 0) @@ -39,7 +71,8 @@ stext: ; need to make sure only boot cpu takes this path. GET_CPU_ID r5 cmp r5, 0 - jnz arc_platform_smp_wait_to_boot + mov.ne r0, r5 + jne arc_platform_smp_wait_to_boot #endif ; Clear BSS before updating any globals ; XXX: use ZOL here @@ -101,7 +134,7 @@ stext: first_lines_of_secondary: - sr @_int_vec_base_lds, [AUX_INTR_VEC_BASE] + CPU_EARLY_SETUP ; setup per-cpu idle task as "current" on this CPU ld r0, [@secondary_idle_tsk] diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c index 5d76706..13b3ffb 100644 --- a/arch/arc/kernel/ptrace.c +++ b/arch/arc/kernel/ptrace.c @@ -146,6 +146,10 @@ long arch_ptrace(struct task_struct *child, long request, pr_debug("REQ=%ld: ADDR =0x%lx, DATA=0x%lx)\n", request, addr, data); switch (request) { + case PTRACE_GET_THREAD_AREA: + ret = put_user(task_thread_info(child)->thr_ptr, + (unsigned long __user *)data); + break; default: ret = ptrace_request(child, request, addr, data); break; diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c index 5a1259c..7801034 100644 --- a/arch/arc/mm/cache_arc700.c +++ b/arch/arc/mm/cache_arc700.c @@ -73,37 +73,9 @@ #include <asm/cachectl.h> #include <asm/setup.h> -/* Instruction cache related Auxiliary registers */ -#define ARC_REG_IC_BCR 0x77 /* Build Config reg */ -#define ARC_REG_IC_IVIC 0x10 -#define ARC_REG_IC_CTRL 0x11 -#define ARC_REG_IC_IVIL 0x19 -#if (CONFIG_ARC_MMU_VER > 2) -#define ARC_REG_IC_PTAG 0x1E -#endif - -/* Bit val in IC_CTRL */ -#define IC_CTRL_CACHE_DISABLE 0x1 - -/* Data cache related Auxiliary registers */ -#define ARC_REG_DC_BCR 0x72 /* Build Config reg */ -#define ARC_REG_DC_IVDC 0x47 -#define ARC_REG_DC_CTRL 0x48 -#define ARC_REG_DC_IVDL 0x4A -#define ARC_REG_DC_FLSH 0x4B -#define ARC_REG_DC_FLDL 0x4C -#if (CONFIG_ARC_MMU_VER > 2) -#define ARC_REG_DC_PTAG 0x5C -#endif - -/* Bit val in DC_CTRL */ -#define DC_CTRL_INV_MODE_FLUSH 0x40 -#define DC_CTRL_FLUSH_STATUS 0x100 - -char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len) +char *arc_cache_mumbojumbo(int c, char *buf, int len) { int n = 0; - unsigned int c = smp_processor_id(); #define PR_CACHE(p, enb, str) \ { \ @@ -169,72 +141,43 @@ void read_decode_cache_bcr(void) */ void arc_cache_init(void) { - unsigned int cpu = smp_processor_id(); - struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache; - struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache; - unsigned int dcache_does_alias, temp; + unsigned int __maybe_unused cpu = smp_processor_id(); + struct cpuinfo_arc_cache __maybe_unused *ic, __maybe_unused *dc; char str[256]; printk(arc_cache_mumbojumbo(0, str, sizeof(str))); - if (!ic->ver) - goto chk_dc; - #ifdef CONFIG_ARC_HAS_ICACHE - /* 1. Confirm some of I-cache params which Linux assumes */ - if (ic->line_len != ARC_ICACHE_LINE_LEN) - panic("Cache H/W doesn't match kernel Config"); - - if (ic->ver != CONFIG_ARC_MMU_VER) - panic("Cache ver doesn't match MMU ver\n"); -#endif - - /* Enable/disable I-Cache */ - temp = read_aux_reg(ARC_REG_IC_CTRL); - -#ifdef CONFIG_ARC_HAS_ICACHE - temp &= ~IC_CTRL_CACHE_DISABLE; -#else - temp |= IC_CTRL_CACHE_DISABLE; + ic = &cpuinfo_arc700[cpu].icache; + if (ic->ver) { + if (ic->line_len != ARC_ICACHE_LINE_LEN) + panic("ICache line [%d] != kernel Config [%d]", + ic->line_len, ARC_ICACHE_LINE_LEN); + + if (ic->ver != CONFIG_ARC_MMU_VER) + panic("Cache ver [%d] doesn't match MMU ver [%d]\n", + ic->ver, CONFIG_ARC_MMU_VER); + } #endif - write_aux_reg(ARC_REG_IC_CTRL, temp); - -chk_dc: - if (!dc->ver) - return; - #ifdef CONFIG_ARC_HAS_DCACHE - if (dc->line_len != ARC_DCACHE_LINE_LEN) - panic("Cache H/W doesn't match kernel Config"); + dc = &cpuinfo_arc700[cpu].dcache; + if (dc->ver) { + unsigned int dcache_does_alias; - /* check for D-Cache aliasing */ - dcache_does_alias = (dc->sz / dc->assoc) > PAGE_SIZE; + if (dc->line_len != ARC_DCACHE_LINE_LEN) + panic("DCache line [%d] != kernel Config [%d]", + dc->line_len, ARC_DCACHE_LINE_LEN); - if (dcache_does_alias && !cache_is_vipt_aliasing()) - panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); - else if (!dcache_does_alias && cache_is_vipt_aliasing()) - panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n"); -#endif + /* check for D-Cache aliasing */ + dcache_does_alias = (dc->sz / dc->assoc) > PAGE_SIZE; - /* Set the default Invalidate Mode to "simpy discard dirty lines" - * as this is more frequent then flush before invalidate - * Ofcourse we toggle this default behviour when desired - */ - temp = read_aux_reg(ARC_REG_DC_CTRL); - temp &= ~DC_CTRL_INV_MODE_FLUSH; - -#ifdef CONFIG_ARC_HAS_DCACHE - /* Enable D-Cache: Clear Bit 0 */ - write_aux_reg(ARC_REG_DC_CTRL, temp & ~IC_CTRL_CACHE_DISABLE); -#else - /* Flush D cache */ - write_aux_reg(ARC_REG_DC_FLSH, 0x1); - /* Disable D cache */ - write_aux_reg(ARC_REG_DC_CTRL, temp | IC_CTRL_CACHE_DISABLE); + if (dcache_does_alias && !cache_is_vipt_aliasing()) + panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + else if (!dcache_does_alias && cache_is_vipt_aliasing()) + panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + } #endif - - return; } #define OP_INV 0x1 diff --git a/arch/arc/plat-arcfpga/Kconfig b/arch/arc/plat-arcfpga/Kconfig index 295cefe..33058aa 100644 --- a/arch/arc/plat-arcfpga/Kconfig +++ b/arch/arc/plat-arcfpga/Kconfig @@ -33,7 +33,6 @@ config ISS_SMP_EXTN bool "ARC SMP Extensions (ISS Models only)" default n depends on SMP - select ARC_HAS_COH_RTSC help SMP Extensions to ARC700, in a "simulation only" Model, supported in ARC ISS (Instruction Set Simulator). diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 2ec9220..99e1ce9 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -5,6 +5,7 @@ config ARM select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H + select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT if MMU select CLONE_BACKWARDS @@ -51,7 +52,6 @@ config ARM select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND select HAVE_OPROFILE if (HAVE_PERF_EVENTS) select HAVE_PERF_EVENTS - select HAVE_PREEMPT_LAZY select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_SYSCALL_TRACEPOINTS select HAVE_UID16 @@ -893,7 +893,7 @@ config ARCH_MULTI_V5 bool "ARMv5 based platforms (ARM926T, XSCALE, PJ1, ...)" depends on !ARCH_MULTI_V6_V7 select ARCH_MULTI_V4_V5 - select CPU_ARM926T if (!CPU_ARM946E || CPU_ARM1020 || \ + select CPU_ARM926T if !(CPU_ARM946E || CPU_ARM1020 || \ CPU_ARM1020E || CPU_ARM1022 || CPU_ARM1026 || \ CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_FEROCEON) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 75189f1..de5143e 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -399,8 +399,7 @@ dtb_check_done: add sp, sp, r6 #endif - tst r4, #1 - bleq cache_clean_flush + bl cache_clean_flush adr r0, BSYM(restart) add r0, r0, r6 @@ -1053,6 +1052,8 @@ cache_clean_flush: b call_cache_fn __armv4_mpu_cache_flush: + tst r4, #1 + movne pc, lr mov r2, #1 mov r3, #0 mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache @@ -1070,6 +1071,8 @@ __armv4_mpu_cache_flush: mov pc, lr __fa526_cache_flush: + tst r4, #1 + movne pc, lr mov r1, #0 mcr p15, 0, r1, c7, c14, 0 @ clean and invalidate D cache mcr p15, 0, r1, c7, c5, 0 @ flush I cache @@ -1078,13 +1081,16 @@ __fa526_cache_flush: __armv6_mmu_cache_flush: mov r1, #0 - mcr p15, 0, r1, c7, c14, 0 @ clean+invalidate D + tst r4, #1 + mcreq p15, 0, r1, c7, c14, 0 @ clean+invalidate D mcr p15, 0, r1, c7, c5, 0 @ invalidate I+BTB - mcr p15, 0, r1, c7, c15, 0 @ clean+invalidate unified + mcreq p15, 0, r1, c7, c15, 0 @ clean+invalidate unified mcr p15, 0, r1, c7, c10, 4 @ drain WB mov pc, lr __armv7_mmu_cache_flush: + tst r4, #1 + bne iflush mrc p15, 0, r10, c0, c1, 5 @ read ID_MMFR1 tst r10, #0xf << 16 @ hierarchical cache (ARMv7) mov r10, #0 @@ -1145,6 +1151,8 @@ iflush: mov pc, lr __armv5tej_mmu_cache_flush: + tst r4, #1 + movne pc, lr 1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate D cache bne 1b mcr p15, 0, r0, c7, c5, 0 @ flush I cache @@ -1152,6 +1160,8 @@ __armv5tej_mmu_cache_flush: mov pc, lr __armv4_mmu_cache_flush: + tst r4, #1 + movne pc, lr mov r2, #64*1024 @ default: 32K dcache size (*2) mov r11, #32 @ default: 32 byte line size mrc p15, 0, r3, c0, c0, 1 @ read cache type @@ -1185,6 +1195,8 @@ no_cache_id: __armv3_mmu_cache_flush: __armv3_mpu_cache_flush: + tst r4, #1 + movne pc, lr mov r1, #0 mcr p15, 0, r1, c7, c0, 0 @ invalidate whole cache v3 mov pc, lr diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi index 2f66ded..e6e952e 100644 --- a/arch/arm/boot/dts/am335x-bone-common.dtsi +++ b/arch/arm/boot/dts/am335x-bone-common.dtsi @@ -120,7 +120,7 @@ musb: usb@47400000 { status = "okay"; - control@44e10000 { + control@44e10620 { status = "okay"; }; @@ -141,7 +141,7 @@ dr_mode = "host"; }; - dma-controller@07402000 { + dma-controller@47402000 { status = "okay"; }; }; diff --git a/arch/arm/boot/dts/am335x-evm.dts b/arch/arm/boot/dts/am335x-evm.dts index e8ec875..b2f4769 100644 --- a/arch/arm/boot/dts/am335x-evm.dts +++ b/arch/arm/boot/dts/am335x-evm.dts @@ -174,7 +174,7 @@ musb: usb@47400000 { status = "okay"; - control@44e10000 { + control@44e10620 { status = "okay"; }; @@ -195,7 +195,7 @@ dr_mode = "host"; }; - dma-controller@07402000 { + dma-controller@47402000 { status = "okay"; }; }; diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts index 4f339fa..521d92a 100644 --- a/arch/arm/boot/dts/am335x-evmsk.dts +++ b/arch/arm/boot/dts/am335x-evmsk.dts @@ -211,7 +211,7 @@ musb: usb@47400000 { status = "okay"; - control@44e10000 { + control@44e10620 { status = "okay"; }; diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index f9c5da9..e9b6775 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -346,7 +346,7 @@ ti,hwmods = "usb_otg_hs"; status = "disabled"; - ctrl_mod: control@44e10000 { + ctrl_mod: control@44e10620 { compatible = "ti,am335x-usb-ctrl-module"; reg = <0x44e10620 0x10 0x44e10648 0x4>; @@ -449,7 +449,7 @@ "tx14", "tx15"; }; - cppi41dma: dma-controller@07402000 { + cppi41dma: dma-controller@47402000 { compatible = "ti,am3359-cppi41"; reg = <0x47400000 0x1000 0x47402000 0x1000 diff --git a/arch/arm/boot/dts/armada-370-xp.dtsi b/arch/arm/boot/dts/armada-370-xp.dtsi index 364a63d..beae26c 100644 --- a/arch/arm/boot/dts/armada-370-xp.dtsi +++ b/arch/arm/boot/dts/armada-370-xp.dtsi @@ -156,6 +156,7 @@ #size-cells = <0>; compatible = "marvell,orion-mdio"; reg = <0x72004 0x4>; + clocks = <&gateclk 4>; }; eth0: ethernet@70000 { diff --git a/arch/arm/boot/dts/armada-xp-db.dts b/arch/arm/boot/dts/armada-xp-db.dts index bcf6d79..8c2fe44 100644 --- a/arch/arm/boot/dts/armada-xp-db.dts +++ b/arch/arm/boot/dts/armada-xp-db.dts @@ -40,7 +40,7 @@ /* Device Bus parameters are required */ /* Read parameters */ - devbus,bus-width = <8>; + devbus,bus-width = <16>; devbus,turn-off-ps = <60000>; devbus,badr-skew-ps = <0>; devbus,acc-first-ps = <124000>; diff --git a/arch/arm/boot/dts/armada-xp-gp.dts b/arch/arm/boot/dts/armada-xp-gp.dts index 2298e4a..e325e62 100644 --- a/arch/arm/boot/dts/armada-xp-gp.dts +++ b/arch/arm/boot/dts/armada-xp-gp.dts @@ -49,7 +49,7 @@ /* Device Bus parameters are required */ /* Read parameters */ - devbus,bus-width = <8>; + devbus,bus-width = <16>; devbus,turn-off-ps = <60000>; devbus,badr-skew-ps = <0>; devbus,acc-first-ps = <124000>; diff --git a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts index 5695afc..d6cce8a 100644 --- a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts +++ b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts @@ -37,7 +37,7 @@ /* Device Bus parameters are required */ /* Read parameters */ - devbus,bus-width = <8>; + devbus,bus-width = <16>; devbus,turn-off-ps = <60000>; devbus,badr-skew-ps = <0>; devbus,acc-first-ps = <124000>; diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi index d5bd65f..55bb7f3 100644 --- a/arch/arm/boot/dts/at91sam9263.dtsi +++ b/arch/arm/boot/dts/at91sam9263.dtsi @@ -506,6 +506,7 @@ compatible = "atmel,hsmci"; reg = <0xfff80000 0x600>; interrupts = <10 IRQ_TYPE_LEVEL_HIGH 0>; + pinctrl-names = "default"; #address-cells = <1>; #size-cells = <0>; status = "disabled"; @@ -515,6 +516,7 @@ compatible = "atmel,hsmci"; reg = <0xfff84000 0x600>; interrupts = <11 IRQ_TYPE_LEVEL_HIGH 0>; + pinctrl-names = "default"; #address-cells = <1>; #size-cells = <0>; status = "disabled"; diff --git a/arch/arm/boot/dts/exynos5250-arndale.dts b/arch/arm/boot/dts/exynos5250-arndale.dts index cee55fa..47fa5ab 100644 --- a/arch/arm/boot/dts/exynos5250-arndale.dts +++ b/arch/arm/boot/dts/exynos5250-arndale.dts @@ -286,6 +286,7 @@ regulator-name = "vdd_g3d"; regulator-min-microvolt = <1000000>; regulator-max-microvolt = <1000000>; + regulator-always-on; regulator-boot-on; op_mode = <1>; }; diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi index 737ed5d..cf3300a 100644 --- a/arch/arm/boot/dts/imx25.dtsi +++ b/arch/arm/boot/dts/imx25.dtsi @@ -30,6 +30,7 @@ spi2 = &spi3; usb0 = &usbotg; usb1 = &usbhost1; + ethernet0 = &fec; }; cpus { @@ -157,7 +158,7 @@ #size-cells = <0>; compatible = "fsl,imx25-cspi", "fsl,imx35-cspi"; reg = <0x43fa4000 0x4000>; - clocks = <&clks 62>, <&clks 62>; + clocks = <&clks 78>, <&clks 78>; clock-names = "ipg", "per"; interrupts = <14>; status = "disabled"; @@ -351,7 +352,7 @@ compatible = "fsl,imx25-pwm", "fsl,imx27-pwm"; #pwm-cells = <2>; reg = <0x53fa0000 0x4000>; - clocks = <&clks 106>, <&clks 36>; + clocks = <&clks 106>, <&clks 52>; clock-names = "ipg", "per"; interrupts = <36>; }; @@ -370,7 +371,7 @@ compatible = "fsl,imx25-pwm", "fsl,imx27-pwm"; #pwm-cells = <2>; reg = <0x53fa8000 0x4000>; - clocks = <&clks 107>, <&clks 36>; + clocks = <&clks 107>, <&clks 52>; clock-names = "ipg", "per"; interrupts = <41>; }; @@ -411,7 +412,7 @@ pwm4: pwm@53fc8000 { compatible = "fsl,imx25-pwm", "fsl,imx27-pwm"; reg = <0x53fc8000 0x4000>; - clocks = <&clks 108>, <&clks 36>; + clocks = <&clks 108>, <&clks 52>; clock-names = "ipg", "per"; interrupts = <42>; }; @@ -457,7 +458,7 @@ compatible = "fsl,imx25-pwm", "fsl,imx27-pwm"; #pwm-cells = <2>; reg = <0x53fe0000 0x4000>; - clocks = <&clks 105>, <&clks 36>; + clocks = <&clks 105>, <&clks 52>; clock-names = "ipg", "per"; interrupts = <26>; }; diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi index b7a1c6d..c07aea4 100644 --- a/arch/arm/boot/dts/imx27.dtsi +++ b/arch/arm/boot/dts/imx27.dtsi @@ -30,6 +30,7 @@ spi0 = &cspi1; spi1 = &cspi2; spi2 = &cspi3; + ethernet0 = &fec; }; aitc: aitc-interrupt-controller@e0000000 { diff --git a/arch/arm/boot/dts/imx51.dtsi b/arch/arm/boot/dts/imx51.dtsi index 54cee65..6d2a534 100644 --- a/arch/arm/boot/dts/imx51.dtsi +++ b/arch/arm/boot/dts/imx51.dtsi @@ -27,6 +27,7 @@ spi0 = &ecspi1; spi1 = &ecspi2; spi2 = &cspi; + ethernet0 = &fec; }; tzic: tz-interrupt-controller@e0000000 { diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi index 4307e80..50eda50 100644 --- a/arch/arm/boot/dts/imx53.dtsi +++ b/arch/arm/boot/dts/imx53.dtsi @@ -33,6 +33,7 @@ spi0 = &ecspi1; spi1 = &ecspi2; spi2 = &cspi; + ethernet0 = &fec; }; cpus { @@ -87,7 +88,7 @@ ipu: ipu@18000000 { #crtc-cells = <1>; compatible = "fsl,imx53-ipu"; - reg = <0x18000000 0x080000000>; + reg = <0x18000000 0x08000000>; interrupts = <11 10>; clocks = <&clks 59>, <&clks 110>, <&clks 61>; clock-names = "bus", "di0", "di1"; diff --git a/arch/arm/boot/dts/kirkwood-mv88f6281gtw-ge.dts b/arch/arm/boot/dts/kirkwood-mv88f6281gtw-ge.dts index 6317e1d..e650e35 100644 --- a/arch/arm/boot/dts/kirkwood-mv88f6281gtw-ge.dts +++ b/arch/arm/boot/dts/kirkwood-mv88f6281gtw-ge.dts @@ -30,6 +30,16 @@ bootargs = "console=ttyS0,115200n8 earlyprintk"; }; + mbus { + pcie-controller { + status = "okay"; + + pcie@1,0 { + status = "okay"; + }; + }; + }; + ocp@f1000000 { pinctrl@10000 { pmx_usb_led: pmx-usb-led { @@ -73,14 +83,6 @@ ehci@50000 { status = "okay"; }; - - pcie-controller { - status = "okay"; - - pcie@1,0 { - status = "okay"; - }; - }; }; gpio-leds { diff --git a/arch/arm/boot/dts/kirkwood-nsa310-common.dtsi b/arch/arm/boot/dts/kirkwood-nsa310-common.dtsi index 06267a9..7c3f4bc 100644 --- a/arch/arm/boot/dts/kirkwood-nsa310-common.dtsi +++ b/arch/arm/boot/dts/kirkwood-nsa310-common.dtsi @@ -4,6 +4,16 @@ / { model = "ZyXEL NSA310"; + mbus { + pcie-controller { + status = "okay"; + + pcie@1,0 { + status = "okay"; + }; + }; + }; + ocp@f1000000 { pinctrl: pinctrl@10000 { @@ -69,14 +79,6 @@ reg = <0x5040000 0x2fc0000>; }; }; - - pcie-controller { - status = "okay"; - - pcie@1,0 { - status = "okay"; - }; - }; }; gpio_poweroff { diff --git a/arch/arm/boot/dts/ste-ccu8540.dts b/arch/arm/boot/dts/ste-ccu8540.dts index 7f3baf5..32dd55e 100644 --- a/arch/arm/boot/dts/ste-ccu8540.dts +++ b/arch/arm/boot/dts/ste-ccu8540.dts @@ -18,6 +18,7 @@ compatible = "st-ericsson,ccu8540", "st-ericsson,u8540"; memory@0 { + device_type = "memory"; reg = <0x20000000 0x1f000000>, <0xc0000000 0x3f000000>; }; diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index fe88105..f8bca69 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -116,6 +116,7 @@ CONFIG_FB_SIMPLE=y CONFIG_USB=y CONFIG_USB_XHCI_HCD=y CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_EXYNOS=y CONFIG_USB_EHCI_TEGRA=y CONFIG_USB_EHCI_HCD_PLATFORM=y CONFIG_USB_ISP1760_HCD=y diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c index 59f7877..e73ec2a 100644 --- a/arch/arm/crypto/aes_glue.c +++ b/arch/arm/crypto/aes_glue.c @@ -103,6 +103,6 @@ module_exit(aes_fini); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("aes"); -MODULE_ALIAS("aes-asm"); +MODULE_ALIAS_CRYPTO("aes"); +MODULE_ALIAS_CRYPTO("aes-asm"); MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>"); diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c index 76cd976..ace4cd6 100644 --- a/arch/arm/crypto/sha1_glue.c +++ b/arch/arm/crypto/sha1_glue.c @@ -175,5 +175,5 @@ module_exit(sha1_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm (ARM)"); -MODULE_ALIAS("sha1"); +MODULE_ALIAS_CRYPTO("sha1"); MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>"); diff --git a/arch/arm/include/asm/div64.h b/arch/arm/include/asm/div64.h index 191ada6..662c7bd 100644 --- a/arch/arm/include/asm/div64.h +++ b/arch/arm/include/asm/div64.h @@ -156,7 +156,7 @@ /* Select the best insn combination to perform the */ \ /* actual __m * __n / (__p << 64) operation. */ \ if (!__c) { \ - asm ( "umull %Q0, %R0, %1, %Q2\n\t" \ + asm ( "umull %Q0, %R0, %Q1, %Q2\n\t" \ "mov %Q0, #0" \ : "=&r" (__res) \ : "r" (__m), "r" (__n) \ diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index e42cf59..2aff798 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -3,11 +3,6 @@ #ifdef __KERNEL__ -#if defined(CONFIG_CPU_USE_DOMAINS) && defined(CONFIG_SMP) -/* ARM doesn't provide unprivileged exclusive memory accessors */ -#include <asm-generic/futex.h> -#else - #include <linux/futex.h> #include <linux/uaccess.h> #include <asm/errno.h> @@ -164,6 +159,5 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) return ret; } -#endif /* !(CPU_USE_DOMAINS && SMP) */ #endif /* __KERNEL__ */ #endif /* _ASM_ARM_FUTEX_H */ diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index f97ee02..c98c9c8 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -140,6 +140,7 @@ #define L_PTE_MT_DEV_NONSHARED (_AT(pteval_t, 0x0c) << 2) /* 1100 */ #define L_PTE_MT_DEV_WC (_AT(pteval_t, 0x09) << 2) /* 1001 */ #define L_PTE_MT_DEV_CACHED (_AT(pteval_t, 0x0b) << 2) /* 1011 */ +#define L_PTE_MT_VECTORS (_AT(pteval_t, 0x0f) << 2) /* 1111 */ #define L_PTE_MT_MASK (_AT(pteval_t, 0x0f) << 2) #ifndef __ASSEMBLY__ diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h index f3e3d80..c99e259 100644 --- a/arch/arm/include/asm/switch_to.h +++ b/arch/arm/include/asm/switch_to.h @@ -3,13 +3,6 @@ #include <linux/thread_info.h> -#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM -void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p); -#else -static inline void -switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } -#endif - /* * For v7 SMP cores running a preemptible kernel we may be pre-empted * during a TLB maintenance operation, so execute an inner-shareable dsb @@ -29,7 +22,6 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info #define switch_to(prev,next,last) \ do { \ - switch_kmaps(prev, next); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ } while (0) diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 33cb511..2b8114f 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -43,16 +43,6 @@ struct cpu_context_save { __u32 extra[2]; /* Xscale 'acc' register, etc */ }; -struct arm_restart_block { - union { - /* For user cache flushing */ - struct { - unsigned long start; - unsigned long end; - } cache; - }; -}; - /* * low level task data that entry.S needs immediate access to. * __switch_to() assumes cpu_context follows immediately after cpu_domain. @@ -60,7 +50,6 @@ struct arm_restart_block { struct thread_info { unsigned long flags; /* low level flags */ int preempt_count; /* 0 => preemptable, <0 => bug */ - int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ mm_segment_t addr_limit; /* address limit */ struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ @@ -79,7 +68,6 @@ struct thread_info { unsigned long thumbee_state; /* ThumbEE Handler Base register */ #endif struct restart_block restart_block; - struct arm_restart_block arm_restart_block; }; #define INIT_THREAD_INFO(tsk) \ @@ -160,7 +148,6 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, #define TIF_SIGPENDING 0 #define TIF_NEED_RESCHED 1 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ -#define TIF_NEED_RESCHED_LAZY 3 #define TIF_SYSCALL_TRACE 8 #define TIF_SYSCALL_AUDIT 9 #define TIF_SYSCALL_TRACEPOINT 10 @@ -173,7 +160,6 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) -#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h index 83259b8..36172ad 100644 --- a/arch/arm/include/asm/tls.h +++ b/arch/arm/include/asm/tls.h @@ -1,6 +1,9 @@ #ifndef __ASMARM_TLS_H #define __ASMARM_TLS_H +#include <linux/compiler.h> +#include <asm/thread_info.h> + #ifdef __ASSEMBLY__ #include <asm/asm-offsets.h> .macro switch_tls_none, base, tp, tpuser, tmp1, tmp2 @@ -50,6 +53,47 @@ #endif #ifndef __ASSEMBLY__ + +static inline void set_tls(unsigned long val) +{ + struct thread_info *thread; + + thread = current_thread_info(); + + thread->tp_value[0] = val; + + /* + * This code runs with preemption enabled and therefore must + * be reentrant with respect to switch_tls. + * + * We need to ensure ordering between the shadow state and the + * hardware state, so that we don't corrupt the hardware state + * with a stale shadow state during context switch. + * + * If we're preempted here, switch_tls will load TPIDRURO from + * thread_info upon resuming execution and the following mcr + * is merely redundant. + */ + barrier(); + + if (!tls_emu) { + if (has_tls_reg) { + asm("mcr p15, 0, %0, c13, c0, 3" + : : "r" (val)); + } else { + /* + * User space must never try to access this + * directly. Expect your app to break + * eventually if you do so. The user helper + * at 0xffff0fe0 must be used instead. (see + * entry-armv.S for details) + */ + *((unsigned int *)0xffff0ff0) = val; + } + + } +} + static inline unsigned long get_tpuser(void) { unsigned long reg = 0; @@ -59,5 +103,23 @@ static inline unsigned long get_tpuser(void) return reg; } + +static inline void set_tpuser(unsigned long val) +{ + /* Since TPIDRURW is fully context-switched (unlike TPIDRURO), + * we need not update thread_info. + */ + if (has_tls_reg && !tls_emu) { + asm("mcr p15, 0, %0, c13, c0, 2" + : : "r" (val)); + } +} + +static inline void flush_tls(void) +{ + set_tls(0); + set_tpuser(0); +} + #endif #endif /* __ASMARM_TLS_H */ diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 72abdc5..7f3f3cc 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -171,8 +171,9 @@ extern int __put_user_8(void *, unsigned long long); #define __put_user_check(x,p) \ ({ \ unsigned long __limit = current_thread_info()->addr_limit - 1; \ + const typeof(*(p)) __user *__tmp_p = (p); \ register const typeof(*(p)) __r2 asm("r2") = (x); \ - register const typeof(*(p)) __user *__p asm("r0") = (p);\ + register const typeof(*(p)) __user *__p asm("r0") = __tmp_p; \ register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ switch (sizeof(*(__p))) { \ diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 141baa3..cbd6197 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -48,6 +48,5 @@ */ #define __IGNORE_fadvise64_64 #define __IGNORE_migrate_pages -#define __IGNORE_kcmp #endif /* __ASM_ARM_UNISTD_H */ diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 12e46dd..ded0417 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -54,7 +54,6 @@ int main(void) BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); - DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_EXEC_DOMAIN, offsetof(struct thread_info, exec_domain)); diff --git a/arch/arm/kernel/crash_dump.c b/arch/arm/kernel/crash_dump.c index 90c50d4..5d1286d 100644 --- a/arch/arm/kernel/crash_dump.c +++ b/arch/arm/kernel/crash_dump.c @@ -39,7 +39,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, if (!csize) return 0; - vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + vaddr = ioremap(__pfn_to_phys(pfn), PAGE_SIZE); if (!vaddr) return -ENOMEM; diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 8c5e809..ec3e5cf 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -205,18 +205,11 @@ __irq_svc: #ifdef CONFIG_PREEMPT get_thread_info tsk ldr r8, [tsk, #TI_PREEMPT] @ get preempt count - teq r8, #0 @ if preempt count != 0 - bne 1f @ return from exeption ldr r0, [tsk, #TI_FLAGS] @ get flags - tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set - blne svc_preempt @ preempt! - - ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count - teq r8, #0 @ if preempt lazy count != 0 + teq r8, #0 @ if preempt count != 0 movne r0, #0 @ force flags to 0 - tst r0, #_TIF_NEED_RESCHED_LAZY + tst r0, #_TIF_NEED_RESCHED blne svc_preempt -1: #endif svc_exit r5, irq = 1 @ return from exception @@ -231,8 +224,6 @@ svc_preempt: 1: bl preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS tst r0, #_TIF_NEED_RESCHED - bne 1b - tst r0, #_TIF_NEED_RESCHED_LAZY moveq pc, r8 @ go again b 1b #endif diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 39f89fb..88c6bab 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -132,6 +132,10 @@ orrne r5, V7M_xPSR_FRAMEPTRALIGN biceq r5, V7M_xPSR_FRAMEPTRALIGN + @ ensure bit 0 is cleared in the PC, otherwise behaviour is + @ unpredictable + bic r4, #1 + @ write basic exception frame stmdb r2!, {r1, r3-r5} ldmia sp, {r1, r3-r5} diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 9723d17..1e782bd 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -163,7 +163,7 @@ static bool migrate_one_irq(struct irq_desc *desc) c = irq_data_get_irq_chip(d); if (!c->irq_set_affinity) pr_debug("IRQ%u: unable to set affinity\n", d->irq); - else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret) + else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret) cpumask_copy(d->affinity, affinity); return ret; diff --git a/arch/arm/kernel/kprobes-common.c b/arch/arm/kernel/kprobes-common.c index 18a7628..380c20f 100644 --- a/arch/arm/kernel/kprobes-common.c +++ b/arch/arm/kernel/kprobes-common.c @@ -14,6 +14,7 @@ #include <linux/kernel.h> #include <linux/kprobes.h> #include <asm/system_info.h> +#include <asm/opcodes.h> #include "kprobes.h" @@ -305,7 +306,8 @@ kprobe_decode_ldmstm(kprobe_opcode_t insn, struct arch_specific_insn *asi) if (handler) { /* We can emulate the instruction in (possibly) modified form */ - asi->insn[0] = (insn & 0xfff00000) | (rn << 16) | reglist; + asi->insn[0] = __opcode_to_mem_arm((insn & 0xfff00000) | + (rn << 16) | reglist); asi->insn_handler = handler; return INSN_GOOD; } @@ -334,13 +336,14 @@ prepare_emulated_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi, #ifdef CONFIG_THUMB2_KERNEL if (thumb) { u16 *thumb_insn = (u16 *)asi->insn; - thumb_insn[1] = 0x4770; /* Thumb bx lr */ - thumb_insn[2] = 0x4770; /* Thumb bx lr */ + /* Thumb bx lr */ + thumb_insn[1] = __opcode_to_mem_thumb16(0x4770); + thumb_insn[2] = __opcode_to_mem_thumb16(0x4770); return insn; } - asi->insn[1] = 0xe12fff1e; /* ARM bx lr */ + asi->insn[1] = __opcode_to_mem_arm(0xe12fff1e); /* ARM bx lr */ #else - asi->insn[1] = 0xe1a0f00e; /* mov pc, lr */ + asi->insn[1] = __opcode_to_mem_arm(0xe1a0f00e); /* mov pc, lr */ #endif /* Make an ARM instruction unconditional */ if (insn < 0xe0000000) @@ -360,12 +363,12 @@ set_emulated_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi, if (thumb) { u16 *ip = (u16 *)asi->insn; if (is_wide_instruction(insn)) - *ip++ = insn >> 16; - *ip++ = insn; + *ip++ = __opcode_to_mem_thumb16(insn >> 16); + *ip++ = __opcode_to_mem_thumb16(insn); return; } #endif - asi->insn[0] = insn; + asi->insn[0] = __opcode_to_mem_arm(insn); } /* diff --git a/arch/arm/kernel/kprobes-thumb.c b/arch/arm/kernel/kprobes-thumb.c index 6123daf..241222c 100644 --- a/arch/arm/kernel/kprobes-thumb.c +++ b/arch/arm/kernel/kprobes-thumb.c @@ -11,6 +11,7 @@ #include <linux/kernel.h> #include <linux/kprobes.h> #include <linux/module.h> +#include <asm/opcodes.h> #include "kprobes.h" @@ -163,9 +164,9 @@ t32_decode_ldmstm(kprobe_opcode_t insn, struct arch_specific_insn *asi) enum kprobe_insn ret = kprobe_decode_ldmstm(insn, asi); /* Fixup modified instruction to have halfwords in correct order...*/ - insn = asi->insn[0]; - ((u16 *)asi->insn)[0] = insn >> 16; - ((u16 *)asi->insn)[1] = insn & 0xffff; + insn = __mem_to_opcode_arm(asi->insn[0]); + ((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(insn >> 16); + ((u16 *)asi->insn)[1] = __opcode_to_mem_thumb16(insn & 0xffff); return ret; } @@ -1153,7 +1154,7 @@ t16_decode_hiregs(kprobe_opcode_t insn, struct arch_specific_insn *asi) { insn &= ~0x00ff; insn |= 0x001; /* Set Rdn = R1 and Rm = R0 */ - ((u16 *)asi->insn)[0] = insn; + ((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(insn); asi->insn_handler = t16_emulate_hiregs; return INSN_GOOD; } @@ -1182,8 +1183,10 @@ t16_decode_push(kprobe_opcode_t insn, struct arch_specific_insn *asi) * and call it with R9=SP and LR in the register list represented * by R8. */ - ((u16 *)asi->insn)[0] = 0xe929; /* 1st half STMDB R9!,{} */ - ((u16 *)asi->insn)[1] = insn & 0x1ff; /* 2nd half (register list) */ + /* 1st half STMDB R9!,{} */ + ((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(0xe929); + /* 2nd half (register list) */ + ((u16 *)asi->insn)[1] = __opcode_to_mem_thumb16(insn & 0x1ff); asi->insn_handler = t16_emulate_push; return INSN_GOOD; } @@ -1232,8 +1235,10 @@ t16_decode_pop(kprobe_opcode_t insn, struct arch_specific_insn *asi) * and call it with R9=SP and PC in the register list represented * by R8. */ - ((u16 *)asi->insn)[0] = 0xe8b9; /* 1st half LDMIA R9!,{} */ - ((u16 *)asi->insn)[1] = insn & 0x1ff; /* 2nd half (register list) */ + /* 1st half LDMIA R9!,{} */ + ((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(0xe8b9); + /* 2nd half (register list) */ + ((u16 *)asi->insn)[1] = __opcode_to_mem_thumb16(insn & 0x1ff); asi->insn_handler = insn & 0x100 ? t16_emulate_pop_pc : t16_emulate_pop_nopc; return INSN_GOOD; diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c index 170e9f3..1c6ece5 100644 --- a/arch/arm/kernel/kprobes.c +++ b/arch/arm/kernel/kprobes.c @@ -26,6 +26,7 @@ #include <linux/stop_machine.h> #include <linux/stringify.h> #include <asm/traps.h> +#include <asm/opcodes.h> #include <asm/cacheflush.h> #include "kprobes.h" @@ -62,10 +63,10 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) #ifdef CONFIG_THUMB2_KERNEL thumb = true; addr &= ~1; /* Bit 0 would normally be set to indicate Thumb code */ - insn = ((u16 *)addr)[0]; + insn = __mem_to_opcode_thumb16(((u16 *)addr)[0]); if (is_wide_instruction(insn)) { - insn <<= 16; - insn |= ((u16 *)addr)[1]; + u16 inst2 = __mem_to_opcode_thumb16(((u16 *)addr)[1]); + insn = __opcode_thumb32_compose(insn, inst2); decode_insn = thumb32_kprobe_decode_insn; } else decode_insn = thumb16_kprobe_decode_insn; @@ -73,7 +74,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) thumb = false; if (addr & 0x3) return -EINVAL; - insn = *p->addr; + insn = __mem_to_opcode_arm(*p->addr); decode_insn = arm_kprobe_decode_insn; #endif diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 57221e3..8cf0996 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -14,11 +14,12 @@ #include <asm/pgalloc.h> #include <asm/mmu_context.h> #include <asm/cacheflush.h> +#include <asm/fncpy.h> #include <asm/mach-types.h> #include <asm/smp_plat.h> #include <asm/system_misc.h> -extern const unsigned char relocate_new_kernel[]; +extern void relocate_new_kernel(void); extern const unsigned int relocate_new_kernel_size; extern unsigned long kexec_start_address; @@ -142,6 +143,8 @@ void machine_kexec(struct kimage *image) { unsigned long page_list; unsigned long reboot_code_buffer_phys; + unsigned long reboot_entry = (unsigned long)relocate_new_kernel; + unsigned long reboot_entry_phys; void *reboot_code_buffer; /* @@ -168,16 +171,23 @@ void machine_kexec(struct kimage *image) /* copy our kernel relocation code to the control code page */ - memcpy(reboot_code_buffer, - relocate_new_kernel, relocate_new_kernel_size); + reboot_entry = fncpy(reboot_code_buffer, + reboot_entry, + relocate_new_kernel_size); + reboot_entry_phys = (unsigned long)reboot_entry + + (reboot_code_buffer_phys - (unsigned long)reboot_code_buffer); - - flush_icache_range((unsigned long) reboot_code_buffer, - (unsigned long) reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE); printk(KERN_INFO "Bye!\n"); if (kexec_reinit) kexec_reinit(); - soft_restart(reboot_code_buffer_phys); + soft_restart(reboot_entry_phys); +} + +void arch_crash_save_vmcoreinfo(void) +{ +#ifdef CONFIG_ARM_LPAE + VMCOREINFO_CONFIG(ARM_LPAE); +#endif } diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index e186ee1..de5cd76 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -303,11 +303,18 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) struct arm_pmu *armpmu = (struct arm_pmu *) dev; struct platform_device *plat_device = armpmu->plat_device; struct arm_pmu_platdata *plat = dev_get_platdata(&plat_device->dev); + int ret; + u64 start_clock, finish_clock; + start_clock = sched_clock(); if (plat && plat->handle_irq) - return plat->handle_irq(irq, dev, armpmu->handle_irq); + ret = plat->handle_irq(irq, dev, armpmu->handle_irq); else - return armpmu->handle_irq(irq, dev); + ret = armpmu->handle_irq(irq, dev); + finish_clock = sched_clock(); + + perf_sample_event_took(finish_clock - start_clock); + return ret; } static void diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 83af229..5f6e650 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -334,6 +334,8 @@ void flush_thread(void) memset(&tsk->thread.debug, 0, sizeof(struct debug_info)); memset(&thread->fpstate, 0, sizeof(union fp_state)); + flush_tls(); + thread_notify(THREAD_NOTIFY_FLUSH, thread); } @@ -432,30 +434,6 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) } #ifdef CONFIG_MMU -/* - * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not - * initialized by pgtable_page_ctor() then a coredump of the vector page will - * fail. - */ -static int __init vectors_user_mapping_init_page(void) -{ - struct page *page; - unsigned long addr = 0xffff0000; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - - pgd = pgd_offset_k(addr); - pud = pud_offset(pgd, addr); - pmd = pmd_offset(pud, addr); - page = pmd_page(*(pmd)); - - pgtable_page_ctor(page); - - return 0; -} -late_initcall(vectors_user_mapping_init_page); - #ifdef CONFIG_KUSER_HELPERS /* * The vectors page is always readable from user space for the diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S index d0cdedf..9585896 100644 --- a/arch/arm/kernel/relocate_kernel.S +++ b/arch/arm/kernel/relocate_kernel.S @@ -2,10 +2,12 @@ * relocate_kernel.S - put the kernel image in place to boot */ +#include <linux/linkage.h> #include <asm/kexec.h> - .globl relocate_new_kernel -relocate_new_kernel: + .align 3 /* not needed for this code, but keeps fncpy() happy */ + +ENTRY(relocate_new_kernel) ldr r0,kexec_indirection_page ldr r1,kexec_start_address @@ -79,6 +81,8 @@ kexec_mach_type: kexec_boot_atags: .long 0x0 +ENDPROC(relocate_new_kernel) + relocate_new_kernel_end: .globl relocate_new_kernel_size diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 2a767d2..6ebeaf4 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1016,6 +1016,15 @@ static int c_show(struct seq_file *m, void *v) seq_printf(m, "model name\t: %s rev %d (%s)\n", cpu_name, cpuid & 15, elf_platform); +#if defined(CONFIG_SMP) + seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", + per_cpu(cpu_data, i).loops_per_jiffy / (500000UL/HZ), + (per_cpu(cpu_data, i).loops_per_jiffy / (5000UL/HZ)) % 100); +#else + seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", + loops_per_jiffy / (500000/HZ), + (loops_per_jiffy / (5000/HZ)) % 100); +#endif /* dump out the processor features */ seq_puts(m, "Features\t: "); diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index d1b0bfd..ab33042 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -589,8 +589,7 @@ asmlinkage int do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) { do { - if (likely(thread_flags & (_TIF_NEED_RESCHED | - _TIF_NEED_RESCHED_LAZY))) { + if (likely(thread_flags & _TIF_NEED_RESCHED)) { schedule(); } else { if (unlikely(!user_mode(regs))) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 72024ea..bd1b9e6 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -383,8 +383,17 @@ asmlinkage void secondary_start_kernel(void) void __init smp_cpus_done(unsigned int max_cpus) { - printk(KERN_INFO "SMP: Total of %d processors activated.\n", - num_online_cpus()); + int cpu; + unsigned long bogosum = 0; + + for_each_online_cpu(cpu) + bogosum += per_cpu(cpu_data, cpu).loops_per_jiffy; + + printk(KERN_INFO "SMP: Total of %d processors activated " + "(%lu.%02lu BogoMIPS).\n", + num_online_cpus(), + bogosum / (500000/HZ), + (bogosum / (5000/HZ)) % 100); hyp_mode_check(); } diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index af4e8c8..6582c4a 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -83,13 +83,16 @@ static int save_trace(struct stackframe *frame, void *d) return trace->nr_entries >= trace->max_entries; } -void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +/* This must be noinline to so that our skip calculation works correctly */ +static noinline void __save_stack_trace(struct task_struct *tsk, + struct stack_trace *trace, unsigned int nosched) { struct stack_trace_data data; struct stackframe frame; data.trace = trace; data.skip = trace->skip; + data.no_sched_functions = nosched; if (tsk != current) { #ifdef CONFIG_SMP @@ -102,7 +105,6 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) trace->entries[trace->nr_entries++] = ULONG_MAX; return; #else - data.no_sched_functions = 1; frame.fp = thread_saved_fp(tsk); frame.sp = thread_saved_sp(tsk); frame.lr = 0; /* recovered from the stack */ @@ -111,11 +113,12 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) } else { register unsigned long current_sp asm ("sp"); - data.no_sched_functions = 0; + /* We don't want this function nor the caller */ + data.skip += 2; frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_sp; frame.lr = (unsigned long)__builtin_return_address(0); - frame.pc = (unsigned long)save_stack_trace_tsk; + frame.pc = (unsigned long)__save_stack_trace; } walk_stackframe(&frame, save_trace, &data); @@ -123,9 +126,14 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) trace->entries[trace->nr_entries++] = ULONG_MAX; } +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + __save_stack_trace(tsk, trace, 1); +} + void save_stack_trace(struct stack_trace *trace) { - save_stack_trace_tsk(current, trace); + __save_stack_trace(current, trace, 0); } EXPORT_SYMBOL_GPL(save_stack_trace); #endif diff --git a/arch/arm/kernel/thumbee.c b/arch/arm/kernel/thumbee.c index 7b8403b..80f0d69 100644 --- a/arch/arm/kernel/thumbee.c +++ b/arch/arm/kernel/thumbee.c @@ -45,7 +45,7 @@ static int thumbee_notifier(struct notifier_block *self, unsigned long cmd, void switch (cmd) { case THREAD_NOTIFY_FLUSH: - thread->thumbee_state = 0; + teehbr_write(0); break; case THREAD_NOTIFY_SWITCH: current_thread_info()->thumbee_state = teehbr_read(); diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 1f735aa..8e6cd76 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -503,8 +503,6 @@ static int bad_syscall(int n, struct pt_regs *regs) return regs->ARM_r0; } -static long do_cache_op_restart(struct restart_block *); - static inline int __do_cache_op(unsigned long start, unsigned long end) { @@ -513,24 +511,8 @@ __do_cache_op(unsigned long start, unsigned long end) do { unsigned long chunk = min(PAGE_SIZE, end - start); - if (signal_pending(current)) { - struct thread_info *ti = current_thread_info(); - - ti->restart_block = (struct restart_block) { - .fn = do_cache_op_restart, - }; - - ti->arm_restart_block = (struct arm_restart_block) { - { - .cache = { - .start = start, - .end = end, - }, - }, - }; - - return -ERESTART_RESTARTBLOCK; - } + if (fatal_signal_pending(current)) + return 0; ret = flush_cache_user_range(start, start + chunk); if (ret) @@ -543,15 +525,6 @@ __do_cache_op(unsigned long start, unsigned long end) return 0; } -static long do_cache_op_restart(struct restart_block *unused) -{ - struct arm_restart_block *restart_block; - - restart_block = ¤t_thread_info()->arm_restart_block; - return __do_cache_op(restart_block->cache.start, - restart_block->cache.end); -} - static inline int do_cache_op(unsigned long start, unsigned long end, int flags) { @@ -571,7 +544,6 @@ do_cache_op(unsigned long start, unsigned long end, int flags) #define NR(x) ((__ARM_NR_##x) - __ARM_NR_BASE) asmlinkage int arm_syscall(int no, struct pt_regs *regs) { - struct thread_info *thread = current_thread_info(); siginfo_t info; if ((no >> 16) != (__ARM_NR_BASE>> 16)) @@ -622,21 +594,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) return regs->ARM_r0; case NR(set_tls): - thread->tp_value[0] = regs->ARM_r0; - if (tls_emu) - return 0; - if (has_tls_reg) { - asm ("mcr p15, 0, %0, c13, c0, 3" - : : "r" (regs->ARM_r0)); - } else { - /* - * User space must never try to access this directly. - * Expect your app to break eventually if you do so. - * The user helper at 0xffff0fe0 must be used instead. - * (see entry-armv.S for details) - */ - *((unsigned int *)0xffff0ff0) = regs->ARM_r0; - } + set_tls(regs->ARM_r0); return 0; #ifdef CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index bbafc67..00df012 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -87,7 +87,7 @@ extern const struct unwind_idx __start_unwind_idx[]; static const struct unwind_idx *__origin_unwind_idx; extern const struct unwind_idx __stop_unwind_idx[]; -static DEFINE_RAW_SPINLOCK(unwind_lock); +static DEFINE_SPINLOCK(unwind_lock); static LIST_HEAD(unwind_tables); /* Convert a prel31 symbol to an absolute address */ @@ -195,7 +195,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) /* module unwind tables */ struct unwind_table *table; - raw_spin_lock_irqsave(&unwind_lock, flags); + spin_lock_irqsave(&unwind_lock, flags); list_for_each_entry(table, &unwind_tables, list) { if (addr >= table->begin_addr && addr < table->end_addr) { @@ -207,7 +207,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) break; } } - raw_spin_unlock_irqrestore(&unwind_lock, flags); + spin_unlock_irqrestore(&unwind_lock, flags); } pr_debug("%s: idx = %p\n", __func__, idx); @@ -469,9 +469,9 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size, tab->begin_addr = text_addr; tab->end_addr = text_addr + text_size; - raw_spin_lock_irqsave(&unwind_lock, flags); + spin_lock_irqsave(&unwind_lock, flags); list_add_tail(&tab->list, &unwind_tables); - raw_spin_unlock_irqrestore(&unwind_lock, flags); + spin_unlock_irqrestore(&unwind_lock, flags); return tab; } @@ -483,9 +483,9 @@ void unwind_table_del(struct unwind_table *tab) if (!tab) return; - raw_spin_lock_irqsave(&unwind_lock, flags); + spin_lock_irqsave(&unwind_lock, flags); list_del(&tab->list); - raw_spin_unlock_irqrestore(&unwind_lock, flags); + spin_unlock_irqrestore(&unwind_lock, flags); kfree(tab); } diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index cb79a5d..fe59e4a 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -41,6 +41,8 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; +#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) + static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { /* @@ -172,14 +174,14 @@ void free_boot_hyp_pgd(void) if (boot_hyp_pgd) { unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - kfree(boot_hyp_pgd); + free_pages((unsigned long)boot_hyp_pgd, pgd_order); boot_hyp_pgd = NULL; } if (hyp_pgd) unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - kfree(init_bounce_page); + free_page((unsigned long)init_bounce_page); init_bounce_page = NULL; mutex_unlock(&kvm_hyp_pgd_mutex); @@ -209,7 +211,7 @@ void free_hyp_pgds(void) for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); - kfree(hyp_pgd); + free_pages((unsigned long)hyp_pgd, pgd_order); hyp_pgd = NULL; } @@ -781,7 +783,7 @@ int kvm_mmu_init(void) size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start; phys_addr_t phys_base; - init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL); + init_bounce_page = (void *)__get_free_page(GFP_KERNEL); if (!init_bounce_page) { kvm_err("Couldn't allocate HYP init bounce page\n"); err = -ENOMEM; @@ -807,8 +809,9 @@ int kvm_mmu_init(void) (unsigned long)phys_base); } - hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); - boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); + hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); + boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order); + if (!hyp_pgd || !boot_hyp_pgd) { kvm_err("Hyp mode PGD not allocated\n"); err = -ENOMEM; diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c index 35f7b26..f607deb 100644 --- a/arch/arm/mach-at91/at91rm9200_time.c +++ b/arch/arm/mach-at91/at91rm9200_time.c @@ -134,7 +134,6 @@ clkevt32k_mode(enum clock_event_mode mode, struct clock_event_device *dev) break; case CLOCK_EVT_MODE_SHUTDOWN: case CLOCK_EVT_MODE_UNUSED: - remove_irq(NR_IRQS_LEGACY + AT91_ID_SYS, &at91rm9200_timer_irq); case CLOCK_EVT_MODE_RESUME: irqmask = 0; break; diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c index 1c4c487..bb39232 100644 --- a/arch/arm/mach-at91/at91sam926x_time.c +++ b/arch/arm/mach-at91/at91sam926x_time.c @@ -77,7 +77,7 @@ static struct clocksource pit_clk = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static struct irqaction at91sam926x_pit_irq; + /* * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16) */ @@ -86,8 +86,6 @@ pit_clkevt_mode(enum clock_event_mode mode, struct clock_event_device *dev) { switch (mode) { case CLOCK_EVT_MODE_PERIODIC: - /* Set up irq handler */ - setup_irq(at91sam926x_pit_irq.irq, &at91sam926x_pit_irq); /* update clocksource counter */ pit_cnt += pit_cycle * PIT_PICNT(pit_read(AT91_PIT_PIVR)); pit_write(AT91_PIT_MR, (pit_cycle - 1) | AT91_PIT_PITEN @@ -100,7 +98,6 @@ pit_clkevt_mode(enum clock_event_mode mode, struct clock_event_device *dev) case CLOCK_EVT_MODE_UNUSED: /* disable irq, leaving the clocksource active */ pit_write(AT91_PIT_MR, (pit_cycle - 1) | AT91_PIT_PITEN); - remove_irq(at91sam926x_pit_irq.irq, &at91sam926x_pit_irq); break; case CLOCK_EVT_MODE_RESUME: break; diff --git a/arch/arm/mach-at91/clock.c b/arch/arm/mach-at91/clock.c index 6b2630a..0778e54 100644 --- a/arch/arm/mach-at91/clock.c +++ b/arch/arm/mach-at91/clock.c @@ -963,6 +963,7 @@ static int __init at91_clock_reset(void) } at91_pmc_write(AT91_PMC_SCDR, scdr); + at91_pmc_write(AT91_PMC_PCDR, pcdr); if (cpu_is_sama5d3()) at91_pmc_write(AT91_PMC_PCDR1, pcdr1); diff --git a/arch/arm/mach-at91/sysirq_mask.c b/arch/arm/mach-at91/sysirq_mask.c index 2ba694f..f8bc351 100644 --- a/arch/arm/mach-at91/sysirq_mask.c +++ b/arch/arm/mach-at91/sysirq_mask.c @@ -25,24 +25,28 @@ #include "generic.h" -#define AT91_RTC_IDR 0x24 /* Interrupt Disable Register */ -#define AT91_RTC_IMR 0x28 /* Interrupt Mask Register */ +#define AT91_RTC_IDR 0x24 /* Interrupt Disable Register */ +#define AT91_RTC_IMR 0x28 /* Interrupt Mask Register */ +#define AT91_RTC_IRQ_MASK 0x1f /* Available IRQs mask */ void __init at91_sysirq_mask_rtc(u32 rtc_base) { void __iomem *base; - u32 mask; base = ioremap(rtc_base, 64); if (!base) return; - mask = readl_relaxed(base + AT91_RTC_IMR); - if (mask) { - pr_info("AT91: Disabling rtc irq\n"); - writel_relaxed(mask, base + AT91_RTC_IDR); - (void)readl_relaxed(base + AT91_RTC_IMR); /* flush */ - } + /* + * sam9x5 SoCs have the following errata: + * "RTC: Interrupt Mask Register cannot be used + * Interrupt Mask Register read always returns 0." + * + * Hence we're not relying on IMR values to disable + * interrupts. + */ + writel_relaxed(AT91_RTC_IRQ_MASK, base + AT91_RTC_IDR); + (void)readl_relaxed(base + AT91_RTC_IMR); /* flush */ iounmap(base); } diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c index f56f767..58b43e6 100644 --- a/arch/arm/mach-exynos/platsmp.c +++ b/arch/arm/mach-exynos/platsmp.c @@ -73,7 +73,7 @@ static void __iomem *scu_base_addr(void) return (void __iomem *)(S5P_VA_SCU); } -static DEFINE_RAW_SPINLOCK(boot_lock); +static DEFINE_SPINLOCK(boot_lock); static void exynos_secondary_init(unsigned int cpu) { @@ -86,8 +86,8 @@ static void exynos_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - raw_spin_lock(&boot_lock); - raw_spin_unlock(&boot_lock); + spin_lock(&boot_lock); + spin_unlock(&boot_lock); } static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -99,7 +99,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) * Set synchronisation state between this boot processor * and the secondary one */ - raw_spin_lock(&boot_lock); + spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -128,7 +128,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) if (timeout == 0) { printk(KERN_ERR "cpu1 power enable failed"); - raw_spin_unlock(&boot_lock); + spin_unlock(&boot_lock); return -ETIMEDOUT; } } @@ -167,7 +167,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - raw_spin_unlock(&boot_lock); + spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index ef85ac4..2eed3cf 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -304,8 +304,8 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node) post_div_table[1].div = 1; post_div_table[2].div = 1; video_div_table[1].div = 1; - video_div_table[2].div = 1; - }; + video_div_table[3].div = 1; + } /* type name parent_name base div_mask */ clk[pll1_sys] = imx_clk_pllv3(IMX_PLLV3_SYS, "pll1_sys", "osc", base, 0x7f); diff --git a/arch/arm/mach-imx/devices/platform-ipu-core.c b/arch/arm/mach-imx/devices/platform-ipu-core.c index fc4dd7c..6bd7c3f 100644 --- a/arch/arm/mach-imx/devices/platform-ipu-core.c +++ b/arch/arm/mach-imx/devices/platform-ipu-core.c @@ -77,7 +77,7 @@ struct platform_device *__init imx_alloc_mx3_camera( pdev = platform_device_alloc("mx3-camera", 0); if (!pdev) - goto err; + return ERR_PTR(-ENOMEM); pdev->dev.dma_mask = kmalloc(sizeof(*pdev->dev.dma_mask), GFP_KERNEL); if (!pdev->dev.dma_mask) diff --git a/arch/arm/mach-msm/platsmp.c b/arch/arm/mach-msm/platsmp.c index fc09a04..3f06edc 100644 --- a/arch/arm/mach-msm/platsmp.c +++ b/arch/arm/mach-msm/platsmp.c @@ -30,7 +30,7 @@ extern void msm_secondary_startup(void); -static DEFINE_RAW_SPINLOCK(boot_lock); +static DEFINE_SPINLOCK(boot_lock); static inline int get_core_count(void) { @@ -50,8 +50,8 @@ static void msm_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - raw_spin_lock(&boot_lock); - raw_spin_unlock(&boot_lock); + spin_lock(&boot_lock); + spin_unlock(&boot_lock); } static void prepare_cold_cpu(unsigned int cpu) @@ -88,7 +88,7 @@ static int msm_boot_secondary(unsigned int cpu, struct task_struct *idle) * set synchronisation state between this boot processor * and the secondary one */ - raw_spin_lock(&boot_lock); + spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -122,7 +122,7 @@ static int msm_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - raw_spin_unlock(&boot_lock); + spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index 58adf2f..7e0529b 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -124,6 +124,29 @@ int __init coherency_init(void) { struct device_node *np; + /* + * The coherency fabric is needed: + * - For coherency between processors on Armada XP, so only + * when SMP is enabled. + * - For coherency between the processor and I/O devices, but + * this coherency requires many pre-requisites (write + * allocate cache policy, shareable pages, SMP bit set) that + * are only meant in SMP situations. + * + * Note that this means that on Armada 370, there is currently + * no way to use hardware I/O coherency, because even when + * CONFIG_SMP is enabled, is_smp() returns false due to the + * Armada 370 being a single-core processor. To lift this + * limitation, we would have to find a way to make the cache + * policy set to write-allocate (on all Armada SoCs), and to + * set the shareable attribute in page tables (on all Armada + * SoCs except the Armada 370). Unfortunately, such decisions + * are taken very early in the kernel boot process, at a point + * where we don't know yet on which SoC we are running. + */ + if (!is_smp()) + return 0; + np = of_find_matching_node(NULL, of_coherency_table); if (np) { struct resource res; @@ -150,6 +173,9 @@ static int __init coherency_late_init(void) { struct device_node *np; + if (!is_smp()) + return 0; + np = of_find_matching_node(NULL, of_coherency_table); if (np) { bus_register_notifier(&platform_bus_type, diff --git a/arch/arm/mach-omap1/board-h2.c b/arch/arm/mach-omap1/board-h2.c index fd90caf..db57072 100644 --- a/arch/arm/mach-omap1/board-h2.c +++ b/arch/arm/mach-omap1/board-h2.c @@ -343,7 +343,7 @@ static struct omap_usb_config h2_usb_config __initdata = { /* usb1 has a Mini-AB port and external isp1301 transceiver */ .otg = 2, -#ifdef CONFIG_USB_GADGET_OMAP +#if IS_ENABLED(CONFIG_USB_OMAP) .hmc_mode = 19, /* 0:host(off) 1:dev|otg 2:disabled */ /* .hmc_mode = 21,*/ /* 0:host(off) 1:dev(loopback) 2:host(loopback) */ #elif defined(CONFIG_USB_OHCI_HCD) || defined(CONFIG_USB_OHCI_HCD_MODULE) diff --git a/arch/arm/mach-omap1/board-h3.c b/arch/arm/mach-omap1/board-h3.c index 816ecd1..bfed4f9 100644 --- a/arch/arm/mach-omap1/board-h3.c +++ b/arch/arm/mach-omap1/board-h3.c @@ -366,7 +366,7 @@ static struct omap_usb_config h3_usb_config __initdata = { /* usb1 has a Mini-AB port and external isp1301 transceiver */ .otg = 2, -#ifdef CONFIG_USB_GADGET_OMAP +#if IS_ENABLED(CONFIG_USB_OMAP) .hmc_mode = 19, /* 0:host(off) 1:dev|otg 2:disabled */ #elif defined(CONFIG_USB_OHCI_HCD) || defined(CONFIG_USB_OHCI_HCD_MODULE) /* NONSTANDARD CABLE NEEDED (B-to-Mini-B) */ diff --git a/arch/arm/mach-omap1/board-innovator.c b/arch/arm/mach-omap1/board-innovator.c index bd5f02e..c49ce83 100644 --- a/arch/arm/mach-omap1/board-innovator.c +++ b/arch/arm/mach-omap1/board-innovator.c @@ -312,7 +312,7 @@ static struct omap_usb_config h2_usb_config __initdata = { /* usb1 has a Mini-AB port and external isp1301 transceiver */ .otg = 2, -#ifdef CONFIG_USB_GADGET_OMAP +#if IS_ENABLED(CONFIG_USB_OMAP) .hmc_mode = 19, /* 0:host(off) 1:dev|otg 2:disabled */ /* .hmc_mode = 21,*/ /* 0:host(off) 1:dev(loopback) 2:host(loopback) */ #elif defined(CONFIG_USB_OHCI_HCD) || defined(CONFIG_USB_OHCI_HCD_MODULE) diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c index a7ce692..006fbb5 100644 --- a/arch/arm/mach-omap1/board-osk.c +++ b/arch/arm/mach-omap1/board-osk.c @@ -280,7 +280,7 @@ static struct omap_usb_config osk_usb_config __initdata = { * be used, with a NONSTANDARD gender-bending cable/dongle, as * a peripheral. */ -#ifdef CONFIG_USB_GADGET_OMAP +#if IS_ENABLED(CONFIG_USB_OMAP) .register_dev = 1, .hmc_mode = 0, #else diff --git a/arch/arm/mach-omap2/cclock3xxx_data.c b/arch/arm/mach-omap2/cclock3xxx_data.c index 334b767..9128b25 100644 --- a/arch/arm/mach-omap2/cclock3xxx_data.c +++ b/arch/arm/mach-omap2/cclock3xxx_data.c @@ -418,7 +418,8 @@ static struct clk_hw_omap dpll4_m5x2_ck_hw = { .clkdm_name = "dpll4_clkdm", }; -DEFINE_STRUCT_CLK(dpll4_m5x2_ck, dpll4_m5x2_ck_parent_names, dpll4_m5x2_ck_ops); +DEFINE_STRUCT_CLK_FLAGS(dpll4_m5x2_ck, dpll4_m5x2_ck_parent_names, + dpll4_m5x2_ck_ops, CLK_SET_RATE_PARENT); static struct clk dpll4_m5x2_ck_3630 = { .name = "dpll4_m5x2_ck", diff --git a/arch/arm/mach-omap2/control.c b/arch/arm/mach-omap2/control.c index 31e0dfe..62a392b 100644 --- a/arch/arm/mach-omap2/control.c +++ b/arch/arm/mach-omap2/control.c @@ -324,7 +324,8 @@ void omap3_save_scratchpad_contents(void) scratchpad_contents.public_restore_ptr = virt_to_phys(omap3_restore_3630); else if (omap_rev() != OMAP3430_REV_ES3_0 && - omap_rev() != OMAP3430_REV_ES3_1) + omap_rev() != OMAP3430_REV_ES3_1 && + omap_rev() != OMAP3430_REV_ES3_1_2) scratchpad_contents.public_restore_ptr = virt_to_phys(omap3_restore); else diff --git a/arch/arm/mach-omap2/irq.c b/arch/arm/mach-omap2/irq.c index e022a86..6037a9a 100644 --- a/arch/arm/mach-omap2/irq.c +++ b/arch/arm/mach-omap2/irq.c @@ -222,6 +222,7 @@ void __init ti81xx_init_irq(void) static inline void omap_intc_handle_irq(void __iomem *base_addr, struct pt_regs *regs) { u32 irqnr; + int handled_irq = 0; do { irqnr = readl_relaxed(base_addr + 0x98); @@ -249,8 +250,15 @@ out: if (irqnr) { irqnr = irq_find_mapping(domain, irqnr); handle_IRQ(irqnr, regs); + handled_irq = 1; } } while (irqnr); + + /* If an irq is masked or deasserted while active, we will + * keep ending up here with no irq handled. So remove it from + * the INTC with an ack.*/ + if (!handled_irq) + omap_ack_irq(NULL); } asmlinkage void __exception_irq_entry omap2_intc_handle_irq(struct pt_regs *regs) diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c index f82cf87..94c2f6d 100644 --- a/arch/arm/mach-omap2/mux.c +++ b/arch/arm/mach-omap2/mux.c @@ -183,8 +183,10 @@ static int __init _omap_mux_get_by_name(struct omap_mux_partition *partition, m0_entry = mux->muxnames[0]; /* First check for full name in mode0.muxmode format */ - if (mode0_len && strncmp(muxname, m0_entry, mode0_len)) - continue; + if (mode0_len) + if (strncmp(muxname, m0_entry, mode0_len) || + (strlen(m0_entry) != mode0_len)) + continue; /* Then check for muxmode only */ for (i = 0; i < OMAP_MUX_NR_MODES; i++) { diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index 5969da3..8912110 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -44,7 +44,7 @@ u16 pm44xx_errata; /* SCU base address */ static void __iomem *scu_base; -static DEFINE_RAW_SPINLOCK(boot_lock); +static DEFINE_SPINLOCK(boot_lock); void __iomem *omap4_get_scu_base(void) { @@ -68,8 +68,8 @@ static void omap4_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - raw_spin_lock(&boot_lock); - raw_spin_unlock(&boot_lock); + spin_lock(&boot_lock); + spin_unlock(&boot_lock); } static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -83,7 +83,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) * Set synchronisation state between this boot processor * and the secondary one */ - raw_spin_lock(&boot_lock); + spin_lock(&boot_lock); /* * Update the AuxCoreBoot0 with boot state for secondary core. @@ -160,7 +160,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) * Now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - raw_spin_unlock(&boot_lock); + spin_unlock(&boot_lock); return 0; } diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index 60f2344..857e76c 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -1968,7 +1968,7 @@ static struct omap_hwmod_irq_info omap3xxx_usb_host_hs_irqs[] = { static struct omap_hwmod omap3xxx_usb_host_hs_hwmod = { .name = "usb_host_hs", .class = &omap3xxx_usb_host_hs_hwmod_class, - .clkdm_name = "l3_init_clkdm", + .clkdm_name = "usbhost_clkdm", .mpu_irqs = omap3xxx_usb_host_hs_irqs, .main_clk = "usbhost_48m_fck", .prcm = { @@ -2053,7 +2053,7 @@ static struct omap_hwmod_irq_info omap3xxx_usb_tll_hs_irqs[] = { static struct omap_hwmod omap3xxx_usb_tll_hs_hwmod = { .name = "usb_tll_hs", .class = &omap3xxx_usb_tll_hs_hwmod_class, - .clkdm_name = "l3_init_clkdm", + .clkdm_name = "core_l4_clkdm", .mpu_irqs = omap3xxx_usb_tll_hs_irqs, .main_clk = "usbtll_fck", .prcm = { diff --git a/arch/arm/mach-omap2/omap_hwmod_54xx_data.c b/arch/arm/mach-omap2/omap_hwmod_54xx_data.c index cde4155..6bbb1e6 100644 --- a/arch/arm/mach-omap2/omap_hwmod_54xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_54xx_data.c @@ -895,7 +895,7 @@ static struct omap_hwmod omap54xx_mcpdm_hwmod = { * current exception. */ - .flags = HWMOD_EXT_OPT_MAIN_CLK, + .flags = HWMOD_EXT_OPT_MAIN_CLK | HWMOD_SWSUP_SIDLE, .main_clk = "pad_clks_ck", .prcm = { .omap4 = { diff --git a/arch/arm/mach-omap2/pm.h b/arch/arm/mach-omap2/pm.h index 7bdd22a..d4d0fce 100644 --- a/arch/arm/mach-omap2/pm.h +++ b/arch/arm/mach-omap2/pm.h @@ -103,7 +103,7 @@ static inline void enable_omap3630_toggle_l2_on_restore(void) { } #define PM_OMAP4_ROM_SMP_BOOT_ERRATUM_GICD (1 << 0) -#if defined(CONFIG_ARCH_OMAP4) +#if defined(CONFIG_PM) && defined(CONFIG_ARCH_OMAP4) extern u16 pm44xx_errata; #define IS_PM44XX_ERRATUM(id) (pm44xx_errata & (id)) #else diff --git a/arch/arm/mach-omap2/pm44xx.c b/arch/arm/mach-omap2/pm44xx.c index 82f06989..6be33ce 100644 --- a/arch/arm/mach-omap2/pm44xx.c +++ b/arch/arm/mach-omap2/pm44xx.c @@ -146,26 +146,6 @@ static inline int omap4_init_static_deps(void) struct clockdomain *ducati_clkdm, *l3_2_clkdm; int ret = 0; - if (omap_rev() == OMAP4430_REV_ES1_0) { - WARN(1, "Power Management not supported on OMAP4430 ES1.0\n"); - return -ENODEV; - } - - pr_err("Power Management for TI OMAP4.\n"); - /* - * OMAP4 chip PM currently works only with certain (newer) - * versions of bootloaders. This is due to missing code in the - * kernel to properly reset and initialize some devices. - * http://www.spinics.net/lists/arm-kernel/msg218641.html - */ - pr_warn("OMAP4 PM: u-boot >= v2012.07 is required for full PM support\n"); - - ret = pwrdm_for_each(pwrdms_setup, NULL); - if (ret) { - pr_err("Failed to setup powerdomains\n"); - return ret; - } - /* * The dynamic dependency between MPUSS -> MEMIF and * MPUSS -> L4_PER/L3_* and DUCATI -> L3_* doesn't work as @@ -216,6 +196,15 @@ int __init omap4_pm_init(void) pr_info("Power Management for TI OMAP4+ devices.\n"); + /* + * OMAP4 chip PM currently works only with certain (newer) + * versions of bootloaders. This is due to missing code in the + * kernel to properly reset and initialize some devices. + * http://www.spinics.net/lists/arm-kernel/msg218641.html + */ + if (cpu_is_omap44xx()) + pr_warn("OMAP4 PM: u-boot >= v2012.07 is required for full PM support\n"); + ret = pwrdm_for_each(pwrdms_setup, NULL); if (ret) { pr_err("Failed to setup powerdomains.\n"); diff --git a/arch/arm/mach-omap2/soc.h b/arch/arm/mach-omap2/soc.h index 4588df1..78f44f3 100644 --- a/arch/arm/mach-omap2/soc.h +++ b/arch/arm/mach-omap2/soc.h @@ -245,6 +245,8 @@ IS_AM_SUBCLASS(437x, 0x437) #define soc_is_omap54xx() 0 #define soc_is_omap543x() 0 #define soc_is_dra7xx() 0 +#define soc_is_dra74x() 0 +#define soc_is_dra72x() 0 #if defined(MULTI_OMAP2) # if defined(CONFIG_ARCH_OMAP2) @@ -393,7 +395,11 @@ IS_OMAP_TYPE(3430, 0x3430) #if defined(CONFIG_SOC_DRA7XX) #undef soc_is_dra7xx +#undef soc_is_dra74x +#undef soc_is_dra72x #define soc_is_dra7xx() (of_machine_is_compatible("ti,dra7")) +#define soc_is_dra74x() (of_machine_is_compatible("ti,dra74")) +#define soc_is_dra72x() (of_machine_is_compatible("ti,dra72")) #endif /* Various silicon revisions for omap2 */ diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index ead48fa..bf83df1 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -503,11 +503,11 @@ static void __init realtime_counter_init(void) rate = clk_get_rate(sys_clk); /* Numerator/denumerator values refer TRM Realtime Counter section */ switch (rate) { - case 1200000: + case 12000000: num = 64; den = 125; break; - case 1300000: + case 13000000: num = 768; den = 1625; break; @@ -515,11 +515,11 @@ static void __init realtime_counter_init(void) num = 8; den = 25; break; - case 2600000: + case 26000000: num = 384; den = 1625; break; - case 2700000: + case 27000000: num = 256; den = 1125; break; diff --git a/arch/arm/mach-orion5x/common.h b/arch/arm/mach-orion5x/common.h index f565f99..7548db2 100644 --- a/arch/arm/mach-orion5x/common.h +++ b/arch/arm/mach-orion5x/common.h @@ -21,7 +21,7 @@ struct mv_sata_platform_data; #define ORION_MBUS_DEVBUS_BOOT_ATTR 0x0f #define ORION_MBUS_DEVBUS_TARGET(cs) 0x01 #define ORION_MBUS_DEVBUS_ATTR(cs) (~(1 << cs)) -#define ORION_MBUS_SRAM_TARGET 0x00 +#define ORION_MBUS_SRAM_TARGET 0x09 #define ORION_MBUS_SRAM_ATTR 0x00 /* diff --git a/arch/arm/mach-prima2/platsmp.c b/arch/arm/mach-prima2/platsmp.c index 42837dc4..3dbcb1a 100644 --- a/arch/arm/mach-prima2/platsmp.c +++ b/arch/arm/mach-prima2/platsmp.c @@ -23,7 +23,7 @@ static void __iomem *scu_base; static void __iomem *rsc_base; -static DEFINE_RAW_SPINLOCK(boot_lock); +static DEFINE_SPINLOCK(boot_lock); static struct map_desc scu_io_desc __initdata = { .length = SZ_4K, @@ -56,8 +56,8 @@ static void sirfsoc_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - raw_spin_lock(&boot_lock); - raw_spin_unlock(&boot_lock); + spin_lock(&boot_lock); + spin_unlock(&boot_lock); } static struct of_device_id rsc_ids[] = { @@ -95,7 +95,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) /* make sure write buffer is drained */ mb(); - raw_spin_lock(&boot_lock); + spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -128,7 +128,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - raw_spin_unlock(&boot_lock); + spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mach-shmobile/setup-sh73a0.c b/arch/arm/mach-shmobile/setup-sh73a0.c index 22de174..a10565d 100644 --- a/arch/arm/mach-shmobile/setup-sh73a0.c +++ b/arch/arm/mach-shmobile/setup-sh73a0.c @@ -746,6 +746,7 @@ static struct platform_device ipmmu_device = { static struct renesas_intc_irqpin_config irqpin0_platform_data = { .irq_base = irq_pin(0), /* IRQ0 -> IRQ7 */ + .control_parent = true, }; static struct resource irqpin0_resources[] = { @@ -807,6 +808,7 @@ static struct platform_device irqpin1_device = { static struct renesas_intc_irqpin_config irqpin2_platform_data = { .irq_base = irq_pin(16), /* IRQ16 -> IRQ23 */ + .control_parent = true, }; static struct resource irqpin2_resources[] = { @@ -837,6 +839,7 @@ static struct platform_device irqpin2_device = { static struct renesas_intc_irqpin_config irqpin3_platform_data = { .irq_base = irq_pin(24), /* IRQ24 -> IRQ31 */ + .control_parent = true, }; static struct resource irqpin3_resources[] = { diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c index 33dc270..5c4a198 100644 --- a/arch/arm/mach-spear/platsmp.c +++ b/arch/arm/mach-spear/platsmp.c @@ -20,7 +20,7 @@ #include <mach/spear.h> #include "generic.h" -static DEFINE_RAW_SPINLOCK(boot_lock); +static DEFINE_SPINLOCK(boot_lock); static void __iomem *scu_base = IOMEM(VA_SCU_BASE); @@ -36,8 +36,8 @@ static void spear13xx_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - raw_spin_lock(&boot_lock); - raw_spin_unlock(&boot_lock); + spin_lock(&boot_lock); + spin_unlock(&boot_lock); } static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -48,7 +48,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) * set synchronisation state between this boot processor * and the secondary one */ - raw_spin_lock(&boot_lock); + spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -75,7 +75,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - raw_spin_unlock(&boot_lock); + spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mach-sti/platsmp.c b/arch/arm/mach-sti/platsmp.c index c05b764..dce50d9 100644 --- a/arch/arm/mach-sti/platsmp.c +++ b/arch/arm/mach-sti/platsmp.c @@ -35,7 +35,7 @@ static void write_pen_release(int val) outer_clean_range(__pa(&pen_release), __pa(&pen_release + 1)); } -static DEFINE_RAW_SPINLOCK(boot_lock); +static DEFINE_SPINLOCK(boot_lock); void sti_secondary_init(unsigned int cpu) { @@ -50,8 +50,8 @@ void sti_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - raw_spin_lock(&boot_lock); - raw_spin_unlock(&boot_lock); + spin_lock(&boot_lock); + spin_unlock(&boot_lock); } int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -62,7 +62,7 @@ int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) * set synchronisation state between this boot processor * and the secondary one */ - raw_spin_lock(&boot_lock); + spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -93,7 +93,7 @@ int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - raw_spin_unlock(&boot_lock); + spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mach-ux500/platsmp.c b/arch/arm/mach-ux500/platsmp.c index eeb5916..1f296e7 100644 --- a/arch/arm/mach-ux500/platsmp.c +++ b/arch/arm/mach-ux500/platsmp.c @@ -52,7 +52,7 @@ static void __iomem *scu_base_addr(void) return NULL; } -static DEFINE_RAW_SPINLOCK(boot_lock); +static DEFINE_SPINLOCK(boot_lock); static void ux500_secondary_init(unsigned int cpu) { @@ -65,8 +65,8 @@ static void ux500_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - raw_spin_lock(&boot_lock); - raw_spin_unlock(&boot_lock); + spin_lock(&boot_lock); + spin_unlock(&boot_lock); } static int ux500_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -77,7 +77,7 @@ static int ux500_boot_secondary(unsigned int cpu, struct task_struct *idle) * set synchronisation state between this boot processor * and the secondary one */ - raw_spin_lock(&boot_lock); + spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -98,7 +98,7 @@ static int ux500_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - raw_spin_unlock(&boot_lock); + spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index cd2c88e..426f531 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -446,7 +446,6 @@ config CPU_32v5 config CPU_32v6 bool - select CPU_USE_DOMAINS if CPU_V6 && MMU select TLS_REG_EMUL if !CPU_32v6K && !MMU config CPU_32v6K @@ -671,7 +670,7 @@ config ARM_VIRT_EXT config SWP_EMULATE bool "Emulate SWP/SWPB instructions" - depends on !CPU_USE_DOMAINS && CPU_V7 + depends on CPU_V7 default y if SMP select HAVE_PROC_CPU if PROC_FS help @@ -799,6 +798,7 @@ config NEED_KUSER_HELPERS config KUSER_HELPERS bool "Enable kuser helpers in vector page" if !NEED_KUSER_HELPERS + depends on MMU default y help Warning: disabling this option may break user programs. diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S index 8074199..5d777a5 100644 --- a/arch/arm/mm/abort-ev6.S +++ b/arch/arm/mm/abort-ev6.S @@ -17,12 +17,6 @@ */ .align 5 ENTRY(v6_early_abort) -#ifdef CONFIG_CPU_V6 - sub r1, sp, #4 @ Get unused stack location - strex r0, r1, [r1] @ Clear the exclusive monitor -#elif defined(CONFIG_CPU_32v6K) - clrex -#endif mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR /* diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S index 7033752..4812ad0 100644 --- a/arch/arm/mm/abort-ev7.S +++ b/arch/arm/mm/abort-ev7.S @@ -13,12 +13,6 @@ */ .align 5 ENTRY(v7_early_abort) - /* - * The effect of data aborts on on the exclusive access monitor are - * UNPREDICTABLE. Do a CLREX to clear the state - */ - clrex - mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 6f4585b..1fe0bf5 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -39,6 +39,7 @@ * This code is not portable to processors with late data abort handling. */ #define CODING_BITS(i) (i & 0x0e000000) +#define COND_BITS(i) (i & 0xf0000000) #define LDST_I_BIT(i) (i & (1 << 26)) /* Immediate constant */ #define LDST_P_BIT(i) (i & (1 << 24)) /* Preindex */ @@ -812,6 +813,8 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) break; case 0x04000000: /* ldr or str immediate */ + if (COND_BITS(instr) == 0xf0000000) /* NEON VLDn, VSTn */ + goto bad; offset.un = OFFSET_BITS(instr); handler = do_alignment_ldrstr; break; diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index b40d4ba..eb8830a 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -277,7 +277,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (!mm || pagefault_disabled()) + if (in_atomic() || !mm) goto no_context; if (user_mode(regs)) diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index bd41dd8..21b9e1b 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -38,7 +38,6 @@ EXPORT_SYMBOL(kunmap); void *kmap_atomic(struct page *page) { - pte_t pte = mk_pte(page, kmap_prot); unsigned int idx; unsigned long vaddr; void *kmap; @@ -77,10 +76,7 @@ void *kmap_atomic(struct page *page) * in place, so the contained TLB flush ensures the TLB is updated * with the new mapping. */ -#ifdef CONFIG_PREEMPT_RT_FULL - current->kmap_pte[type] = pte; -#endif - set_top_pte(vaddr, pte); + set_top_pte(vaddr, mk_pte(page, kmap_prot)); return (void *)vaddr; } @@ -97,15 +93,12 @@ void __kunmap_atomic(void *kvaddr) if (cache_is_vivt()) __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); -#ifdef CONFIG_PREEMPT_RT_FULL - current->kmap_pte[type] = __pte(0); -#endif #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); + set_top_pte(vaddr, __pte(0)); #else (void) idx; /* to kill a warning */ #endif - set_top_pte(vaddr, __pte(0)); kmap_atomic_idx_pop(); } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { /* this address was obtained through kmap_high_get() */ @@ -117,7 +110,6 @@ EXPORT_SYMBOL(__kunmap_atomic); void *kmap_atomic_pfn(unsigned long pfn) { - pte_t pte = pfn_pte(pfn, kmap_prot); unsigned long vaddr; int idx, type; @@ -129,10 +121,7 @@ void *kmap_atomic_pfn(unsigned long pfn) #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(!pte_none(get_top_pte(vaddr))); #endif -#ifdef CONFIG_PREEMPT_RT_FULL - current->kmap_pte[type] = pte; -#endif - set_top_pte(vaddr, pte); + set_top_pte(vaddr, pfn_pte(pfn, kmap_prot)); return (void *)vaddr; } @@ -146,29 +135,3 @@ struct page *kmap_atomic_to_page(const void *ptr) return pte_page(get_top_pte(vaddr)); } - -#if defined CONFIG_PREEMPT_RT_FULL -void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) -{ - int i; - - /* - * Clear @prev's kmap_atomic mappings - */ - for (i = 0; i < prev_p->kmap_idx; i++) { - int idx = i + KM_TYPE_NR * smp_processor_id(); - - set_top_pte(__fix_to_virt(FIX_KMAP_BEGIN + idx), __pte(0)); - } - /* - * Restore @next_p's kmap_atomic mappings - */ - for (i = 0; i < next_p->kmap_idx; i++) { - int idx = i + KM_TYPE_NR * smp_processor_id(); - - if (!pte_none(next_p->kmap_pte[i])) - set_top_pte(__fix_to_virt(FIX_KMAP_BEGIN + idx), - next_p->kmap_pte[i]); - } -} -#endif diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c index 54ee616..66781bf 100644 --- a/arch/arm/mm/hugetlbpage.c +++ b/arch/arm/mm/hugetlbpage.c @@ -56,8 +56,3 @@ int pmd_huge(pmd_t pmd) { return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); } - -int pmd_huge_support(void) -{ - return 1; -} diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index 83cb3ac..c61d237 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -24,6 +24,13 @@ static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, pr_warning("Failed to allocate identity pmd.\n"); return; } + /* + * Copy the original PMD to ensure that the PMD entries for + * the kernel image are preserved. + */ + if (!pud_none(*pud)) + memcpy(pmd, pmd_offset(pud, 0), + PTRS_PER_PMD * sizeof(pmd_t)); pud_populate(&init_mm, pud, pmd); pmd += pmd_index(addr); } else diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 0222ba7..b7c987d 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -464,6 +464,16 @@ static void __init build_mem_type_table(void) s2_device_pgprot = mem_types[MT_DEVICE].prot_pte_s2; /* + * We don't use domains on ARMv6 (since this causes problems with + * v6/v7 kernels), so we must use a separate memory type for user + * r/o, kernel r/w to map the vectors page. + */ +#ifndef CONFIG_ARM_LPAE + if (cpu_arch == CPU_ARCH_ARMv6) + vecs_pgprot |= L_PTE_MT_VECTORS; +#endif + + /* * ARMv6 and above have extended page tables. */ if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) { diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index e3c48a3..ee1d805 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -112,13 +112,9 @@ * 100x 1 0 1 r/o no acc * 10x0 1 0 1 r/o no acc * 1011 0 0 1 r/w no acc - * 110x 0 1 0 r/w r/o - * 11x0 0 1 0 r/w r/o - * 1111 0 1 1 r/w r/w - * - * If !CONFIG_CPU_USE_DOMAINS, the following permissions are changed: * 110x 1 1 1 r/o r/o * 11x0 1 1 1 r/o r/o + * 1111 0 1 1 r/w r/w */ .macro armv6_mt_table pfx \pfx\()_mt_table: @@ -137,7 +133,7 @@ .long PTE_EXT_TEX(2) @ L_PTE_MT_DEV_NONSHARED .long 0x00 @ unused .long 0x00 @ unused - .long 0x00 @ unused + .long PTE_CACHEABLE | PTE_BUFFERABLE | PTE_EXT_APX @ L_PTE_MT_VECTORS .endm .macro armv6_set_pte_ext pfx @@ -158,24 +154,21 @@ tst r1, #L_PTE_USER orrne r3, r3, #PTE_EXT_AP1 -#ifdef CONFIG_CPU_USE_DOMAINS - @ allow kernel read/write access to read-only user pages tstne r3, #PTE_EXT_APX - bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0 -#endif + + @ user read-only -> kernel read-only + bicne r3, r3, #PTE_EXT_AP0 tst r1, #L_PTE_XN orrne r3, r3, #PTE_EXT_XN - orr r3, r3, r2 + eor r3, r3, r2 tst r1, #L_PTE_YOUNG tstne r1, #L_PTE_PRESENT moveq r3, #0 -#ifndef CONFIG_CPU_USE_DOMAINS tstne r1, #L_PTE_NONE movne r3, #0 -#endif str r3, [r0] mcr p15, 0, r0, c7, c10, 1 @ flush_pte diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index bdd3be4..1f52915 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -90,21 +90,14 @@ ENTRY(cpu_v7_set_pte_ext) tst r1, #L_PTE_USER orrne r3, r3, #PTE_EXT_AP1 -#ifdef CONFIG_CPU_USE_DOMAINS - @ allow kernel read/write access to read-only user pages - tstne r3, #PTE_EXT_APX - bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0 -#endif tst r1, #L_PTE_XN orrne r3, r3, #PTE_EXT_XN tst r1, #L_PTE_YOUNG tstne r1, #L_PTE_VALID -#ifndef CONFIG_CPU_USE_DOMAINS eorne r1, r1, #L_PTE_NONE tstne r1, #L_PTE_NONE -#endif moveq r3, #0 ARM( str r3, [r0, #2048]! ) diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index 01a719e..22e3ad6 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -64,6 +64,14 @@ ENTRY(cpu_v7_switch_mm) mov pc, lr ENDPROC(cpu_v7_switch_mm) +#ifdef __ARMEB__ +#define rl r3 +#define rh r2 +#else +#define rl r2 +#define rh r3 +#endif + /* * cpu_v7_set_pte_ext(ptep, pte) * @@ -73,13 +81,13 @@ ENDPROC(cpu_v7_switch_mm) */ ENTRY(cpu_v7_set_pte_ext) #ifdef CONFIG_MMU - tst r2, #L_PTE_VALID + tst rl, #L_PTE_VALID beq 1f - tst r3, #1 << (57 - 32) @ L_PTE_NONE - bicne r2, #L_PTE_VALID + tst rh, #1 << (57 - 32) @ L_PTE_NONE + bicne rl, #L_PTE_VALID bne 1f - tst r3, #1 << (55 - 32) @ L_PTE_DIRTY - orreq r2, #L_PTE_RDONLY + tst rh, #1 << (55 - 32) @ L_PTE_DIRTY + orreq rl, #L_PTE_RDONLY 1: strd r2, r3, [r0] ALT_SMP(W(nop)) ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index ced046d..2e38158 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -204,7 +204,6 @@ __v7_pj4b_setup: /* Auxiliary Debug Modes Control 1 Register */ #define PJ4B_STATIC_BP (1 << 2) /* Enable Static BP */ #define PJ4B_INTER_PARITY (1 << 8) /* Disable Internal Parity Handling */ -#define PJ4B_BCK_OFF_STREX (1 << 5) /* Enable the back off of STREX instr */ #define PJ4B_CLEAN_LINE (1 << 16) /* Disable data transfer for clean line */ /* Auxiliary Debug Modes Control 2 Register */ @@ -227,7 +226,6 @@ __v7_pj4b_setup: /* Auxiliary Debug Modes Control 1 Register */ mrc p15, 1, r0, c15, c1, 1 orr r0, r0, #PJ4B_CLEAN_LINE - orr r0, r0, #PJ4B_BCK_OFF_STREX orr r0, r0, #PJ4B_INTER_PARITY bic r0, r0, #PJ4B_STATIC_BP mcr p15, 1, r0, c15, c1, 1 diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index d19b1cf..b34b95f 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -535,7 +535,7 @@ ENTRY(cpu_xscale_do_suspend) mrc p15, 0, r5, c15, c1, 0 @ CP access reg mrc p15, 0, r6, c13, c0, 0 @ PID mrc p15, 0, r7, c3, c0, 0 @ domain ID - mrc p15, 0, r8, c1, c1, 0 @ auxiliary control reg + mrc p15, 0, r8, c1, c0, 1 @ auxiliary control reg mrc p15, 0, r9, c1, c0, 0 @ control reg bic r4, r4, #2 @ clear frequency change bit stmia r0, {r4 - r9} @ store cp regs @@ -552,7 +552,7 @@ ENTRY(cpu_xscale_do_resume) mcr p15, 0, r6, c13, c0, 0 @ PID mcr p15, 0, r7, c3, c0, 0 @ domain ID mcr p15, 0, r1, c2, c0, 0 @ translation table base addr - mcr p15, 0, r8, c1, c1, 0 @ auxiliary control reg + mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg mov r0, r9 @ control register b cpu_resume_mmu ENDPROC(cpu_xscale_do_resume) diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c index b2e0858..39895d8 100644 --- a/arch/arm/plat-versatile/platsmp.c +++ b/arch/arm/plat-versatile/platsmp.c @@ -31,7 +31,7 @@ static void write_pen_release(int val) outer_clean_range(__pa(&pen_release), __pa(&pen_release + 1)); } -static DEFINE_RAW_SPINLOCK(boot_lock); +static DEFINE_SPINLOCK(boot_lock); void versatile_secondary_init(unsigned int cpu) { @@ -44,8 +44,8 @@ void versatile_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - raw_spin_lock(&boot_lock); - raw_spin_unlock(&boot_lock); + spin_lock(&boot_lock); + spin_unlock(&boot_lock); } int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -56,7 +56,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) * Set synchronisation state between this boot processor * and the secondary one */ - raw_spin_lock(&boot_lock); + spin_lock(&boot_lock); /* * This is really belt and braces; we hold unintended secondary @@ -86,7 +86,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - raw_spin_unlock(&boot_lock); + spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c044548..fe70eae 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1,6 +1,7 @@ config ARM64 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE + select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_COMPAT_IPC_PARSE_VERSION select ARCH_WANT_FRAME_POINTERS diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 899af80..c30a548 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -33,8 +33,8 @@ typedef s32 compat_ssize_t; typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; -typedef u32 __compat_uid_t; -typedef u32 __compat_gid_t; +typedef u16 __compat_uid_t; +typedef u16 __compat_gid_t; typedef u16 __compat_uid16_t; typedef u16 __compat_gid16_t; typedef u32 __compat_uid32_t; diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index d064047..52b484b 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -79,7 +79,6 @@ static inline void decode_ctrl_reg(u32 reg, */ #define ARM_MAX_BRP 16 #define ARM_MAX_WRP 16 -#define ARM_MAX_HBP_SLOTS (ARM_MAX_BRP + ARM_MAX_WRP) /* Virtual debug register bases. */ #define AARCH64_DBG_REG_BVR 0 diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 20925bc..e1134d0 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -51,6 +51,8 @@ #define TASK_SIZE_32 UL(0x100000000) #define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ TASK_SIZE_32 : TASK_SIZE_64) +#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \ + TASK_SIZE_32 : TASK_SIZE_64) #else #define TASK_SIZE TASK_SIZE_64 #endif /* CONFIG_COMPAT */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 82d95a7..05fe332 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -168,7 +168,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { if (pte_valid_user(pte)) { - if (pte_exec(pte)) + if (!pte_special(pte) && pte_exec(pte)) __sync_icache_dcache(pte, addr); if (!pte_dirty(pte)) pte = pte_wrprotect(pte); @@ -253,11 +253,11 @@ static inline int has_transparent_hugepage(void) * Mark the prot value as uncacheable and unbufferable. */ #define pgprot_noncached(prot) \ - __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE)) + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE) | PTE_PXN | PTE_UXN) #define pgprot_writecombine(prot) \ - __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC)) + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) #define pgprot_dmacoherent(prot) \ - __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC)) + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) #define __HAVE_PHYS_MEM_ACCESS_PROT struct file; extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 3881fd1..028a1b9 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -275,7 +275,6 @@ el1_sp_pc: * Stack or PC alignment exception handling */ mrs x0, far_el1 - mov x1, x25 mov x2, sp b do_sp_pc_abort el1_undef: diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 7ae8a1f..7af6183 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -183,9 +183,27 @@ void exit_thread(void) { } +static void tls_thread_flush(void) +{ + asm ("msr tpidr_el0, xzr"); + + if (is_compat_task()) { + current->thread.tp_value = 0; + + /* + * We need to ensure ordering between the shadow state and the + * hardware state, so that we don't corrupt the hardware state + * with a stale shadow state during context switch. + */ + barrier(); + asm ("msr tpidrro_el0, xzr"); + } +} + void flush_thread(void) { fpsimd_flush_thread(); + tls_thread_flush(); flush_ptrace_hw_breakpoint(current); } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index c484d56..ee79a1a 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -81,7 +81,8 @@ static void ptrace_hbptriggered(struct perf_event *bp, break; } } - for (i = ARM_MAX_BRP; i < ARM_MAX_HBP_SLOTS && !bp; ++i) { + + for (i = 0; i < ARM_MAX_WRP; ++i) { if (current->thread.debug.hbp_watch[i] == bp) { info.si_errno = -((i << 1) + 1); break; @@ -823,6 +824,7 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, compat_ulong_t val) { int ret; + mm_segment_t old_fs = get_fs(); if (off & 3 || off >= COMPAT_USER_SZ) return -EIO; @@ -830,10 +832,13 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, if (off >= sizeof(compat_elf_gregset_t)) return 0; + set_fs(KERNEL_DS); ret = copy_regset_from_user(tsk, &user_aarch32_view, REGSET_COMPAT_GPR, off, sizeof(compat_ulong_t), &val); + set_fs(old_fs); + return ret; } diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 26e9c4e..7803992 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -79,6 +79,12 @@ long compat_arm_syscall(struct pt_regs *regs) case __ARM_NR_compat_set_tls: current->thread.tp_value = regs->regs[0]; + + /* + * Protect against register corruption from context switch. + * See comment in tls_thread_flush. + */ + barrier(); asm ("msr tpidrro_el0, %0" : : "r" (regs->regs[0])); return 0; diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index 6e0ed93..c17967f 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -46,7 +46,7 @@ USER(9f, strh wzr, [x0], #2 ) sub x1, x1, #2 4: adds x1, x1, #1 b.mi 5f - strb wzr, [x0] +USER(9f, strb wzr, [x0] ) 5: mov x0, #0 ret ENDPROC(__clear_user) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index e4193e3..0d64089 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -79,7 +79,8 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) return; if (!test_and_set_bit(PG_dcache_clean, &page->flags)) { - __flush_dcache_area(page_address(page), PAGE_SIZE); + __flush_dcache_area(page_address(page), + PAGE_SIZE << compound_order(page)); __flush_icache_all(); } else if (icache_is_aivivt()) { __flush_icache_all(); diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 5e9aec3..023747b 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -51,12 +51,11 @@ int pmd_huge(pmd_t pmd) int pud_huge(pud_t pud) { +#ifndef __PAGETABLE_PMD_FOLDED return !(pud_val(pud) & PUD_TABLE_BIT); -} - -int pmd_huge_support(void) -{ - return 1; +#else + return 0; +#endif } static __init int setup_hugepagesz(char *opt) diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c index 25920d2..0eca933 100644 --- a/arch/avr32/mm/fault.c +++ b/arch/avr32/mm/fault.c @@ -81,7 +81,7 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs) * If we're in an interrupt or have no user context, we must * not take the fault... */ - if (!mm || regs->sr & SYSREG_BIT(GM) || pagefault_disabled()) + if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM)) goto no_context; local_irq_enable(); diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c index 281e859..1790f22 100644 --- a/arch/cris/mm/fault.c +++ b/arch/cris/mm/fault.c @@ -113,7 +113,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs, * user context, we must not take the fault. */ - if (!mm || pagefault_disabled()) + if (in_atomic() || !mm) goto no_context; if (user_mode(regs)) diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c index 8d9fc16..9a66372 100644 --- a/arch/frv/mm/fault.c +++ b/arch/frv/mm/fault.c @@ -78,7 +78,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (!mm || pagefault_disabled()) + if (in_atomic() || !mm) goto no_context; if (user_mode(__frame)) diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 164db10..7225dad 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -96,7 +96,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re /* * If we're in an interrupt or have no user context, we must not take the fault.. */ - if (!mm || pagefault_disabled()) + if (in_atomic() || !mm) goto no_context; #ifdef CONFIG_VIRTUAL_MEM_MAP diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index 68232db..76069c1 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -114,11 +114,6 @@ int pud_huge(pud_t pud) return 0; } -int pmd_huge_support(void) -{ - return 0; -} - struct page * follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) { diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c index ccb6797..e9c6a80 100644 --- a/arch/m32r/mm/fault.c +++ b/arch/m32r/mm/fault.c @@ -114,7 +114,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, * If we're in an interrupt or have no user context or are running in an * atomic region then we must not take the fault.. */ - if (!mm || pagefault_disabled()) + if (in_atomic() || !mm) goto bad_area_nosemaphore; if (error_code & ACE_USERMODE) diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 76bee37..eb1d61f 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -85,7 +85,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (!mm || pagefault_disabled()) + if (in_atomic() || !mm) goto no_context; if (user_mode(regs)) diff --git a/arch/m68k/mm/hwtest.c b/arch/m68k/mm/hwtest.c index 2c7dde3..2a5259f 100644 --- a/arch/m68k/mm/hwtest.c +++ b/arch/m68k/mm/hwtest.c @@ -28,9 +28,11 @@ int hwreg_present( volatile void *regp ) { int ret = 0; + unsigned long flags; long save_sp, save_vbr; long tmp_vectors[3]; + local_irq_save(flags); __asm__ __volatile__ ( "movec %/vbr,%2\n\t" "movel #Lberr1,%4@(8)\n\t" @@ -46,6 +48,7 @@ int hwreg_present( volatile void *regp ) : "=&d" (ret), "=&r" (save_sp), "=&r" (save_vbr) : "a" (regp), "a" (tmp_vectors) ); + local_irq_restore(flags); return( ret ); } @@ -58,9 +61,11 @@ EXPORT_SYMBOL(hwreg_present); int hwreg_write( volatile void *regp, unsigned short val ) { int ret; + unsigned long flags; long save_sp, save_vbr; long tmp_vectors[3]; + local_irq_save(flags); __asm__ __volatile__ ( "movec %/vbr,%2\n\t" "movel #Lberr2,%4@(8)\n\t" @@ -78,6 +83,7 @@ int hwreg_write( volatile void *regp, unsigned short val ) : "=&d" (ret), "=&r" (save_sp), "=&r" (save_vbr) : "a" (regp), "a" (tmp_vectors), "g" (val) ); + local_irq_restore(flags); return( ret ); } diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h index c90bfc6..e355a4c 100644 --- a/arch/metag/include/asm/barrier.h +++ b/arch/metag/include/asm/barrier.h @@ -15,6 +15,7 @@ static inline void wr_fence(void) volatile int *flushptr = (volatile int *) LINSYSEVENT_WR_FENCE; barrier(); *flushptr = 0; + barrier(); } #else /* CONFIG_METAG_META21 */ @@ -35,6 +36,7 @@ static inline void wr_fence(void) *flushptr = 0; *flushptr = 0; *flushptr = 0; + barrier(); } #endif /* !CONFIG_METAG_META21 */ @@ -68,6 +70,7 @@ static inline void fence(void) volatile int *flushptr = (volatile int *) LINSYSEVENT_WR_ATOMIC_UNLOCK; barrier(); *flushptr = 0; + barrier(); } #define smp_mb() fence() #define smp_rmb() fence() diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h index f16477d..3be8581 100644 --- a/arch/metag/include/asm/processor.h +++ b/arch/metag/include/asm/processor.h @@ -22,6 +22,8 @@ /* Add an extra page of padding at the top of the stack for the guard page. */ #define STACK_TOP (TASK_SIZE - PAGE_SIZE) #define STACK_TOP_MAX STACK_TOP +/* Maximum virtual space for stack */ +#define STACK_SIZE_MAX (1 << 28) /* 256 MB */ /* This decides where the kernel will search for a free chunk of vm * space during mmap's. diff --git a/arch/metag/mm/hugetlbpage.c b/arch/metag/mm/hugetlbpage.c index 0424315..3c52fa6 100644 --- a/arch/metag/mm/hugetlbpage.c +++ b/arch/metag/mm/hugetlbpage.c @@ -110,11 +110,6 @@ int pud_huge(pud_t pud) return 0; } -int pmd_huge_support(void) -{ - return 1; -} - struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) { diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index 13d6b07..fa4cf52 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -107,7 +107,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11) is_write = 0; - if (unlikely(!mm || pagefault_disabled())) { + if (unlikely(in_atomic() || !mm)) { if (kernel_mode(regs)) goto bad_area_nosemaphore; diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index f3981c2..f75ab4a 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2078,7 +2078,7 @@ config CPU_R4400_WORKAROUNDS # config HIGHMEM bool "High Memory Support" - depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !PREEMPT_RT_FULL + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM config CPU_SUPPORTS_HIGHMEM bool diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index 2c95730..d498a1f 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -13,6 +13,7 @@ #include <linux/types.h> #include <linux/kernel.h> +#include <linux/string.h> #include <asm/addrspace.h> diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 25fbfae..ab7dc01 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -635,7 +635,7 @@ static void octeon_irq_cpu_offline_ciu(struct irq_data *data) cpumask_clear(&new_affinity); cpumask_set_cpu(cpumask_first(cpu_online_mask), &new_affinity); } - __irq_set_affinity_locked(data, &new_affinity); + irq_set_affinity_locked(data, &new_affinity, false); } static int octeon_irq_ciu_set_affinity(struct irq_data *data, diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index b212ae1..8a00799 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -458,6 +458,18 @@ static void octeon_halt(void) octeon_kill_core(NULL); } +static char __read_mostly octeon_system_type[80]; + +static int __init init_octeon_system_type(void) +{ + snprintf(octeon_system_type, sizeof(octeon_system_type), "%s (%s)", + cvmx_board_type_to_string(octeon_bootinfo->board_type), + octeon_model_get_string(read_c0_prid())); + + return 0; +} +early_initcall(init_octeon_system_type); + /** * Return a string representing the system type * @@ -465,11 +477,7 @@ static void octeon_halt(void) */ const char *octeon_board_type_string(void) { - static char name[80]; - sprintf(name, "%s (%s)", - cvmx_board_type_to_string(octeon_bootinfo->board_type), - octeon_model_get_string(read_c0_prid())); - return name; + return octeon_system_type; } const char *get_system_type(void) diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h index ce35c9a..370ae7c 100644 --- a/arch/mips/include/asm/ftrace.h +++ b/arch/mips/include/asm/ftrace.h @@ -24,7 +24,7 @@ do { \ asm volatile ( \ "1: " load " %[" STR(dst) "], 0(%[" STR(src) "])\n"\ " li %[" STR(error) "], 0\n" \ - "2:\n" \ + "2: .insn\n" \ \ ".section .fixup, \"ax\"\n" \ "3: li %[" STR(error) "], 1\n" \ @@ -46,7 +46,7 @@ do { \ asm volatile ( \ "1: " store " %[" STR(src) "], 0(%[" STR(dst) "])\n"\ " li %[" STR(error) "], 0\n" \ - "2:\n" \ + "2: .insn\n" \ \ ".section .fixup, \"ax\"\n" \ "3: li %[" STR(error) "], 1\n" \ diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 61215a3..897cd58 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -134,7 +134,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_LOAD_WATCH (1<<TIF_LOAD_WATCH) #define _TIF_WORK_SYSCALL_ENTRY (_TIF_NOHZ | _TIF_SYSCALL_TRACE | \ - _TIF_SYSCALL_AUDIT) + _TIF_SYSCALL_AUDIT | _TIF_SECCOMP) /* work to do in syscall_trace_leave() */ #define _TIF_WORK_SYSCALL_EXIT (_TIF_NOHZ | _TIF_SYSCALL_TRACE | \ diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c index 5b5ddb2..78f1843 100644 --- a/arch/mips/kernel/irq-gic.c +++ b/arch/mips/kernel/irq-gic.c @@ -255,11 +255,13 @@ static void __init gic_setup_intr(unsigned int intr, unsigned int cpu, /* Setup Intr to Pin mapping */ if (pin & GIC_MAP_TO_NMI_MSK) { + int i; + GICWRITE(GIC_REG_ADDR(SHARED, GIC_SH_MAP_TO_PIN(intr)), pin); /* FIXME: hack to route NMI to all cpu's */ - for (cpu = 0; cpu < NR_CPUS; cpu += 32) { + for (i = 0; i < NR_CPUS; i += 32) { GICWRITE(GIC_REG_ADDR(SHARED, - GIC_SH_MAP_TO_VPE_REG_OFF(intr, cpu)), + GIC_SH_MAP_TO_VPE_REG_OFF(intr, i)), 0xffffffff); } } else { diff --git a/arch/mips/kernel/irq-msc01.c b/arch/mips/kernel/irq-msc01.c index fab40f7..ac9facc 100644 --- a/arch/mips/kernel/irq-msc01.c +++ b/arch/mips/kernel/irq-msc01.c @@ -131,7 +131,7 @@ void __init init_msc_irqs(unsigned long icubase, unsigned int irqbase, msc_irqma board_bind_eic_interrupt = &msc_bind_eic_interrupt; - for (; nirq >= 0; nirq--, imp++) { + for (; nirq > 0; nirq--, imp++) { int n = imp->im_irq; switch (imp->im_type) { diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S index 539b629..8f89ff4 100644 --- a/arch/mips/kernel/mcount.S +++ b/arch/mips/kernel/mcount.S @@ -123,7 +123,11 @@ NESTED(_mcount, PT_SIZE, ra) nop #endif b ftrace_stub +#ifdef CONFIG_32BIT + addiu sp, sp, 8 +#else nop +#endif static_trace: MCOUNT_SAVE_REGS @@ -133,6 +137,9 @@ static_trace: move a1, AT /* arg2: parent's return address */ MCOUNT_RESTORE_REGS +#ifdef CONFIG_32BIT + addiu sp, sp, 8 +#endif .globl ftrace_stub ftrace_stub: RETURN_BACK @@ -177,6 +184,11 @@ NESTED(ftrace_graph_caller, PT_SIZE, ra) jal prepare_ftrace_return nop MCOUNT_RESTORE_REGS +#ifndef CONFIG_DYNAMIC_FTRACE +#ifdef CONFIG_32BIT + addiu sp, sp, 8 +#endif +#endif RETURN_BACK END(ftrace_graph_caller) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 8ae1ebe..5404cab 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -162,6 +162,7 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data) __get_user(fregs[i], i + (__u64 __user *) data); __get_user(child->thread.fpu.fcr31, data + 64); + child->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; /* FIR may not be written. */ @@ -452,7 +453,7 @@ long arch_ptrace(struct task_struct *child, long request, break; #endif case FPC_CSR: - child->thread.fpu.fcr31 = data; + child->thread.fpu.fcr31 = data & ~FPU_CSR_ALL_X; break; case DSP_BASE ... DSP_BASE + 5: { dspreg_t *dregs; diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index a4ae7ad..2f285ab 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -573,7 +573,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { local_irq_enable(); - preempt_check_resched(); user_exit(); diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index c369a5d..b897dde 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -605,7 +605,6 @@ static void emulate_load_store_insn(struct pt_regs *regs, case sdc1_op: die_if_kernel("Unaligned FP access in kernel code", regs); BUG_ON(!used_math()); - BUG_ON(!is_fpu_owner()); lose_fpu(1); /* Save FPU state for the emulator. */ res = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 1, diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index a7b0445..3f3e5b2 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c @@ -149,9 +149,7 @@ void kvm_mips_free_vcpus(struct kvm *kvm) if (kvm->arch.guest_pmap[i] != KVM_INVALID_PAGE) kvm_mips_release_pfn_clean(kvm->arch.guest_pmap[i]); } - - if (kvm->arch.guest_pmap) - kfree(kvm->arch.guest_pmap); + kfree(kvm->arch.guest_pmap); kvm_for_each_vcpu(i, vcpu, kvm) { kvm_arch_vcpu_free(vcpu); @@ -303,7 +301,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) if (cpu_has_veic || cpu_has_vint) { size = 0x200 + VECTORSPACING * 64; } else { - size = 0x200; + size = 0x4000; } /* Save Linux EBASE */ @@ -388,12 +386,9 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) kvm_mips_dump_stats(vcpu); - if (vcpu->arch.guest_ebase) - kfree(vcpu->arch.guest_ebase); - - if (vcpu->arch.kseg0_commpage) - kfree(vcpu->arch.kseg0_commpage); - + kfree(vcpu->arch.guest_ebase); + kfree(vcpu->arch.kseg0_commpage); + kfree(vcpu); } void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) diff --git a/arch/mips/kvm/kvm_mips_emul.c b/arch/mips/kvm/kvm_mips_emul.c index 4b6274b..e75ef82 100644 --- a/arch/mips/kvm/kvm_mips_emul.c +++ b/arch/mips/kvm/kvm_mips_emul.c @@ -1571,17 +1571,17 @@ kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, arch->gprs[rt] = kvm_read_c0_guest_userlocal(cop0); #else /* UserLocal not implemented */ - er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu); + er = EMULATE_FAIL; #endif break; default: - printk("RDHWR not supported\n"); + kvm_debug("RDHWR %#x not supported @ %p\n", rd, opc); er = EMULATE_FAIL; break; } } else { - printk("Emulate RI not supported @ %p: %#x\n", opc, inst); + kvm_debug("Emulate RI not supported @ %p: %#x\n", opc, inst); er = EMULATE_FAIL; } @@ -1590,6 +1590,7 @@ kvm_mips_handle_ri(unsigned long cause, uint32_t *opc, */ if (er == EMULATE_FAIL) { vcpu->arch.pc = curr_pc; + er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu); } return er; } diff --git a/arch/mips/lantiq/dts/easy50712.dts b/arch/mips/lantiq/dts/easy50712.dts index fac1f5b..143b8a3 100644 --- a/arch/mips/lantiq/dts/easy50712.dts +++ b/arch/mips/lantiq/dts/easy50712.dts @@ -8,6 +8,7 @@ }; memory@0 { + device_type = "memory"; reg = <0x0 0x2000000>; }; diff --git a/arch/mips/loongson/common/Makefile b/arch/mips/loongson/common/Makefile index 9e4484c..9005a8d6 100644 --- a/arch/mips/loongson/common/Makefile +++ b/arch/mips/loongson/common/Makefile @@ -11,7 +11,8 @@ obj-$(CONFIG_PCI) += pci.o # Serial port support # obj-$(CONFIG_EARLY_PRINTK) += early_printk.o -obj-$(CONFIG_SERIAL_8250) += serial.o +loongson-serial-$(CONFIG_SERIAL_8250) := serial.o +obj-y += $(loongson-serial-m) $(loongson-serial-y) obj-$(CONFIG_LOONGSON_UART_BASE) += uart_base.o obj-$(CONFIG_LOONGSON_MC146818) += rtc.o diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 6a492c0..becc42b 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -89,7 +89,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (!mm || pagefault_disabled()) + if (in_atomic() || !mm) goto bad_area_nosemaphore; if (user_mode(regs)) diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c index 01fda44..a7fee0d 100644 --- a/arch/mips/mm/hugetlbpage.c +++ b/arch/mips/mm/hugetlbpage.c @@ -85,11 +85,6 @@ int pud_huge(pud_t pud) return (pud_val(pud) & _PAGE_HUGE) != 0; } -int pmd_huge_support(void) -{ - return 1; -} - struct page * follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 9bb3a93..a39b415 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -1095,6 +1095,7 @@ static void build_update_entries(u32 **p, unsigned int tmp, unsigned int ptep) struct mips_huge_tlb_info { int huge_pte; int restore_scratch; + bool need_reload_pte; }; static struct mips_huge_tlb_info @@ -1109,6 +1110,7 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, rv.huge_pte = scratch; rv.restore_scratch = 0; + rv.need_reload_pte = false; if (check_for_high_segbits) { UASM_i_MFC0(p, tmp, C0_BADVADDR); @@ -1297,6 +1299,7 @@ static void build_r4000_tlb_refill_handler(void) } else { htlb_info.huge_pte = K0; htlb_info.restore_scratch = 0; + htlb_info.need_reload_pte = true; vmalloc_mode = refill_noscratch; /* * create the plain linear handler @@ -1333,6 +1336,8 @@ static void build_r4000_tlb_refill_handler(void) } #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT uasm_l_tlb_huge_update(&l, p); + if (htlb_info.need_reload_pte) + UASM_i_LW(&p, htlb_info.huge_pte, 0, K1); build_huge_update_entries(&p, htlb_info.huge_pte, K1); build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random, htlb_info.restore_scratch); diff --git a/arch/mips/oprofile/backtrace.c b/arch/mips/oprofile/backtrace.c index 6854ed5..83a1dfd 100644 --- a/arch/mips/oprofile/backtrace.c +++ b/arch/mips/oprofile/backtrace.c @@ -92,7 +92,7 @@ static inline int unwind_user_frame(struct stackframe *old_frame, /* This marks the end of the previous function, which means we overran. */ break; - stack_size = (unsigned) stack_adjustment; + stack_size = (unsigned long) stack_adjustment; } else if (is_ra_save_ins(&ip)) { int ra_slot = ip.i_format.simmediate; if (ra_slot < 0) diff --git a/arch/mips/power/hibernate.S b/arch/mips/power/hibernate.S index 7e0277a..32a7c82 100644 --- a/arch/mips/power/hibernate.S +++ b/arch/mips/power/hibernate.S @@ -43,6 +43,7 @@ LEAF(swsusp_arch_resume) bne t1, t3, 1b PTR_L t0, PBE_NEXT(t0) bnez t0, 0b + jal local_flush_tlb_all /* Avoid TLB mismatch after kernel resume */ PTR_LA t0, saved_regs PTR_L ra, PT_R31(t0) PTR_L sp, PT_R29(t0) diff --git a/arch/mips/ralink/dts/mt7620a_eval.dts b/arch/mips/ralink/dts/mt7620a_eval.dts index 35eb874..709f581 100644 --- a/arch/mips/ralink/dts/mt7620a_eval.dts +++ b/arch/mips/ralink/dts/mt7620a_eval.dts @@ -7,6 +7,7 @@ model = "Ralink MT7620A evaluation board"; memory@0 { + device_type = "memory"; reg = <0x0 0x2000000>; }; diff --git a/arch/mips/ralink/dts/rt2880_eval.dts b/arch/mips/ralink/dts/rt2880_eval.dts index 322d700..0a685db 100644 --- a/arch/mips/ralink/dts/rt2880_eval.dts +++ b/arch/mips/ralink/dts/rt2880_eval.dts @@ -7,6 +7,7 @@ model = "Ralink RT2880 evaluation board"; memory@0 { + device_type = "memory"; reg = <0x8000000 0x2000000>; }; diff --git a/arch/mips/ralink/dts/rt3052_eval.dts b/arch/mips/ralink/dts/rt3052_eval.dts index 0ac73ea..ec9e9a0 100644 --- a/arch/mips/ralink/dts/rt3052_eval.dts +++ b/arch/mips/ralink/dts/rt3052_eval.dts @@ -7,6 +7,7 @@ model = "Ralink RT3052 evaluation board"; memory@0 { + device_type = "memory"; reg = <0x0 0x2000000>; }; diff --git a/arch/mips/ralink/dts/rt3883_eval.dts b/arch/mips/ralink/dts/rt3883_eval.dts index 2fa6b33..e8df21a 100644 --- a/arch/mips/ralink/dts/rt3883_eval.dts +++ b/arch/mips/ralink/dts/rt3883_eval.dts @@ -7,6 +7,7 @@ model = "Ralink RT3883 evaluation board"; memory@0 { + device_type = "memory"; reg = <0x0 0x2000000>; }; diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c index 8bd4425..3516cbd 100644 --- a/arch/mn10300/mm/fault.c +++ b/arch/mn10300/mm/fault.c @@ -168,7 +168,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (!mm || pagefault_disabled()) + if (in_atomic() || !mm) goto no_context; if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR) diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index d8a455e..fec8bf9 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -853,37 +853,44 @@ UNHANDLED_EXCEPTION(_vector_0x1f00,0x1f00) /* ========================================================[ return ] === */ +_resume_userspace: + DISABLE_INTERRUPTS(r3,r4) + l.lwz r4,TI_FLAGS(r10) + l.andi r13,r4,_TIF_WORK_MASK + l.sfeqi r13,0 + l.bf _restore_all + l.nop + _work_pending: - /* - * if (current_thread_info->flags & _TIF_NEED_RESCHED) - * schedule(); - */ - l.lwz r5,TI_FLAGS(r10) - l.andi r3,r5,_TIF_NEED_RESCHED - l.sfnei r3,0 - l.bnf _work_notifysig + l.lwz r5,PT_ORIG_GPR11(r1) + l.sfltsi r5,0 + l.bnf 1f l.nop - l.jal schedule + l.andi r5,r5,0 +1: + l.jal do_work_pending + l.ori r3,r1,0 /* pt_regs */ + + l.sfeqi r11,0 + l.bf _restore_all l.nop - l.j _resume_userspace + l.sfltsi r11,0 + l.bnf 1f l.nop - -/* Handle pending signals and notify-resume requests. - * do_notify_resume must be passed the latest pushed pt_regs, not - * necessarily the "userspace" ones. Also, pt_regs->syscallno - * must be set so that the syscall restart functionality works. - */ -_work_notifysig: - l.jal do_notify_resume - l.ori r3,r1,0 /* pt_regs */ - -_resume_userspace: - DISABLE_INTERRUPTS(r3,r4) - l.lwz r3,TI_FLAGS(r10) - l.andi r3,r3,_TIF_WORK_MASK - l.sfnei r3,0 - l.bf _work_pending + l.and r11,r11,r0 + l.ori r11,r11,__NR_restart_syscall + l.j _syscall_check_trace_enter l.nop +1: + l.lwz r11,PT_ORIG_GPR11(r1) + /* Restore arg registers */ + l.lwz r3,PT_GPR3(r1) + l.lwz r4,PT_GPR4(r1) + l.lwz r5,PT_GPR5(r1) + l.lwz r6,PT_GPR6(r1) + l.lwz r7,PT_GPR7(r1) + l.j _syscall_check_trace_enter + l.lwz r8,PT_GPR8(r1) _restore_all: RESTORE_ALL diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c index ae167f7..c277ec8 100644 --- a/arch/openrisc/kernel/signal.c +++ b/arch/openrisc/kernel/signal.c @@ -28,24 +28,24 @@ #include <linux/tracehook.h> #include <asm/processor.h> +#include <asm/syscall.h> #include <asm/ucontext.h> #include <asm/uaccess.h> #define DEBUG_SIG 0 struct rt_sigframe { - struct siginfo *pinfo; - void *puc; struct siginfo info; struct ucontext uc; unsigned char retcode[16]; /* trampoline code */ }; -static int restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc) +static int restore_sigcontext(struct pt_regs *regs, + struct sigcontext __user *sc) { - unsigned int err = 0; + int err = 0; - /* Alwys make any pending restarted system call return -EINTR */ + /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; /* @@ -53,25 +53,21 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc) * (sc is already checked for VERIFY_READ since the sigframe was * checked in sys_sigreturn previously) */ - if (__copy_from_user(regs, sc->regs.gpr, 32 * sizeof(unsigned long))) - goto badframe; - if (__copy_from_user(®s->pc, &sc->regs.pc, sizeof(unsigned long))) - goto badframe; - if (__copy_from_user(®s->sr, &sc->regs.sr, sizeof(unsigned long))) - goto badframe; + err |= __copy_from_user(regs, sc->regs.gpr, 32 * sizeof(unsigned long)); + err |= __copy_from_user(®s->pc, &sc->regs.pc, sizeof(unsigned long)); + err |= __copy_from_user(®s->sr, &sc->regs.sr, sizeof(unsigned long)); /* make sure the SM-bit is cleared so user-mode cannot fool us */ regs->sr &= ~SPR_SR_SM; + regs->orig_gpr11 = -1; /* Avoid syscall restart checks */ + /* TODO: the other ports use regs->orig_XX to disable syscall checks * after this completes, but we don't use that mechanism. maybe we can * use it now ? */ return err; - -badframe: - return 1; } asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs) @@ -111,21 +107,18 @@ badframe: * Set up a signal frame. */ -static int setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs, - unsigned long mask) +static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) { int err = 0; /* copy the regs */ - + /* There should be no need to save callee-saved registers here... + * ...but we save them anyway. Revisit this + */ err |= __copy_to_user(sc->regs.gpr, regs, 32 * sizeof(unsigned long)); err |= __copy_to_user(&sc->regs.pc, ®s->pc, sizeof(unsigned long)); err |= __copy_to_user(&sc->regs.sr, ®s->sr, sizeof(unsigned long)); - /* then some other stuff */ - - err |= __put_user(mask, &sc->oldmask); - return err; } @@ -181,24 +174,18 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, int err = 0; frame = get_sigframe(ka, regs, sizeof(*frame)); - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; - err |= __put_user(&frame->info, &frame->pinfo); - err |= __put_user(&frame->uc, &frame->puc); - + /* Create siginfo. */ if (ka->sa.sa_flags & SA_SIGINFO) err |= copy_siginfo_to_user(&frame->info, info); - if (err) - goto give_sigsegv; - /* Clear all the bits of the ucontext we don't use. */ - err |= __clear_user(&frame->uc, offsetof(struct ucontext, uc_mcontext)); + /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(NULL, &frame->uc.uc_link); err |= __save_altstack(&frame->uc.uc_stack, regs->sp); - err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0]); + err |= setup_sigcontext(regs, &frame->uc.uc_mcontext); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); @@ -207,9 +194,12 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* trampoline - the desired return ip is the retcode itself */ return_ip = (unsigned long)&frame->retcode; - /* This is l.ori r11,r0,__NR_sigreturn, l.sys 1 */ - err |= __put_user(0xa960, (short *)(frame->retcode + 0)); - err |= __put_user(__NR_rt_sigreturn, (short *)(frame->retcode + 2)); + /* This is: + l.ori r11,r0,__NR_sigreturn + l.sys 1 + */ + err |= __put_user(0xa960, (short *)(frame->retcode + 0)); + err |= __put_user(__NR_rt_sigreturn, (short *)(frame->retcode + 2)); err |= __put_user(0x20000001, (unsigned long *)(frame->retcode + 4)); err |= __put_user(0x15000000, (unsigned long *)(frame->retcode + 8)); @@ -262,82 +252,106 @@ handle_signal(unsigned long sig, * mode below. */ -void do_signal(struct pt_regs *regs) +int do_signal(struct pt_regs *regs, int syscall) { siginfo_t info; int signr; struct k_sigaction ka; - - /* - * We want the common case to go fast, which - * is why we may in certain cases get here from - * kernel mode. Just return without doing anything - * if so. - */ - if (!user_mode(regs)) - return; - - signr = get_signal_to_deliver(&info, &ka, regs, NULL); - - /* If we are coming out of a syscall then we need - * to check if the syscall was interrupted and wants to be - * restarted after handling the signal. If so, the original - * syscall number is put back into r11 and the PC rewound to - * point at the l.sys instruction that resulted in the - * original syscall. Syscall results other than the four - * below mean that the syscall executed to completion and no - * restart is necessary. - */ - if (regs->orig_gpr11) { - int restart = 0; - - switch (regs->gpr[11]) { + unsigned long continue_addr = 0; + unsigned long restart_addr = 0; + unsigned long retval = 0; + int restart = 0; + + if (syscall) { + continue_addr = regs->pc; + restart_addr = continue_addr - 4; + retval = regs->gpr[11]; + + /* + * Setup syscall restart here so that a debugger will + * see the already changed PC. + */ + switch (retval) { case -ERESTART_RESTARTBLOCK: + restart = -2; + /* Fall through */ case -ERESTARTNOHAND: - /* Restart if there is no signal handler */ - restart = (signr <= 0); - break; case -ERESTARTSYS: - /* Restart if there no signal handler or - * SA_RESTART flag is set */ - restart = (signr <= 0 || (ka.sa.sa_flags & SA_RESTART)); - break; case -ERESTARTNOINTR: - /* Always restart */ - restart = 1; + restart++; + regs->gpr[11] = regs->orig_gpr11; + regs->pc = restart_addr; break; } + } - if (restart) { - if (regs->gpr[11] == -ERESTART_RESTARTBLOCK) - regs->gpr[11] = __NR_restart_syscall; - else - regs->gpr[11] = regs->orig_gpr11; - regs->pc -= 4; - } else { - regs->gpr[11] = -EINTR; + /* + * Get the signal to deliver. When running under ptrace, at this + * point the debugger may change all our registers ... + */ + signr = get_signal_to_deliver(&info, &ka, regs, NULL); + /* + * Depending on the signal settings we may need to revert the + * decision to restart the system call. But skip this if a + * debugger has chosen to restart at a different PC. + */ + if (signr > 0) { + if (unlikely(restart) && regs->pc == restart_addr) { + if (retval == -ERESTARTNOHAND || + retval == -ERESTART_RESTARTBLOCK + || (retval == -ERESTARTSYS + && !(ka.sa.sa_flags & SA_RESTART))) { + /* No automatic restart */ + regs->gpr[11] = -EINTR; + regs->pc = continue_addr; + } } - } - if (signr <= 0) { - /* no signal to deliver so we just put the saved sigmask - * back */ - restore_saved_sigmask(); - } else { /* signr > 0 */ - /* Whee! Actually deliver the signal. */ handle_signal(signr, &info, &ka, regs); + } else { + /* no handler */ + restore_saved_sigmask(); + /* + * Restore pt_regs PC as syscall restart will be handled by + * kernel without return to userspace + */ + if (unlikely(restart) && regs->pc == restart_addr) { + regs->pc = continue_addr; + return restart; + } } - return; + return 0; } -asmlinkage void do_notify_resume(struct pt_regs *regs) +asmlinkage int +do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) { - if (current_thread_info()->flags & _TIF_SIGPENDING) - do_signal(regs); - - if (current_thread_info()->flags & _TIF_NOTIFY_RESUME) { - clear_thread_flag(TIF_NOTIFY_RESUME); - tracehook_notify_resume(regs); - } + do { + if (likely(thread_flags & _TIF_NEED_RESCHED)) { + schedule(); + } else { + if (unlikely(!user_mode(regs))) + return 0; + local_irq_enable(); + if (thread_flags & _TIF_SIGPENDING) { + int restart = do_signal(regs, syscall); + if (unlikely(restart)) { + /* + * Restart without handlers. + * Deal with it without leaving + * the kernel space. + */ + return restart; + } + syscall = 0; + } else { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } + } + local_irq_disable(); + thread_flags = current_thread_info()->flags; + } while (thread_flags & _TIF_WORK_MASK); + return 0; } diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index e02f665..ceadda9 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -48,7 +48,12 @@ cflags-y := -pipe # These flags should be implied by an hppa-linux configuration, but they # are not in gcc 3.2. -cflags-y += -mno-space-regs -mfast-indirect-calls +cflags-y += -mno-space-regs + +# -mfast-indirect-calls is only relevant for 32-bit kernels. +ifndef CONFIG_64BIT +cflags-y += -mfast-indirect-calls +endif # Currently we save and restore fpregs on all kernel entry/interruption paths. # If that gets optimized, we might need to disable the use of fpregs in the diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index cc2290a..c6ee865 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -53,6 +53,8 @@ #define STACK_TOP TASK_SIZE #define STACK_TOP_MAX DEFAULT_TASK_SIZE +#define STACK_SIZE_MAX (1 << 30) /* 1 GB */ + #endif #ifndef __ASSEMBLY__ diff --git a/arch/parisc/include/uapi/asm/shmbuf.h b/arch/parisc/include/uapi/asm/shmbuf.h index 0a3eada..f395cde 100644 --- a/arch/parisc/include/uapi/asm/shmbuf.h +++ b/arch/parisc/include/uapi/asm/shmbuf.h @@ -36,23 +36,16 @@ struct shmid64_ds { unsigned int __unused2; }; -#ifdef CONFIG_64BIT -/* The 'unsigned int' (formerly 'unsigned long') data types below will - * ensure that a 32-bit app calling shmctl(*,IPC_INFO,*) will work on - * a wide kernel, but if some of these values are meant to contain pointers - * they may need to be 'long long' instead. -PB XXX FIXME - */ -#endif struct shminfo64 { - unsigned int shmmax; - unsigned int shmmin; - unsigned int shmmni; - unsigned int shmseg; - unsigned int shmall; - unsigned int __unused1; - unsigned int __unused2; - unsigned int __unused3; - unsigned int __unused4; + unsigned long shmmax; + unsigned long shmmin; + unsigned long shmmni; + unsigned long shmseg; + unsigned long shmall; + unsigned long __unused1; + unsigned long __unused2; + unsigned long __unused3; + unsigned long __unused4; }; #endif /* _PARISC_SHMBUF_H */ diff --git a/arch/parisc/include/uapi/asm/signal.h b/arch/parisc/include/uapi/asm/signal.h index a2fa2971..f5645d6 100644 --- a/arch/parisc/include/uapi/asm/signal.h +++ b/arch/parisc/include/uapi/asm/signal.h @@ -69,8 +69,6 @@ #define SA_NOMASK SA_NODEFER #define SA_ONESHOT SA_RESETHAND -#define SA_RESTORER 0x04000000 /* obsolete -- ignored */ - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 10a0c2a..b24732d 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -286,11 +286,11 @@ ENTRY_COMP(msgsnd) ENTRY_COMP(msgrcv) ENTRY_SAME(msgget) /* 190 */ - ENTRY_SAME(msgctl) - ENTRY_SAME(shmat) + ENTRY_COMP(msgctl) + ENTRY_COMP(shmat) ENTRY_SAME(shmdt) ENTRY_SAME(shmget) - ENTRY_SAME(shmctl) /* 195 */ + ENTRY_COMP(shmctl) /* 195 */ ENTRY_SAME(ni_syscall) /* streams1 */ ENTRY_SAME(ni_syscall) /* streams2 */ ENTRY_SAME(lstat64) @@ -323,7 +323,7 @@ ENTRY_SAME(epoll_ctl) /* 225 */ ENTRY_SAME(epoll_wait) ENTRY_SAME(remap_file_pages) - ENTRY_SAME(semtimedop) + ENTRY_COMP(semtimedop) ENTRY_COMP(mq_open) ENTRY_SAME(mq_unlink) /* 230 */ ENTRY_COMP(mq_timedsend) diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index f16f57b..0293588 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -177,7 +177,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, int fault; unsigned int flags; - if (pagefault_disabled()) + if (in_atomic()) goto no_context; tsk = current; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9ca41f7..2e0ddfa 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -60,11 +60,10 @@ config LOCKDEP_SUPPORT config RWSEM_GENERIC_SPINLOCK bool - default y if PREEMPT_RT_FULL config RWSEM_XCHGADD_ALGORITHM bool - default y if !PREEMPT_RT_FULL + default y config GENERIC_LOCKBREAK bool @@ -132,7 +131,6 @@ config PPC select GENERIC_CLOCKEVENTS select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER - select HAVE_PREEMPT_LAZY select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA select CLONE_BACKWARDS @@ -140,6 +138,7 @@ config PPC select OLD_SIGSUSPEND select OLD_SIGACTION if PPC32 select HAVE_DEBUG_STACKOVERFLOW + select ARCH_SUPPORTS_ATOMIC_RMW config EARLY_PRINTK bool @@ -287,7 +286,7 @@ menu "Kernel options" config HIGHMEM bool "High memory support" - depends on PPC32 && !PREEMPT_RT_FULL + depends on PPC32 source kernel/Kconfig.hz source kernel/Kconfig.preempt @@ -397,7 +396,7 @@ config KEXEC config CRASH_DUMP bool "Build a kdump crash kernel" depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP) - select RELOCATABLE if PPC64 || 44x + select RELOCATABLE if (PPC64 && !COMPILE_TEST) || 44x select DYNAMIC_MEMSTART if FSL_BOOKE help Build a kernel suitable for use as a kdump capture kernel. @@ -987,6 +986,7 @@ endmenu if PPC64 config RELOCATABLE bool "Build a relocatable kernel" + depends on !COMPILE_TEST select NONSTATIC_KERNEL help This builds a kernel image that is capable of running anywhere diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 51cfb78..994337b 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -114,7 +114,9 @@ endif CFLAGS-$(CONFIG_TUNE_CELL) += $(call cc-option,-mtune=cell) -KBUILD_CPPFLAGS += -Iarch/$(ARCH) +asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1) + +KBUILD_CPPFLAGS += -Iarch/$(ARCH) $(asinstr) KBUILD_AFLAGS += -Iarch/$(ARCH) KBUILD_CFLAGS += -msoft-float -pipe -Iarch/$(ARCH) $(CFLAGS-y) CPP = $(CC) -E $(KBUILD_CFLAGS) diff --git a/arch/powerpc/crypto/sha1.c b/arch/powerpc/crypto/sha1.c index f9e8b94..b51da91 100644 --- a/arch/powerpc/crypto/sha1.c +++ b/arch/powerpc/crypto/sha1.c @@ -154,4 +154,5 @@ module_exit(sha1_powerpc_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); -MODULE_ALIAS("sha1-powerpc"); +MODULE_ALIAS_CRYPTO("sha1"); +MODULE_ALIAS_CRYPTO("sha1-powerpc"); diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index a613d2c..b142b8e 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -8,7 +8,11 @@ #include <linux/sched.h> #define COMPAT_USER_HZ 100 +#ifdef __BIG_ENDIAN__ #define COMPAT_UTS_MACHINE "ppc\0\0" +#else +#define COMPAT_UTS_MACHINE "ppcle\0\0" +#endif typedef u32 compat_size_t; typedef s32 compat_ssize_t; diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 8b48090..3a52b9b 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -57,10 +57,10 @@ struct machdep_calls { void (*hpte_removebolted)(unsigned long ea, int psize, int ssize); void (*flush_hash_range)(unsigned long number, int local); - void (*hugepage_invalidate)(struct mm_struct *mm, + void (*hugepage_invalidate)(unsigned long vsid, + unsigned long addr, unsigned char *hpte_slot_array, - unsigned long addr, int psize); - + int psize, int ssize); /* special for kexec, to be called in real mode, linear mapping is * destroyed as well */ void (*hpte_clear_all)(void); diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 3fd2f1b..cefc7b4 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -60,8 +60,7 @@ struct power_pmu { #define PPMU_SIAR_VALID 0x00000010 /* Processor has SIAR Valid bit */ #define PPMU_HAS_SSLOT 0x00000020 /* Has sampled slot in MMCRA */ #define PPMU_HAS_SIER 0x00000040 /* Has SIER */ -#define PPMU_BHRB 0x00000080 /* has BHRB feature enabled */ -#define PPMU_EBB 0x00000100 /* supports event based branch */ +#define PPMU_ARCH_207S 0x00000080 /* PMC is architecture v2.07S */ /* * Values for flags to get_alternatives() diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index 46db094..832a39d 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -409,7 +409,7 @@ static inline char *get_hpte_slot_array(pmd_t *pmdp) } extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp); + pmd_t *pmdp, unsigned long old_pmd); #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot); extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 0d2d0f0..e6d03c7 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -378,11 +378,16 @@ n: * ld rY,ADDROFF(name)(rX) */ #ifdef __powerpc64__ +#ifdef HAVE_AS_ATHIGH +#define __AS_ATHIGH high +#else +#define __AS_ATHIGH h +#endif #define LOAD_REG_IMMEDIATE(reg,expr) \ lis reg,(expr)@highest; \ ori reg,reg,(expr)@higher; \ rldicr reg,reg,32,31; \ - oris reg,reg,(expr)@h; \ + oris reg,reg,(expr)@__AS_ATHIGH; \ ori reg,reg,(expr)@l; #define LOAD_REG_ADDR(reg,name) \ diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h index d836d94..9ecede1 100644 --- a/arch/powerpc/include/asm/pte-hash64-64k.h +++ b/arch/powerpc/include/asm/pte-hash64-64k.h @@ -46,11 +46,31 @@ * in order to deal with 64K made of 4K HW pages. Thus we override the * generic accessors and iterators here */ -#define __real_pte(e,p) ((real_pte_t) { \ - (e), (pte_val(e) & _PAGE_COMBO) ? \ - (pte_val(*((p) + PTRS_PER_PTE))) : 0 }) -#define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \ - (((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf)) +#define __real_pte __real_pte +static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) +{ + real_pte_t rpte; + + rpte.pte = pte; + rpte.hidx = 0; + if (pte_val(pte) & _PAGE_COMBO) { + /* + * Make sure we order the hidx load against the _PAGE_COMBO + * check. The store side ordering is done in __hash_page_4K + */ + smp_rmb(); + rpte.hidx = pte_val(*((ptep) + PTRS_PER_PTE)); + } + return rpte; +} + +static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) +{ + if ((pte_val(rpte.pte) & _PAGE_COMBO)) + return (rpte.hidx >> (index<<2)) & 0xf; + return (pte_val(rpte.pte) >> 12) & 0xf; +} + #define __rpte_to_pte(r) ((r).pte) #define __rpte_sub_valid(rpte, index) \ (pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index))) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 7ca729c..390e098 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -116,6 +116,7 @@ /* Server variant */ #define MSR_ (MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV) +#define MSR_IDLE (MSR_ME | MSR_SF | MSR_HV) #define MSR_KERNEL (MSR_ | MSR_64BIT) #define MSR_USER32 (MSR_ | MSR_PR | MSR_EE) #define MSR_USER64 (MSR_USER32 | MSR_64BIT) @@ -208,6 +209,7 @@ #define SPRN_ACOP 0x1F /* Available Coprocessor Register */ #define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */ #define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */ +#define TEXASR_FS __MASK(63-36) /* Transaction Failure Summary */ #define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ #define SPRN_CTRLF 0x088 diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 5f54a74..826be86 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -28,8 +28,6 @@ #include <asm/synch.h> #include <asm/ppc-opcode.h> -#define arch_spin_is_locked(x) ((x)->slock != 0) - #ifdef CONFIG_PPC64 /* use 0x800000yy when locked, where yy == CPU number */ #ifdef __BIG_ENDIAN__ @@ -54,6 +52,12 @@ #define SYNC_IO #endif +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + smp_mb(); + return lock->slock != 0; +} + /* * This returns the old value in the lock, so we succeeded * in getting the lock if the return value is 0. diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 2be5618..06d63a3 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -85,6 +85,8 @@ static inline void clear_task_ebb(struct task_struct *t) { #ifdef CONFIG_PPC_BOOK3S_64 /* EBB perf events are not inherited, so clear all EBB state. */ + t->thread.ebbrr = 0; + t->thread.ebbhr = 0; t->thread.bescr = 0; t->thread.mmcr2 = 0; t->thread.mmcr0 = 0; diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 43523fe..05fcdd8 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -190,7 +190,7 @@ SYSCALL_SPU(getcwd) SYSCALL_SPU(capget) SYSCALL_SPU(capset) COMPAT_SYS(sigaltstack) -COMPAT_SYS_SPU(sendfile) +SYSX_SPU(sys_sendfile64,compat_sys_sendfile,sys_sendfile) SYSCALL(ni_syscall) SYSCALL(ni_syscall) PPC_SYS(vfork) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index f50711f..ba7b197 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -43,8 +43,6 @@ struct thread_info { int cpu; /* cpu we're on */ int preempt_count; /* 0 => preemptable, <0 => BUG */ - int preempt_lazy_count; /* 0 => preemptable, - <0 => BUG */ struct restart_block restart_block; unsigned long local_flags; /* private flags for thread */ @@ -92,7 +90,8 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */ +#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling + TIF_NEED_RESCHED */ #define TIF_32BIT 4 /* 32 bit binary */ #define TIF_PERFMON_WORK 5 /* work for pfm_handle_work() */ #define TIF_PERFMON_CTXSW 6 /* perfmon needs ctxsw calls */ @@ -108,8 +107,6 @@ static inline struct thread_info *current_thread_info(void) #define TIF_EMULATE_STACK_STORE 16 /* Is an instruction emulation for stack store? */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ -#define TIF_POLLING_NRFLAG 18 /* true if poll_idle() is polling - TIF_NEED_RESCHED */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -129,16 +126,13 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE) #define _TIF_NOHZ (1<<TIF_NOHZ) -#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY) #define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ _TIF_NOHZ) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ - _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_NEED_RESCHED_LAZY) + _TIF_NOTIFY_RESUME | _TIF_UPROBE) #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR) -#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) /* Bits in local_flags */ /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h index 5b76579..de2c0e4 100644 --- a/arch/powerpc/include/uapi/asm/cputable.h +++ b/arch/powerpc/include/uapi/asm/cputable.h @@ -41,5 +41,6 @@ #define PPC_FEATURE2_EBB 0x10000000 #define PPC_FEATURE2_ISEL 0x08000000 #define PPC_FEATURE2_TAR 0x04000000 +#define PPC_FEATURE2_VEC_CRYPTO 0x02000000 #endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 55210c0..502c7a4 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -165,7 +165,6 @@ int main(void) DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); - DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 597d954..c5d3d02 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -105,7 +105,8 @@ extern void __restore_cpu_e6500(void); PPC_FEATURE_PSERIES_PERFMON_COMPAT) #define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \ PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \ - PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR) + PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \ + PPC_FEATURE2_VEC_CRYPTO) #define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\ PPC_FEATURE_TRUE_LE | \ PPC_FEATURE_HAS_ALTIVEC_COMP) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 081f926..22b45a4 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -890,14 +890,7 @@ resume_kernel: cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ bne restore andi. r8,r8,_TIF_NEED_RESCHED - bne+ 1f - lwz r0,TI_PREEMPT_LAZY(r9) - cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ - bne restore - lwz r0,TI_FLAGS(r9) - andi. r0,r0,_TIF_NEED_RESCHED_LAZY beq+ restore -1: lwz r3,_MSR(r1) andi. r0,r3,MSR_EE /* interrupts off? */ beq restore /* don't schedule if so */ @@ -908,11 +901,11 @@ resume_kernel: */ bl trace_hardirqs_off #endif -2: bl preempt_schedule_irq +1: bl preempt_schedule_irq CURRENT_THREAD_INFO(r9, r1) lwz r3,TI_FLAGS(r9) - andi. r0,r3,_TIF_NEED_RESCHED_MASK - bne- 2b + andi. r0,r3,_TIF_NEED_RESCHED + bne- 1b #ifdef CONFIG_TRACE_IRQFLAGS /* And now, to properly rebalance the above, we tell lockdep they * are being turned back on, which will happen when we return @@ -1233,7 +1226,7 @@ global_dbcr0: #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ do_work: /* r10 contains MSR_KERNEL here */ - andi. r0,r9,_TIF_NEED_RESCHED_MASK + andi. r0,r9,_TIF_NEED_RESCHED beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ @@ -1254,7 +1247,7 @@ recheck: MTMSRD(r10) /* disable interrupts */ CURRENT_THREAD_INFO(r9, r1) lwz r9,TI_FLAGS(r9) - andi. r0,r9,_TIF_NEED_RESCHED_MASK + andi. r0,r9,_TIF_NEED_RESCHED bne- do_resched andi. r0,r9,_TIF_USER_WORK_MASK beq restore_user diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index e6bfe8e..7be3717 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -659,7 +659,7 @@ _GLOBAL(ret_from_except_lite) #else beq restore #endif -1: andi. r0,r4,_TIF_NEED_RESCHED_MASK +1: andi. r0,r4,_TIF_NEED_RESCHED beq 2f bl .restore_interrupts SCHEDULE_USER @@ -709,18 +709,10 @@ resume_kernel: #ifdef CONFIG_PREEMPT /* Check if we need to preempt */ - lwz r8,TI_PREEMPT(r9) - cmpwi 0,r8,0 /* if non-zero, just restore regs and return */ - bne restore andi. r0,r4,_TIF_NEED_RESCHED - bne+ check_count - - andi. r0,r4,_TIF_NEED_RESCHED_LAZY beq+ restore - lwz r8,TI_PREEMPT_LAZY(r9) - /* Check that preempt_count() == 0 and interrupts are enabled */ -check_count: + lwz r8,TI_PREEMPT(r9) cmpwi cr1,r8,0 ld r0,SOFTE(r1) cmpdi r0,0 @@ -737,7 +729,7 @@ check_count: /* Re-test flags and eventually loop */ CURRENT_THREAD_INFO(r9, r1) ld r4,TI_FLAGS(r9) - andi. r0,r4,_TIF_NEED_RESCHED_MASK + andi. r0,r4,_TIF_NEED_RESCHED bne 1b /* diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index e11863f..df93072 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -84,6 +84,22 @@ _GLOBAL(power7_nap) std r9,_MSR(r1) std r1,PACAR1(r13) + /* + * Go to real mode to do the nap, as required by the architecture. + * Also, we need to be in real mode before setting hwthread_state, + * because as soon as we do that, another thread can switch + * the MMU context to the guest. + */ + LOAD_REG_IMMEDIATE(r5, MSR_IDLE) + li r6, MSR_RI + andc r6, r9, r6 + LOAD_REG_ADDR(r7, power7_enter_nap_mode) + mtmsrd r6, 1 /* clear RI before setting SRR0/1 */ + mtspr SPRN_SRR0, r7 + mtspr SPRN_SRR1, r5 + rfid + +power7_enter_nap_mode: #ifdef CONFIG_KVM_BOOK3S_64_HV /* Tell KVM we're napping */ li r4,KVM_HWTHREAD_IN_NAP diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index f3ed55a..c7cb8c2 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -594,7 +594,6 @@ void irq_ctx_init(void) } } -#ifndef CONFIG_PREEMPT_RT_FULL static inline void do_softirq_onstack(void) { struct thread_info *curtp, *irqtp; @@ -627,7 +626,6 @@ void do_softirq(void) local_irq_restore(flags); } -#endif irq_hw_number_t virq_to_hw(unsigned int virq) { diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 22e88dd..a531358 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -48,6 +48,9 @@ static struct __initdata of_device_id legacy_serial_parents[] = { static unsigned int legacy_serial_count; static int legacy_serial_console = -1; +static const upf_t legacy_port_flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | + UPF_SHARE_IRQ | UPF_FIXED_PORT; + static unsigned int tsi_serial_in(struct uart_port *p, int offset) { unsigned int tmp; @@ -153,8 +156,6 @@ static int __init add_legacy_soc_port(struct device_node *np, { u64 addr; const __be32 *addrp; - upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ - | UPF_FIXED_PORT; struct device_node *tsi = of_get_parent(np); /* We only support ports that have a clock frequency properly @@ -185,9 +186,11 @@ static int __init add_legacy_soc_port(struct device_node *np, * IO port value. It will be fixed up later along with the irq */ if (tsi && !strcmp(tsi->type, "tsi-bridge")) - return add_legacy_port(np, -1, UPIO_TSI, addr, addr, NO_IRQ, flags, 0); + return add_legacy_port(np, -1, UPIO_TSI, addr, addr, + NO_IRQ, legacy_port_flags, 0); else - return add_legacy_port(np, -1, UPIO_MEM, addr, addr, NO_IRQ, flags, 0); + return add_legacy_port(np, -1, UPIO_MEM, addr, addr, + NO_IRQ, legacy_port_flags, 0); } static int __init add_legacy_isa_port(struct device_node *np, @@ -233,7 +236,7 @@ static int __init add_legacy_isa_port(struct device_node *np, /* Add port, irq will be dealt with later */ return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]), - taddr, NO_IRQ, UPF_BOOT_AUTOCONF, 0); + taddr, NO_IRQ, legacy_port_flags, 0); } @@ -306,7 +309,7 @@ static int __init add_legacy_pci_port(struct device_node *np, * IO port value. It will be fixed up later along with the irq */ return add_legacy_port(np, index, iotype, base, addr, NO_IRQ, - UPF_BOOT_AUTOCONF, np != pci_dev); + legacy_port_flags, np != pci_dev); } #endif diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 611acdf..263e445 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -237,7 +237,7 @@ static void wake_offline_cpus(void) if (!cpu_online(cpu)) { printk(KERN_INFO "kexec: Waking offline cpu %d.\n", cpu); - cpu_up(cpu); + WARN_ON(cpu_up(cpu)); } } } diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 84cbc94..ace3413 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -40,7 +40,6 @@ * We store the saved ksp_limit in the unused part * of the STACK_FRAME_OVERHEAD */ -#ifndef CONFIG_PREEMPT_RT_FULL _GLOBAL(call_do_softirq) mflr r0 stw r0,4(r1) @@ -57,7 +56,6 @@ _GLOBAL(call_do_softirq) stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr -#endif /* * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp); diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index f93987e..e59caf8 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -29,7 +29,6 @@ .text -#ifndef CONFIG_PREEMPT_RT_FULL _GLOBAL(call_do_softirq) mflr r0 std r0,16(r1) @@ -40,7 +39,6 @@ _GLOBAL(call_do_softirq) ld r0,16(r1) mtlr r0 blr -#endif _GLOBAL(call_do_irq) mflr r0 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index aa75b2b..f110610 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -525,6 +525,31 @@ out_and_saveregs: tm_save_sprs(thr); } +extern void __tm_recheckpoint(struct thread_struct *thread, + unsigned long orig_msr); + +void tm_recheckpoint(struct thread_struct *thread, + unsigned long orig_msr) +{ + unsigned long flags; + + /* We really can't be interrupted here as the TEXASR registers can't + * change and later in the trecheckpoint code, we have a userspace R1. + * So let's hard disable over this region. + */ + local_irq_save(flags); + hard_irq_disable(); + + /* The TM SPRs are restored here, so that TEXASR.FS can be set + * before the trecheckpoint and no explosion occurs. + */ + tm_restore_sprs(thread); + + __tm_recheckpoint(thread, orig_msr); + + local_irq_restore(flags); +} + static inline void tm_recheckpoint_new_task(struct task_struct *new) { unsigned long msr; @@ -543,13 +568,10 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new) if (!new->thread.regs) return; - /* The TM SPRs are restored here, so that TEXASR.FS can be set - * before the trecheckpoint and no explosion occurs. - */ - tm_restore_sprs(&new->thread); - - if (!MSR_TM_ACTIVE(new->thread.regs->msr)) + if (!MSR_TM_ACTIVE(new->thread.regs->msr)){ + tm_restore_sprs(&new->thread); return; + } msr = new->thread.tm_orig_msr; /* Recheckpoint to restore original checkpointed register state. */ TM_DEBUG("*** tm_recheckpoint of pid %d " diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 3d261c0..5b86d6a 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -458,9 +458,17 @@ void __init smp_setup_cpu_maps(void) } for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) { + bool avail; + DBG(" thread %d -> cpu %d (hard id %d)\n", j, cpu, be32_to_cpu(intserv[j])); - set_cpu_present(cpu, true); + + avail = of_device_is_available(dn); + if (!avail) + avail = !of_property_match_string(dn, + "enable-method", "spin-table"); + + set_cpu_present(cpu, avail); set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j])); set_cpu_possible(cpu, true); cpu++; diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index fea2dba..50606e4 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -872,6 +872,8 @@ static long restore_tm_user_regs(struct pt_regs *regs, * transactional versions should be loaded. */ tm_enable(); + /* Make sure the transaction is marked as failed */ + current->thread.tm_texasr |= TEXASR_FS; /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); /* Get the top half of the MSR */ diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 36b1d1d..4456779 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -519,6 +519,8 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, } #endif tm_enable(); + /* Make sure the transaction is marked as failed */ + current->thread.tm_texasr |= TEXASR_FS; /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 5ac241b..62e7f22 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -423,7 +423,7 @@ unsigned long profile_pc(struct pt_regs *regs) EXPORT_SYMBOL(profile_pc); #endif -#if defined(CONFIG_IRQ_WORK) && !defined(CONFIG_PREEMPT_RT_FULL) +#ifdef CONFIG_IRQ_WORK /* * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... @@ -512,7 +512,7 @@ void timer_interrupt(struct pt_regs * regs) __get_cpu_var(irq_stat).timer_irqs++; -#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) +#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); #endif diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index cd809ea..8b05185 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -306,7 +306,7 @@ dont_backup_fp: * Call with IRQs off, stacks get all out of sync for * some periods in here! */ -_GLOBAL(tm_recheckpoint) +_GLOBAL(__tm_recheckpoint) mfcr r5 mflr r0 std r5, 8(r1) diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 75702e2..f7089fc 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -69,8 +69,12 @@ static void udbg_uart_putc(char c) static int udbg_uart_getc_poll(void) { - if (!udbg_uart_in || !(udbg_uart_in(UART_LSR) & LSR_DR)) + if (!udbg_uart_in) + return -1; + + if (!(udbg_uart_in(UART_LSR) & LSR_DR)) return udbg_uart_in(UART_RBR); + return -1; } diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S index 47afd08..fe7e97a 100644 --- a/arch/powerpc/kernel/vdso32/getcpu.S +++ b/arch/powerpc/kernel/vdso32/getcpu.S @@ -30,8 +30,8 @@ V_FUNCTION_BEGIN(__kernel_getcpu) .cfi_startproc mfspr r5,SPRN_USPRG3 - cmpdi cr0,r3,0 - cmpdi cr1,r4,0 + cmpwi cr0,r3,0 + cmpwi cr1,r4,0 clrlwi r6,r5,16 rlwinm r7,r5,16,31-15,31-0 beq cr0,1f diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e1ab62e..211974a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -82,7 +82,7 @@ void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) /* CPU points to the first thread of the core */ if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) { -#ifdef CONFIG_KVM_XICS +#ifdef CONFIG_PPC_ICP_NATIVE int real_cpu = cpu + vcpu->arch.ptid; if (paca[real_cpu].kvm_hstate.xics_phys) xics_wake_cpu(real_cpu); @@ -1092,9 +1092,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu) smp_wmb(); #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) if (vcpu->arch.ptid) { -#ifdef CONFIG_KVM_XICS xics_wake_cpu(cpu); -#endif ++vc->n_woken; } #endif diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S index b2c68ce..a5b30c7 100644 --- a/arch/powerpc/lib/crtsavres.S +++ b/arch/powerpc/lib/crtsavres.S @@ -231,6 +231,87 @@ _GLOBAL(_rest32gpr_31_x) mr 1,11 blr +#ifdef CONFIG_ALTIVEC +/* Called with r0 pointing just beyond the end of the vector save area. */ + +_GLOBAL(_savevr_20) + li r11,-192 + stvx vr20,r11,r0 +_GLOBAL(_savevr_21) + li r11,-176 + stvx vr21,r11,r0 +_GLOBAL(_savevr_22) + li r11,-160 + stvx vr22,r11,r0 +_GLOBAL(_savevr_23) + li r11,-144 + stvx vr23,r11,r0 +_GLOBAL(_savevr_24) + li r11,-128 + stvx vr24,r11,r0 +_GLOBAL(_savevr_25) + li r11,-112 + stvx vr25,r11,r0 +_GLOBAL(_savevr_26) + li r11,-96 + stvx vr26,r11,r0 +_GLOBAL(_savevr_27) + li r11,-80 + stvx vr27,r11,r0 +_GLOBAL(_savevr_28) + li r11,-64 + stvx vr28,r11,r0 +_GLOBAL(_savevr_29) + li r11,-48 + stvx vr29,r11,r0 +_GLOBAL(_savevr_30) + li r11,-32 + stvx vr30,r11,r0 +_GLOBAL(_savevr_31) + li r11,-16 + stvx vr31,r11,r0 + blr + +_GLOBAL(_restvr_20) + li r11,-192 + lvx vr20,r11,r0 +_GLOBAL(_restvr_21) + li r11,-176 + lvx vr21,r11,r0 +_GLOBAL(_restvr_22) + li r11,-160 + lvx vr22,r11,r0 +_GLOBAL(_restvr_23) + li r11,-144 + lvx vr23,r11,r0 +_GLOBAL(_restvr_24) + li r11,-128 + lvx vr24,r11,r0 +_GLOBAL(_restvr_25) + li r11,-112 + lvx vr25,r11,r0 +_GLOBAL(_restvr_26) + li r11,-96 + lvx vr26,r11,r0 +_GLOBAL(_restvr_27) + li r11,-80 + lvx vr27,r11,r0 +_GLOBAL(_restvr_28) + li r11,-64 + lvx vr28,r11,r0 +_GLOBAL(_restvr_29) + li r11,-48 + lvx vr29,r11,r0 +_GLOBAL(_restvr_30) + li r11,-32 + lvx vr30,r11,r0 +_GLOBAL(_restvr_31) + li r11,-16 + lvx vr31,r11,r0 + blr + +#endif /* CONFIG_ALTIVEC */ + #else /* CONFIG_PPC64 */ .section ".text.save.restore","ax",@progbits @@ -356,6 +437,111 @@ _restgpr0_31: mtlr r0 blr +#ifdef CONFIG_ALTIVEC +/* Called with r0 pointing just beyond the end of the vector save area. */ + +.globl _savevr_20 +_savevr_20: + li r12,-192 + stvx vr20,r12,r0 +.globl _savevr_21 +_savevr_21: + li r12,-176 + stvx vr21,r12,r0 +.globl _savevr_22 +_savevr_22: + li r12,-160 + stvx vr22,r12,r0 +.globl _savevr_23 +_savevr_23: + li r12,-144 + stvx vr23,r12,r0 +.globl _savevr_24 +_savevr_24: + li r12,-128 + stvx vr24,r12,r0 +.globl _savevr_25 +_savevr_25: + li r12,-112 + stvx vr25,r12,r0 +.globl _savevr_26 +_savevr_26: + li r12,-96 + stvx vr26,r12,r0 +.globl _savevr_27 +_savevr_27: + li r12,-80 + stvx vr27,r12,r0 +.globl _savevr_28 +_savevr_28: + li r12,-64 + stvx vr28,r12,r0 +.globl _savevr_29 +_savevr_29: + li r12,-48 + stvx vr29,r12,r0 +.globl _savevr_30 +_savevr_30: + li r12,-32 + stvx vr30,r12,r0 +.globl _savevr_31 +_savevr_31: + li r12,-16 + stvx vr31,r12,r0 + blr + +.globl _restvr_20 +_restvr_20: + li r12,-192 + lvx vr20,r12,r0 +.globl _restvr_21 +_restvr_21: + li r12,-176 + lvx vr21,r12,r0 +.globl _restvr_22 +_restvr_22: + li r12,-160 + lvx vr22,r12,r0 +.globl _restvr_23 +_restvr_23: + li r12,-144 + lvx vr23,r12,r0 +.globl _restvr_24 +_restvr_24: + li r12,-128 + lvx vr24,r12,r0 +.globl _restvr_25 +_restvr_25: + li r12,-112 + lvx vr25,r12,r0 +.globl _restvr_26 +_restvr_26: + li r12,-96 + lvx vr26,r12,r0 +.globl _restvr_27 +_restvr_27: + li r12,-80 + lvx vr27,r12,r0 +.globl _restvr_28 +_restvr_28: + li r12,-64 + lvx vr28,r12,r0 +.globl _restvr_29 +_restvr_29: + li r12,-48 + lvx vr29,r12,r0 +.globl _restvr_30 +_restvr_30: + li r12,-32 + lvx vr30,r12,r0 +.globl _restvr_31 +_restvr_31: + li r12,-16 + lvx vr31,r12,r0 + blr + +#endif /* CONFIG_ALTIVEC */ + #endif /* CONFIG_PPC64 */ #endif diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 0c9c8d7..170a034 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -70,12 +70,16 @@ void __rw_yield(arch_rwlock_t *rw) void arch_spin_unlock_wait(arch_spinlock_t *lock) { + smp_mb(); + while (lock->slock) { HMT_low(); if (SHARED_PROCESSOR) __spin_yield(lock); } HMT_medium(); + + smp_mb(); } EXPORT_SYMBOL(arch_spin_unlock_wait); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index b1faa15..aec4dbf 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1397,7 +1397,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) regs->gpr[rd] = byterev_4(val); goto ldst_done; -#ifdef CONFIG_PPC_CPU +#ifdef CONFIG_PPC_FPU case 535: /* lfsx */ case 567: /* lfsux */ if (!(regs->msr & MSR_FP)) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 45aa26e..51ab9e7 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -261,7 +261,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, if (!arch_irq_disabled_regs(regs)) local_irq_enable(); - if (in_atomic() || mm == NULL || pagefault_disabled()) { + if (in_atomic() || mm == NULL) { if (!user_mode(regs)) { rc = SIGSEGV; goto bail; diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index c33d939..9ca9c16 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -413,18 +413,18 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, local_irq_restore(flags); } -static void native_hugepage_invalidate(struct mm_struct *mm, +static void native_hugepage_invalidate(unsigned long vsid, + unsigned long addr, unsigned char *hpte_slot_array, - unsigned long addr, int psize) + int psize, int ssize) { - int ssize = 0, i; - int lock_tlbie; + int i; struct hash_pte *hptep; int actual_psize = MMU_PAGE_16M; unsigned int max_hpte_count, valid; unsigned long flags, s_addr = addr; unsigned long hpte_v, want_v, shift; - unsigned long hidx, vpn = 0, vsid, hash, slot; + unsigned long hidx, vpn = 0, hash, slot; shift = mmu_psize_defs[psize].shift; max_hpte_count = 1U << (PMD_SHIFT - shift); @@ -438,15 +438,6 @@ static void native_hugepage_invalidate(struct mm_struct *mm, /* get the vpn */ addr = s_addr + (i * (1ul << shift)); - if (!is_kernel_addr(addr)) { - ssize = user_segment_size(addr); - vsid = get_vsid(mm->context.id, addr, ssize); - WARN_ON(vsid == 0); - } else { - vsid = get_kernel_vsid(addr, mmu_kernel_ssize); - ssize = mmu_kernel_ssize; - } - vpn = hpt_vpn(addr, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); if (hidx & _PTEIDX_SECONDARY) @@ -466,22 +457,13 @@ static void native_hugepage_invalidate(struct mm_struct *mm, else /* Invalidate the hpte. NOTE: this also unlocks it */ hptep->v = 0; + /* + * We need to do tlb invalidate for all the address, tlbie + * instruction compares entry_VA in tlb with the VA specified + * here + */ + tlbie(vpn, psize, actual_psize, ssize, 0); } - /* - * Since this is a hugepage, we just need a single tlbie. - * use the last vpn. - */ - lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); - if (lock_tlbie) - raw_spin_lock(&native_tlbie_lock); - - asm volatile("ptesync":::"memory"); - __tlbie(vpn, psize, actual_psize, ssize); - asm volatile("eieio; tlbsync; ptesync":::"memory"); - - if (lock_tlbie) - raw_spin_unlock(&native_tlbie_lock); - local_irq_restore(flags); } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index bde8b55..503a5d0 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -947,6 +947,22 @@ void hash_failure_debug(unsigned long ea, unsigned long access, trap, vsid, ssize, psize, lpsize, pte); } +static void check_paca_psize(unsigned long ea, struct mm_struct *mm, + int psize, bool user_region) +{ + if (user_region) { + if (psize != get_paca_psize(ea)) { + get_paca()->context = mm->context; + slb_flush_and_rebolt(); + } + } else if (get_paca()->vmalloc_sllp != + mmu_psize_defs[mmu_vmalloc_psize].sllp) { + get_paca()->vmalloc_sllp = + mmu_psize_defs[mmu_vmalloc_psize].sllp; + slb_vmalloc_update(); + } +} + /* Result code is: * 0 - handled * 1 - normal page fault @@ -1068,6 +1084,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) WARN_ON(1); } #endif + check_paca_psize(ea, mm, psize, user_region); + goto bail; } @@ -1108,17 +1126,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) #endif } } - if (user_region) { - if (psize != get_paca_psize(ea)) { - get_paca()->context = mm->context; - slb_flush_and_rebolt(); - } - } else if (get_paca()->vmalloc_sllp != - mmu_psize_defs[mmu_vmalloc_psize].sllp) { - get_paca()->vmalloc_sllp = - mmu_psize_defs[mmu_vmalloc_psize].sllp; - slb_vmalloc_update(); - } + + check_paca_psize(ea, mm, psize, user_region); #endif /* CONFIG_PPC_64K_PAGES */ #ifdef CONFIG_PPC_HAS_HASH_64K diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 34de9e0..7d86c86 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -18,6 +18,57 @@ #include <linux/mm.h> #include <asm/machdep.h> +static void invalidate_old_hpte(unsigned long vsid, unsigned long addr, + pmd_t *pmdp, unsigned int psize, int ssize) +{ + int i, max_hpte_count, valid; + unsigned long s_addr; + unsigned char *hpte_slot_array; + unsigned long hidx, shift, vpn, hash, slot; + + s_addr = addr & HPAGE_PMD_MASK; + hpte_slot_array = get_hpte_slot_array(pmdp); + /* + * IF we try to do a HUGE PTE update after a withdraw is done. + * we will find the below NULL. This happens when we do + * split_huge_page_pmd + */ + if (!hpte_slot_array) + return; + + if (ppc_md.hugepage_invalidate) + return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, + psize, ssize); + /* + * No bluk hpte removal support, invalidate each entry + */ + shift = mmu_psize_defs[psize].shift; + max_hpte_count = HPAGE_PMD_SIZE >> shift; + for (i = 0; i < max_hpte_count; i++) { + /* + * 8 bits per each hpte entries + * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] + */ + valid = hpte_valid(hpte_slot_array, i); + if (!valid) + continue; + hidx = hpte_hash_index(hpte_slot_array, i); + + /* get the vpn */ + addr = s_addr + (i * (1ul << shift)); + vpn = hpt_vpn(addr, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + ppc_md.hpte_invalidate(slot, vpn, psize, + MMU_PAGE_16M, ssize, 0); + } +} + + int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, unsigned long trap, int local, int ssize, unsigned int psize) @@ -33,7 +84,9 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, * atomically mark the linux large page PMD busy and dirty */ do { - old_pmd = pmd_val(*pmdp); + pmd_t pmd = ACCESS_ONCE(*pmdp); + + old_pmd = pmd_val(pmd); /* If PMD busy, retry the access */ if (unlikely(old_pmd & _PAGE_BUSY)) return 0; @@ -85,6 +138,15 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, vpn = hpt_vpn(ea, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); hpte_slot_array = get_hpte_slot_array(pmdp); + if (psize == MMU_PAGE_4K) { + /* + * invalidate the old hpte entry if we have that mapped via 64K + * base page size. This is because demote_segment won't flush + * hash page table entries. + */ + if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) + invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize); + } valid = hpte_valid(hpte_slot_array, index); if (valid) { @@ -107,11 +169,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, * safely update this here. */ valid = 0; - new_pmd &= ~_PAGE_HPTEFLAGS; hpte_slot_array[index] = 0; - } else - /* clear the busy bits and set the hash pte bits */ - new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; + } } if (!valid) { @@ -119,15 +178,13 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, /* insert new entry */ pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; -repeat: - hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - - /* clear the busy bits and set the hash pte bits */ - new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; + new_pmd |= _PAGE_HASHPTE; /* Add in WIMG bits */ rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT | _PAGE_GUARDED)); +repeat: + hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, @@ -168,8 +225,17 @@ repeat: mark_hpte_slot_valid(hpte_slot_array, index, slot); } /* - * No need to use ldarx/stdcx here + * Mark the pte with _PAGE_COMBO, if we are trying to hash it with + * base page size 4k. + */ + if (psize == MMU_PAGE_4K) + new_pmd |= _PAGE_COMBO; + /* + * The hpte valid is stored in the pgtable whose address is in the + * second half of the PMD. Order this against clearing of the busy bit in + * huge pmd. */ + smp_wmb(); *pmdp = __pmd(new_pmd & ~_PAGE_BUSY); return 0; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index d67db4b..834ca8e 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -86,11 +86,6 @@ int pgd_huge(pgd_t pgd) */ return ((pgd_val(pgd) & 0x3) != 0x0); } - -int pmd_huge_support(void) -{ - return 1; -} #else int pmd_huge(pmd_t pmd) { @@ -106,11 +101,6 @@ int pgd_huge(pgd_t pgd) { return 0; } - -int pmd_huge_support(void) -{ - return 0; -} #endif pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 4788ea2..e91079b 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -232,6 +232,7 @@ int __node_distance(int a, int b) return distance; } +EXPORT_SYMBOL(__node_distance); static void initialize_distance_lookup_table(int nid, const __be32 *associativity) @@ -588,8 +589,8 @@ static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action, case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: unmap_cpu_from_node(lcpu); - break; ret = NOTIFY_OK; + break; #endif } return ret; diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 536eec72..c9379a2 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -524,7 +524,7 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, *pmdp = __pmd(old & ~clr); #endif if (old & _PAGE_HASHPTE) - hpte_do_hugepage_flush(mm, addr, pmdp); + hpte_do_hugepage_flush(mm, addr, pmdp, old); return old; } @@ -631,7 +631,7 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, if (!(old & _PAGE_SPLITTING)) { /* We need to flush the hpte */ if (old & _PAGE_HASHPTE) - hpte_do_hugepage_flush(vma->vm_mm, address, pmdp); + hpte_do_hugepage_flush(vma->vm_mm, address, pmdp, old); } } @@ -704,7 +704,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, * neesd to be flushed. */ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp) + pmd_t *pmdp, unsigned long old_pmd) { int ssize, i; unsigned long s_addr; @@ -726,12 +726,29 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, if (!hpte_slot_array) return; - /* get the base page size */ + /* get the base page size,vsid and segment size */ +#ifdef CONFIG_DEBUG_VM psize = get_slice_psize(mm, s_addr); + BUG_ON(psize == MMU_PAGE_16M); +#endif + if (old_pmd & _PAGE_COMBO) + psize = MMU_PAGE_4K; + else + psize = MMU_PAGE_64K; + + if (!is_kernel_addr(s_addr)) { + ssize = user_segment_size(s_addr); + vsid = get_vsid(mm->context.id, s_addr, ssize); + WARN_ON(vsid == 0); + } else { + vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize); + ssize = mmu_kernel_ssize; + } if (ppc_md.hugepage_invalidate) - return ppc_md.hugepage_invalidate(mm, hpte_slot_array, - s_addr, psize); + return ppc_md.hugepage_invalidate(vsid, s_addr, + hpte_slot_array, + psize, ssize); /* * No bluk hpte removal support, invalidate each entry */ @@ -749,15 +766,6 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, /* get the vpn */ addr = s_addr + (i * (1ul << shift)); - if (!is_kernel_addr(addr)) { - ssize = user_segment_size(addr); - vsid = get_vsid(mm->context.id, addr, ssize); - WARN_ON(vsid == 0); - } else { - vsid = get_kernel_vsid(addr, mmu_kernel_ssize); - ssize = mmu_kernel_ssize; - } - vpn = hpt_vpn(addr, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); if (hidx & _PTEIDX_SECONDARY) diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 36e44b4..c66e445 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -217,7 +217,7 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, if (!(pte & _PAGE_HASHPTE)) continue; if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte))) - hpte_do_hugepage_flush(mm, start, (pmd_t *)pte); + hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte); else hpte_need_flush(mm, start, ptep, pte, 0); } diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 29b89e8..57a8ff9 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -483,7 +483,7 @@ static bool is_ebb_event(struct perf_event *event) * check that the PMU supports EBB, meaning those that don't can still * use bit 63 of the event code for something else if they wish. */ - return (ppmu->flags & PPMU_EBB) && + return (ppmu->flags & PPMU_ARCH_207S) && ((event->attr.config >> PERF_EVENT_CONFIG_EBB_SHIFT) & 1); } @@ -851,7 +851,22 @@ static void power_pmu_read(struct perf_event *event) } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); local64_add(delta, &event->count); - local64_sub(delta, &event->hw.period_left); + + /* + * A number of places program the PMC with (0x80000000 - period_left). + * We never want period_left to be less than 1 because we will program + * the PMC with a value >= 0x800000000 and an edge detected PMC will + * roll around to 0 before taking an exception. We have seen this + * on POWER8. + * + * To fix this, clamp the minimum value of period_left to 1. + */ + do { + prev = local64_read(&event->hw.period_left); + val = prev - delta; + if (val < 1) + val = 1; + } while (local64_cmpxchg(&event->hw.period_left, prev, val) != prev); } /* @@ -1149,6 +1164,9 @@ static void power_pmu_enable(struct pmu *pmu) mb(); write_mmcr0(cpuhw, mmcr0); + if (ppmu->flags & PPMU_ARCH_207S) + mtspr(SPRN_MMCR2, 0); + /* * Enable instruction sampling if necessary */ @@ -1547,7 +1565,7 @@ static int power_pmu_event_init(struct perf_event *event) if (has_branch_stack(event)) { /* PMU has BHRB enabled */ - if (!(ppmu->flags & PPMU_BHRB)) + if (!(ppmu->flags & PPMU_ARCH_207S)) return -EOPNOTSUPP; } diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index a3f7abd..79b7e20 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -608,7 +608,7 @@ static struct power_pmu power8_pmu = { .get_constraint = power8_get_constraint, .get_alternatives = power8_get_alternatives, .disable_pmc = power8_disable_pmc, - .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB | PPMU_EBB, + .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_ARCH_207S, .n_generic = ARRAY_SIZE(power8_generic_events), .generic_events = power8_generic_events, .attr_groups = power8_pmu_attr_groups, diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c index 2898b73..b69221b 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c @@ -340,7 +340,7 @@ static int mpc52xx_irqhost_map(struct irq_domain *h, unsigned int virq, { int l1irq; int l2irq; - struct irq_chip *uninitialized_var(irqchip); + struct irq_chip *irqchip; void *hndlr; int type; u32 reg; @@ -373,8 +373,9 @@ static int mpc52xx_irqhost_map(struct irq_domain *h, unsigned int virq, case MPC52xx_IRQ_L1_PERP: irqchip = &mpc52xx_periph_irqchip; break; case MPC52xx_IRQ_L1_SDMA: irqchip = &mpc52xx_sdma_irqchip; break; case MPC52xx_IRQ_L1_CRIT: + default: pr_warn("%s: Critical IRQ #%d is unsupported! Nopping it.\n", - __func__, l2irq); + __func__, l1irq); irq_set_chip(virq, &no_irq_chip); return 0; } diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 87ba7cf..65d633f 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -164,7 +164,7 @@ static void spufs_prune_dir(struct dentry *dir) struct dentry *dentry, *tmp; mutex_lock(&dir->d_inode->i_mutex); - list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) { + list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) { spin_lock(&dentry->d_lock); if (!(d_unhashed(dentry)) && dentry->d_inode) { dget_dlock(dentry); diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 227c7fe..b910833 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -493,7 +493,8 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) ret = ioda_eeh_phb_reset(hose, option); } else { bus = eeh_pe_bus_get(pe); - if (pci_is_root_bus(bus)) + if (pci_is_root_bus(bus) || + pci_is_root_bus(bus->parent)) ret = ioda_eeh_root_reset(hose, option); else ret = ioda_eeh_bridge_reset(hose, bus->self, option); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 930e1fe..f077513 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -792,7 +792,6 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, unsigned int is_64, struct msi_msg *msg) { struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev); - struct pci_dn *pdn = pci_get_pdn(dev); struct irq_data *idata; struct irq_chip *ichip; unsigned int xive_num = hwirq - phb->msi_base; @@ -809,7 +808,7 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, return -ENXIO; /* Force 32-bit MSI on some broken devices */ - if (pdn && pdn->force_32bit_msi) + if (dev->no_64bit_msi) is_64 = 0; /* Assign XIVE to PE */ diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index a28d3b5..7dcf862 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -1,3 +1,4 @@ + /* * Support PCI/PCIe on PowerNV platforms * @@ -50,9 +51,8 @@ static int pnv_msi_check_device(struct pci_dev* pdev, int nvec, int type) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; - struct pci_dn *pdn = pci_get_pdn(pdev); - if (pdn && pdn->force_32bit_msi && !phb->msi32_support) + if (pdev->no_64bit_msi && !phb->msi32_support) return -ENODEV; return (phb && phb->msi_bmp.bitmap) ? 0 : -ENODEV; diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 7cfdaae..679df55 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -380,7 +380,7 @@ static int dlpar_online_cpu(struct device_node *dn) BUG_ON(get_cpu_current_state(cpu) != CPU_STATE_OFFLINE); cpu_maps_update_done(); - rc = cpu_up(cpu); + rc = device_online(get_cpu_device(cpu)); if (rc) goto out; cpu_maps_update_begin(); @@ -471,7 +471,7 @@ static int dlpar_offline_cpu(struct device_node *dn) if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) { set_preferred_offline_state(cpu, CPU_STATE_OFFLINE); cpu_maps_update_done(); - rc = cpu_down(cpu); + rc = device_offline(get_cpu_device(cpu)); if (rc) goto out; cpu_maps_update_begin(); diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index 7fbc25b..7444870 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -461,6 +461,7 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *state) } else { result = EEH_STATE_NOT_SUPPORT; } + break; default: result = EEH_STATE_NOT_SUPPORT; } diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 0ea99e3..2d6fe89 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -88,13 +88,14 @@ void set_default_offline_state(int cpu) static void rtas_stop_self(void) { - struct rtas_args args = { - .token = cpu_to_be32(rtas_stop_self_token), + static struct rtas_args args = { .nargs = 0, .nret = 1, .rets = &args.args[0], }; + args.token = cpu_to_be32(rtas_stop_self_token); + local_irq_disable(); BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE); diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 9a432de..bebe64e 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -158,7 +158,7 @@ static int pseries_remove_memory(struct device_node *np) static inline int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) { - return -EOPNOTSUPP; + return 0; } static inline int pseries_remove_memory(struct device_node *np) { diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 0307901..261c509 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -731,13 +731,13 @@ static inline void __remove_ddw(struct device_node *np, const u32 *ddw_avail, u6 np->full_name, ret, ddw_avail[2], liobn); } -static void remove_ddw(struct device_node *np) +static void remove_ddw(struct device_node *np, bool remove_prop) { struct dynamic_dma_window_prop *dwp; struct property *win64; const u32 *ddw_avail; u64 liobn; - int len, ret; + int len, ret = 0; ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len); win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); @@ -763,7 +763,8 @@ static void remove_ddw(struct device_node *np) __remove_ddw(np, ddw_avail, liobn); delprop: - ret = of_remove_property(np, win64); + if (remove_prop) + ret = of_remove_property(np, win64); if (ret) pr_warning("%s: failed to remove direct window property: %d\n", np->full_name, ret); @@ -835,7 +836,7 @@ static int find_existing_ddw_windows(void) * can clear the table or find the holes. To that end, * first, remove any existing DDW configuration. */ - remove_ddw(pdn); + remove_ddw(pdn, true); /* * Second, if we are running on a new enough level of @@ -1125,7 +1126,7 @@ out_free_window: kfree(window); out_clear_window: - remove_ddw(pdn); + remove_ddw(pdn, true); out_free_prop: kfree(win64->name); @@ -1337,7 +1338,14 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti switch (action) { case OF_RECONFIG_DETACH_NODE: - remove_ddw(np); + /* + * Removing the property will invoke the reconfig + * notifier again, which causes dead-lock on the + * read-write semaphore of the notifier chain. So + * we have to remove the property when releasing + * the device node. + */ + remove_ddw(np, false); if (pci && pci->iommu_table) iommu_free_table(pci->iommu_table, np->full_name); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 356bc75..691a479 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -412,16 +412,17 @@ static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); } -static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm, - unsigned char *hpte_slot_array, - unsigned long addr, int psize) +static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, + unsigned long addr, + unsigned char *hpte_slot_array, + int psize, int ssize) { - int ssize = 0, i, index = 0; + int i, index = 0; unsigned long s_addr = addr; unsigned int max_hpte_count, valid; unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH]; unsigned long slot_array[PPC64_HUGE_HPTE_BATCH]; - unsigned long shift, hidx, vpn = 0, vsid, hash, slot; + unsigned long shift, hidx, vpn = 0, hash, slot; shift = mmu_psize_defs[psize].shift; max_hpte_count = 1U << (PMD_SHIFT - shift); @@ -434,15 +435,6 @@ static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm, /* get the vpn */ addr = s_addr + (i * (1ul << shift)); - if (!is_kernel_addr(addr)) { - ssize = user_segment_size(addr); - vsid = get_vsid(mm->context.id, addr, ssize); - WARN_ON(vsid == 0); - } else { - vsid = get_kernel_vsid(addr, mmu_kernel_ssize); - ssize = mmu_kernel_ssize; - } - vpn = hpt_vpn(addr, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); if (hidx & _PTEIDX_SECONDARY) diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 6d2f0ab..3b350fb 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -426,7 +426,7 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type) */ again: if (type == PCI_CAP_ID_MSI) { - if (pdn->force_32bit_msi) { + if (pdev->no_64bit_msi) { rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSI_FN, nvec); if (rc < 0) { /* diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index af9d346..4898203 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -288,10 +288,10 @@ static inline void disable_surveillance(void) args.token = rtas_token("set-indicator"); if (args.token == RTAS_UNKNOWN_SERVICE) return; - args.nargs = 3; - args.nret = 1; + args.nargs = cpu_to_be32(3); + args.nret = cpu_to_be32(1); args.rets = &args.args[3]; - args.args[0] = SURVEILLANCE_TOKEN; + args.args[0] = cpu_to_be32(SURVEILLANCE_TOKEN); args.args[1] = 0; args.args[2] = 0; enter_rtas(__pa(&args)); diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 6671e8d..faa97bd49 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -93,6 +93,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE select ARCH_SAVE_PAGE_KEYS if HIBERNATION + select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 2a245b5..f8d9cb1 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -967,7 +967,7 @@ static void __exit aes_s390_fini(void) module_init(aes_s390_init); module_exit(aes_s390_fini); -MODULE_ALIAS("aes-all"); +MODULE_ALIAS_CRYPTO("aes-all"); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); MODULE_LICENSE("GPL"); diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 2d96e68..a3e24d4 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -616,8 +616,8 @@ static void __exit des_s390_exit(void) module_init(des_s390_init); module_exit(des_s390_exit); -MODULE_ALIAS("des"); -MODULE_ALIAS("des3_ede"); +MODULE_ALIAS_CRYPTO("des"); +MODULE_ALIAS_CRYPTO("des3_ede"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms"); diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index d43485d..7940dc9 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -160,7 +160,7 @@ static void __exit ghash_mod_exit(void) module_init(ghash_mod_init); module_exit(ghash_mod_exit); -MODULE_ALIAS("ghash"); +MODULE_ALIAS_CRYPTO("ghash"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GHASH Message Digest Algorithm, s390 implementation"); diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index a1b3a9d..5b2bee3 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -103,6 +103,6 @@ static void __exit sha1_s390_fini(void) module_init(sha1_s390_init); module_exit(sha1_s390_fini); -MODULE_ALIAS("sha1"); +MODULE_ALIAS_CRYPTO("sha1"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index 9b85380..b74ff15 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -143,7 +143,7 @@ static void __exit sha256_s390_fini(void) module_init(sha256_s390_init); module_exit(sha256_s390_fini); -MODULE_ALIAS("sha256"); -MODULE_ALIAS("sha224"); +MODULE_ALIAS_CRYPTO("sha256"); +MODULE_ALIAS_CRYPTO("sha224"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA256 and SHA224 Secure Hash Algorithm"); diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 32a8138..0c36989 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -86,7 +86,7 @@ static struct shash_alg sha512_alg = { } }; -MODULE_ALIAS("sha512"); +MODULE_ALIAS_CRYPTO("sha512"); static int sha384_init(struct shash_desc *desc) { @@ -126,7 +126,7 @@ static struct shash_alg sha384_alg = { } }; -MODULE_ALIAS("sha384"); +MODULE_ALIAS_CRYPTO("sha384"); static int __init init(void) { diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index f201af8..31b5ca8 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -219,7 +219,7 @@ extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *); #define to_ccwdev(n) container_of(n, struct ccw_device, dev) #define to_ccwdrv(n) container_of(n, struct ccw_driver, driver) -extern struct ccw_device *ccw_device_probe_console(void); +extern struct ccw_device *ccw_device_probe_console(struct ccw_driver *); extern void ccw_device_wait_idle(struct ccw_device *); extern int ccw_device_force_console(struct ccw_device *); diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index bbf8141..2bed4f0 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -142,9 +142,9 @@ struct _lowcore { __u8 pad_0x02fc[0x0300-0x02fc]; /* 0x02fc */ /* Interrupt response block */ - __u8 irb[64]; /* 0x0300 */ + __u8 irb[96]; /* 0x0300 */ - __u8 pad_0x0340[0x0e00-0x0340]; /* 0x0340 */ + __u8 pad_0x0360[0x0e00-0x0360]; /* 0x0360 */ /* * 0xe00 contains the address of the IPL Parameter Information @@ -288,12 +288,13 @@ struct _lowcore { __u8 pad_0x03a0[0x0400-0x03a0]; /* 0x03a0 */ /* Interrupt response block. */ - __u8 irb[64]; /* 0x0400 */ + __u8 irb[96]; /* 0x0400 */ + __u8 pad_0x0460[0x0480-0x0460]; /* 0x0460 */ /* Per cpu primary space access list */ - __u32 paste[16]; /* 0x0440 */ + __u32 paste[16]; /* 0x0480 */ - __u8 pad_0x0480[0x0e00-0x0480]; /* 0x0480 */ + __u8 pad_0x04c0[0x0e00-0x04c0]; /* 0x04c0 */ /* * 0xe00 contains the address of the IPL Parameter Information diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 1f1b8c7..0ebb699 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -249,7 +249,7 @@ asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist) struct group_info *group_info; int retval; - if (!capable(CAP_SETGID)) + if (!may_setgroups()) return -EPERM; if ((unsigned)gidsetsize > NGROUPS_MAX) return -EINVAL; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 9556905..d4c5e6b 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -322,7 +322,9 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) * psw and gprs are stored on the stack */ if (addr == (addr_t) &dummy->regs.psw.mask && - ((data & ~PSW_MASK_USER) != psw_user_bits || + (((data^psw_user_bits) & ~PSW_MASK_USER) || + (((data^psw_user_bits) & PSW_MASK_ASC) && + ((data|psw_user_bits) & PSW_MASK_ASC) == PSW_MASK_ASC) || ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)))) /* Invalid psw mask. */ return -EINVAL; @@ -655,7 +657,10 @@ static int __poke_user_compat(struct task_struct *child, */ if (addr == (addr_t) &dummy32->regs.psw.mask) { /* Build a 64 bit psw mask from 31 bit mask. */ - if ((tmp & ~PSW32_MASK_USER) != psw32_user_bits) + if (((tmp^psw32_user_bits) & ~PSW32_MASK_USER) || + (((tmp^psw32_user_bits) & PSW32_MASK_ASC) && + ((tmp|psw32_user_bits) & PSW32_MASK_ASC) + == PSW32_MASK_ASC)) /* Invalid psw mask. */ return -EINVAL; regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 7f1f7ac..2df491b 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -71,6 +71,7 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, return 0; if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) return 1; + return 0; case KVM_S390_INT_EMERGENCY: if (psw_extint_disabled(vcpu)) return 0; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 776dafe..48bb1c1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -788,7 +788,8 @@ rerun_vcpu: if (rc) break; if (kvm_is_ucontrol(vcpu->kvm)) - rc = -EOPNOTSUPP; + /* Don't exit for host interrupts. */ + rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0; else rc = kvm_handle_sie_intercept(vcpu); } while (!signal_pending(current) && !rc); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 6b0efce..fc66792 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -296,8 +296,7 @@ static inline int do_exception(struct pt_regs *regs, int access) * user context. */ fault = VM_FAULT_BADCONTEXT; - if (unlikely(!user_space_fault(trans_exc_code) || - !mm || pagefault_disabled())) + if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) goto out; address = trans_exc_code & __FAIL_ADDR_MASK; @@ -443,8 +442,7 @@ void __kprobes do_asce_exception(struct pt_regs *regs) clear_tsk_thread_flag(current, TIF_PER_TRAP); trans_exc_code = regs->int_parm_long; - if (unlikely(!user_space_fault(trans_exc_code) || !mm || - pagefault_disabled())) + if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) goto no_context; down_read(&mm->mmap_sem); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index d261c62..248445f 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -223,11 +223,6 @@ int pud_huge(pud_t pud) return 0; } -int pmd_huge_support(void) -{ - return 1; -} - struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmdp, int write) { diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index de8cbc3..5664be4 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -788,11 +788,21 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep; down_read(&mm->mmap_sem); +retry: ptep = get_locked_pte(current->mm, addr, &ptl); if (unlikely(!ptep)) { up_read(&mm->mmap_sem); return -EFAULT; } + if (!(pte_val(*ptep) & _PAGE_INVALID) && + (pte_val(*ptep) & _PAGE_PROTECT)) { + pte_unmap_unlock(*ptep, ptl); + if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) { + up_read(&mm->mmap_sem); + return -EFAULT; + } + goto retry; + } new = old = pgste_get_lock(ptep); pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 96a4b15..906fba6 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -276,7 +276,6 @@ static void bpf_jit_noleaks(struct bpf_jit *jit, struct sock_filter *filter) case BPF_S_LD_W_IND: case BPF_S_LD_H_IND: case BPF_S_LD_B_IND: - case BPF_S_LDX_B_MSH: case BPF_S_LD_IMM: case BPF_S_LD_MEM: case BPF_S_MISC_TXA: diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c index 35d339b..52238983 100644 --- a/arch/score/mm/fault.c +++ b/arch/score/mm/fault.c @@ -73,7 +73,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (!mm || pagefault_disabled()) + if (in_atomic() || !mm) goto bad_area_nosemaphore; if (user_mode(regs)) diff --git a/arch/sh/kernel/dumpstack.c b/arch/sh/kernel/dumpstack.c index b959f55..8dfe645 100644 --- a/arch/sh/kernel/dumpstack.c +++ b/arch/sh/kernel/dumpstack.c @@ -115,7 +115,7 @@ static int print_trace_stack(void *data, char *name) */ static void print_trace_address(void *data, unsigned long addr, int reliable) { - printk(data); + printk("%s", (char *)data); printk_address(addr, reliable); } diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index ae4b141..063af10 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -149,7 +149,6 @@ void irq_ctx_exit(int cpu) hardirq_ctx[cpu] = NULL; } -#ifndef CONFIG_PREEMPT_RT_FULL asmlinkage void do_softirq(void) { unsigned long flags; @@ -192,7 +191,6 @@ asmlinkage void do_softirq(void) local_irq_restore(flags); } -#endif #else static inline void handle_one_irq(unsigned int irq) { diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 6589138..541dc61 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -438,7 +438,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, * If we're in an interrupt, have no user context or are running * in an atomic region then we must not take the fault: */ - if (unlikely(!mm || pagefault_disabled())) { + if (unlikely(in_atomic() || !mm)) { bad_area_nosemaphore(regs, error_code, address); return; } diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index 0d676a4..d776234 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c @@ -83,11 +83,6 @@ int pud_huge(pud_t pud) return 0; } -int pmd_huge_support(void) -{ - return 0; -} - struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) { diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index ffc749e..11068ae 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -26,7 +26,6 @@ config SPARC select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG select HAVE_ARCH_JUMP_LABEL if SPARC64 - select IRQ_FORCED_THREADING select GENERIC_IRQ_SHOW select ARCH_WANT_IPC_PARSE_VERSION select USE_GENERIC_SMP_HELPERS if SMP @@ -66,6 +65,7 @@ config SPARC64 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_SYSCALL_TRACEPOINTS select HAVE_DEBUG_KMEMLEAK + select SPARSE_IRQ select RTC_DRV_CMOS select RTC_DRV_BQ4802 select RTC_DRV_SUN4V @@ -76,6 +76,7 @@ config SPARC64 select ARCH_HAVE_NMI_SAFE_CMPXCHG select HAVE_C_RECORDMCOUNT select NO_BOOTMEM + select ARCH_SUPPORTS_ATOMIC_RMW config ARCH_DEFCONFIG string @@ -178,10 +179,12 @@ config NR_CPUS source kernel/Kconfig.hz config RWSEM_GENERIC_SPINLOCK - def_bool PREEMPT_RT_FULL + bool + default y if SPARC32 config RWSEM_XCHGADD_ALGORITHM - def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL + bool + default y if SPARC64 config GENERIC_HWEIGHT bool @@ -522,10 +525,6 @@ menu "Executable file formats" source "fs/Kconfig.binfmt" -config EARLY_PRINTK - bool - default y - config COMPAT bool depends on SPARC64 diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c index 503e6d9..ded4cee3 100644 --- a/arch/sparc/crypto/aes_glue.c +++ b/arch/sparc/crypto/aes_glue.c @@ -499,6 +499,6 @@ module_exit(aes_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("AES Secure Hash Algorithm, sparc64 aes opcode accelerated"); -MODULE_ALIAS("aes"); +MODULE_ALIAS_CRYPTO("aes"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c index 888f6260..641f55c 100644 --- a/arch/sparc/crypto/camellia_glue.c +++ b/arch/sparc/crypto/camellia_glue.c @@ -322,6 +322,6 @@ module_exit(camellia_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated"); -MODULE_ALIAS("aes"); +MODULE_ALIAS_CRYPTO("aes"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c index 5162fad..d1064e4 100644 --- a/arch/sparc/crypto/crc32c_glue.c +++ b/arch/sparc/crypto/crc32c_glue.c @@ -176,6 +176,6 @@ module_exit(crc32c_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated"); -MODULE_ALIAS("crc32c"); +MODULE_ALIAS_CRYPTO("crc32c"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c index 3065bc6..d115009 100644 --- a/arch/sparc/crypto/des_glue.c +++ b/arch/sparc/crypto/des_glue.c @@ -532,6 +532,6 @@ module_exit(des_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated"); -MODULE_ALIAS("des"); +MODULE_ALIAS_CRYPTO("des"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c index 09a9ea1..64c7ff5 100644 --- a/arch/sparc/crypto/md5_glue.c +++ b/arch/sparc/crypto/md5_glue.c @@ -185,6 +185,6 @@ module_exit(md5_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, sparc64 md5 opcode accelerated"); -MODULE_ALIAS("md5"); +MODULE_ALIAS_CRYPTO("md5"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c index 6cd5f29..1b3e47a 100644 --- a/arch/sparc/crypto/sha1_glue.c +++ b/arch/sparc/crypto/sha1_glue.c @@ -180,6 +180,6 @@ module_exit(sha1_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, sparc64 sha1 opcode accelerated"); -MODULE_ALIAS("sha1"); +MODULE_ALIAS_CRYPTO("sha1"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c index 04f555a..41f27cc 100644 --- a/arch/sparc/crypto/sha256_glue.c +++ b/arch/sparc/crypto/sha256_glue.c @@ -237,7 +237,7 @@ module_exit(sha256_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, sparc64 sha256 opcode accelerated"); -MODULE_ALIAS("sha224"); -MODULE_ALIAS("sha256"); +MODULE_ALIAS_CRYPTO("sha224"); +MODULE_ALIAS_CRYPTO("sha256"); #include "crop_devid.c" diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c index f04d199..9fff885 100644 --- a/arch/sparc/crypto/sha512_glue.c +++ b/arch/sparc/crypto/sha512_glue.c @@ -222,7 +222,7 @@ module_exit(sha512_sparc64_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA-384 and SHA-512 Secure Hash Algorithm, sparc64 sha512 opcode accelerated"); -MODULE_ALIAS("sha384"); -MODULE_ALIAS("sha512"); +MODULE_ALIAS_CRYPTO("sha384"); +MODULE_ALIAS_CRYPTO("sha512"); #include "crop_devid.c" diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index 905832a..a0ed182 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -21,7 +21,7 @@ extern int __atomic_add_return(int, atomic_t *); extern int atomic_cmpxchg(atomic_t *, int, int); -#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +extern int atomic_xchg(atomic_t *, int); extern int __atomic_add_unless(atomic_t *, int, int); extern void atomic_set(atomic_t *, int); diff --git a/arch/sparc/include/asm/cmpxchg_32.h b/arch/sparc/include/asm/cmpxchg_32.h index 1fae1a0..ae0f9a7 100644 --- a/arch/sparc/include/asm/cmpxchg_32.h +++ b/arch/sparc/include/asm/cmpxchg_32.h @@ -11,22 +11,14 @@ #ifndef __ARCH_SPARC_CMPXCHG__ #define __ARCH_SPARC_CMPXCHG__ -static inline unsigned long xchg_u32(__volatile__ unsigned long *m, unsigned long val) -{ - __asm__ __volatile__("swap [%2], %0" - : "=&r" (val) - : "0" (val), "r" (m) - : "memory"); - return val; -} - +extern unsigned long __xchg_u32(volatile u32 *m, u32 new); extern void __xchg_called_with_bad_pointer(void); static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int size) { switch (size) { case 4: - return xchg_u32(ptr, x); + return __xchg_u32(ptr, x); } __xchg_called_with_bad_pointer(); return x; diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index ca121f0..17be9d6 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -2944,6 +2944,16 @@ extern unsigned long sun4v_vt_set_perfreg(unsigned long reg_num, unsigned long reg_val); #endif +#define HV_FAST_T5_GET_PERFREG 0x1a8 +#define HV_FAST_T5_SET_PERFREG 0x1a9 + +#ifndef __ASSEMBLY__ +unsigned long sun4v_t5_get_perfreg(unsigned long reg_num, + unsigned long *reg_val); +unsigned long sun4v_t5_set_perfreg(unsigned long reg_num, + unsigned long reg_val); +#endif + /* Function numbers for HV_CORE_TRAP. */ #define HV_CORE_SET_VER 0x00 #define HV_CORE_PUTCHAR 0x01 @@ -2975,6 +2985,7 @@ extern unsigned long sun4v_vt_set_perfreg(unsigned long reg_num, #define HV_GRP_VF_CPU 0x0205 #define HV_GRP_KT_CPU 0x0209 #define HV_GRP_VT_CPU 0x020c +#define HV_GRP_T5_CPU 0x0211 #define HV_GRP_DIAG 0x0300 #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h index abf6afe..78c9f2d 100644 --- a/arch/sparc/include/asm/irq_64.h +++ b/arch/sparc/include/asm/irq_64.h @@ -37,7 +37,7 @@ * * ino_bucket->irq allocation is made during {sun4v_,}build_irq(). */ -#define NR_IRQS 255 +#define NR_IRQS (2048) extern void irq_install_pre_handler(int irq, void (*func)(unsigned int, void *, void *), @@ -57,11 +57,8 @@ extern unsigned int sun4u_build_msi(u32 portid, unsigned int *irq_p, unsigned long iclr_base); extern void sun4u_destroy_msi(unsigned int irq); -extern unsigned char irq_alloc(unsigned int dev_handle, - unsigned int dev_ino); -#ifdef CONFIG_PCI_MSI +extern unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino); extern void irq_free(unsigned int irq); -#endif extern void __init init_IRQ(void); extern void fixup_irqs(void); diff --git a/arch/sparc/include/asm/ldc.h b/arch/sparc/include/asm/ldc.h index bdb524a..8732ed3 100644 --- a/arch/sparc/include/asm/ldc.h +++ b/arch/sparc/include/asm/ldc.h @@ -53,13 +53,14 @@ struct ldc_channel; /* Allocate state for a channel. */ extern struct ldc_channel *ldc_alloc(unsigned long id, const struct ldc_channel_config *cfgp, - void *event_arg); + void *event_arg, + const char *name); /* Shut down and free state for a channel. */ extern void ldc_free(struct ldc_channel *lp); /* Register TX and RX queues of the link with the hypervisor. */ -extern int ldc_bind(struct ldc_channel *lp, const char *name); +extern int ldc_bind(struct ldc_channel *lp); /* For non-RAW protocols we need to complete a handshake before * communication can proceed. ldc_connect() does that, if the diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h index e945ddb..f668797 100644 --- a/arch/sparc/include/asm/mmu_64.h +++ b/arch/sparc/include/asm/mmu_64.h @@ -90,10 +90,9 @@ struct tsb_config { #endif typedef struct { - raw_spinlock_t lock; + spinlock_t lock; unsigned long sparc64_ctx_val; unsigned long huge_pte_count; - struct page *pgtable_page; struct tsb_config tsb_block[MM_NUM_TSBS]; struct hv_tsb_descr tsb_descr[MM_NUM_TSBS]; } mm_context_t; diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index 44e393b..3d528f0 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -13,7 +13,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { } -extern raw_spinlock_t ctx_alloc_lock; +extern spinlock_t ctx_alloc_lock; extern unsigned long tlb_context_cache; extern unsigned long mmu_context_bmap[]; @@ -77,7 +77,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str if (unlikely(mm == &init_mm)) return; - raw_spin_lock_irqsave(&mm->context.lock, flags); + spin_lock_irqsave(&mm->context.lock, flags); ctx_valid = CTX_VALID(mm->context); if (!ctx_valid) get_new_mmu_context(mm); @@ -125,7 +125,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); } - raw_spin_unlock_irqrestore(&mm->context.lock, flags); + spin_unlock_irqrestore(&mm->context.lock, flags); } #define deactivate_mm(tsk,mm) do { } while (0) @@ -136,7 +136,7 @@ static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm unsigned long flags; int cpu; - raw_spin_lock_irqsave(&mm->context.lock, flags); + spin_lock_irqsave(&mm->context.lock, flags); if (!CTX_VALID(mm->context)) get_new_mmu_context(mm); cpu = smp_processor_id(); @@ -146,7 +146,7 @@ static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm load_secondary_context(mm); __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); tsb_context_switch(mm); - raw_spin_unlock_irqrestore(&mm->context.lock, flags); + spin_unlock_irqrestore(&mm->context.lock, flags); } #endif /* !(__ASSEMBLY__) */ diff --git a/arch/sparc/include/asm/oplib_64.h b/arch/sparc/include/asm/oplib_64.h index a12dbe3..e48fdf4 100644 --- a/arch/sparc/include/asm/oplib_64.h +++ b/arch/sparc/include/asm/oplib_64.h @@ -62,7 +62,8 @@ struct linux_mem_p1275 { /* You must call prom_init() before using any of the library services, * preferably as early as possible. Pass it the romvec pointer. */ -extern void prom_init(void *cif_handler, void *cif_stack); +extern void prom_init(void *cif_handler); +extern void prom_init_report(void); /* Boot argument acquisition, returns the boot command line string. */ extern char *prom_getbootargs(void); diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index e155388..b18e602 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -15,7 +15,10 @@ #define DCACHE_ALIASING_POSSIBLE #endif -#define HPAGE_SHIFT 22 +#define HPAGE_SHIFT 23 +#define REAL_HPAGE_SHIFT 22 + +#define REAL_HPAGE_SIZE (_AC(1,UL) << REAL_HPAGE_SHIFT) #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) #define HPAGE_SIZE (_AC(1,UL) << HPAGE_SHIFT) @@ -53,19 +56,22 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct pag /* These are used to make use of C type-checking.. */ typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long iopte; } iopte_t; -typedef struct { unsigned int pmd; } pmd_t; -typedef struct { unsigned int pgd; } pgd_t; +typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pud; } pud_t; +typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; #define pte_val(x) ((x).pte) #define iopte_val(x) ((x).iopte) #define pmd_val(x) ((x).pmd) +#define pud_val(x) ((x).pud) #define pgd_val(x) ((x).pgd) #define pgprot_val(x) ((x).pgprot) #define __pte(x) ((pte_t) { (x) } ) #define __iopte(x) ((iopte_t) { (x) } ) #define __pmd(x) ((pmd_t) { (x) } ) +#define __pud(x) ((pud_t) { (x) } ) #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) @@ -73,19 +79,22 @@ typedef struct { unsigned long pgprot; } pgprot_t; /* .. while these make it easier on the compiler */ typedef unsigned long pte_t; typedef unsigned long iopte_t; -typedef unsigned int pmd_t; -typedef unsigned int pgd_t; +typedef unsigned long pmd_t; +typedef unsigned long pud_t; +typedef unsigned long pgd_t; typedef unsigned long pgprot_t; #define pte_val(x) (x) #define iopte_val(x) (x) #define pmd_val(x) (x) +#define pud_val(x) (x) #define pgd_val(x) (x) #define pgprot_val(x) (x) #define __pte(x) (x) #define __iopte(x) (x) #define __pmd(x) (x) +#define __pud(x) (x) #define __pgd(x) (x) #define __pgprot(x) (x) @@ -93,18 +102,33 @@ typedef unsigned long pgprot_t; typedef pte_t *pgtable_t; +extern unsigned long sparc64_va_hole_top; +extern unsigned long sparc64_va_hole_bottom; + +/* The next two defines specify the actual exclusion region we + * enforce, wherein we use a 4GB red zone on each side of the VA hole. + */ +#define VA_EXCLUDE_START (sparc64_va_hole_bottom - (1UL << 32UL)) +#define VA_EXCLUDE_END (sparc64_va_hole_top + (1UL << 32UL)) + #define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \ - (_AC(0x0000000070000000,UL)) : \ - (_AC(0xfffff80000000000,UL) + (1UL << 32UL))) + _AC(0x0000000070000000,UL) : \ + VA_EXCLUDE_END) #include <asm-generic/memory_model.h> +extern unsigned long PAGE_OFFSET; + #endif /* !(__ASSEMBLY__) */ -/* We used to stick this into a hard-coded global register (%g4) - * but that does not make sense anymore. +/* The maximum number of physical memory address bits we support. The + * largest value we can support is whatever "KPGD_SHIFT + KPTE_BITS" + * evaluates to. */ -#define PAGE_OFFSET _AC(0xFFFFF80000000000,UL) +#define MAX_PHYS_ADDRESS_BITS 53 + +#define ILOG2_4MB 22 +#define ILOG2_256MB 28 #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h index bcfe063..2c8d41f 100644 --- a/arch/sparc/include/asm/pgalloc_64.h +++ b/arch/sparc/include/asm/pgalloc_64.h @@ -15,6 +15,13 @@ extern struct kmem_cache *pgtable_cache; +static inline void __pgd_populate(pgd_t *pgd, pud_t *pud) +{ + pgd_set(pgd, pud); +} + +#define pgd_populate(MM, PGD, PUD) __pgd_populate(PGD, PUD) + static inline pgd_t *pgd_alloc(struct mm_struct *mm) { return kmem_cache_alloc(pgtable_cache, GFP_KERNEL); @@ -25,7 +32,23 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) kmem_cache_free(pgtable_cache, pgd); } -#define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) +static inline void __pud_populate(pud_t *pud, pmd_t *pmd) +{ + pud_set(pud, pmd); +} + +#define pud_populate(MM, PUD, PMD) __pud_populate(PUD, PMD) + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return kmem_cache_alloc(pgtable_cache, + GFP_KERNEL|__GFP_REPEAT); +} + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + kmem_cache_free(pgtable_cache, pud); +} static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -91,4 +114,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pte_t *pte, #define __pmd_free_tlb(tlb, pmd, addr) \ pgtable_free_tlb(tlb, pmd, false) +#define __pud_free_tlb(tlb, pud, addr) \ + pgtable_free_tlb(tlb, pud, false) + #endif /* _SPARC64_PGALLOC_H */ diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 90f289f..e8dfabf 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -20,11 +20,10 @@ #include <asm/page.h> #include <asm/processor.h> -#include <asm-generic/pgtable-nopud.h> - /* The kernel image occupies 0x4000000 to 0x6000000 (4MB --> 96MB). * The page copy blockops can use 0x6000000 to 0x8000000. - * The TSB is mapped in the 0x8000000 to 0xa000000 range. + * The 8K TSB is mapped in the 0x8000000 to 0x8400000 range. + * The 4M TSB is mapped in the 0x8400000 to 0x8800000 range. * The PROM resides in an area spanning 0xf0000000 to 0x100000000. * The vmalloc area spans 0x100000000 to 0x200000000. * Since modules need to be in the lowest 32-bits of the address space, @@ -33,33 +32,43 @@ * 0x400000000. */ #define TLBTEMP_BASE _AC(0x0000000006000000,UL) -#define TSBMAP_BASE _AC(0x0000000008000000,UL) +#define TSBMAP_8K_BASE _AC(0x0000000008000000,UL) +#define TSBMAP_4M_BASE _AC(0x0000000008400000,UL) #define MODULES_VADDR _AC(0x0000000010000000,UL) #define MODULES_LEN _AC(0x00000000e0000000,UL) #define MODULES_END _AC(0x00000000f0000000,UL) #define LOW_OBP_ADDRESS _AC(0x00000000f0000000,UL) #define HI_OBP_ADDRESS _AC(0x0000000100000000,UL) #define VMALLOC_START _AC(0x0000000100000000,UL) -#define VMALLOC_END _AC(0x0000010000000000,UL) -#define VMEMMAP_BASE _AC(0x0000010000000000,UL) - -#define vmemmap ((struct page *)VMEMMAP_BASE) +#define VMEMMAP_BASE VMALLOC_END /* PMD_SHIFT determines the size of the area a second-level page * table can map */ -#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-4)) +#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3)) #define PMD_SIZE (_AC(1,UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -#define PMD_BITS (PAGE_SHIFT - 2) +#define PMD_BITS (PAGE_SHIFT - 3) -/* PGDIR_SHIFT determines what a third-level page table entry can map */ -#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-4) + PMD_BITS) +/* PUD_SHIFT determines the size of the area a third-level page + * table can map + */ +#define PUD_SHIFT (PMD_SHIFT + PMD_BITS) +#define PUD_SIZE (_AC(1,UL) << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) +#define PUD_BITS (PAGE_SHIFT - 3) + +/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ +#define PGDIR_SHIFT (PUD_SHIFT + PUD_BITS) #define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define PGDIR_BITS (PAGE_SHIFT - 2) +#define PGDIR_BITS (PAGE_SHIFT - 3) -#if (PGDIR_SHIFT + PGDIR_BITS) != 44 +#if (MAX_PHYS_ADDRESS_BITS > PGDIR_SHIFT + PGDIR_BITS) +#error MAX_PHYS_ADDRESS_BITS exceeds what kernel page tables can support +#endif + +#if (PGDIR_SHIFT + PGDIR_BITS) != 53 #error Page table parameters do not cover virtual address space properly. #endif @@ -67,44 +76,34 @@ #error PMD_SHIFT must equal HPAGE_SHIFT for transparent huge pages. #endif -/* PMDs point to PTE tables which are 4K aligned. */ -#define PMD_PADDR _AC(0xfffffffe,UL) -#define PMD_PADDR_SHIFT _AC(11,UL) - -#define PMD_ISHUGE _AC(0x00000001,UL) +#ifndef __ASSEMBLY__ -/* This is the PMD layout when PMD_ISHUGE is set. With 4MB huge - * pages, this frees up a bunch of bits in the layout that we can - * use for the protection settings and software metadata. - */ -#define PMD_HUGE_PADDR _AC(0xfffff800,UL) -#define PMD_HUGE_PROTBITS _AC(0x000007ff,UL) -#define PMD_HUGE_PRESENT _AC(0x00000400,UL) -#define PMD_HUGE_WRITE _AC(0x00000200,UL) -#define PMD_HUGE_DIRTY _AC(0x00000100,UL) -#define PMD_HUGE_ACCESSED _AC(0x00000080,UL) -#define PMD_HUGE_EXEC _AC(0x00000040,UL) -#define PMD_HUGE_SPLITTING _AC(0x00000020,UL) - -/* PGDs point to PMD tables which are 8K aligned. */ -#define PGD_PADDR _AC(0xfffffffc,UL) -#define PGD_PADDR_SHIFT _AC(11,UL) +extern unsigned long VMALLOC_END; -#ifndef __ASSEMBLY__ +#define vmemmap ((struct page *)VMEMMAP_BASE) #include <linux/sched.h> +bool kern_addr_valid(unsigned long addr); + /* Entries per page directory level. */ -#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-4)) +#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3)) #define PTRS_PER_PMD (1UL << PMD_BITS) +#define PTRS_PER_PUD (1UL << PUD_BITS) #define PTRS_PER_PGD (1UL << PGDIR_BITS) /* Kernel has a separate 44bit address space. */ #define FIRST_USER_ADDRESS 0 -#define pte_ERROR(e) __builtin_trap() -#define pmd_ERROR(e) __builtin_trap() -#define pgd_ERROR(e) __builtin_trap() +#define pmd_ERROR(e) \ + pr_err("%s:%d: bad pmd %p(%016lx) seen at (%pS)\n", \ + __FILE__, __LINE__, &(e), pmd_val(e), __builtin_return_address(0)) +#define pud_ERROR(e) \ + pr_err("%s:%d: bad pud %p(%016lx) seen at (%pS)\n", \ + __FILE__, __LINE__, &(e), pud_val(e), __builtin_return_address(0)) +#define pgd_ERROR(e) \ + pr_err("%s:%d: bad pgd %p(%016lx) seen at (%pS)\n", \ + __FILE__, __LINE__, &(e), pgd_val(e), __builtin_return_address(0)) #endif /* !(__ASSEMBLY__) */ @@ -112,6 +111,8 @@ #define _PAGE_VALID _AC(0x8000000000000000,UL) /* Valid TTE */ #define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/ #define _PAGE_SPECIAL _AC(0x0200000000000000,UL) /* Special page */ +#define _PAGE_PMD_HUGE _AC(0x0100000000000000,UL) /* Huge page */ +#define _PAGE_PUD_HUGE _PAGE_PMD_HUGE /* Advertise support for _PAGE_SPECIAL */ #define __HAVE_ARCH_PTE_SPECIAL @@ -125,6 +126,7 @@ #define _PAGE_IE_4U _AC(0x0800000000000000,UL) /* Invert Endianness */ #define _PAGE_SOFT2_4U _AC(0x07FC000000000000,UL) /* Software bits, set 2 */ #define _PAGE_SPECIAL_4U _AC(0x0200000000000000,UL) /* Special page */ +#define _PAGE_PMD_HUGE_4U _AC(0x0100000000000000,UL) /* Huge page */ #define _PAGE_RES1_4U _AC(0x0002000000000000,UL) /* Reserved */ #define _PAGE_SZ32MB_4U _AC(0x0001000000000000,UL) /* (Panther) 32MB page */ #define _PAGE_SZ256MB_4U _AC(0x2001000000000000,UL) /* (Panther) 256MB page */ @@ -155,6 +157,7 @@ #define _PAGE_READ_4V _AC(0x0800000000000000,UL) /* Readable SW Bit */ #define _PAGE_WRITE_4V _AC(0x0400000000000000,UL) /* Writable SW Bit */ #define _PAGE_SPECIAL_4V _AC(0x0200000000000000,UL) /* Special page */ +#define _PAGE_PMD_HUGE_4V _AC(0x0100000000000000,UL) /* Huge page */ #define _PAGE_PADDR_4V _AC(0x00FFFFFFFFFFE000,UL) /* paddr[55:13] */ #define _PAGE_IE_4V _AC(0x0000000000001000,UL) /* Invert Endianness */ #define _PAGE_E_4V _AC(0x0000000000000800,UL) /* side-Effect */ @@ -180,6 +183,10 @@ #define _PAGE_SZBITS_4U _PAGE_SZ8K_4U #define _PAGE_SZBITS_4V _PAGE_SZ8K_4V +#if REAL_HPAGE_SHIFT != 22 +#error REAL_HPAGE_SHIFT and _PAGE_SZHUGE_foo must match up +#endif + #define _PAGE_SZHUGE_4U _PAGE_SZ4MB_4U #define _PAGE_SZHUGE_4V _PAGE_SZ4MB_4V @@ -239,16 +246,13 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE -extern pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot); -#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) - -extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot); - -static inline pmd_t pmd_mkhuge(pmd_t pmd) +static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - /* Do nothing, mk_pmd() does this part. */ - return pmd; + pte_t pte = pfn_pte(page_nr, pgprot); + + return __pmd(pte_val(pte)); } +#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) #endif /* This one can be done with two shifts. */ @@ -277,8 +281,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot) { unsigned long mask, tmp; - /* SUN4U: 0x600307ffffffecb8 (negated == 0x9ffcf80000001347) - * SUN4V: 0x30ffffffffffee17 (negated == 0xcf000000000011e8) + /* SUN4U: 0x630107ffffffec38 (negated == 0x9cfef800000013c7) + * SUN4V: 0x33ffffffffffee07 (negated == 0xcc000000000011f8) * * Even if we use negation tricks the result is still a 6 * instruction sequence, so don't try to play fancy and just @@ -308,15 +312,26 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t prot) " .previous\n" : "=r" (mask), "=r" (tmp) : "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U | - _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | _PAGE_PRESENT_4U | - _PAGE_SPECIAL), + _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U | + _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4U), "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V | - _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | _PAGE_PRESENT_4V | - _PAGE_SPECIAL)); + _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V | + _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V)); return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask)); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_modify(pte, newprot); + + return __pmd(pte_val(pte)); +} +#endif + static inline pte_t pgoff_to_pte(unsigned long off) { off <<= PAGE_SHIFT; @@ -357,7 +372,7 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot) */ #define pgprot_noncached pgprot_noncached -#ifdef CONFIG_HUGETLB_PAGE +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) static inline pte_t pte_mkhuge(pte_t pte) { unsigned long mask; @@ -375,6 +390,17 @@ static inline pte_t pte_mkhuge(pte_t pte) return __pte(pte_val(pte) | mask); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_mkhuge(pte); + pte_val(pte) |= _PAGE_PMD_HUGE; + + return __pmd(pte_val(pte)); +} +#endif #endif static inline pte_t pte_mkdirty(pte_t pte) @@ -626,95 +652,136 @@ static inline unsigned long pte_special(pte_t pte) return pte_val(pte) & _PAGE_SPECIAL; } -static inline int pmd_large(pmd_t pmd) +static inline unsigned long pmd_large(pmd_t pmd) { - return (pmd_val(pmd) & (PMD_ISHUGE | PMD_HUGE_PRESENT)) == - (PMD_ISHUGE | PMD_HUGE_PRESENT); + pte_t pte = __pte(pmd_val(pmd)); + + return pte_val(pte) & _PAGE_PMD_HUGE; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline int pmd_young(pmd_t pmd) +static inline unsigned long pmd_pfn(pmd_t pmd) { - return pmd_val(pmd) & PMD_HUGE_ACCESSED; + pte_t pte = __pte(pmd_val(pmd)); + + return pte_pfn(pte); } -static inline int pmd_write(pmd_t pmd) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline unsigned long pmd_young(pmd_t pmd) { - return pmd_val(pmd) & PMD_HUGE_WRITE; + pte_t pte = __pte(pmd_val(pmd)); + + return pte_young(pte); } -static inline unsigned long pmd_pfn(pmd_t pmd) +static inline unsigned long pmd_write(pmd_t pmd) { - unsigned long val = pmd_val(pmd) & PMD_HUGE_PADDR; + pte_t pte = __pte(pmd_val(pmd)); - return val >> (PAGE_SHIFT - PMD_PADDR_SHIFT); + return pte_write(pte); } -static inline int pmd_trans_splitting(pmd_t pmd) +static inline unsigned long pmd_trans_huge(pmd_t pmd) { - return (pmd_val(pmd) & (PMD_ISHUGE|PMD_HUGE_SPLITTING)) == - (PMD_ISHUGE|PMD_HUGE_SPLITTING); + pte_t pte = __pte(pmd_val(pmd)); + + return pte_val(pte) & _PAGE_PMD_HUGE; } -static inline int pmd_trans_huge(pmd_t pmd) +static inline unsigned long pmd_trans_splitting(pmd_t pmd) { - return pmd_val(pmd) & PMD_ISHUGE; + pte_t pte = __pte(pmd_val(pmd)); + + return pmd_trans_huge(pmd) && pte_special(pte); } #define has_transparent_hugepage() 1 static inline pmd_t pmd_mkold(pmd_t pmd) { - pmd_val(pmd) &= ~PMD_HUGE_ACCESSED; - return pmd; + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_mkold(pte); + + return __pmd(pte_val(pte)); } static inline pmd_t pmd_wrprotect(pmd_t pmd) { - pmd_val(pmd) &= ~PMD_HUGE_WRITE; - return pmd; + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_wrprotect(pte); + + return __pmd(pte_val(pte)); } static inline pmd_t pmd_mkdirty(pmd_t pmd) { - pmd_val(pmd) |= PMD_HUGE_DIRTY; - return pmd; + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_mkdirty(pte); + + return __pmd(pte_val(pte)); } static inline pmd_t pmd_mkyoung(pmd_t pmd) { - pmd_val(pmd) |= PMD_HUGE_ACCESSED; - return pmd; + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_mkyoung(pte); + + return __pmd(pte_val(pte)); } static inline pmd_t pmd_mkwrite(pmd_t pmd) { - pmd_val(pmd) |= PMD_HUGE_WRITE; - return pmd; -} + pte_t pte = __pte(pmd_val(pmd)); -static inline pmd_t pmd_mknotpresent(pmd_t pmd) -{ - pmd_val(pmd) &= ~PMD_HUGE_PRESENT; - return pmd; + pte = pte_mkwrite(pte); + + return __pmd(pte_val(pte)); } static inline pmd_t pmd_mksplitting(pmd_t pmd) { - pmd_val(pmd) |= PMD_HUGE_SPLITTING; - return pmd; + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_mkspecial(pte); + + return __pmd(pte_val(pte)); } -extern pgprot_t pmd_pgprot(pmd_t entry); +static inline pgprot_t pmd_pgprot(pmd_t entry) +{ + unsigned long val = pmd_val(entry); + + return __pgprot(val); +} #endif static inline int pmd_present(pmd_t pmd) { - return pmd_val(pmd) != 0U; + return pmd_val(pmd) != 0UL; } #define pmd_none(pmd) (!pmd_val(pmd)) +/* pmd_bad() is only called on non-trans-huge PMDs. Our encoding is + * very simple, it's just the physical address. PTE tables are of + * size PAGE_SIZE so make sure the sub-PAGE_SIZE bits are clear and + * the top bits outside of the range of any physical address size we + * support are clear as well. We also validate the physical itself. + */ +#define pmd_bad(pmd) (pmd_val(pmd) & ~PAGE_MASK) + +#define pud_none(pud) (!pud_val(pud)) + +#define pud_bad(pud) (pud_val(pud) & ~PAGE_MASK) + +#define pgd_none(pgd) (!pgd_val(pgd)) + +#define pgd_bad(pgd) (pgd_val(pgd) & ~PAGE_MASK) + #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); @@ -728,37 +795,54 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, static inline void pmd_set(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) { - unsigned long val = __pa((unsigned long) (ptep)) >> PMD_PADDR_SHIFT; + unsigned long val = __pa((unsigned long) (ptep)); pmd_val(*pmdp) = val; } #define pud_set(pudp, pmdp) \ - (pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)) >> PGD_PADDR_SHIFT)) + (pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)))) static inline unsigned long __pmd_page(pmd_t pmd) { - unsigned long paddr = (unsigned long) pmd_val(pmd); -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (pmd_val(pmd) & PMD_ISHUGE) - paddr &= PMD_HUGE_PADDR; -#endif - paddr <<= PMD_PADDR_SHIFT; - return ((unsigned long) __va(paddr)); + pte_t pte = __pte(pmd_val(pmd)); + unsigned long pfn; + + pfn = pte_pfn(pte); + + return ((unsigned long) __va(pfn << PAGE_SHIFT)); } #define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd)) #define pud_page_vaddr(pud) \ - ((unsigned long) __va((((unsigned long)pud_val(pud))<<PGD_PADDR_SHIFT))) + ((unsigned long) __va(pud_val(pud))) #define pud_page(pud) virt_to_page((void *)pud_page_vaddr(pud)) -#define pmd_bad(pmd) (0) -#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0U) -#define pud_none(pud) (!pud_val(pud)) -#define pud_bad(pud) (0) +#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL) #define pud_present(pud) (pud_val(pud) != 0U) -#define pud_clear(pudp) (pud_val(*(pudp)) = 0U) +#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) +#define pgd_page_vaddr(pgd) \ + ((unsigned long) __va(pgd_val(pgd))) +#define pgd_present(pgd) (pgd_val(pgd) != 0U) +#define pgd_clear(pgdp) (pgd_val(*(pgd)) = 0UL) + +static inline unsigned long pud_large(pud_t pud) +{ + pte_t pte = __pte(pud_val(pud)); + + return pte_val(pte) & _PAGE_PMD_HUGE; +} + +static inline unsigned long pud_pfn(pud_t pud) +{ + pte_t pte = __pte(pud_val(pud)); + + return pte_pfn(pte); +} /* Same in both SUN4V and SUN4U. */ #define pte_none(pte) (!pte_val(pte)) +#define pgd_set(pgdp, pudp) \ + (pgd_val(*(pgdp)) = (__pa((unsigned long) (pudp)))) + /* to find an entry in a page-table-directory. */ #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) @@ -766,6 +850,11 @@ static inline unsigned long __pmd_page(pmd_t pmd) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(address) pgd_offset(&init_mm, address) +/* Find an entry in the third-level page table.. */ +#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +#define pud_offset(pgdp, address) \ + ((pud_t *) pgd_page_vaddr(*(pgdp)) + pud_index(address)) + /* Find an entry in the second-level page table.. */ #define pmd_offset(pudp, address) \ ((pmd_t *) pud_page_vaddr(*(pudp)) + \ @@ -789,7 +878,7 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, pmd_t *pmdp) { pmd_t pmd = *pmdp; - set_pmd_at(mm, addr, pmdp, __pmd(0U)); + set_pmd_at(mm, addr, pmdp, __pmd(0UL)); return pmd; } @@ -837,8 +926,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, }) #endif -extern pgd_t swapper_pg_dir[2048]; -extern pmd_t swapper_low_pmd_dir[2048]; +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern void paging_init(void); extern unsigned long find_ecache_flush_span(unsigned long size); @@ -852,6 +940,10 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd); +#define __HAVE_ARCH_PMDP_INVALIDATE +extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp); + #define __HAVE_ARCH_PGTABLE_DEPOSIT extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable); @@ -878,18 +970,6 @@ extern unsigned long pte_file(pte_t); extern pte_t pgoff_to_pte(unsigned long); #define PTE_FILE_MAX_BITS (64UL - PAGE_SHIFT - 1UL) -extern unsigned long sparc64_valid_addr_bitmap[]; - -/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ -static inline bool kern_addr_valid(unsigned long addr) -{ - unsigned long paddr = __pa(addr); - - if ((paddr >> 41UL) != 0UL) - return false; - return test_bit(paddr >> 22, sparc64_valid_addr_bitmap); -} - extern int page_in_phys_avail(unsigned long paddr); /* diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h index 5e35e05..acd6146 100644 --- a/arch/sparc/include/asm/setup.h +++ b/arch/sparc/include/asm/setup.h @@ -24,6 +24,10 @@ static inline int con_is_present(void) } #endif +#ifdef CONFIG_SPARC64 +extern void __init start_early_boot(void); +#endif + extern void sun_do_break(void); extern int stop_a_enabled; extern int scons_pwroff; diff --git a/arch/sparc/include/asm/sparsemem.h b/arch/sparc/include/asm/sparsemem.h index b99d4e4..e5e1752 100644 --- a/arch/sparc/include/asm/sparsemem.h +++ b/arch/sparc/include/asm/sparsemem.h @@ -3,9 +3,11 @@ #ifdef __KERNEL__ +#include <asm/page.h> + #define SECTION_SIZE_BITS 30 -#define MAX_PHYSADDR_BITS 42 -#define MAX_PHYSMEM_BITS 42 +#define MAX_PHYSADDR_BITS MAX_PHYS_ADDRESS_BITS +#define MAX_PHYSMEM_BITS MAX_PHYS_ADDRESS_BITS #endif /* !(__KERNEL__) */ diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h index 6b67e50..69424d4 100644 --- a/arch/sparc/include/asm/spitfire.h +++ b/arch/sparc/include/asm/spitfire.h @@ -45,6 +45,8 @@ #define SUN4V_CHIP_NIAGARA3 0x03 #define SUN4V_CHIP_NIAGARA4 0x04 #define SUN4V_CHIP_NIAGARA5 0x05 +#define SUN4V_CHIP_SPARC_M6 0x06 +#define SUN4V_CHIP_SPARC_M7 0x07 #define SUN4V_CHIP_SPARC64X 0x8a #define SUN4V_CHIP_UNKNOWN 0xff diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index d5e5042..6cda09d 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -63,7 +63,8 @@ struct thread_info { struct pt_regs *kern_una_regs; unsigned int kern_una_insn; - unsigned long fpregs[0] __attribute__ ((aligned(64))); + unsigned long fpregs[(7 * 256) / sizeof(unsigned long)] + __attribute__ ((aligned(64))); }; #endif /* !(__ASSEMBLY__) */ @@ -102,6 +103,7 @@ struct thread_info { #define FAULT_CODE_ITLB 0x04 /* Miss happened in I-TLB */ #define FAULT_CODE_WINFIXUP 0x08 /* Miss happened during spill/fill */ #define FAULT_CODE_BLKCOMMIT 0x10 /* Use blk-commit ASI in copy_page */ +#define FAULT_CODE_BAD_RA 0x20 /* Bad RA for sun4v */ #if PAGE_SHIFT == 13 #define THREAD_SIZE (2*PAGE_SIZE) diff --git a/arch/sparc/include/asm/tlbflush_64.h b/arch/sparc/include/asm/tlbflush_64.h index f0d6a97..1a4bb97 100644 --- a/arch/sparc/include/asm/tlbflush_64.h +++ b/arch/sparc/include/asm/tlbflush_64.h @@ -35,6 +35,8 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, { } +void flush_tlb_kernel_range(unsigned long start, unsigned long end); + #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE extern void flush_tlb_pending(void); @@ -49,11 +51,6 @@ extern void __flush_tlb_kernel_range(unsigned long start, unsigned long end); #ifndef CONFIG_SMP -#define flush_tlb_kernel_range(start,end) \ -do { flush_tsb_kernel_range(start,end); \ - __flush_tlb_kernel_range(start,end); \ -} while (0) - static inline void global_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr) { __flush_tlb_page(CTX_HWBITS(mm->context), vaddr); @@ -64,11 +61,6 @@ static inline void global_flush_tlb_page(struct mm_struct *mm, unsigned long vad extern void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end); extern void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr); -#define flush_tlb_kernel_range(start, end) \ -do { flush_tsb_kernel_range(start,end); \ - smp_flush_tlb_kernel_range(start, end); \ -} while (0) - #define global_flush_tlb_page(mm, vaddr) \ smp_flush_tlb_page(mm, vaddr) diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h index e696432..ecb49cf 100644 --- a/arch/sparc/include/asm/tsb.h +++ b/arch/sparc/include/asm/tsb.h @@ -133,107 +133,89 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; sub TSB, 0x8, TSB; \ TSB_STORE(TSB, TAG); - /* Do a kernel page table walk. Leaves physical PTE pointer in - * REG1. Jumps to FAIL_LABEL on early page table walk termination. - * VADDR will not be clobbered, but REG2 will. + /* Do a kernel page table walk. Leaves valid PTE value in + * REG1. Jumps to FAIL_LABEL on early page table walk + * termination. VADDR will not be clobbered, but REG2 will. + * + * There are two masks we must apply to propagate bits from + * the virtual address into the PTE physical address field + * when dealing with huge pages. This is because the page + * table boundaries do not match the huge page size(s) the + * hardware supports. + * + * In these cases we propagate the bits that are below the + * page table level where we saw the huge page mapping, but + * are still within the relevant physical bits for the huge + * page size in question. So for PMD mappings (which fall on + * bit 23, for 8MB per PMD) we must propagate bit 22 for a + * 4MB huge page. For huge PUDs (which fall on bit 33, for + * 8GB per PUD), we have to accomodate 256MB and 2GB huge + * pages. So for those we propagate bits 32 to 28. */ #define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL) \ sethi %hi(swapper_pg_dir), REG1; \ or REG1, %lo(swapper_pg_dir), REG1; \ sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \ - andn REG2, 0x3, REG2; \ - lduw [REG1 + REG2], REG1; \ + andn REG2, 0x7, REG2; \ + ldx [REG1 + REG2], REG1; \ brz,pn REG1, FAIL_LABEL; \ - sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ + sllx VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \ - sllx REG1, PGD_PADDR_SHIFT, REG1; \ - andn REG2, 0x3, REG2; \ - lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + andn REG2, 0x7, REG2; \ + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ brz,pn REG1, FAIL_LABEL; \ - sllx VADDR, 64 - PMD_SHIFT, REG2; \ - srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \ - sllx REG1, PMD_PADDR_SHIFT, REG1; \ + sethi %uhi(_PAGE_PUD_HUGE), REG2; \ + brz,pn REG1, FAIL_LABEL; \ + sllx REG2, 32, REG2; \ + andcc REG1, REG2, %g0; \ + sethi %hi(0xf8000000), REG2; \ + bne,pt %xcc, 697f; \ + sllx REG2, 1, REG2; \ + sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ andn REG2, 0x7, REG2; \ - add REG1, REG2, REG1; - - /* These macros exists only to make the PMD translator below - * easier to read. It hides the ELF section switch for the - * sun4v code patching. - */ -#define OR_PTE_BIT_1INSN(REG, NAME) \ -661: or REG, _PAGE_##NAME##_4U, REG; \ - .section .sun4v_1insn_patch, "ax"; \ - .word 661b; \ - or REG, _PAGE_##NAME##_4V, REG; \ - .previous; - -#define OR_PTE_BIT_2INSN(REG, TMP, NAME) \ -661: sethi %hi(_PAGE_##NAME##_4U), TMP; \ - or REG, TMP, REG; \ - .section .sun4v_2insn_patch, "ax"; \ - .word 661b; \ - mov -1, TMP; \ - or REG, _PAGE_##NAME##_4V, REG; \ - .previous; - - /* Load into REG the PTE value for VALID, CACHE, and SZHUGE. */ -#define BUILD_PTE_VALID_SZHUGE_CACHE(REG) \ -661: sethi %uhi(_PAGE_VALID|_PAGE_SZHUGE_4U), REG; \ - .section .sun4v_1insn_patch, "ax"; \ - .word 661b; \ - sethi %uhi(_PAGE_VALID), REG; \ - .previous; \ - sllx REG, 32, REG; \ -661: or REG, _PAGE_CP_4U|_PAGE_CV_4U, REG; \ - .section .sun4v_1insn_patch, "ax"; \ - .word 661b; \ - or REG, _PAGE_CP_4V|_PAGE_CV_4V|_PAGE_SZHUGE_4V, REG; \ - .previous; + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + sethi %uhi(_PAGE_PMD_HUGE), REG2; \ + brz,pn REG1, FAIL_LABEL; \ + sllx REG2, 32, REG2; \ + andcc REG1, REG2, %g0; \ + be,pn %xcc, 698f; \ + sethi %hi(0x400000), REG2; \ +697: brgez,pn REG1, FAIL_LABEL; \ + andn REG1, REG2, REG1; \ + and VADDR, REG2, REG2; \ + ba,pt %xcc, 699f; \ + or REG1, REG2, REG1; \ +698: sllx VADDR, 64 - PMD_SHIFT, REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + andn REG2, 0x7, REG2; \ + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + brgez,pn REG1, FAIL_LABEL; \ + nop; \ +699: /* PMD has been loaded into REG1, interpret the value, seeing * if it is a HUGE PMD or a normal one. If it is not valid * then jump to FAIL_LABEL. If it is a HUGE PMD, and it * translates to a valid PTE, branch to PTE_LABEL. * - * We translate the PMD by hand, one bit at a time, - * constructing the huge PTE. - * - * So we construct the PTE in REG2 as follows: - * - * 1) Extract the PMD PFN from REG1 and place it into REG2. - * - * 2) Translate PMD protection bits in REG1 into REG2, one bit - * at a time using andcc tests on REG1 and OR's into REG2. - * - * Only two bits to be concerned with here, EXEC and WRITE. - * Now REG1 is freed up and we can use it as a temporary. - * - * 3) Construct the VALID, CACHE, and page size PTE bits in - * REG1, OR with REG2 to form final PTE. + * We have to propagate the 4MB bit of the virtual address + * because we are fabricating 8MB pages using 4MB hw pages. */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \ - brz,pn REG1, FAIL_LABEL; \ - andcc REG1, PMD_ISHUGE, %g0; \ - be,pt %xcc, 700f; \ - and REG1, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED, REG2; \ - cmp REG2, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED; \ - bne,pn %xcc, FAIL_LABEL; \ - andn REG1, PMD_HUGE_PROTBITS, REG2; \ - sllx REG2, PMD_PADDR_SHIFT, REG2; \ - /* REG2 now holds PFN << PAGE_SHIFT */ \ - andcc REG1, PMD_HUGE_WRITE, %g0; \ - bne,a,pt %xcc, 1f; \ - OR_PTE_BIT_1INSN(REG2, W); \ -1: andcc REG1, PMD_HUGE_EXEC, %g0; \ - be,pt %xcc, 1f; \ - nop; \ - OR_PTE_BIT_2INSN(REG2, REG1, EXEC); \ - /* REG1 can now be clobbered, build final PTE */ \ -1: BUILD_PTE_VALID_SZHUGE_CACHE(REG1); \ - ba,pt %xcc, PTE_LABEL; \ - or REG1, REG2, REG1; \ + brz,pn REG1, FAIL_LABEL; \ + sethi %uhi(_PAGE_PMD_HUGE), REG2; \ + sllx REG2, 32, REG2; \ + andcc REG1, REG2, %g0; \ + be,pt %xcc, 700f; \ + sethi %hi(4 * 1024 * 1024), REG2; \ + brgez,pn REG1, FAIL_LABEL; \ + andn REG1, REG2, REG1; \ + and VADDR, REG2, REG2; \ + brlz,pt REG1, PTE_LABEL; \ + or REG1, REG2, REG1; \ 700: #else #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \ @@ -253,18 +235,21 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; #define USER_PGTABLE_WALK_TL1(VADDR, PHYS_PGD, REG1, REG2, FAIL_LABEL) \ sllx VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \ - andn REG2, 0x3, REG2; \ - lduwa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \ + andn REG2, 0x7, REG2; \ + ldxa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \ + brz,pn REG1, FAIL_LABEL; \ + sllx VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ + andn REG2, 0x7, REG2; \ + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ brz,pn REG1, FAIL_LABEL; \ sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \ srlx REG2, 64 - PAGE_SHIFT, REG2; \ - sllx REG1, PGD_PADDR_SHIFT, REG1; \ - andn REG2, 0x3, REG2; \ - lduwa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ + andn REG2, 0x7, REG2; \ + ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \ USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \ sllx VADDR, 64 - PMD_SHIFT, REG2; \ - srlx REG2, 64 - (PAGE_SHIFT - 1), REG2; \ - sllx REG1, PMD_PADDR_SHIFT, REG1; \ + srlx REG2, 64 - PAGE_SHIFT, REG2; \ andn REG2, 0x7, REG2; \ add REG1, REG2, REG1; \ ldxa [REG1] ASI_PHYS_USE_EC, REG1; \ @@ -306,8 +291,6 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; (KERNEL_TSB_SIZE_BYTES / 16) #define KERNEL_TSB4M_NENTRIES 4096 -#define KTSB_PHYS_SHIFT 15 - /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries * and the found TTE will be left in REG1. REG3 and REG4 must @@ -316,17 +299,15 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; * VADDR and TAG will be preserved and not clobbered by this macro. */ #define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ -661: sethi %hi(swapper_tsb), REG1; \ - or REG1, %lo(swapper_tsb), REG1; \ +661: sethi %uhi(swapper_tsb), REG1; \ + sethi %hi(swapper_tsb), REG2; \ + or REG1, %ulo(swapper_tsb), REG1; \ + or REG2, %lo(swapper_tsb), REG2; \ .section .swapper_tsb_phys_patch, "ax"; \ .word 661b; \ .previous; \ -661: nop; \ - .section .tsb_ldquad_phys_patch, "ax"; \ - .word 661b; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - .previous; \ + sllx REG1, 32, REG1; \ + or REG1, REG2, REG1; \ srlx VADDR, PAGE_SHIFT, REG2; \ and REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \ sllx REG2, 4, REG2; \ @@ -341,17 +322,15 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; * we can make use of that for the index computation. */ #define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ -661: sethi %hi(swapper_4m_tsb), REG1; \ - or REG1, %lo(swapper_4m_tsb), REG1; \ +661: sethi %uhi(swapper_4m_tsb), REG1; \ + sethi %hi(swapper_4m_tsb), REG2; \ + or REG1, %ulo(swapper_4m_tsb), REG1; \ + or REG2, %lo(swapper_4m_tsb), REG2; \ .section .swapper_4m_tsb_phys_patch, "ax"; \ .word 661b; \ .previous; \ -661: nop; \ - .section .tsb_ldquad_phys_patch, "ax"; \ - .word 661b; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - sllx REG1, KTSB_PHYS_SHIFT, REG1; \ - .previous; \ + sllx REG1, 32, REG1; \ + or REG1, REG2, REG1; \ and TAG, (KERNEL_TSB4M_NENTRIES - 1), REG2; \ sllx REG2, 4, REG2; \ add REG1, REG2, REG2; \ diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index e562d3c..ad7e178 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -262,8 +262,8 @@ extern unsigned long __must_check __clear_user(void __user *, unsigned long); extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); -#define __copy_to_user_inatomic ___copy_to_user -#define __copy_from_user_inatomic ___copy_from_user +#define __copy_to_user_inatomic __copy_to_user +#define __copy_from_user_inatomic __copy_from_user struct pt_regs; extern unsigned long compute_effective_address(struct pt_regs *, diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h index 432afa8..55841c1 100644 --- a/arch/sparc/include/asm/vio.h +++ b/arch/sparc/include/asm/vio.h @@ -118,12 +118,18 @@ struct vio_disk_attr_info { u8 vdisk_type; #define VD_DISK_TYPE_SLICE 0x01 /* Slice in block device */ #define VD_DISK_TYPE_DISK 0x02 /* Entire block device */ - u16 resv1; + u8 vdisk_mtype; /* v1.1 */ +#define VD_MEDIA_TYPE_FIXED 0x01 /* Fixed device */ +#define VD_MEDIA_TYPE_CD 0x02 /* CD Device */ +#define VD_MEDIA_TYPE_DVD 0x03 /* DVD Device */ + u8 resv1; u32 vdisk_block_size; u64 operations; - u64 vdisk_size; + u64 vdisk_size; /* v1.1 */ u64 max_xfer_size; - u64 resv2[2]; + u32 phys_block_size; /* v1.2 */ + u32 resv2; + u64 resv3[1]; }; struct vio_disk_desc { @@ -259,7 +265,7 @@ static inline u32 vio_dring_avail(struct vio_dring_state *dr, unsigned int ring_size) { return (dr->pending - - ((dr->prod - dr->cons) & (ring_size - 1))); + ((dr->prod - dr->cons) & (ring_size - 1)) - 1); } #define VIO_MAX_TYPE_LEN 32 diff --git a/arch/sparc/include/asm/visasm.h b/arch/sparc/include/asm/visasm.h index 39ca301..11fdf0e 100644 --- a/arch/sparc/include/asm/visasm.h +++ b/arch/sparc/include/asm/visasm.h @@ -39,6 +39,14 @@ 297: wr %o5, FPRS_FEF, %fprs; \ 298: +#define VISEntryHalfFast(fail_label) \ + rd %fprs, %o5; \ + andcc %o5, FPRS_FEF, %g0; \ + be,pt %icc, 297f; \ + nop; \ + ba,a,pt %xcc, fail_label; \ +297: wr %o5, FPRS_FEF, %fprs; + #define VISExitHalf \ wr %o5, 0, %fprs; diff --git a/arch/sparc/include/uapi/asm/swab.h b/arch/sparc/include/uapi/asm/swab.h index a34ad07..4c7c12d 100644 --- a/arch/sparc/include/uapi/asm/swab.h +++ b/arch/sparc/include/uapi/asm/swab.h @@ -9,9 +9,9 @@ static inline __u16 __arch_swab16p(const __u16 *addr) { __u16 ret; - __asm__ __volatile__ ("lduha [%1] %2, %0" + __asm__ __volatile__ ("lduha [%2] %3, %0" : "=r" (ret) - : "r" (addr), "i" (ASI_PL)); + : "m" (*addr), "r" (addr), "i" (ASI_PL)); return ret; } #define __arch_swab16p __arch_swab16p @@ -20,9 +20,9 @@ static inline __u32 __arch_swab32p(const __u32 *addr) { __u32 ret; - __asm__ __volatile__ ("lduwa [%1] %2, %0" + __asm__ __volatile__ ("lduwa [%2] %3, %0" : "=r" (ret) - : "r" (addr), "i" (ASI_PL)); + : "m" (*addr), "r" (addr), "i" (ASI_PL)); return ret; } #define __arch_swab32p __arch_swab32p @@ -31,9 +31,9 @@ static inline __u64 __arch_swab64p(const __u64 *addr) { __u64 ret; - __asm__ __volatile__ ("ldxa [%1] %2, %0" + __asm__ __volatile__ ("ldxa [%2] %3, %0" : "=r" (ret) - : "r" (addr), "i" (ASI_PL)); + : "m" (*addr), "r" (addr), "i" (ASI_PL)); return ret; } #define __arch_swab64p __arch_swab64p diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index 5c51258..52e10de 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -493,6 +493,18 @@ static void __init sun4v_cpu_probe(void) sparc_pmu_type = "niagara5"; break; + case SUN4V_CHIP_SPARC_M6: + sparc_cpu_type = "SPARC-M6"; + sparc_fpu_type = "SPARC-M6 integrated FPU"; + sparc_pmu_type = "sparc-m6"; + break; + + case SUN4V_CHIP_SPARC_M7: + sparc_cpu_type = "SPARC-M7"; + sparc_fpu_type = "SPARC-M7 integrated FPU"; + sparc_pmu_type = "sparc-m7"; + break; + case SUN4V_CHIP_SPARC64X: sparc_cpu_type = "SPARC64-X"; sparc_fpu_type = "SPARC64-X integrated FPU"; diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c index cb5d272..b031c9c 100644 --- a/arch/sparc/kernel/cpumap.c +++ b/arch/sparc/kernel/cpumap.c @@ -327,6 +327,8 @@ static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index) case SUN4V_CHIP_NIAGARA3: case SUN4V_CHIP_NIAGARA4: case SUN4V_CHIP_NIAGARA5: + case SUN4V_CHIP_SPARC_M6: + case SUN4V_CHIP_SPARC_M7: case SUN4V_CHIP_SPARC64X: rover_inc_table = niagara_iterate_method; break; diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index dff60ab..f87a55d 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -1200,14 +1200,14 @@ static int ds_probe(struct vio_dev *vdev, const struct vio_device_id *id) ds_cfg.tx_irq = vdev->tx_irq; ds_cfg.rx_irq = vdev->rx_irq; - lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp); + lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp, "DS"); if (IS_ERR(lp)) { err = PTR_ERR(lp); goto out_free_ds_states; } dp->lp = lp; - err = ldc_bind(lp, "DS"); + err = ldc_bind(lp); if (err) goto out_free_ldc; diff --git a/arch/sparc/kernel/dtlb_prot.S b/arch/sparc/kernel/dtlb_prot.S index b2c2c5b..d668ca1 100644 --- a/arch/sparc/kernel/dtlb_prot.S +++ b/arch/sparc/kernel/dtlb_prot.S @@ -24,11 +24,11 @@ mov TLB_TAG_ACCESS, %g4 ! For reload of vaddr /* PROT ** ICACHE line 2: More real fault processing */ + ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5 bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup - ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5 - ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4 - nop + ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault + nop nop nop nop diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h index 9c179fb..3ad726c 100644 --- a/arch/sparc/kernel/entry.h +++ b/arch/sparc/kernel/entry.h @@ -66,13 +66,10 @@ struct pause_patch_entry { extern struct pause_patch_entry __pause_3insn_patch, __pause_3insn_patch_end; -extern void __init per_cpu_patch(void); extern void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *, struct sun4v_1insn_patch_entry *); extern void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *, struct sun4v_2insn_patch_entry *); -extern void __init sun4v_patch(void); -extern void __init boot_cpu_id_too_large(int cpu); extern unsigned int dcache_parity_tl1_occurred; extern unsigned int icache_parity_tl1_occurred; diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 26b706a..3d61fca 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -282,8 +282,8 @@ sun4v_chip_type: stx %l2, [%l4 + 0x0] ldx [%sp + 2047 + 128 + 0x50], %l3 ! physaddr low /* 4MB align */ - srlx %l3, 22, %l3 - sllx %l3, 22, %l3 + srlx %l3, ILOG2_4MB, %l3 + sllx %l3, ILOG2_4MB, %l3 stx %l3, [%l4 + 0x8] /* Leave service as-is, "call-method" */ @@ -427,6 +427,12 @@ sun4v_chip_type: cmp %g2, '5' be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA5, %g4 + cmp %g2, '6' + be,pt %xcc, 5f + mov SUN4V_CHIP_SPARC_M6, %g4 + cmp %g2, '7' + be,pt %xcc, 5f + mov SUN4V_CHIP_SPARC_M7, %g4 ba,pt %xcc, 49f nop @@ -585,6 +591,12 @@ niagara_tlb_fixup: cmp %g1, SUN4V_CHIP_NIAGARA5 be,pt %xcc, niagara4_patch nop + cmp %g1, SUN4V_CHIP_SPARC_M6 + be,pt %xcc, niagara4_patch + nop + cmp %g1, SUN4V_CHIP_SPARC_M7 + be,pt %xcc, niagara4_patch + nop call generic_patch_copyops nop @@ -660,14 +672,12 @@ tlb_fixup_done: sethi %hi(init_thread_union), %g6 or %g6, %lo(init_thread_union), %g6 ldx [%g6 + TI_TASK], %g4 - mov %sp, %l6 wr %g0, ASI_P, %asi mov 1, %g1 sllx %g1, THREAD_SHIFT, %g1 sub %g1, (STACKFRAME_SZ + STACK_BIAS), %g1 add %g6, %g1, %sp - mov 0, %fp /* Set per-cpu pointer initially to zero, this makes * the boot-cpu use the in-kernel-image per-cpu areas @@ -694,44 +704,14 @@ tlb_fixup_done: nop #endif - mov %l6, %o1 ! OpenPROM stack call prom_init mov %l7, %o0 ! OpenPROM cif handler - /* Initialize current_thread_info()->cpu as early as possible. - * In order to do that accurately we have to patch up the get_cpuid() - * assembler sequences. And that, in turn, requires that we know - * if we are on a Starfire box or not. While we're here, patch up - * the sun4v sequences as well. + /* To create a one-register-window buffer between the kernel's + * initial stack and the last stack frame we use from the firmware, + * do the rest of the boot from a C helper function. */ - call check_if_starfire - nop - call per_cpu_patch - nop - call sun4v_patch - nop - -#ifdef CONFIG_SMP - call hard_smp_processor_id - nop - cmp %o0, NR_CPUS - blu,pt %xcc, 1f - nop - call boot_cpu_id_too_large - nop - /* Not reached... */ - -1: -#else - mov 0, %o0 -#endif - sth %o0, [%g6 + TI_CPU] - - call prom_init_report - nop - - /* Off we go.... */ - call start_kernel + call start_early_boot nop /* Not reached... */ diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c index c0a2de0..5c55145 100644 --- a/arch/sparc/kernel/hvapi.c +++ b/arch/sparc/kernel/hvapi.c @@ -46,6 +46,7 @@ static struct api_info api_table[] = { { .group = HV_GRP_VF_CPU, }, { .group = HV_GRP_KT_CPU, }, { .group = HV_GRP_VT_CPU, }, + { .group = HV_GRP_T5_CPU, }, { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, }; diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S index f3ab509..caedf83 100644 --- a/arch/sparc/kernel/hvcalls.S +++ b/arch/sparc/kernel/hvcalls.S @@ -821,3 +821,19 @@ ENTRY(sun4v_vt_set_perfreg) retl nop ENDPROC(sun4v_vt_set_perfreg) + +ENTRY(sun4v_t5_get_perfreg) + mov %o1, %o4 + mov HV_FAST_T5_GET_PERFREG, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + retl + nop +ENDPROC(sun4v_t5_get_perfreg) + +ENTRY(sun4v_t5_set_perfreg) + mov HV_FAST_T5_SET_PERFREG, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_t5_set_perfreg) diff --git a/arch/sparc/kernel/hvtramp.S b/arch/sparc/kernel/hvtramp.S index 4eb1a5a..4ad8138 100644 --- a/arch/sparc/kernel/hvtramp.S +++ b/arch/sparc/kernel/hvtramp.S @@ -110,7 +110,6 @@ hv_cpu_startup: sllx %g5, THREAD_SHIFT, %g5 sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 add %g6, %g5, %sp - mov 0, %fp call init_irqwork_curcpu nop diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 2096468..6cacf2d 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -278,7 +278,8 @@ static void *sbus_alloc_coherent(struct device *dev, size_t len, } order = get_order(len_total); - if ((va = __get_free_pages(GFP_KERNEL|__GFP_COMP, order)) == 0) + va = __get_free_pages(gfp, order); + if (va == 0) goto err_nopages; if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) @@ -443,7 +444,7 @@ static void *pci32_alloc_coherent(struct device *dev, size_t len, } order = get_order(len_total); - va = (void *) __get_free_pages(GFP_KERNEL, order); + va = (void *) __get_free_pages(gfp, order); if (va == NULL) { printk("pci_alloc_consistent: no %ld pages\n", len_total>>PAGE_SHIFT); goto err_nopages; diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index d74fa7f..7c22f1c 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -47,8 +47,6 @@ #include "cpumap.h" #include "kstack.h" -#define NUM_IVECS (IMAP_INR + 1) - struct ino_bucket *ivector_table; unsigned long ivector_table_pa; @@ -107,55 +105,196 @@ static void bucket_set_irq(unsigned long bucket_pa, unsigned int irq) #define irq_work_pa(__cpu) &(trap_block[(__cpu)].irq_worklist_pa) -static struct { - unsigned int dev_handle; - unsigned int dev_ino; - unsigned int in_use; -} irq_table[NR_IRQS]; -static DEFINE_SPINLOCK(irq_alloc_lock); +static unsigned long hvirq_major __initdata; +static int __init early_hvirq_major(char *p) +{ + int rc = kstrtoul(p, 10, &hvirq_major); + + return rc; +} +early_param("hvirq", early_hvirq_major); -unsigned char irq_alloc(unsigned int dev_handle, unsigned int dev_ino) +static int hv_irq_version; + +/* Major version 2.0 of HV_GRP_INTR added support for the VIRQ cookie + * based interfaces, but: + * + * 1) Several OSs, Solaris and Linux included, use them even when only + * negotiating version 1.0 (or failing to negotiate at all). So the + * hypervisor has a workaround that provides the VIRQ interfaces even + * when only verion 1.0 of the API is in use. + * + * 2) Second, and more importantly, with major version 2.0 these VIRQ + * interfaces only were actually hooked up for LDC interrupts, even + * though the Hypervisor specification clearly stated: + * + * The new interrupt API functions will be available to a guest + * when it negotiates version 2.0 in the interrupt API group 0x2. When + * a guest negotiates version 2.0, all interrupt sources will only + * support using the cookie interface, and any attempt to use the + * version 1.0 interrupt APIs numbered 0xa0 to 0xa6 will result in the + * ENOTSUPPORTED error being returned. + * + * with an emphasis on "all interrupt sources". + * + * To correct this, major version 3.0 was created which does actually + * support VIRQs for all interrupt sources (not just LDC devices). So + * if we want to move completely over the cookie based VIRQs we must + * negotiate major version 3.0 or later of HV_GRP_INTR. + */ +static bool sun4v_cookie_only_virqs(void) { - unsigned long flags; - unsigned char ent; + if (hv_irq_version >= 3) + return true; + return false; +} - BUILD_BUG_ON(NR_IRQS >= 256); +static void __init irq_init_hv(void) +{ + unsigned long hv_error, major, minor = 0; + + if (tlb_type != hypervisor) + return; + + if (hvirq_major) + major = hvirq_major; + else + major = 3; - spin_lock_irqsave(&irq_alloc_lock, flags); + hv_error = sun4v_hvapi_register(HV_GRP_INTR, major, &minor); + if (!hv_error) + hv_irq_version = major; + else + hv_irq_version = 1; - for (ent = 1; ent < NR_IRQS; ent++) { - if (!irq_table[ent].in_use) + pr_info("SUN4V: Using IRQ API major %d, cookie only virqs %s\n", + hv_irq_version, + sun4v_cookie_only_virqs() ? "enabled" : "disabled"); +} + +/* This function is for the timer interrupt.*/ +int __init arch_probe_nr_irqs(void) +{ + return 1; +} + +#define DEFAULT_NUM_IVECS (0xfffU) +static unsigned int nr_ivec = DEFAULT_NUM_IVECS; +#define NUM_IVECS (nr_ivec) + +static unsigned int __init size_nr_ivec(void) +{ + if (tlb_type == hypervisor) { + switch (sun4v_chip_type) { + /* Athena's devhandle|devino is large.*/ + case SUN4V_CHIP_SPARC64X: + nr_ivec = 0xffff; break; + } } - if (ent >= NR_IRQS) { - printk(KERN_ERR "IRQ: Out of virtual IRQs.\n"); - ent = 0; - } else { - irq_table[ent].dev_handle = dev_handle; - irq_table[ent].dev_ino = dev_ino; - irq_table[ent].in_use = 1; - } + return nr_ivec; +} + +struct irq_handler_data { + union { + struct { + unsigned int dev_handle; + unsigned int dev_ino; + }; + unsigned long sysino; + }; + struct ino_bucket bucket; + unsigned long iclr; + unsigned long imap; +}; + +static inline unsigned int irq_data_to_handle(struct irq_data *data) +{ + struct irq_handler_data *ihd = data->handler_data; + + return ihd->dev_handle; +} + +static inline unsigned int irq_data_to_ino(struct irq_data *data) +{ + struct irq_handler_data *ihd = data->handler_data; + + return ihd->dev_ino; +} - spin_unlock_irqrestore(&irq_alloc_lock, flags); +static inline unsigned long irq_data_to_sysino(struct irq_data *data) +{ + struct irq_handler_data *ihd = data->handler_data; - return ent; + return ihd->sysino; } -#ifdef CONFIG_PCI_MSI void irq_free(unsigned int irq) { - unsigned long flags; + void *data = irq_get_handler_data(irq); - if (irq >= NR_IRQS) - return; + kfree(data); + irq_set_handler_data(irq, NULL); + irq_free_descs(irq, 1); +} - spin_lock_irqsave(&irq_alloc_lock, flags); +unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino) +{ + int irq; - irq_table[irq].in_use = 0; + irq = __irq_alloc_descs(-1, 1, 1, numa_node_id(), NULL); + if (irq <= 0) + goto out; - spin_unlock_irqrestore(&irq_alloc_lock, flags); + return irq; +out: + return 0; +} + +static unsigned int cookie_exists(u32 devhandle, unsigned int devino) +{ + unsigned long hv_err, cookie; + struct ino_bucket *bucket; + unsigned int irq = 0U; + + hv_err = sun4v_vintr_get_cookie(devhandle, devino, &cookie); + if (hv_err) { + pr_err("HV get cookie failed hv_err = %ld\n", hv_err); + goto out; + } + + if (cookie & ((1UL << 63UL))) { + cookie = ~cookie; + bucket = (struct ino_bucket *) __va(cookie); + irq = bucket->__irq; + } +out: + return irq; +} + +static unsigned int sysino_exists(u32 devhandle, unsigned int devino) +{ + unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino); + struct ino_bucket *bucket; + unsigned int irq; + + bucket = &ivector_table[sysino]; + irq = bucket_get_irq(__pa(bucket)); + + return irq; +} + +void ack_bad_irq(unsigned int irq) +{ + pr_crit("BAD IRQ ack %d\n", irq); +} + +void irq_install_pre_handler(int irq, + void (*func)(unsigned int, void *, void *), + void *arg1, void *arg2) +{ + pr_warn("IRQ pre handler NOT supported.\n"); } -#endif /* * /proc/interrupts printing: @@ -206,15 +345,6 @@ static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid) return tid; } -struct irq_handler_data { - unsigned long iclr; - unsigned long imap; - - void (*pre_handler)(unsigned int, void *, void *); - void *arg1; - void *arg2; -}; - #ifdef CONFIG_SMP static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity) { @@ -316,8 +446,8 @@ static void sun4u_irq_eoi(struct irq_data *data) static void sun4v_irq_enable(struct irq_data *data) { - unsigned int ino = irq_table[data->irq].dev_ino; unsigned long cpuid = irq_choose_cpu(data->irq, data->affinity); + unsigned int ino = irq_data_to_sysino(data); int err; err = sun4v_intr_settarget(ino, cpuid); @@ -337,8 +467,8 @@ static void sun4v_irq_enable(struct irq_data *data) static int sun4v_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - unsigned int ino = irq_table[data->irq].dev_ino; unsigned long cpuid = irq_choose_cpu(data->irq, mask); + unsigned int ino = irq_data_to_sysino(data); int err; err = sun4v_intr_settarget(ino, cpuid); @@ -351,7 +481,7 @@ static int sun4v_set_affinity(struct irq_data *data, static void sun4v_irq_disable(struct irq_data *data) { - unsigned int ino = irq_table[data->irq].dev_ino; + unsigned int ino = irq_data_to_sysino(data); int err; err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED); @@ -362,7 +492,7 @@ static void sun4v_irq_disable(struct irq_data *data) static void sun4v_irq_eoi(struct irq_data *data) { - unsigned int ino = irq_table[data->irq].dev_ino; + unsigned int ino = irq_data_to_sysino(data); int err; err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE); @@ -373,14 +503,13 @@ static void sun4v_irq_eoi(struct irq_data *data) static void sun4v_virq_enable(struct irq_data *data) { - unsigned long cpuid, dev_handle, dev_ino; + unsigned long dev_handle = irq_data_to_handle(data); + unsigned long dev_ino = irq_data_to_ino(data); + unsigned long cpuid; int err; cpuid = irq_choose_cpu(data->irq, data->affinity); - dev_handle = irq_table[data->irq].dev_handle; - dev_ino = irq_table[data->irq].dev_ino; - err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid); if (err != HV_EOK) printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " @@ -403,14 +532,13 @@ static void sun4v_virq_enable(struct irq_data *data) static int sun4v_virt_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) { - unsigned long cpuid, dev_handle, dev_ino; + unsigned long dev_handle = irq_data_to_handle(data); + unsigned long dev_ino = irq_data_to_ino(data); + unsigned long cpuid; int err; cpuid = irq_choose_cpu(data->irq, mask); - dev_handle = irq_table[data->irq].dev_handle; - dev_ino = irq_table[data->irq].dev_ino; - err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid); if (err != HV_EOK) printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " @@ -422,11 +550,10 @@ static int sun4v_virt_set_affinity(struct irq_data *data, static void sun4v_virq_disable(struct irq_data *data) { - unsigned long dev_handle, dev_ino; + unsigned long dev_handle = irq_data_to_handle(data); + unsigned long dev_ino = irq_data_to_ino(data); int err; - dev_handle = irq_table[data->irq].dev_handle; - dev_ino = irq_table[data->irq].dev_ino; err = sun4v_vintr_set_valid(dev_handle, dev_ino, HV_INTR_DISABLED); @@ -438,12 +565,10 @@ static void sun4v_virq_disable(struct irq_data *data) static void sun4v_virq_eoi(struct irq_data *data) { - unsigned long dev_handle, dev_ino; + unsigned long dev_handle = irq_data_to_handle(data); + unsigned long dev_ino = irq_data_to_ino(data); int err; - dev_handle = irq_table[data->irq].dev_handle; - dev_ino = irq_table[data->irq].dev_ino; - err = sun4v_vintr_set_state(dev_handle, dev_ino, HV_INTR_STATE_IDLE); if (err != HV_EOK) @@ -479,31 +604,10 @@ static struct irq_chip sun4v_virq = { .flags = IRQCHIP_EOI_IF_HANDLED, }; -static void pre_flow_handler(struct irq_data *d) -{ - struct irq_handler_data *handler_data = irq_data_get_irq_handler_data(d); - unsigned int ino = irq_table[d->irq].dev_ino; - - handler_data->pre_handler(ino, handler_data->arg1, handler_data->arg2); -} - -void irq_install_pre_handler(int irq, - void (*func)(unsigned int, void *, void *), - void *arg1, void *arg2) -{ - struct irq_handler_data *handler_data = irq_get_handler_data(irq); - - handler_data->pre_handler = func; - handler_data->arg1 = arg1; - handler_data->arg2 = arg2; - - __irq_set_preflow_handler(irq, pre_flow_handler); -} - unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap) { - struct ino_bucket *bucket; struct irq_handler_data *handler_data; + struct ino_bucket *bucket; unsigned int irq; int ino; @@ -537,119 +641,166 @@ out: return irq; } -static unsigned int sun4v_build_common(unsigned long sysino, - struct irq_chip *chip) +static unsigned int sun4v_build_common(u32 devhandle, unsigned int devino, + void (*handler_data_init)(struct irq_handler_data *data, + u32 devhandle, unsigned int devino), + struct irq_chip *chip) { - struct ino_bucket *bucket; - struct irq_handler_data *handler_data; + struct irq_handler_data *data; unsigned int irq; - BUG_ON(tlb_type != hypervisor); + irq = irq_alloc(devhandle, devino); + if (!irq) + goto out; - bucket = &ivector_table[sysino]; - irq = bucket_get_irq(__pa(bucket)); - if (!irq) { - irq = irq_alloc(0, sysino); - bucket_set_irq(__pa(bucket), irq); - irq_set_chip_and_handler_name(irq, chip, handle_fasteoi_irq, - "IVEC"); + data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); + if (unlikely(!data)) { + pr_err("IRQ handler data allocation failed.\n"); + irq_free(irq); + irq = 0; + goto out; } - handler_data = irq_get_handler_data(irq); - if (unlikely(handler_data)) - goto out; + irq_set_handler_data(irq, data); + handler_data_init(data, devhandle, devino); + irq_set_chip_and_handler_name(irq, chip, handle_fasteoi_irq, "IVEC"); + data->imap = ~0UL; + data->iclr = ~0UL; +out: + return irq; +} - handler_data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); - if (unlikely(!handler_data)) { - prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n"); - prom_halt(); - } - irq_set_handler_data(irq, handler_data); +static unsigned long cookie_assign(unsigned int irq, u32 devhandle, + unsigned int devino) +{ + struct irq_handler_data *ihd = irq_get_handler_data(irq); + unsigned long hv_error, cookie; - /* Catch accidental accesses to these things. IMAP/ICLR handling - * is done by hypervisor calls on sun4v platforms, not by direct - * register accesses. + /* handler_irq needs to find the irq. cookie is seen signed in + * sun4v_dev_mondo and treated as a non ivector_table delivery. */ - handler_data->imap = ~0UL; - handler_data->iclr = ~0UL; + ihd->bucket.__irq = irq; + cookie = ~__pa(&ihd->bucket); -out: - return irq; + hv_error = sun4v_vintr_set_cookie(devhandle, devino, cookie); + if (hv_error) + pr_err("HV vintr set cookie failed = %ld\n", hv_error); + + return hv_error; } -unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino) +static void cookie_handler_data(struct irq_handler_data *data, + u32 devhandle, unsigned int devino) { - unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino); + data->dev_handle = devhandle; + data->dev_ino = devino; +} + +static unsigned int cookie_build_irq(u32 devhandle, unsigned int devino, + struct irq_chip *chip) +{ + unsigned long hv_error; + unsigned int irq; + + irq = sun4v_build_common(devhandle, devino, cookie_handler_data, chip); + + hv_error = cookie_assign(irq, devhandle, devino); + if (hv_error) { + irq_free(irq); + irq = 0; + } - return sun4v_build_common(sysino, &sun4v_irq); + return irq; } -unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino) +static unsigned int sun4v_build_cookie(u32 devhandle, unsigned int devino) { - struct irq_handler_data *handler_data; - unsigned long hv_err, cookie; - struct ino_bucket *bucket; unsigned int irq; - bucket = kzalloc(sizeof(struct ino_bucket), GFP_ATOMIC); - if (unlikely(!bucket)) - return 0; + irq = cookie_exists(devhandle, devino); + if (irq) + goto out; - /* The only reference we store to the IRQ bucket is - * by physical address which kmemleak can't see, tell - * it that this object explicitly is not a leak and - * should be scanned. - */ - kmemleak_not_leak(bucket); + irq = cookie_build_irq(devhandle, devino, &sun4v_virq); - __flush_dcache_range((unsigned long) bucket, - ((unsigned long) bucket + - sizeof(struct ino_bucket))); +out: + return irq; +} - irq = irq_alloc(devhandle, devino); +static void sysino_set_bucket(unsigned int irq) +{ + struct irq_handler_data *ihd = irq_get_handler_data(irq); + struct ino_bucket *bucket; + unsigned long sysino; + + sysino = sun4v_devino_to_sysino(ihd->dev_handle, ihd->dev_ino); + BUG_ON(sysino >= nr_ivec); + bucket = &ivector_table[sysino]; bucket_set_irq(__pa(bucket), irq); +} - irq_set_chip_and_handler_name(irq, &sun4v_virq, handle_fasteoi_irq, - "IVEC"); +static void sysino_handler_data(struct irq_handler_data *data, + u32 devhandle, unsigned int devino) +{ + unsigned long sysino; - handler_data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); - if (unlikely(!handler_data)) - return 0; + sysino = sun4v_devino_to_sysino(devhandle, devino); + data->sysino = sysino; +} - /* In order to make the LDC channel startup sequence easier, - * especially wrt. locking, we do not let request_irq() enable - * the interrupt. - */ - irq_set_status_flags(irq, IRQ_NOAUTOEN); - irq_set_handler_data(irq, handler_data); +static unsigned int sysino_build_irq(u32 devhandle, unsigned int devino, + struct irq_chip *chip) +{ + unsigned int irq; - /* Catch accidental accesses to these things. IMAP/ICLR handling - * is done by hypervisor calls on sun4v platforms, not by direct - * register accesses. - */ - handler_data->imap = ~0UL; - handler_data->iclr = ~0UL; + irq = sun4v_build_common(devhandle, devino, sysino_handler_data, chip); + if (!irq) + goto out; - cookie = ~__pa(bucket); - hv_err = sun4v_vintr_set_cookie(devhandle, devino, cookie); - if (hv_err) { - prom_printf("IRQ: Fatal, cannot set cookie for [%x:%x] " - "err=%lu\n", devhandle, devino, hv_err); - prom_halt(); - } + sysino_set_bucket(irq); +out: + return irq; +} +static int sun4v_build_sysino(u32 devhandle, unsigned int devino) +{ + int irq; + + irq = sysino_exists(devhandle, devino); + if (irq) + goto out; + + irq = sysino_build_irq(devhandle, devino, &sun4v_irq); +out: return irq; } -void ack_bad_irq(unsigned int irq) +unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino) { - unsigned int ino = irq_table[irq].dev_ino; + unsigned int irq; + + if (sun4v_cookie_only_virqs()) + irq = sun4v_build_cookie(devhandle, devino); + else + irq = sun4v_build_sysino(devhandle, devino); - if (!ino) - ino = 0xdeadbeef; + return irq; +} + +unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino) +{ + int irq; - printk(KERN_CRIT "Unexpected IRQ from ino[%x] irq[%u]\n", - ino, irq); + irq = cookie_build_irq(devhandle, devino, &sun4v_virq); + if (!irq) + goto out; + + /* This is borrowed from the original function. + */ + irq_set_status_flags(irq, IRQ_NOAUTOEN); + +out: + return irq; } void *hardirq_stack[NR_CPUS]; @@ -698,7 +849,6 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs) set_irq_regs(old_regs); } -#ifndef CONFIG_PREEMPT_RT_FULL void do_softirq(void) { unsigned long flags; @@ -724,7 +874,6 @@ void do_softirq(void) local_irq_restore(flags); } -#endif #ifdef CONFIG_HOTPLUG_CPU void fixup_irqs(void) @@ -733,9 +882,12 @@ void fixup_irqs(void) for (irq = 0; irq < NR_IRQS; irq++) { struct irq_desc *desc = irq_to_desc(irq); - struct irq_data *data = irq_desc_get_irq_data(desc); + struct irq_data *data; unsigned long flags; + if (!desc) + continue; + data = irq_desc_get_irq_data(desc); raw_spin_lock_irqsave(&desc->lock, flags); if (desc->action && !irqd_is_per_cpu(data)) { if (data->chip->irq_set_affinity) @@ -935,16 +1087,22 @@ static struct irqaction timer_irq_action = { .name = "timer", }; -/* Only invoked on boot processor. */ -void __init init_IRQ(void) +static void __init irq_ivector_init(void) { - unsigned long size; + unsigned long size, order; + unsigned int ivecs; - map_prom_timers(); - kill_prom_timer(); + /* If we are doing cookie only VIRQs then we do not need the ivector + * table to process interrupts. + */ + if (sun4v_cookie_only_virqs()) + return; - size = sizeof(struct ino_bucket) * NUM_IVECS; - ivector_table = kzalloc(size, GFP_KERNEL); + ivecs = size_nr_ivec(); + size = sizeof(struct ino_bucket) * ivecs; + order = get_order(size); + ivector_table = (struct ino_bucket *) + __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); if (!ivector_table) { prom_printf("Fatal error, cannot allocate ivector_table\n"); prom_halt(); @@ -953,6 +1111,15 @@ void __init init_IRQ(void) ((unsigned long) ivector_table) + size); ivector_table_pa = __pa(ivector_table); +} + +/* Only invoked on boot processor.*/ +void __init init_IRQ(void) +{ + irq_init_hv(); + irq_ivector_init(); + map_prom_timers(); + kill_prom_timer(); if (tlb_type == hypervisor) sun4v_init_mondo_queues(); diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S index fde5a41..ef0d8e9 100644 --- a/arch/sparc/kernel/ktlb.S +++ b/arch/sparc/kernel/ktlb.S @@ -47,14 +47,6 @@ kvmap_itlb_vmalloc_addr: KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath) TSB_LOCK_TAG(%g1, %g2, %g7) - - /* Load and check PTE. */ - ldxa [%g5] ASI_PHYS_USE_EC, %g5 - mov 1, %g7 - sllx %g7, TSB_TAG_INVALID_BIT, %g7 - brgez,a,pn %g5, kvmap_itlb_longpath - TSB_STORE(%g1, %g7) - TSB_WRITE(%g1, %g5, %g6) /* fallthrough to TLB load */ @@ -118,6 +110,12 @@ kvmap_dtlb_obp: ba,pt %xcc, kvmap_dtlb_load nop +kvmap_linear_early: + sethi %hi(kern_linear_pte_xor), %g7 + ldx [%g7 + %lo(kern_linear_pte_xor)], %g2 + ba,pt %xcc, kvmap_dtlb_tsb4m_load + xor %g2, %g4, %g5 + .align 32 kvmap_dtlb_tsb4m_load: TSB_LOCK_TAG(%g1, %g2, %g7) @@ -146,85 +144,17 @@ kvmap_dtlb_4v: /* Correct TAG_TARGET is already in %g6, check 4mb TSB. */ KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load) #endif - /* TSB entry address left in %g1, lookup linear PTE. - * Must preserve %g1 and %g6 (TAG). - */ -kvmap_dtlb_tsb4m_miss: - /* Clear the PAGE_OFFSET top virtual bits, shift - * down to get PFN, and make sure PFN is in range. - */ - sllx %g4, 21, %g5 - - /* Check to see if we know about valid memory at the 4MB - * chunk this physical address will reside within. - */ - srlx %g5, 21 + 41, %g2 - brnz,pn %g2, kvmap_dtlb_longpath - nop - - /* This unconditional branch and delay-slot nop gets patched - * by the sethi sequence once the bitmap is properly setup. + /* Linear mapping TSB lookup failed. Fallthrough to kernel + * page table based lookup. */ - .globl valid_addr_bitmap_insn -valid_addr_bitmap_insn: - ba,pt %xcc, 2f - nop - .subsection 2 - .globl valid_addr_bitmap_patch -valid_addr_bitmap_patch: - sethi %hi(sparc64_valid_addr_bitmap), %g7 - or %g7, %lo(sparc64_valid_addr_bitmap), %g7 - .previous - - srlx %g5, 21 + 22, %g2 - srlx %g2, 6, %g5 - and %g2, 63, %g2 - sllx %g5, 3, %g5 - ldx [%g7 + %g5], %g5 - mov 1, %g7 - sllx %g7, %g2, %g7 - andcc %g5, %g7, %g0 - be,pn %xcc, kvmap_dtlb_longpath - -2: sethi %hi(kpte_linear_bitmap), %g2 - - /* Get the 256MB physical address index. */ - sllx %g4, 21, %g5 - or %g2, %lo(kpte_linear_bitmap), %g2 - srlx %g5, 21 + 28, %g5 - and %g5, (32 - 1), %g7 - - /* Divide by 32 to get the offset into the bitmask. */ - srlx %g5, 5, %g5 - add %g7, %g7, %g7 - sllx %g5, 3, %g5 - - /* kern_linear_pte_xor[(mask >> shift) & 3)] */ - ldx [%g2 + %g5], %g2 - srlx %g2, %g7, %g7 - sethi %hi(kern_linear_pte_xor), %g5 - and %g7, 3, %g7 - or %g5, %lo(kern_linear_pte_xor), %g5 - sllx %g7, 3, %g7 - ldx [%g5 + %g7], %g2 - .globl kvmap_linear_patch kvmap_linear_patch: - ba,pt %xcc, kvmap_dtlb_tsb4m_load - xor %g2, %g4, %g5 + ba,a,pt %xcc, kvmap_linear_early kvmap_dtlb_vmalloc_addr: KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath) TSB_LOCK_TAG(%g1, %g2, %g7) - - /* Load and check PTE. */ - ldxa [%g5] ASI_PHYS_USE_EC, %g5 - mov 1, %g7 - sllx %g7, TSB_TAG_INVALID_BIT, %g7 - brgez,a,pn %g5, kvmap_dtlb_longpath - TSB_STORE(%g1, %g7) - TSB_WRITE(%g1, %g5, %g6) /* fallthrough to TLB load */ @@ -256,13 +186,8 @@ kvmap_dtlb_load: #ifdef CONFIG_SPARSEMEM_VMEMMAP kvmap_vmemmap: - sub %g4, %g5, %g5 - srlx %g5, 22, %g5 - sethi %hi(vmemmap_table), %g1 - sllx %g5, 3, %g5 - or %g1, %lo(vmemmap_table), %g1 - ba,pt %xcc, kvmap_dtlb_load - ldx [%g1 + %g5], %g5 + KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath) + ba,a,pt %xcc, kvmap_dtlb_load #endif kvmap_dtlb_nonlinear: @@ -274,8 +199,8 @@ kvmap_dtlb_nonlinear: #ifdef CONFIG_SPARSEMEM_VMEMMAP /* Do not use the TSB for vmemmap. */ - mov (VMEMMAP_BASE >> 40), %g5 - sllx %g5, 40, %g5 + sethi %hi(VMEMMAP_BASE), %g5 + ldx [%g5 + %lo(VMEMMAP_BASE)], %g5 cmp %g4,%g5 bgeu,pn %xcc, kvmap_vmemmap nop @@ -287,8 +212,8 @@ kvmap_dtlb_tsbmiss: sethi %hi(MODULES_VADDR), %g5 cmp %g4, %g5 blu,pn %xcc, kvmap_dtlb_longpath - mov (VMALLOC_END >> 40), %g5 - sllx %g5, 40, %g5 + sethi %hi(VMALLOC_END), %g5 + ldx [%g5 + %lo(VMALLOC_END)], %g5 cmp %g4, %g5 bgeu,pn %xcc, kvmap_dtlb_longpath nop diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index e01d75d..27bb554 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -1078,7 +1078,8 @@ static void ldc_iommu_release(struct ldc_channel *lp) struct ldc_channel *ldc_alloc(unsigned long id, const struct ldc_channel_config *cfgp, - void *event_arg) + void *event_arg, + const char *name) { struct ldc_channel *lp; const struct ldc_mode_ops *mops; @@ -1093,6 +1094,8 @@ struct ldc_channel *ldc_alloc(unsigned long id, err = -EINVAL; if (!cfgp) goto out_err; + if (!name) + goto out_err; switch (cfgp->mode) { case LDC_MODE_RAW: @@ -1185,6 +1188,21 @@ struct ldc_channel *ldc_alloc(unsigned long id, INIT_HLIST_HEAD(&lp->mh_list); + snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name); + snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name); + + err = request_irq(lp->cfg.rx_irq, ldc_rx, 0, + lp->rx_irq_name, lp); + if (err) + goto out_free_txq; + + err = request_irq(lp->cfg.tx_irq, ldc_tx, 0, + lp->tx_irq_name, lp); + if (err) { + free_irq(lp->cfg.rx_irq, lp); + goto out_free_txq; + } + return lp; out_free_txq: @@ -1237,31 +1255,14 @@ EXPORT_SYMBOL(ldc_free); * state. This does not initiate a handshake, ldc_connect() does * that. */ -int ldc_bind(struct ldc_channel *lp, const char *name) +int ldc_bind(struct ldc_channel *lp) { unsigned long hv_err, flags; int err = -EINVAL; - if (!name || - (lp->state != LDC_STATE_INIT)) + if (lp->state != LDC_STATE_INIT) return -EINVAL; - snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name); - snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name); - - err = request_irq(lp->cfg.rx_irq, ldc_rx, 0, - lp->rx_irq_name, lp); - if (err) - return err; - - err = request_irq(lp->cfg.tx_irq, ldc_tx, 0, - lp->tx_irq_name, lp); - if (err) { - free_irq(lp->cfg.rx_irq, lp); - return err; - } - - spin_lock_irqsave(&lp->lock, flags); enable_irq(lp->cfg.rx_irq); @@ -1336,7 +1337,7 @@ int ldc_connect(struct ldc_channel *lp) if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) || !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) || lp->hs_state != LDC_HS_OPEN) - err = -EINVAL; + err = ((lp->hs_state > LDC_HS_OPEN) ? 0 : -EINVAL); else err = start_handshake(lp); diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 6479256..fce8ab1 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -141,7 +141,6 @@ static inline unsigned int get_nmi_count(int cpu) static __init void nmi_cpu_busy(void *data) { - local_irq_enable_in_hardirq(); while (endflag == 0) mb(); } diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index bc4d3f5..cb02145 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -398,8 +398,8 @@ static void apb_fake_ranges(struct pci_dev *dev, apb_calc_first_last(map, &first, &last); res = bus->resource[1]; res->flags = IORESOURCE_MEM; - region.start = (first << 21); - region.end = (last << 21) + ((1 << 21) - 1); + region.start = (first << 29); + region.end = (last << 29) + ((1 << 29) - 1); pcibios_bus_to_resource(dev, res, ®ion); } diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c index 8f76f23..f9c6813 100644 --- a/arch/sparc/kernel/pci_schizo.c +++ b/arch/sparc/kernel/pci_schizo.c @@ -581,7 +581,7 @@ static irqreturn_t schizo_pcierr_intr_other(struct pci_pbm_info *pbm) { unsigned long csr_reg, csr, csr_error_bits; irqreturn_t ret = IRQ_NONE; - u16 stat; + u32 stat; csr_reg = pbm->pbm_regs + SCHIZO_PCI_CTRL; csr = upa_readq(csr_reg); @@ -617,7 +617,7 @@ static irqreturn_t schizo_pcierr_intr_other(struct pci_pbm_info *pbm) pbm->name); ret = IRQ_HANDLED; } - pci_read_config_word(pbm->pci_bus->self, PCI_STATUS, &stat); + pbm->pci_ops->read(pbm->pci_bus, 0, PCI_STATUS, 2, &stat); if (stat & (PCI_STATUS_PARITY | PCI_STATUS_SIG_TARGET_ABORT | PCI_STATUS_REC_TARGET_ABORT | @@ -625,7 +625,7 @@ static irqreturn_t schizo_pcierr_intr_other(struct pci_pbm_info *pbm) PCI_STATUS_SIG_SYSTEM_ERROR)) { printk("%s: PCI bus error, PCI_STATUS[%04x]\n", pbm->name, stat); - pci_write_config_word(pbm->pci_bus->self, PCI_STATUS, 0xffff); + pbm->pci_ops->write(pbm->pci_bus, 0, PCI_STATUS, 2, 0xffff); ret = IRQ_HANDLED; } return ret; diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index dbb51a6..7e967c8 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -43,12 +43,10 @@ void __irq_entry deferred_pcr_work_irq(int irq, struct pt_regs *regs) set_irq_regs(old_regs); } -#ifndef CONFIG_PREEMPT_RT_FULL void arch_irq_work_raise(void) { set_softint(1 << PIL_DEFERRED_PCR_WORK); } -#endif const struct pcr_ops *pcr_ops; EXPORT_SYMBOL_GPL(pcr_ops); @@ -193,12 +191,41 @@ static const struct pcr_ops n4_pcr_ops = { .pcr_nmi_disable = PCR_N4_PICNPT, }; +static u64 n5_pcr_read(unsigned long reg_num) +{ + unsigned long val; + + (void) sun4v_t5_get_perfreg(reg_num, &val); + + return val; +} + +static void n5_pcr_write(unsigned long reg_num, u64 val) +{ + (void) sun4v_t5_set_perfreg(reg_num, val); +} + +static const struct pcr_ops n5_pcr_ops = { + .read_pcr = n5_pcr_read, + .write_pcr = n5_pcr_write, + .read_pic = n4_pic_read, + .write_pic = n4_pic_write, + .nmi_picl_value = n4_picl_value, + .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE | + PCR_N4_UTRACE | PCR_N4_TOE | + (26 << PCR_N4_SL_SHIFT)), + .pcr_nmi_disable = PCR_N4_PICNPT, +}; + + static unsigned long perf_hsvc_group; static unsigned long perf_hsvc_major; static unsigned long perf_hsvc_minor; static int __init register_perf_hsvc(void) { + unsigned long hverror; + if (tlb_type == hypervisor) { switch (sun4v_chip_type) { case SUN4V_CHIP_NIAGARA1: @@ -217,6 +244,10 @@ static int __init register_perf_hsvc(void) perf_hsvc_group = HV_GRP_VT_CPU; break; + case SUN4V_CHIP_NIAGARA5: + perf_hsvc_group = HV_GRP_T5_CPU; + break; + default: return -ENODEV; } @@ -224,10 +255,12 @@ static int __init register_perf_hsvc(void) perf_hsvc_major = 1; perf_hsvc_minor = 0; - if (sun4v_hvapi_register(perf_hsvc_group, - perf_hsvc_major, - &perf_hsvc_minor)) { - printk("perfmon: Could not register hvapi.\n"); + hverror = sun4v_hvapi_register(perf_hsvc_group, + perf_hsvc_major, + &perf_hsvc_minor); + if (hverror) { + pr_err("perfmon: Could not register hvapi(0x%lx).\n", + hverror); return -ENODEV; } } @@ -256,6 +289,10 @@ static int __init setup_sun4v_pcr_ops(void) pcr_ops = &n4_pcr_ops; break; + case SUN4V_CHIP_NIAGARA5: + pcr_ops = &n5_pcr_ops; + break; + default: ret = -ENODEV; break; diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index b5c38fa..617b9fe 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1662,7 +1662,8 @@ static bool __init supported_pmu(void) sparc_pmu = &niagara2_pmu; return true; } - if (!strcmp(sparc_pmu_type, "niagara4")) { + if (!strcmp(sparc_pmu_type, "niagara4") || + !strcmp(sparc_pmu_type, "niagara5")) { sparc_pmu = &niagara4_pmu; return true; } @@ -1671,9 +1672,12 @@ static bool __init supported_pmu(void) int __init init_hw_perf_events(void) { + int err; + pr_info("Performance events: "); - if (!supported_pmu()) { + err = pcr_arch_init(); + if (err || !supported_pmu()) { pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); return 0; } @@ -1685,7 +1689,7 @@ int __init init_hw_perf_events(void) return 0; } -early_initcall(init_hw_perf_events); +pure_initcall(init_hw_perf_events); void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index b9cc976..fa49b80 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -305,6 +305,9 @@ static void __global_pmu_self(int this_cpu) struct global_pmu_snapshot *pp; int i, num; + if (!pcr_ops) + return; + pp = &global_cpu_snapshot[this_cpu].pmu; num = 1; diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c index 0884ccd..1434526 100644 --- a/arch/sparc/kernel/setup_32.c +++ b/arch/sparc/kernel/setup_32.c @@ -309,7 +309,6 @@ void __init setup_arch(char **cmdline_p) boot_flags_init(*cmdline_p); - early_console = &prom_early_console; register_console(&prom_early_console); printk("ARCH: "); diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 4306d44..61a5198 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -30,6 +30,7 @@ #include <linux/cpu.h> #include <linux/initrd.h> #include <linux/module.h> +#include <linux/start_kernel.h> #include <asm/io.h> #include <asm/processor.h> @@ -174,7 +175,7 @@ char reboot_command[COMMAND_LINE_SIZE]; static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 }; -void __init per_cpu_patch(void) +static void __init per_cpu_patch(void) { struct cpuid_patch_entry *p; unsigned long ver; @@ -266,7 +267,7 @@ void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *start, } } -void __init sun4v_patch(void) +static void __init sun4v_patch(void) { extern void sun4v_hvapi_init(void); @@ -335,14 +336,25 @@ static void __init pause_patch(void) } } -#ifdef CONFIG_SMP -void __init boot_cpu_id_too_large(int cpu) +void __init start_early_boot(void) { - prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n", - cpu, NR_CPUS); - prom_halt(); + int cpu; + + check_if_starfire(); + per_cpu_patch(); + sun4v_patch(); + + cpu = hard_smp_processor_id(); + if (cpu >= NR_CPUS) { + prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n", + cpu, NR_CPUS); + prom_halt(); + } + current_thread_info()->cpu = cpu; + + prom_init_report(); + start_kernel(); } -#endif /* On Ultra, we support all of the v8 capabilities. */ unsigned long sparc64_elf_hwcap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | @@ -500,12 +512,16 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= HWCAP_SPARC_BLKINIT; if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= HWCAP_SPARC_N2; } @@ -533,6 +549,8 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 | AV_SPARC_ASI_BLK_INIT | @@ -540,6 +558,8 @@ static void __init init_sparc64_elf_hwcap(void) if (sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC | AV_SPARC_FMAF); @@ -555,12 +575,6 @@ static void __init init_sparc64_elf_hwcap(void) pause_patch(); } -static inline void register_prom_console(void) -{ - early_console = &prom_early_console; - register_console(&prom_early_console); -} - void __init setup_arch(char **cmdline_p) { /* Initialize PROM console and command line. */ @@ -572,7 +586,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_EARLYFB if (btext_find_display()) #endif - register_prom_console(); + register_console(&prom_early_console); if (tlb_type == hypervisor) printk("ARCH: SUN4V\n"); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 8c68424..226ff1a 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -150,7 +150,7 @@ void cpu_panic(void) #define NUM_ROUNDS 64 /* magic value */ #define NUM_ITERS 5 /* likewise */ -static DEFINE_SPINLOCK(itc_sync_lock); +static DEFINE_RAW_SPINLOCK(itc_sync_lock); static unsigned long go[SLAVE + 1]; #define DEBUG_TICK_SYNC 0 @@ -258,7 +258,7 @@ static void smp_synchronize_one_tick(int cpu) go[MASTER] = 0; membar_safe("#StoreLoad"); - spin_lock_irqsave(&itc_sync_lock, flags); + raw_spin_lock_irqsave(&itc_sync_lock, flags); { for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) { while (!go[MASTER]) @@ -269,7 +269,7 @@ static void smp_synchronize_one_tick(int cpu) membar_safe("#StoreLoad"); } } - spin_unlock_irqrestore(&itc_sync_lock, flags); + raw_spin_unlock_irqrestore(&itc_sync_lock, flags); } #if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU) @@ -822,13 +822,17 @@ void arch_send_call_function_single_ipi(int cpu) void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs) { clear_softint(1 << irq); + irq_enter(); generic_smp_call_function_interrupt(); + irq_exit(); } void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs) { clear_softint(1 << irq); + irq_enter(); generic_smp_call_function_single_interrupt(); + irq_exit(); } static void tsb_sync(void *info) @@ -976,12 +980,12 @@ void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *reg if (unlikely(!mm || (mm == &init_mm))) return; - raw_spin_lock_irqsave(&mm->context.lock, flags); + spin_lock_irqsave(&mm->context.lock, flags); if (unlikely(!CTX_VALID(mm->context))) get_new_mmu_context(mm); - raw_spin_unlock_irqrestore(&mm->context.lock, flags); + spin_unlock_irqrestore(&mm->context.lock, flags); load_secondary_context(mm); __flush_tlb_mm(CTX_HWBITS(mm->context), @@ -1394,7 +1398,6 @@ void __cpu_die(unsigned int cpu) void __init smp_cpus_done(unsigned int max_cpus) { - pcr_arch_init(); } void smp_send_reschedule(int cpu) @@ -1474,6 +1477,13 @@ static void __init pcpu_populate_pte(unsigned long addr) pud_t *pud; pmd_t *pmd; + if (pgd_none(*pgd)) { + pud_t *new; + + new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + pgd_populate(&init_mm, pgd, new); + } + pud = pud_offset(pgd, addr); if (pud_none(*pud)) { pmd_t *new; diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S index bde867f..6179e19 100644 --- a/arch/sparc/kernel/sun4v_tlb_miss.S +++ b/arch/sparc/kernel/sun4v_tlb_miss.S @@ -182,7 +182,7 @@ sun4v_tsb_miss_common: cmp %g5, -1 be,pt %xcc, 80f nop - COMPUTE_TSB_PTR(%g5, %g4, HPAGE_SHIFT, %g2, %g7) + COMPUTE_TSB_PTR(%g5, %g4, REAL_HPAGE_SHIFT, %g2, %g7) /* That clobbered %g2, reload it. */ ldxa [%g0] ASI_SCRATCHPAD, %g2 @@ -195,6 +195,11 @@ sun4v_tsb_miss_common: ldx [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7 sun4v_itlb_error: + rdpr %tl, %g1 + cmp %g1, 1 + ble,pt %icc, sun4v_bad_ra + or %g0, FAULT_CODE_BAD_RA | FAULT_CODE_ITLB, %g1 + sethi %hi(sun4v_err_itlb_vaddr), %g1 stx %g4, [%g1 + %lo(sun4v_err_itlb_vaddr)] sethi %hi(sun4v_err_itlb_ctx), %g1 @@ -206,15 +211,10 @@ sun4v_itlb_error: sethi %hi(sun4v_err_itlb_error), %g1 stx %o0, [%g1 + %lo(sun4v_err_itlb_error)] + sethi %hi(1f), %g7 rdpr %tl, %g4 - cmp %g4, 1 - ble,pt %icc, 1f - sethi %hi(2f), %g7 ba,pt %xcc, etraptl1 - or %g7, %lo(2f), %g7 - -1: ba,pt %xcc, etrap -2: or %g7, %lo(2b), %g7 +1: or %g7, %lo(1f), %g7 mov %l4, %o1 call sun4v_itlb_error_report add %sp, PTREGS_OFF, %o0 @@ -222,6 +222,11 @@ sun4v_itlb_error: /* NOTREACHED */ sun4v_dtlb_error: + rdpr %tl, %g1 + cmp %g1, 1 + ble,pt %icc, sun4v_bad_ra + or %g0, FAULT_CODE_BAD_RA | FAULT_CODE_DTLB, %g1 + sethi %hi(sun4v_err_dtlb_vaddr), %g1 stx %g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)] sethi %hi(sun4v_err_dtlb_ctx), %g1 @@ -233,21 +238,23 @@ sun4v_dtlb_error: sethi %hi(sun4v_err_dtlb_error), %g1 stx %o0, [%g1 + %lo(sun4v_err_dtlb_error)] + sethi %hi(1f), %g7 rdpr %tl, %g4 - cmp %g4, 1 - ble,pt %icc, 1f - sethi %hi(2f), %g7 ba,pt %xcc, etraptl1 - or %g7, %lo(2f), %g7 - -1: ba,pt %xcc, etrap -2: or %g7, %lo(2b), %g7 +1: or %g7, %lo(1f), %g7 mov %l4, %o1 call sun4v_dtlb_error_report add %sp, PTREGS_OFF, %o0 /* NOTREACHED */ +sun4v_bad_ra: + or %g0, %g4, %g5 + ba,pt %xcc, sparc64_realfault_common + or %g1, %g0, %g4 + + /* NOTREACHED */ + /* Instruction Access Exception, tl0. */ sun4v_iacc: ldxa [%g0] ASI_SCRATCHPAD, %g2 diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index f7c72b6..d066eb1 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S @@ -44,7 +44,7 @@ SIGN1(sys32_timer_settime, compat_sys_timer_settime, %o1) SIGN1(sys32_io_submit, compat_sys_io_submit, %o1) SIGN1(sys32_mq_open, compat_sys_mq_open, %o1) SIGN1(sys32_select, compat_sys_select, %o0) -SIGN3(sys32_futex, compat_sys_futex, %o1, %o2, %o5) +SIGN1(sys32_futex, compat_sys_futex, %o1) SIGN1(sys32_recvfrom, compat_sys_recvfrom, %o0) SIGN1(sys32_recvmsg, compat_sys_recvmsg, %o0) SIGN1(sys32_sendmsg, compat_sys_sendmsg, %o0) diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 51561b8..d05eb9c 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -39,9 +39,6 @@ asmlinkage unsigned long sys_getpagesize(void) return PAGE_SIZE; } -#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL)) -#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL)) - /* Does addr --> addr+len fall within 4GB of the VA-space hole or * overflow past the end of the 64-bit address space? */ diff --git a/arch/sparc/kernel/trampoline_64.S b/arch/sparc/kernel/trampoline_64.S index ad4bde3..092a39d 100644 --- a/arch/sparc/kernel/trampoline_64.S +++ b/arch/sparc/kernel/trampoline_64.S @@ -110,10 +110,13 @@ startup_continue: brnz,pn %g1, 1b nop - sethi %hi(p1275buf), %g2 - or %g2, %lo(p1275buf), %g2 - ldx [%g2 + 0x10], %l2 - add %l2, -(192 + 128), %sp + /* Get onto temporary stack which will be in the locked + * kernel image. + */ + sethi %hi(tramp_stack), %g1 + or %g1, %lo(tramp_stack), %g1 + add %g1, TRAMP_STACK_SIZE, %g1 + sub %g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp flushw /* Setup the loop variables: @@ -395,7 +398,6 @@ after_lock_tlb: sllx %g5, THREAD_SHIFT, %g5 sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 add %g6, %g5, %sp - mov 0, %fp rdpr %pstate, %o1 or %o1, PSTATE_IE, %o1 diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index b3f833a..1a33850 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2092,6 +2092,11 @@ void sun4v_nonresum_overflow(struct pt_regs *regs) atomic_inc(&sun4v_nonresum_oflow_cnt); } +static void sun4v_tlb_error(struct pt_regs *regs) +{ + die_if_kernel("TLB/TSB error", regs); +} + unsigned long sun4v_err_itlb_vaddr; unsigned long sun4v_err_itlb_ctx; unsigned long sun4v_err_itlb_pte; @@ -2099,8 +2104,7 @@ unsigned long sun4v_err_itlb_error; void sun4v_itlb_error_report(struct pt_regs *regs, int tl) { - if (tl > 1) - dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n", regs->tpc, tl); @@ -2113,7 +2117,7 @@ void sun4v_itlb_error_report(struct pt_regs *regs, int tl) sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx, sun4v_err_itlb_pte, sun4v_err_itlb_error); - prom_halt(); + sun4v_tlb_error(regs); } unsigned long sun4v_err_dtlb_vaddr; @@ -2123,8 +2127,7 @@ unsigned long sun4v_err_dtlb_error; void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) { - if (tl > 1) - dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); + dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n", regs->tpc, tl); @@ -2137,7 +2140,7 @@ void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx, sun4v_err_dtlb_pte, sun4v_err_dtlb_error); - prom_halt(); + sun4v_tlb_error(regs); } void hypervisor_tlbop_error(unsigned long err, unsigned long op) diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S index a313e4a..be98685 100644 --- a/arch/sparc/kernel/tsb.S +++ b/arch/sparc/kernel/tsb.S @@ -75,7 +75,7 @@ tsb_miss_page_table_walk: mov 512, %g7 andn %g5, 0x7, %g5 sllx %g7, %g6, %g7 - srlx %g4, HPAGE_SHIFT, %g6 + srlx %g4, REAL_HPAGE_SHIFT, %g6 sub %g7, 1, %g7 and %g6, %g7, %g6 sllx %g6, 4, %g6 @@ -162,10 +162,10 @@ tsb_miss_page_table_walk_sun4v_fastpath: nop .previous - rdpr %tl, %g3 - cmp %g3, 1 + rdpr %tl, %g7 + cmp %g7, 1 bne,pn %xcc, winfix_trampoline - nop + mov %g3, %g4 ba,pt %xcc, etrap rd %pc, %g7 call hugetlb_setup diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c index 8201c25e..4db8898 100644 --- a/arch/sparc/kernel/unaligned_64.c +++ b/arch/sparc/kernel/unaligned_64.c @@ -163,17 +163,23 @@ static unsigned long *fetch_reg_addr(unsigned int reg, struct pt_regs *regs) unsigned long compute_effective_address(struct pt_regs *regs, unsigned int insn, unsigned int rd) { + int from_kernel = (regs->tstate & TSTATE_PRIV) != 0; unsigned int rs1 = (insn >> 14) & 0x1f; unsigned int rs2 = insn & 0x1f; - int from_kernel = (regs->tstate & TSTATE_PRIV) != 0; + unsigned long addr; if (insn & 0x2000) { maybe_flush_windows(rs1, 0, rd, from_kernel); - return (fetch_reg(rs1, regs) + sign_extend_imm13(insn)); + addr = (fetch_reg(rs1, regs) + sign_extend_imm13(insn)); } else { maybe_flush_windows(rs1, rs2, rd, from_kernel); - return (fetch_reg(rs1, regs) + fetch_reg(rs2, regs)); + addr = (fetch_reg(rs1, regs) + fetch_reg(rs2, regs)); } + + if (!from_kernel && test_thread_flag(TIF_32BIT)) + addr &= 0xffffffff; + + return addr; } /* This is just to make gcc think die_if_kernel does return... */ diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c index f8e7dd5..9c5fbd0 100644 --- a/arch/sparc/kernel/viohs.c +++ b/arch/sparc/kernel/viohs.c @@ -714,7 +714,7 @@ int vio_ldc_alloc(struct vio_driver_state *vio, cfg.tx_irq = vio->vdev->tx_irq; cfg.rx_irq = vio->vdev->rx_irq; - lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg); + lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg, vio->name); if (IS_ERR(lp)) return PTR_ERR(lp); @@ -746,7 +746,7 @@ void vio_port_up(struct vio_driver_state *vio) err = 0; if (state == LDC_STATE_INIT) { - err = ldc_bind(vio->lp, vio->name); + err = ldc_bind(vio->lp); if (err) printk(KERN_WARNING "%s: Port %lu bind failed, " "err=%d\n", diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 0bacceb..0924305 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -35,8 +35,9 @@ jiffies = jiffies_64; SECTIONS { - /* swapper_low_pmd_dir is sparc64 only */ - swapper_low_pmd_dir = 0x0000000000402000; +#ifdef CONFIG_SPARC64 + swapper_pg_dir = 0x0000000000402000; +#endif . = INITIAL_ADDRESS; .text TEXTSTART : { diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S index 2c20ad6..30eee6e 100644 --- a/arch/sparc/lib/NG2memcpy.S +++ b/arch/sparc/lib/NG2memcpy.S @@ -236,6 +236,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ */ VISEntryHalf + membar #Sync alignaddr %o1, %g0, %g0 add %o1, (64 - 1), %o4 diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S index 9cf2ee0..140527a 100644 --- a/arch/sparc/lib/NG4memcpy.S +++ b/arch/sparc/lib/NG4memcpy.S @@ -41,6 +41,10 @@ #endif #endif +#if !defined(EX_LD) && !defined(EX_ST) +#define NON_USER_COPY +#endif + #ifndef EX_LD #define EX_LD(x) x #endif @@ -197,9 +201,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ mov EX_RETVAL(%o3), %o0 .Llarge_src_unaligned: +#ifdef NON_USER_COPY + VISEntryHalfFast(.Lmedium_vis_entry_fail) +#else + VISEntryHalf +#endif andn %o2, 0x3f, %o4 sub %o2, %o4, %o2 - VISEntryHalf alignaddr %o1, %g0, %g1 add %o1, %o4, %o1 EX_LD(LOAD(ldd, %g1 + 0x00, %f0)) @@ -240,6 +248,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ nop ba,a,pt %icc, .Lmedium_unaligned +#ifdef NON_USER_COPY +.Lmedium_vis_entry_fail: + or %o0, %o1, %g2 +#endif .Lmedium: LOAD(prefetch, %o1 + 0x40, #n_reads_strong) andcc %g2, 0x7, %g0 diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c index 1d32b54..8f2f94d 100644 --- a/arch/sparc/lib/atomic32.c +++ b/arch/sparc/lib/atomic32.c @@ -40,6 +40,19 @@ int __atomic_add_return(int i, atomic_t *v) } EXPORT_SYMBOL(__atomic_add_return); +int atomic_xchg(atomic_t *v, int new) +{ + int ret; + unsigned long flags; + + spin_lock_irqsave(ATOMIC_HASH(v), flags); + ret = v->counter; + v->counter = new; + spin_unlock_irqrestore(ATOMIC_HASH(v), flags); + return ret; +} +EXPORT_SYMBOL(atomic_xchg); + int atomic_cmpxchg(atomic_t *v, int old, int new) { int ret; @@ -132,3 +145,17 @@ unsigned long __cmpxchg_u32(volatile u32 *ptr, u32 old, u32 new) return (unsigned long)prev; } EXPORT_SYMBOL(__cmpxchg_u32); + +unsigned long __xchg_u32(volatile u32 *ptr, u32 new) +{ + unsigned long flags; + u32 prev; + + spin_lock_irqsave(ATOMIC_HASH(ptr), flags); + prev = *ptr; + *ptr = new; + spin_unlock_irqrestore(ATOMIC_HASH(ptr), flags); + + return (unsigned long)prev; +} +EXPORT_SYMBOL(__xchg_u32); diff --git a/arch/sparc/lib/clear_page.S b/arch/sparc/lib/clear_page.S index 77e531f..46272df 100644 --- a/arch/sparc/lib/clear_page.S +++ b/arch/sparc/lib/clear_page.S @@ -37,10 +37,10 @@ _clear_page: /* %o0=dest */ .globl clear_user_page clear_user_page: /* %o0=dest, %o1=vaddr */ lduw [%g6 + TI_PRE_COUNT], %o2 - sethi %uhi(PAGE_OFFSET), %g2 + sethi %hi(PAGE_OFFSET), %g2 sethi %hi(PAGE_SIZE), %o4 - sllx %g2, 32, %g2 + ldx [%g2 + %lo(PAGE_OFFSET)], %g2 sethi %hi(PAGE_KERNEL_LOCKED), %g3 ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 diff --git a/arch/sparc/lib/copy_page.S b/arch/sparc/lib/copy_page.S index 4d2df32..dd16c61 100644 --- a/arch/sparc/lib/copy_page.S +++ b/arch/sparc/lib/copy_page.S @@ -46,10 +46,10 @@ .type copy_user_page,#function copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ lduw [%g6 + TI_PRE_COUNT], %o4 - sethi %uhi(PAGE_OFFSET), %g2 + sethi %hi(PAGE_OFFSET), %g2 sethi %hi(PAGE_SIZE), %o3 - sllx %g2, 32, %g2 + ldx [%g2 + %lo(PAGE_OFFSET)], %g2 sethi %hi(PAGE_KERNEL_LOCKED), %g3 ldx [%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3 diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S index 99c017b..f75e690 100644 --- a/arch/sparc/lib/memset.S +++ b/arch/sparc/lib/memset.S @@ -3,8 +3,9 @@ * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) * - * Returns 0, if ok, and number of bytes not yet set if exception - * occurs and we were called as clear_user. + * Calls to memset returns initial %o0. Calls to bzero returns 0, if ok, and + * number of bytes not yet set if exception occurs and we were called as + * clear_user. */ #include <asm/ptrace.h> @@ -65,6 +66,8 @@ __bzero_begin: .globl __memset_start, __memset_end __memset_start: memset: + mov %o0, %g1 + mov 1, %g4 and %o1, 0xff, %g3 sll %g3, 8, %g2 or %g3, %g2, %g3 @@ -89,6 +92,7 @@ memset: sub %o0, %o2, %o0 __bzero: + clr %g4 mov %g0, %g3 1: cmp %o1, 7 @@ -151,8 +155,8 @@ __bzero: bne,a 8f EX(stb %g3, [%o0], and %o1, 1) 8: - retl - clr %o0 + b 0f + nop 7: be 13b orcc %o1, 0, %g0 @@ -164,6 +168,12 @@ __bzero: bne 8b EX(stb %g3, [%o0 - 1], add %o1, 1) 0: + andcc %g4, 1, %g0 + be 5f + nop + retl + mov %g1, %o0 +5: retl clr %o0 __memset_end: diff --git a/arch/sparc/math-emu/math_32.c b/arch/sparc/math-emu/math_32.c index aa4d55b..5ce8f2f 100644 --- a/arch/sparc/math-emu/math_32.c +++ b/arch/sparc/math-emu/math_32.c @@ -499,7 +499,7 @@ static int do_one_mathemu(u32 insn, unsigned long *pfsr, unsigned long *fregs) case 0: fsr = *pfsr; if (IR == -1) IR = 2; /* fcc is always fcc0 */ - fsr &= ~0xc00; fsr |= (IR << 10); break; + fsr &= ~0xc00; fsr |= (IR << 10); *pfsr = fsr; break; case 1: rd->s = IR; break; diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index 2eaca28..59dbd46 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -199,7 +199,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (!mm || pagefault_disabled()) + if (in_atomic() || !mm) goto no_context; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index a1d35e2..603e462 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -95,38 +95,51 @@ static unsigned int get_user_insn(unsigned long tpc) pte_t *ptep, pte; unsigned long pa; u32 insn = 0; - unsigned long pstate; - if (pgd_none(*pgdp)) - goto outret; + if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp))) + goto out; pudp = pud_offset(pgdp, tpc); - if (pud_none(*pudp)) - goto outret; - pmdp = pmd_offset(pudp, tpc); - if (pmd_none(*pmdp)) - goto outret; + if (pud_none(*pudp) || unlikely(pud_bad(*pudp))) + goto out; /* This disables preemption for us as well. */ - __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); - __asm__ __volatile__("wrpr %0, %1, %%pstate" - : : "r" (pstate), "i" (PSTATE_IE)); - ptep = pte_offset_map(pmdp, tpc); - pte = *ptep; - if (!pte_present(pte)) - goto out; + local_irq_disable(); + + pmdp = pmd_offset(pudp, tpc); + if (pmd_none(*pmdp) || unlikely(pmd_bad(*pmdp))) + goto out_irq_enable; - pa = (pte_pfn(pte) << PAGE_SHIFT); - pa += (tpc & ~PAGE_MASK); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (pmd_trans_huge(*pmdp)) { + if (pmd_trans_splitting(*pmdp)) + goto out_irq_enable; - /* Use phys bypass so we don't pollute dtlb/dcache. */ - __asm__ __volatile__("lduwa [%1] %2, %0" - : "=r" (insn) - : "r" (pa), "i" (ASI_PHYS_USE_EC)); + pa = pmd_pfn(*pmdp) << PAGE_SHIFT; + pa += tpc & ~HPAGE_MASK; + /* Use phys bypass so we don't pollute dtlb/dcache. */ + __asm__ __volatile__("lduwa [%1] %2, %0" + : "=r" (insn) + : "r" (pa), "i" (ASI_PHYS_USE_EC)); + } else +#endif + { + ptep = pte_offset_map(pmdp, tpc); + pte = *ptep; + if (pte_present(pte)) { + pa = (pte_pfn(pte) << PAGE_SHIFT); + pa += (tpc & ~PAGE_MASK); + + /* Use phys bypass so we don't pollute dtlb/dcache. */ + __asm__ __volatile__("lduwa [%1] %2, %0" + : "=r" (insn) + : "r" (pa), "i" (ASI_PHYS_USE_EC)); + } + pte_unmap(ptep); + } +out_irq_enable: + local_irq_enable(); out: - pte_unmap(ptep); - __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); -outret: return insn; } @@ -152,7 +165,8 @@ show_signal_msg(struct pt_regs *regs, int sig, int code, } static void do_fault_siginfo(int code, int sig, struct pt_regs *regs, - unsigned int insn, int fault_code) + unsigned long fault_addr, unsigned int insn, + int fault_code) { unsigned long addr; siginfo_t info; @@ -160,10 +174,18 @@ static void do_fault_siginfo(int code, int sig, struct pt_regs *regs, info.si_code = code; info.si_signo = sig; info.si_errno = 0; - if (fault_code & FAULT_CODE_ITLB) + if (fault_code & FAULT_CODE_ITLB) { addr = regs->tpc; - else - addr = compute_effective_address(regs, insn, 0); + } else { + /* If we were able to probe the faulting instruction, use it + * to compute a precise fault address. Otherwise use the fault + * time provided address which may only have page granularity. + */ + if (insn) + addr = compute_effective_address(regs, insn, 0); + else + addr = fault_addr; + } info.si_addr = (void __user *) addr; info.si_trapno = 0; @@ -238,7 +260,7 @@ static void __kprobes do_kernel_fault(struct pt_regs *regs, int si_code, /* The si_code was set to make clear whether * this was a SEGV_MAPERR or SEGV_ACCERR fault. */ - do_fault_siginfo(si_code, SIGSEGV, regs, insn, fault_code); + do_fault_siginfo(si_code, SIGSEGV, regs, address, insn, fault_code); return; } @@ -258,18 +280,6 @@ static void noinline __kprobes bogus_32bit_fault_tpc(struct pt_regs *regs) show_regs(regs); } -static void noinline __kprobes bogus_32bit_fault_address(struct pt_regs *regs, - unsigned long addr) -{ - static int times; - - if (times++ < 10) - printk(KERN_ERR "FAULT[%s:%d]: 32-bit process " - "reports 64-bit fault address [%lx]\n", - current->comm, current->pid, addr); - show_regs(regs); -} - asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) { struct mm_struct *mm = current->mm; @@ -298,10 +308,8 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) goto intr_or_no_mm; } } - if (unlikely((address >> 32) != 0)) { - bogus_32bit_fault_address(regs, address); + if (unlikely((address >> 32) != 0)) goto intr_or_no_mm; - } } if (regs->tstate & TSTATE_PRIV) { @@ -322,7 +330,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (!mm || pagefault_disabled()) + if (in_atomic() || !mm) goto intr_or_no_mm; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); @@ -338,6 +346,9 @@ retry: down_read(&mm->mmap_sem); } + if (fault_code & FAULT_CODE_BAD_RA) + goto do_sigbus; + vma = find_vma(mm, address); if (!vma) goto bad_area; @@ -521,7 +532,7 @@ do_sigbus: * Send a sigbus, regardless of whether we were in kernel * or user mode. */ - do_fault_siginfo(BUS_ADRERR, SIGBUS, regs, insn, fault_code); + do_fault_siginfo(BUS_ADRERR, SIGBUS, regs, address, insn, fault_code); /* Kernel mode? Handle exceptions or die */ if (regs->tstate & TSTATE_PRIV) diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 01ee23d..ae6ce38 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -71,13 +71,12 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, int *nr) { struct page *head, *page, *tail; - u32 mask; int refs; - mask = PMD_HUGE_PRESENT; - if (write) - mask |= PMD_HUGE_WRITE; - if ((pmd_val(pmd) & mask) != mask) + if (!(pmd_val(pmd) & _PAGE_VALID)) + return 0; + + if (write && !pmd_write(pmd)) return 0; refs = 0; @@ -161,6 +160,36 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, return 1; } +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next, flags; + pgd_t *pgdp; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + + local_irq_save(flags); + pgdp = pgd_offset(mm, addr); + do { + pgd_t pgd = *pgdp; + + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + break; + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + break; + } while (pgdp++, addr = next, addr != end); + local_irq_restore(flags); + + return nr; +} + int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 9639964..8545f62 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -21,8 +21,6 @@ /* Slightly simplified from the non-hugepage variant because by * definition we don't have to worry about any page coloring stuff */ -#define VA_EXCLUDE_START (0x0000080000000000UL - (1UL << 32UL)) -#define VA_EXCLUDE_END (0xfffff80000000000UL + (1UL << 32UL)) static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, unsigned long addr, @@ -234,11 +232,6 @@ int pud_huge(pud_t pud) return 0; } -int pmd_huge_support(void) -{ - return 0; -} - struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) { diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index ec995b0..4438e94 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -73,7 +73,6 @@ unsigned long kern_linear_pte_xor[4] __read_mostly; * 'cpu' properties, but we need to have this table setup before the * MDESC is initialized. */ -unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; #ifndef CONFIG_DEBUG_PAGEALLOC /* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings. @@ -82,10 +81,11 @@ unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; */ extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; #endif +extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; static unsigned long cpu_pgsz_mask; -#define MAX_BANKS 32 +#define MAX_BANKS 1024 static struct linux_prom64_registers pavail[MAX_BANKS]; static int pavail_ents; @@ -163,10 +163,6 @@ static void __init read_obp_memory(const char *property, cmp_p64, NULL); } -unsigned long sparc64_valid_addr_bitmap[VALID_ADDR_BITMAP_BYTES / - sizeof(unsigned long)]; -EXPORT_SYMBOL(sparc64_valid_addr_bitmap); - /* Kernel physical address base and size in bytes. */ unsigned long kern_base __read_mostly; unsigned long kern_size __read_mostly; @@ -350,18 +346,22 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t * mm = vma->vm_mm; - raw_spin_lock_irqsave(&mm->context.lock, flags); + /* Don't insert a non-valid PTE into the TSB, we'll deadlock. */ + if (!pte_accessible(mm, pte)) + return; + + spin_lock_irqsave(&mm->context.lock, flags); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) if (mm->context.huge_pte_count && is_hugetlb_pte(pte)) - __update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT, + __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, address, pte_val(pte)); else #endif __update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT, address, pte_val(pte)); - raw_spin_unlock_irqrestore(&mm->context.lock, flags); + spin_unlock_irqrestore(&mm->context.lock, flags); } void flush_dcache_page(struct page *page) @@ -588,7 +588,7 @@ static void __init remap_kernel(void) int i, tlb_ent = sparc64_highest_locked_tlbent(); tte_vaddr = (unsigned long) KERNBASE; - phys_page = (prom_boot_mapping_phys_low >> 22UL) << 22UL; + phys_page = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB; tte_data = kern_large_tte(phys_page); kern_locked_tte_data = tte_data; @@ -661,7 +661,7 @@ void __flush_dcache_range(unsigned long start, unsigned long end) EXPORT_SYMBOL(__flush_dcache_range); /* get_new_mmu_context() uses "cache + 1". */ -DEFINE_RAW_SPINLOCK(ctx_alloc_lock); +DEFINE_SPINLOCK(ctx_alloc_lock); unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1; #define MAX_CTX_NR (1UL << CTX_NR_BITS) #define CTX_BMAP_SLOTS BITS_TO_LONGS(MAX_CTX_NR) @@ -683,7 +683,7 @@ void get_new_mmu_context(struct mm_struct *mm) unsigned long orig_pgsz_bits; int new_version; - raw_spin_lock(&ctx_alloc_lock); + spin_lock(&ctx_alloc_lock); orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK); ctx = (tlb_context_cache + 1) & CTX_NR_MASK; new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx); @@ -719,7 +719,7 @@ void get_new_mmu_context(struct mm_struct *mm) out: tlb_context_cache = new_ctx; mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits; - raw_spin_unlock(&ctx_alloc_lock); + spin_unlock(&ctx_alloc_lock); if (unlikely(new_version)) smp_new_mmu_context_version(); @@ -834,7 +834,10 @@ static int find_node(unsigned long addr) if ((addr & p->mask) == p->val) return i; } - return -1; + /* The following condition has been observed on LDOM guests.*/ + WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node" + " rule. Some physical memory will be owned by node 0."); + return 0; } static u64 memblock_nid_range(u64 start, u64 end, int *nid) @@ -1355,9 +1358,144 @@ static unsigned long __init bootmem_init(unsigned long phys_base) static struct linux_prom64_registers pall[MAX_BANKS] __initdata; static int pall_ents __initdata; -#ifdef CONFIG_DEBUG_PAGEALLOC +static unsigned long max_phys_bits = 40; + +bool kern_addr_valid(unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if ((long)addr < 0L) { + unsigned long pa = __pa(addr); + + if ((addr >> max_phys_bits) != 0UL) + return false; + + return pfn_valid(pa >> PAGE_SHIFT); + } + + if (addr >= (unsigned long) KERNBASE && + addr < (unsigned long)&_end) + return true; + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + return 0; + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) + return 0; + + if (pud_large(*pud)) + return pfn_valid(pud_pfn(*pud)); + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return 0; + + if (pmd_large(*pmd)) + return pfn_valid(pmd_pfn(*pmd)); + + pte = pte_offset_kernel(pmd, addr); + if (pte_none(*pte)) + return 0; + + return pfn_valid(pte_pfn(*pte)); +} +EXPORT_SYMBOL(kern_addr_valid); + +static unsigned long __ref kernel_map_hugepud(unsigned long vstart, + unsigned long vend, + pud_t *pud) +{ + const unsigned long mask16gb = (1UL << 34) - 1UL; + u64 pte_val = vstart; + + /* Each PUD is 8GB */ + if ((vstart & mask16gb) || + (vend - vstart <= mask16gb)) { + pte_val ^= kern_linear_pte_xor[2]; + pud_val(*pud) = pte_val | _PAGE_PUD_HUGE; + + return vstart + PUD_SIZE; + } + + pte_val ^= kern_linear_pte_xor[3]; + pte_val |= _PAGE_PUD_HUGE; + + vend = vstart + mask16gb + 1UL; + while (vstart < vend) { + pud_val(*pud) = pte_val; + + pte_val += PUD_SIZE; + vstart += PUD_SIZE; + pud++; + } + return vstart; +} + +static bool kernel_can_map_hugepud(unsigned long vstart, unsigned long vend, + bool guard) +{ + if (guard && !(vstart & ~PUD_MASK) && (vend - vstart) >= PUD_SIZE) + return true; + + return false; +} + +static unsigned long __ref kernel_map_hugepmd(unsigned long vstart, + unsigned long vend, + pmd_t *pmd) +{ + const unsigned long mask256mb = (1UL << 28) - 1UL; + const unsigned long mask2gb = (1UL << 31) - 1UL; + u64 pte_val = vstart; + + /* Each PMD is 8MB */ + if ((vstart & mask256mb) || + (vend - vstart <= mask256mb)) { + pte_val ^= kern_linear_pte_xor[0]; + pmd_val(*pmd) = pte_val | _PAGE_PMD_HUGE; + + return vstart + PMD_SIZE; + } + + if ((vstart & mask2gb) || + (vend - vstart <= mask2gb)) { + pte_val ^= kern_linear_pte_xor[1]; + pte_val |= _PAGE_PMD_HUGE; + vend = vstart + mask256mb + 1UL; + } else { + pte_val ^= kern_linear_pte_xor[2]; + pte_val |= _PAGE_PMD_HUGE; + vend = vstart + mask2gb + 1UL; + } + + while (vstart < vend) { + pmd_val(*pmd) = pte_val; + + pte_val += PMD_SIZE; + vstart += PMD_SIZE; + pmd++; + } + + return vstart; +} + +static bool kernel_can_map_hugepmd(unsigned long vstart, unsigned long vend, + bool guard) +{ + if (guard && !(vstart & ~PMD_MASK) && (vend - vstart) >= PMD_SIZE) + return true; + + return false; +} + static unsigned long __ref kernel_map_range(unsigned long pstart, - unsigned long pend, pgprot_t prot) + unsigned long pend, pgprot_t prot, + bool use_huge) { unsigned long vstart = PAGE_OFFSET + pstart; unsigned long vend = PAGE_OFFSET + pend; @@ -1376,19 +1514,34 @@ static unsigned long __ref kernel_map_range(unsigned long pstart, pmd_t *pmd; pte_t *pte; + if (pgd_none(*pgd)) { + pud_t *new; + + new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + alloc_bytes += PAGE_SIZE; + pgd_populate(&init_mm, pgd, new); + } pud = pud_offset(pgd, vstart); if (pud_none(*pud)) { pmd_t *new; + if (kernel_can_map_hugepud(vstart, vend, use_huge)) { + vstart = kernel_map_hugepud(vstart, vend, pud); + continue; + } new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); alloc_bytes += PAGE_SIZE; pud_populate(&init_mm, pud, new); } pmd = pmd_offset(pud, vstart); - if (!pmd_present(*pmd)) { + if (pmd_none(*pmd)) { pte_t *new; + if (kernel_can_map_hugepmd(vstart, vend, use_huge)) { + vstart = kernel_map_hugepmd(vstart, vend, pmd); + continue; + } new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); alloc_bytes += PAGE_SIZE; pmd_populate_kernel(&init_mm, pmd, new); @@ -1411,100 +1564,34 @@ static unsigned long __ref kernel_map_range(unsigned long pstart, return alloc_bytes; } -extern unsigned int kvmap_linear_patch[1]; -#endif /* CONFIG_DEBUG_PAGEALLOC */ - -static void __init kpte_set_val(unsigned long index, unsigned long val) -{ - unsigned long *ptr = kpte_linear_bitmap; - - val <<= ((index % (BITS_PER_LONG / 2)) * 2); - ptr += (index / (BITS_PER_LONG / 2)); - - *ptr |= val; -} - -static const unsigned long kpte_shift_min = 28; /* 256MB */ -static const unsigned long kpte_shift_max = 34; /* 16GB */ -static const unsigned long kpte_shift_incr = 3; - -static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end, - unsigned long shift) +static void __init flush_all_kernel_tsbs(void) { - unsigned long size = (1UL << shift); - unsigned long mask = (size - 1UL); - unsigned long remains = end - start; - unsigned long val; - - if (remains < size || (start & mask)) - return start; - - /* VAL maps: - * - * shift 28 --> kern_linear_pte_xor index 1 - * shift 31 --> kern_linear_pte_xor index 2 - * shift 34 --> kern_linear_pte_xor index 3 - */ - val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1; - - remains &= ~mask; - if (shift != kpte_shift_max) - remains = size; - - while (remains) { - unsigned long index = start >> kpte_shift_min; + int i; - kpte_set_val(index, val); + for (i = 0; i < KERNEL_TSB_NENTRIES; i++) { + struct tsb *ent = &swapper_tsb[i]; - start += 1UL << kpte_shift_min; - remains -= 1UL << kpte_shift_min; + ent->tag = (1UL << TSB_TAG_INVALID_BIT); } +#ifndef CONFIG_DEBUG_PAGEALLOC + for (i = 0; i < KERNEL_TSB4M_NENTRIES; i++) { + struct tsb *ent = &swapper_4m_tsb[i]; - return start; -} - -static void __init mark_kpte_bitmap(unsigned long start, unsigned long end) -{ - unsigned long smallest_size, smallest_mask; - unsigned long s; - - smallest_size = (1UL << kpte_shift_min); - smallest_mask = (smallest_size - 1UL); - - while (start < end) { - unsigned long orig_start = start; - - for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) { - start = kpte_mark_using_shift(start, end, s); - - if (start != orig_start) - break; - } - - if (start == orig_start) - start = (start + smallest_size) & ~smallest_mask; + ent->tag = (1UL << TSB_TAG_INVALID_BIT); } +#endif } -static void __init init_kpte_bitmap(void) -{ - unsigned long i; - - for (i = 0; i < pall_ents; i++) { - unsigned long phys_start, phys_end; - - phys_start = pall[i].phys_addr; - phys_end = phys_start + pall[i].reg_size; - - mark_kpte_bitmap(phys_start, phys_end); - } -} +extern unsigned int kvmap_linear_patch[1]; static void __init kernel_physical_mapping_init(void) { -#ifdef CONFIG_DEBUG_PAGEALLOC unsigned long i, mem_alloced = 0UL; + bool use_huge = true; +#ifdef CONFIG_DEBUG_PAGEALLOC + use_huge = false; +#endif for (i = 0; i < pall_ents; i++) { unsigned long phys_start, phys_end; @@ -1512,7 +1599,7 @@ static void __init kernel_physical_mapping_init(void) phys_end = phys_start + pall[i].reg_size; mem_alloced += kernel_map_range(phys_start, phys_end, - PAGE_KERNEL); + PAGE_KERNEL, use_huge); } printk("Allocated %ld bytes for kernel page tables.\n", @@ -1521,8 +1608,9 @@ static void __init kernel_physical_mapping_init(void) kvmap_linear_patch[0] = 0x01000000; /* nop */ flushi(&kvmap_linear_patch[0]); + flush_all_kernel_tsbs(); + __flush_tlb_all(); -#endif } #ifdef CONFIG_DEBUG_PAGEALLOC @@ -1532,7 +1620,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) unsigned long phys_end = phys_start + (numpages * PAGE_SIZE); kernel_map_range(phys_start, phys_end, - (enable ? PAGE_KERNEL : __pgprot(0))); + (enable ? PAGE_KERNEL : __pgprot(0)), false); flush_tsb_kernel_range(PAGE_OFFSET + phys_start, PAGE_OFFSET + phys_end); @@ -1557,6 +1645,80 @@ unsigned long __init find_ecache_flush_span(unsigned long size) return ~0UL; } +unsigned long PAGE_OFFSET; +EXPORT_SYMBOL(PAGE_OFFSET); + +unsigned long VMALLOC_END = 0x0000010000000000UL; +EXPORT_SYMBOL(VMALLOC_END); + +unsigned long sparc64_va_hole_top = 0xfffff80000000000UL; +unsigned long sparc64_va_hole_bottom = 0x0000080000000000UL; + +static void __init setup_page_offset(void) +{ + if (tlb_type == cheetah || tlb_type == cheetah_plus) { + /* Cheetah/Panther support a full 64-bit virtual + * address, so we can use all that our page tables + * support. + */ + sparc64_va_hole_top = 0xfff0000000000000UL; + sparc64_va_hole_bottom = 0x0010000000000000UL; + + max_phys_bits = 42; + } else if (tlb_type == hypervisor) { + switch (sun4v_chip_type) { + case SUN4V_CHIP_NIAGARA1: + case SUN4V_CHIP_NIAGARA2: + /* T1 and T2 support 48-bit virtual addresses. */ + sparc64_va_hole_top = 0xffff800000000000UL; + sparc64_va_hole_bottom = 0x0000800000000000UL; + + max_phys_bits = 39; + break; + case SUN4V_CHIP_NIAGARA3: + /* T3 supports 48-bit virtual addresses. */ + sparc64_va_hole_top = 0xffff800000000000UL; + sparc64_va_hole_bottom = 0x0000800000000000UL; + + max_phys_bits = 43; + break; + case SUN4V_CHIP_NIAGARA4: + case SUN4V_CHIP_NIAGARA5: + case SUN4V_CHIP_SPARC64X: + case SUN4V_CHIP_SPARC_M6: + /* T4 and later support 52-bit virtual addresses. */ + sparc64_va_hole_top = 0xfff8000000000000UL; + sparc64_va_hole_bottom = 0x0008000000000000UL; + max_phys_bits = 47; + break; + case SUN4V_CHIP_SPARC_M7: + default: + /* M7 and later support 52-bit virtual addresses. */ + sparc64_va_hole_top = 0xfff8000000000000UL; + sparc64_va_hole_bottom = 0x0008000000000000UL; + max_phys_bits = 49; + break; + } + } + + if (max_phys_bits > MAX_PHYS_ADDRESS_BITS) { + prom_printf("MAX_PHYS_ADDRESS_BITS is too small, need %lu\n", + max_phys_bits); + prom_halt(); + } + + PAGE_OFFSET = sparc64_va_hole_top; + VMALLOC_END = ((sparc64_va_hole_bottom >> 1) + + (sparc64_va_hole_bottom >> 2)); + + pr_info("MM: PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n", + PAGE_OFFSET, max_phys_bits); + pr_info("MM: VMALLOC [0x%016lx --> 0x%016lx]\n", + VMALLOC_START, VMALLOC_END); + pr_info("MM: VMEMMAP [0x%016lx --> 0x%016lx]\n", + VMEMMAP_BASE, VMEMMAP_BASE << 1); +} + static void __init tsb_phys_patch(void) { struct tsb_ldquad_phys_patch_entry *pquad; @@ -1599,21 +1761,42 @@ static void __init tsb_phys_patch(void) #define NUM_KTSB_DESCR 1 #endif static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR]; -extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES]; + +/* The swapper TSBs are loaded with a base sequence of: + * + * sethi %uhi(SYMBOL), REG1 + * sethi %hi(SYMBOL), REG2 + * or REG1, %ulo(SYMBOL), REG1 + * or REG2, %lo(SYMBOL), REG2 + * sllx REG1, 32, REG1 + * or REG1, REG2, REG1 + * + * When we use physical addressing for the TSB accesses, we patch the + * first four instructions in the above sequence. + */ static void patch_one_ktsb_phys(unsigned int *start, unsigned int *end, unsigned long pa) { - pa >>= KTSB_PHYS_SHIFT; + unsigned long high_bits, low_bits; + + high_bits = (pa >> 32) & 0xffffffff; + low_bits = (pa >> 0) & 0xffffffff; while (start < end) { unsigned int *ia = (unsigned int *)(unsigned long)*start; - ia[0] = (ia[0] & ~0x3fffff) | (pa >> 10); + ia[0] = (ia[0] & ~0x3fffff) | (high_bits >> 10); __asm__ __volatile__("flush %0" : : "r" (ia)); - ia[1] = (ia[1] & ~0x3ff) | (pa & 0x3ff); + ia[1] = (ia[1] & ~0x3fffff) | (low_bits >> 10); __asm__ __volatile__("flush %0" : : "r" (ia + 1)); + ia[2] = (ia[2] & ~0x1fff) | (high_bits & 0x3ff); + __asm__ __volatile__("flush %0" : : "r" (ia + 2)); + + ia[3] = (ia[3] & ~0x1fff) | (low_bits & 0x3ff); + __asm__ __volatile__("flush %0" : : "r" (ia + 3)); + start++; } } @@ -1722,7 +1905,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) #ifndef CONFIG_DEBUG_PAGEALLOC if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) { kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^ - 0xfffff80000000000UL; + PAGE_OFFSET; kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | _PAGE_W_4V); } else { @@ -1731,7 +1914,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) { kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^ - 0xfffff80000000000UL; + PAGE_OFFSET; kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | _PAGE_W_4V); } else { @@ -1740,7 +1923,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) { kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^ - 0xfffff80000000000UL; + PAGE_OFFSET; kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | _PAGE_W_4V); } else { @@ -1752,7 +1935,6 @@ static void __init sun4v_linear_pte_xor_finalize(void) /* paging_init() sets up the page tables */ static unsigned long last_valid_pfn; -pgd_t swapper_pg_dir[2048]; static void sun4u_pgprot_init(void); static void sun4v_pgprot_init(void); @@ -1763,6 +1945,8 @@ void __init paging_init(void) unsigned long real_end, i; int node; + setup_page_offset(); + /* These build time checkes make sure that the dcache_dirty_cpu() * page->flags usage will work. * @@ -1788,7 +1972,7 @@ void __init paging_init(void) BUILD_BUG_ON(NR_CPUS > 4096); - kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL; + kern_base = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB; kern_size = (unsigned long)&_end - (unsigned long)KERNBASE; /* Invalidate both kernel TSBs. */ @@ -1844,7 +2028,7 @@ void __init paging_init(void) shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE); real_end = (unsigned long)_end; - num_kernel_image_mappings = DIV_ROUND_UP(real_end - KERNBASE, 1 << 22); + num_kernel_image_mappings = DIV_ROUND_UP(real_end - KERNBASE, 1 << ILOG2_4MB); printk("Kernel: Using %d locked TLB entries for main kernel image.\n", num_kernel_image_mappings); @@ -1853,16 +2037,10 @@ void __init paging_init(void) */ init_mm.pgd += ((shift) / (sizeof(pgd_t))); - memset(swapper_low_pmd_dir, 0, sizeof(swapper_low_pmd_dir)); + memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir)); - /* Now can init the kernel/bad page tables. */ - pud_set(pud_offset(&swapper_pg_dir[0], 0), - swapper_low_pmd_dir + (shift / sizeof(pgd_t))); - inherit_prom_mappings(); - init_kpte_bitmap(); - /* Ok, we can use our TLB miss and window trap handlers safely. */ setup_tba(); @@ -1969,70 +2147,6 @@ int page_in_phys_avail(unsigned long paddr) return 0; } -static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata; -static int pavail_rescan_ents __initdata; - -/* Certain OBP calls, such as fetching "available" properties, can - * claim physical memory. So, along with initializing the valid - * address bitmap, what we do here is refetch the physical available - * memory list again, and make sure it provides at least as much - * memory as 'pavail' does. - */ -static void __init setup_valid_addr_bitmap_from_pavail(unsigned long *bitmap) -{ - int i; - - read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents); - - for (i = 0; i < pavail_ents; i++) { - unsigned long old_start, old_end; - - old_start = pavail[i].phys_addr; - old_end = old_start + pavail[i].reg_size; - while (old_start < old_end) { - int n; - - for (n = 0; n < pavail_rescan_ents; n++) { - unsigned long new_start, new_end; - - new_start = pavail_rescan[n].phys_addr; - new_end = new_start + - pavail_rescan[n].reg_size; - - if (new_start <= old_start && - new_end >= (old_start + PAGE_SIZE)) { - set_bit(old_start >> 22, bitmap); - goto do_next_page; - } - } - - prom_printf("mem_init: Lost memory in pavail\n"); - prom_printf("mem_init: OLD start[%lx] size[%lx]\n", - pavail[i].phys_addr, - pavail[i].reg_size); - prom_printf("mem_init: NEW start[%lx] size[%lx]\n", - pavail_rescan[i].phys_addr, - pavail_rescan[i].reg_size); - prom_printf("mem_init: Cannot continue, aborting.\n"); - prom_halt(); - - do_next_page: - old_start += PAGE_SIZE; - } - } -} - -static void __init patch_tlb_miss_handler_bitmap(void) -{ - extern unsigned int valid_addr_bitmap_insn[]; - extern unsigned int valid_addr_bitmap_patch[]; - - valid_addr_bitmap_insn[1] = valid_addr_bitmap_patch[1]; - mb(); - valid_addr_bitmap_insn[0] = valid_addr_bitmap_patch[0]; - flushi(&valid_addr_bitmap_insn[0]); -} - static void __init register_page_bootmem_info(void) { #ifdef CONFIG_NEED_MULTIPLE_NODES @@ -2045,18 +2159,6 @@ static void __init register_page_bootmem_info(void) } void __init mem_init(void) { - unsigned long addr, last; - - addr = PAGE_OFFSET + kern_base; - last = PAGE_ALIGN(kern_size) + addr; - while (addr < last) { - set_bit(__pa(addr) >> 22, sparc64_valid_addr_bitmap); - addr += PAGE_SIZE; - } - - setup_valid_addr_bitmap_from_pavail(sparc64_valid_addr_bitmap); - patch_tlb_miss_handler_bitmap(); - high_memory = __va(last_valid_pfn << PAGE_SHIFT); register_page_bootmem_info(); @@ -2146,18 +2248,9 @@ unsigned long _PAGE_CACHE __read_mostly; EXPORT_SYMBOL(_PAGE_CACHE); #ifdef CONFIG_SPARSEMEM_VMEMMAP -unsigned long vmemmap_table[VMEMMAP_SIZE]; - -static long __meminitdata addr_start, addr_end; -static int __meminitdata node_start; - int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, int node) { - unsigned long phys_start = (vstart - VMEMMAP_BASE); - unsigned long phys_end = (vend - VMEMMAP_BASE); - unsigned long addr = phys_start & VMEMMAP_CHUNK_MASK; - unsigned long end = VMEMMAP_ALIGN(phys_end); unsigned long pte_base; pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U | @@ -2168,47 +2261,52 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | _PAGE_W_4V); - for (; addr < end; addr += VMEMMAP_CHUNK) { - unsigned long *vmem_pp = - vmemmap_table + (addr >> VMEMMAP_CHUNK_SHIFT); - void *block; + pte_base |= _PAGE_PMD_HUGE; - if (!(*vmem_pp & _PAGE_VALID)) { - block = vmemmap_alloc_block(1UL << 22, node); - if (!block) + vstart = vstart & PMD_MASK; + vend = ALIGN(vend, PMD_SIZE); + for (; vstart < vend; vstart += PMD_SIZE) { + pgd_t *pgd = pgd_offset_k(vstart); + unsigned long pte; + pud_t *pud; + pmd_t *pmd; + + if (pgd_none(*pgd)) { + pud_t *new = vmemmap_alloc_block(PAGE_SIZE, node); + + if (!new) return -ENOMEM; + pgd_populate(&init_mm, pgd, new); + } - *vmem_pp = pte_base | __pa(block); + pud = pud_offset(pgd, vstart); + if (pud_none(*pud)) { + pmd_t *new = vmemmap_alloc_block(PAGE_SIZE, node); - /* check to see if we have contiguous blocks */ - if (addr_end != addr || node_start != node) { - if (addr_start) - printk(KERN_DEBUG " [%lx-%lx] on node %d\n", - addr_start, addr_end-1, node_start); - addr_start = addr; - node_start = node; - } - addr_end = addr + VMEMMAP_CHUNK; + if (!new) + return -ENOMEM; + pud_populate(&init_mm, pud, new); } - } - return 0; -} -void __meminit vmemmap_populate_print_last(void) -{ - if (addr_start) { - printk(KERN_DEBUG " [%lx-%lx] on node %d\n", - addr_start, addr_end-1, node_start); - addr_start = 0; - addr_end = 0; - node_start = 0; + pmd = pmd_offset(pud, vstart); + + pte = pmd_val(*pmd); + if (!(pte & _PAGE_VALID)) { + void *block = vmemmap_alloc_block(PMD_SIZE, node); + + if (!block) + return -ENOMEM; + + pmd_val(*pmd) = pte_base | __pa(block); + } } + + return 0; } void vmemmap_free(unsigned long start, unsigned long end) { } - #endif /* CONFIG_SPARSEMEM_VMEMMAP */ static void prot_init_common(unsigned long page_none, @@ -2261,10 +2359,10 @@ static void __init sun4u_pgprot_init(void) __ACCESS_BITS_4U | _PAGE_E_4U); #ifdef CONFIG_DEBUG_PAGEALLOC - kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL; + kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET; #else kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^ - 0xfffff80000000000UL; + PAGE_OFFSET; #endif kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U | _PAGE_P_4U | _PAGE_W_4U); @@ -2308,10 +2406,10 @@ static void __init sun4v_pgprot_init(void) _PAGE_CACHE = _PAGE_CACHE_4V; #ifdef CONFIG_DEBUG_PAGEALLOC - kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL; + kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET; #else kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^ - 0xfffff80000000000UL; + PAGE_OFFSET; #endif kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V | _PAGE_P_4V | _PAGE_W_4V); @@ -2455,53 +2553,13 @@ void __flush_tlb_all(void) : : "r" (pstate)); } -static pte_t *get_from_cache(struct mm_struct *mm) -{ - struct page *page; - pte_t *ret; - - spin_lock(&mm->page_table_lock); - page = mm->context.pgtable_page; - ret = NULL; - if (page) { - void *p = page_address(page); - - mm->context.pgtable_page = NULL; - - ret = (pte_t *) (p + (PAGE_SIZE / 2)); - } - spin_unlock(&mm->page_table_lock); - - return ret; -} - -static struct page *__alloc_for_cache(struct mm_struct *mm) -{ - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | - __GFP_REPEAT | __GFP_ZERO); - - if (page) { - spin_lock(&mm->page_table_lock); - if (!mm->context.pgtable_page) { - atomic_set(&page->_count, 2); - mm->context.pgtable_page = page; - } - spin_unlock(&mm->page_table_lock); - } - return page; -} - pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - struct page *page; - pte_t *pte; - - pte = get_from_cache(mm); - if (pte) - return pte; + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | + __GFP_REPEAT | __GFP_ZERO); + pte_t *pte = NULL; - page = __alloc_for_cache(mm); if (page) pte = (pte_t *) page_address(page); @@ -2511,14 +2569,10 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page; - pte_t *pte; - - pte = get_from_cache(mm); - if (pte) - return pte; + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | + __GFP_REPEAT | __GFP_ZERO); + pte_t *pte = NULL; - page = __alloc_for_cache(mm); if (page) { pgtable_page_ctor(page); pte = (pte_t *) page_address(page); @@ -2529,18 +2583,15 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { - struct page *page = virt_to_page(pte); - if (put_page_testzero(page)) - free_hot_cold_page(page, 0); + free_page((unsigned long)pte); } static void __pte_free(pgtable_t pte) { struct page *page = virt_to_page(pte); - if (put_page_testzero(page)) { - pgtable_page_dtor(page); - free_hot_cold_page(page, 0); - } + + pgtable_page_dtor(page); + __free_page(page); } void pte_free(struct mm_struct *mm, pgtable_t pte) @@ -2557,124 +2608,27 @@ void pgtable_free(void *table, bool is_page) } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot, bool for_modify) -{ - if (pgprot_val(pgprot) & _PAGE_VALID) - pmd_val(pmd) |= PMD_HUGE_PRESENT; - if (tlb_type == hypervisor) { - if (pgprot_val(pgprot) & _PAGE_WRITE_4V) - pmd_val(pmd) |= PMD_HUGE_WRITE; - if (pgprot_val(pgprot) & _PAGE_EXEC_4V) - pmd_val(pmd) |= PMD_HUGE_EXEC; - - if (!for_modify) { - if (pgprot_val(pgprot) & _PAGE_ACCESSED_4V) - pmd_val(pmd) |= PMD_HUGE_ACCESSED; - if (pgprot_val(pgprot) & _PAGE_MODIFIED_4V) - pmd_val(pmd) |= PMD_HUGE_DIRTY; - } - } else { - if (pgprot_val(pgprot) & _PAGE_WRITE_4U) - pmd_val(pmd) |= PMD_HUGE_WRITE; - if (pgprot_val(pgprot) & _PAGE_EXEC_4U) - pmd_val(pmd) |= PMD_HUGE_EXEC; - - if (!for_modify) { - if (pgprot_val(pgprot) & _PAGE_ACCESSED_4U) - pmd_val(pmd) |= PMD_HUGE_ACCESSED; - if (pgprot_val(pgprot) & _PAGE_MODIFIED_4U) - pmd_val(pmd) |= PMD_HUGE_DIRTY; - } - } - - return pmd; -} - -pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) -{ - pmd_t pmd; - - pmd_val(pmd) = (page_nr << ((PAGE_SHIFT - PMD_PADDR_SHIFT))); - pmd_val(pmd) |= PMD_ISHUGE; - pmd = pmd_set_protbits(pmd, pgprot, false); - return pmd; -} - -pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) -{ - pmd_val(pmd) &= ~(PMD_HUGE_PRESENT | - PMD_HUGE_WRITE | - PMD_HUGE_EXEC); - pmd = pmd_set_protbits(pmd, newprot, true); - return pmd; -} - -pgprot_t pmd_pgprot(pmd_t entry) -{ - unsigned long pte = 0; - - if (pmd_val(entry) & PMD_HUGE_PRESENT) - pte |= _PAGE_VALID; - - if (tlb_type == hypervisor) { - if (pmd_val(entry) & PMD_HUGE_PRESENT) - pte |= _PAGE_PRESENT_4V; - if (pmd_val(entry) & PMD_HUGE_EXEC) - pte |= _PAGE_EXEC_4V; - if (pmd_val(entry) & PMD_HUGE_WRITE) - pte |= _PAGE_W_4V; - if (pmd_val(entry) & PMD_HUGE_ACCESSED) - pte |= _PAGE_ACCESSED_4V; - if (pmd_val(entry) & PMD_HUGE_DIRTY) - pte |= _PAGE_MODIFIED_4V; - pte |= _PAGE_CP_4V|_PAGE_CV_4V; - } else { - if (pmd_val(entry) & PMD_HUGE_PRESENT) - pte |= _PAGE_PRESENT_4U; - if (pmd_val(entry) & PMD_HUGE_EXEC) - pte |= _PAGE_EXEC_4U; - if (pmd_val(entry) & PMD_HUGE_WRITE) - pte |= _PAGE_W_4U; - if (pmd_val(entry) & PMD_HUGE_ACCESSED) - pte |= _PAGE_ACCESSED_4U; - if (pmd_val(entry) & PMD_HUGE_DIRTY) - pte |= _PAGE_MODIFIED_4U; - pte |= _PAGE_CP_4U|_PAGE_CV_4U; - } - - return __pgprot(pte); -} - void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd) { unsigned long pte, flags; struct mm_struct *mm; pmd_t entry = *pmd; - pgprot_t prot; if (!pmd_large(entry) || !pmd_young(entry)) return; - pte = (pmd_val(entry) & ~PMD_HUGE_PROTBITS); - pte <<= PMD_PADDR_SHIFT; - pte |= _PAGE_VALID; - - prot = pmd_pgprot(entry); + pte = pmd_val(entry); - if (tlb_type == hypervisor) - pgprot_val(prot) |= _PAGE_SZHUGE_4V; - else - pgprot_val(prot) |= _PAGE_SZHUGE_4U; - - pte |= pgprot_val(prot); + /* We are fabricating 8MB pages using 4MB real hw pages. */ + pte |= (addr & (1UL << REAL_HPAGE_SHIFT)); mm = vma->vm_mm; spin_lock_irqsave(&mm->context.lock, flags); if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) - __update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT, + __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT, addr, pte); spin_unlock_irqrestore(&mm->context.lock, flags); @@ -2721,7 +2675,7 @@ void hugetlb_setup(struct pt_regs *regs) if (tlb_type == cheetah_plus) { unsigned long ctx; - raw_spin_lock(&ctx_alloc_lock); + spin_lock(&ctx_alloc_lock); ctx = mm->context.sparc64_ctx_val; ctx &= ~CTX_PGSZ_MASK; ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT; @@ -2742,7 +2696,30 @@ void hugetlb_setup(struct pt_regs *regs) mm->context.sparc64_ctx_val = ctx; on_each_cpu(context_reload, mm, 0); } - raw_spin_unlock(&ctx_alloc_lock); + spin_unlock(&ctx_alloc_lock); } } #endif + +#ifdef CONFIG_SMP +#define do_flush_tlb_kernel_range smp_flush_tlb_kernel_range +#else +#define do_flush_tlb_kernel_range __flush_tlb_kernel_range +#endif + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + if (start < HI_OBP_ADDRESS && end > LOW_OBP_ADDRESS) { + if (start < LOW_OBP_ADDRESS) { + flush_tsb_kernel_range(start, LOW_OBP_ADDRESS); + do_flush_tlb_kernel_range(start, LOW_OBP_ADDRESS); + } + if (end > HI_OBP_ADDRESS) { + flush_tsb_kernel_range(HI_OBP_ADDRESS, end); + do_flush_tlb_kernel_range(HI_OBP_ADDRESS, end); + } + } else { + flush_tsb_kernel_range(start, end); + do_flush_tlb_kernel_range(start, end); + } +} diff --git a/arch/sparc/mm/init_64.h b/arch/sparc/mm/init_64.h index 0661aa6..ac49119 100644 --- a/arch/sparc/mm/init_64.h +++ b/arch/sparc/mm/init_64.h @@ -1,20 +1,15 @@ #ifndef _SPARC64_MM_INIT_H #define _SPARC64_MM_INIT_H +#include <asm/page.h> + /* Most of the symbols in this file are defined in init.c and * marked non-static so that assembler code can get at them. */ -#define MAX_PHYS_ADDRESS (1UL << 41UL) -#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL) -#define KPTE_BITMAP_BYTES \ - ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4) -#define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL) -#define VALID_ADDR_BITMAP_BYTES \ - ((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8) +#define MAX_PHYS_ADDRESS (1UL << MAX_PHYS_ADDRESS_BITS) extern unsigned long kern_linear_pte_xor[4]; -extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)]; extern unsigned int sparc64_highest_unlocked_tlb_ent; extern unsigned long sparc64_kern_pri_context; extern unsigned long sparc64_kern_pri_nuc_bits; @@ -36,15 +31,4 @@ extern unsigned long kern_locked_tte_data; extern void prom_world(int enter); -#ifdef CONFIG_SPARSEMEM_VMEMMAP -#define VMEMMAP_CHUNK_SHIFT 22 -#define VMEMMAP_CHUNK (1UL << VMEMMAP_CHUNK_SHIFT) -#define VMEMMAP_CHUNK_MASK ~(VMEMMAP_CHUNK - 1UL) -#define VMEMMAP_ALIGN(x) (((x)+VMEMMAP_CHUNK-1UL)&VMEMMAP_CHUNK_MASK) - -#define VMEMMAP_SIZE ((((1UL << MAX_PHYSADDR_BITS) >> PAGE_SHIFT) * \ - sizeof(struct page)) >> VMEMMAP_CHUNK_SHIFT) -extern unsigned long vmemmap_table[VMEMMAP_SIZE]; -#endif - #endif /* _SPARC64_MM_INIT_H */ diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 7a91f28..c24d0aa 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -135,7 +135,7 @@ no_cache_flush: #ifdef CONFIG_TRANSPARENT_HUGEPAGE static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr, - pmd_t pmd, bool exec) + pmd_t pmd) { unsigned long end; pte_t *pte; @@ -143,8 +143,11 @@ static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr, pte = pte_offset_map(&pmd, vaddr); end = vaddr + HPAGE_SIZE; while (vaddr < end) { - if (pte_val(*pte) & _PAGE_VALID) + if (pte_val(*pte) & _PAGE_VALID) { + bool exec = pte_exec(*pte); + tlb_batch_add_one(mm, vaddr, exec); + } pte++; vaddr += PAGE_SIZE; } @@ -161,8 +164,8 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, if (mm == &init_mm) return; - if ((pmd_val(pmd) ^ pmd_val(orig)) & PMD_ISHUGE) { - if (pmd_val(pmd) & PMD_ISHUGE) + if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { + if (pmd_val(pmd) & _PAGE_PMD_HUGE) mm->context.huge_pte_count++; else mm->context.huge_pte_count--; @@ -178,16 +181,30 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, } if (!pmd_none(orig)) { - bool exec = ((pmd_val(orig) & PMD_HUGE_EXEC) != 0); - addr &= HPAGE_MASK; - if (pmd_val(orig) & PMD_ISHUGE) + if (pmd_trans_huge(orig)) { + pte_t orig_pte = __pte(pmd_val(orig)); + bool exec = pte_exec(orig_pte); + tlb_batch_add_one(mm, addr, exec); - else - tlb_batch_pmd_scan(mm, addr, orig, exec); + tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec); + } else { + tlb_batch_pmd_scan(mm, addr, orig); + } } } +void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmdp) +{ + pmd_t entry = *pmdp; + + pmd_val(entry) &= ~_PAGE_VALID; + + set_pmd_at(vma->vm_mm, address, pmdp, entry); + flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); +} + void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable) { diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 9eb10b4..10a69f4 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -73,7 +73,7 @@ void flush_tsb_user(struct tlb_batch *tb) struct mm_struct *mm = tb->mm; unsigned long nentries, base, flags; - raw_spin_lock_irqsave(&mm->context.lock, flags); + spin_lock_irqsave(&mm->context.lock, flags); base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; @@ -87,17 +87,17 @@ void flush_tsb_user(struct tlb_batch *tb) nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; if (tlb_type == cheetah_plus || tlb_type == hypervisor) base = __pa(base); - __flush_tsb_one(tb, HPAGE_SHIFT, base, nentries); + __flush_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries); } #endif - raw_spin_unlock_irqrestore(&mm->context.lock, flags); + spin_unlock_irqrestore(&mm->context.lock, flags); } void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr) { unsigned long nentries, base, flags; - raw_spin_lock_irqsave(&mm->context.lock, flags); + spin_lock_irqsave(&mm->context.lock, flags); base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb; nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries; @@ -111,10 +111,10 @@ void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr) nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries; if (tlb_type == cheetah_plus || tlb_type == hypervisor) base = __pa(base); - __flush_tsb_one_entry(base, vaddr, HPAGE_SHIFT, nentries); + __flush_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, nentries); } #endif - raw_spin_unlock_irqrestore(&mm->context.lock, flags); + spin_unlock_irqrestore(&mm->context.lock, flags); } #define HV_PGSZ_IDX_BASE HV_PGSZ_IDX_8K @@ -133,7 +133,19 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign mm->context.tsb_block[tsb_idx].tsb_nentries = tsb_bytes / sizeof(struct tsb); - base = TSBMAP_BASE; + switch (tsb_idx) { + case MM_TSB_BASE: + base = TSBMAP_8K_BASE; + break; +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) + case MM_TSB_HUGE: + base = TSBMAP_4M_BASE; + break; +#endif + default: + BUG(); + } + tte = pgprot_val(PAGE_KERNEL_LOCKED); tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb); BUG_ON(tsb_paddr & (tsb_bytes - 1UL)); @@ -392,7 +404,7 @@ retry_tsb_alloc: * the lock and ask all other cpus running this address space * to run tsb_context_switch() to see the new TSB table. */ - raw_spin_lock_irqsave(&mm->context.lock, flags); + spin_lock_irqsave(&mm->context.lock, flags); old_tsb = mm->context.tsb_block[tsb_index].tsb; old_cache_index = @@ -407,7 +419,7 @@ retry_tsb_alloc: */ if (unlikely(old_tsb && (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) { - raw_spin_unlock_irqrestore(&mm->context.lock, flags); + spin_unlock_irqrestore(&mm->context.lock, flags); kmem_cache_free(tsb_caches[new_cache_index], new_tsb); return; @@ -433,7 +445,7 @@ retry_tsb_alloc: mm->context.tsb_block[tsb_index].tsb = new_tsb; setup_tsb_params(mm, tsb_index, new_size); - raw_spin_unlock_irqrestore(&mm->context.lock, flags); + spin_unlock_irqrestore(&mm->context.lock, flags); /* If old_tsb is NULL, we're being invoked for the first time * from init_new_context(). @@ -459,7 +471,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) #endif unsigned int i; - raw_spin_lock_init(&mm->context.lock); + spin_lock_init(&mm->context.lock); mm->context.sparc64_ctx_val = 0UL; @@ -472,8 +484,6 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) mm->context.huge_pte_count = 0; #endif - mm->context.pgtable_page = NULL; - /* copy_mm() copies over the parent's mm_struct before calling * us, so we need to zero out the TSB pointer or else tsb_grow() * will be confused and think there is an older TSB to free up. @@ -512,23 +522,16 @@ static void tsb_destroy_one(struct tsb_config *tp) void destroy_context(struct mm_struct *mm) { unsigned long flags, i; - struct page *page; for (i = 0; i < MM_NUM_TSBS; i++) tsb_destroy_one(&mm->context.tsb_block[i]); - page = mm->context.pgtable_page; - if (page && put_page_testzero(page)) { - pgtable_page_dtor(page); - free_hot_cold_page(page, 0); - } - - raw_spin_lock_irqsave(&ctx_alloc_lock, flags); + spin_lock_irqsave(&ctx_alloc_lock, flags); if (CTX_VALID(mm->context)) { unsigned long nr = CTX_NRBITS(mm->context); mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63)); } - raw_spin_unlock_irqrestore(&ctx_alloc_lock, flags); + spin_unlock_irqrestore(&ctx_alloc_lock, flags); } diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index 432aa0c..b4f4733 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -153,10 +153,10 @@ __spitfire_flush_tlb_mm_slow: .globl __flush_icache_page __flush_icache_page: /* %o0 = phys_page */ srlx %o0, PAGE_SHIFT, %o0 - sethi %uhi(PAGE_OFFSET), %g1 + sethi %hi(PAGE_OFFSET), %g1 sllx %o0, PAGE_SHIFT, %o0 sethi %hi(PAGE_SIZE), %g2 - sllx %g1, 32, %g1 + ldx [%g1 + %lo(PAGE_OFFSET)], %g1 add %o0, %g1, %o0 1: subcc %g2, 32, %g2 bne,pt %icc, 1b @@ -178,8 +178,8 @@ __flush_icache_page: /* %o0 = phys_page */ .align 64 .globl __flush_dcache_page __flush_dcache_page: /* %o0=kaddr, %o1=flush_icache */ - sethi %uhi(PAGE_OFFSET), %g1 - sllx %g1, 32, %g1 + sethi %hi(PAGE_OFFSET), %g1 + ldx [%g1 + %lo(PAGE_OFFSET)], %g1 sub %o0, %g1, %o0 ! physical address srlx %o0, 11, %o0 ! make D-cache TAG sethi %hi(1 << 14), %o2 ! D-cache size @@ -287,8 +287,8 @@ __cheetah_flush_tlb_pending: /* 27 insns */ #ifdef DCACHE_ALIASING_POSSIBLE __cheetah_flush_dcache_page: /* 11 insns */ - sethi %uhi(PAGE_OFFSET), %g1 - sllx %g1, 32, %g1 + sethi %hi(PAGE_OFFSET), %g1 + ldx [%g1 + %lo(PAGE_OFFSET)], %g1 sub %o0, %g1, %o0 sethi %hi(PAGE_SIZE), %o4 1: subcc %o4, (1 << 5), %o4 diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 01fe994..44d258d 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -83,9 +83,9 @@ static void bpf_flush_icache(void *start_, void *end_) #define BNE (F2(0, 2) | CONDNE) #ifdef CONFIG_SPARC64 -#define BNE_PTR (F2(0, 1) | CONDNE | (2 << 20)) +#define BE_PTR (F2(0, 1) | CONDE | (2 << 20)) #else -#define BNE_PTR BNE +#define BE_PTR BE #endif #define SETHI(K, REG) \ @@ -600,7 +600,7 @@ void bpf_jit_compile(struct sk_filter *fp) case BPF_S_ANC_IFINDEX: emit_skb_loadptr(dev, r_A); emit_cmpi(r_A, 0); - emit_branch(BNE_PTR, cleanup_addr + 4); + emit_branch(BE_PTR, cleanup_addr + 4); emit_nop(); emit_load32(r_A, struct net_device, ifindex, r_A); break; @@ -613,7 +613,7 @@ void bpf_jit_compile(struct sk_filter *fp) case BPF_S_ANC_HATYPE: emit_skb_loadptr(dev, r_A); emit_cmpi(r_A, 0); - emit_branch(BNE_PTR, cleanup_addr + 4); + emit_branch(BE_PTR, cleanup_addr + 4); emit_nop(); emit_load16(r_A, struct net_device, type, r_A); break; diff --git a/arch/sparc/power/hibernate_asm.S b/arch/sparc/power/hibernate_asm.S index 7994216..d7d9017 100644 --- a/arch/sparc/power/hibernate_asm.S +++ b/arch/sparc/power/hibernate_asm.S @@ -54,8 +54,8 @@ ENTRY(swsusp_arch_resume) nop /* Write PAGE_OFFSET to %g7 */ - sethi %uhi(PAGE_OFFSET), %g7 - sllx %g7, 32, %g7 + sethi %hi(PAGE_OFFSET), %g7 + ldx [%g7 + %lo(PAGE_OFFSET)], %g7 setuw (PAGE_SIZE-8), %g3 diff --git a/arch/sparc/prom/bootstr_64.c b/arch/sparc/prom/bootstr_64.c index ab9ccc6..7149e77 100644 --- a/arch/sparc/prom/bootstr_64.c +++ b/arch/sparc/prom/bootstr_64.c @@ -14,7 +14,10 @@ * the .bss section or it will break things. */ -#define BARG_LEN 256 +/* We limit BARG_LEN to 1024 because this is the size of the + * 'barg_out' command line buffer in the SILO bootloader. + */ +#define BARG_LEN 1024 struct { int bootstr_len; int bootstr_valid; diff --git a/arch/sparc/prom/cif.S b/arch/sparc/prom/cif.S index 9c86b4b..8050f38 100644 --- a/arch/sparc/prom/cif.S +++ b/arch/sparc/prom/cif.S @@ -11,11 +11,10 @@ .text .globl prom_cif_direct prom_cif_direct: + save %sp, -192, %sp sethi %hi(p1275buf), %o1 or %o1, %lo(p1275buf), %o1 - ldx [%o1 + 0x0010], %o2 ! prom_cif_stack - save %o2, -192, %sp - ldx [%i1 + 0x0008], %l2 ! prom_cif_handler + ldx [%o1 + 0x0008], %l2 ! prom_cif_handler mov %g4, %l0 mov %g5, %l1 mov %g6, %l3 diff --git a/arch/sparc/prom/init_64.c b/arch/sparc/prom/init_64.c index d95db75..110b0d7 100644 --- a/arch/sparc/prom/init_64.c +++ b/arch/sparc/prom/init_64.c @@ -26,13 +26,13 @@ phandle prom_chosen_node; * It gets passed the pointer to the PROM vector. */ -extern void prom_cif_init(void *, void *); +extern void prom_cif_init(void *); -void __init prom_init(void *cif_handler, void *cif_stack) +void __init prom_init(void *cif_handler) { phandle node; - prom_cif_init(cif_handler, cif_stack); + prom_cif_init(cif_handler); prom_chosen_node = prom_finddevice(prom_chosen_path); if (!prom_chosen_node || (s32)prom_chosen_node == -1) diff --git a/arch/sparc/prom/p1275.c b/arch/sparc/prom/p1275.c index 04a4540..fda23e6 100644 --- a/arch/sparc/prom/p1275.c +++ b/arch/sparc/prom/p1275.c @@ -10,6 +10,7 @@ #include <linux/smp.h> #include <linux/string.h> #include <linux/spinlock.h> +#include <linux/irqflags.h> #include <asm/openprom.h> #include <asm/oplib.h> @@ -20,7 +21,6 @@ struct { long prom_callback; /* 0x00 */ void (*prom_cif_handler)(long *); /* 0x08 */ - unsigned long prom_cif_stack; /* 0x10 */ } p1275buf; extern void prom_world(int); @@ -37,8 +37,8 @@ void p1275_cmd_direct(unsigned long *args) { unsigned long flags; - raw_local_save_flags(flags); - raw_local_irq_restore((unsigned long)PIL_NMI); + local_save_flags(flags); + local_irq_restore((unsigned long)PIL_NMI); raw_spin_lock(&prom_entry_lock); prom_world(1); @@ -46,11 +46,10 @@ void p1275_cmd_direct(unsigned long *args) prom_world(0); raw_spin_unlock(&prom_entry_lock); - raw_local_irq_restore(flags); + local_irq_restore(flags); } void prom_cif_init(void *cif_handler, void *cif_stack) { p1275buf.prom_cif_handler = (void (*)(long *))cif_handler; - p1275buf.prom_cif_stack = (unsigned long)cif_stack; } diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 40f30ac..6c05712 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -357,7 +357,7 @@ static int handle_page_fault(struct pt_regs *regs, * If we're in an interrupt, have no user context or are running in an * atomic region then we must not take the fault. */ - if (!mm || pagefault_disabled()) { + if (in_atomic() || !mm) { vma = NULL; /* happy compiler */ goto bad_area_nosemaphore; } diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index 004ba56..33294fd 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c @@ -417,7 +417,7 @@ void __homecache_free_pages(struct page *page, unsigned int order) if (put_page_testzero(page)) { homecache_change_page_home(page, order, PAGE_HOME_HASH); if (order == 0) { - free_hot_cold_page(page, 0); + free_hot_cold_page(page, false); } else { init_page_count(page); __free_pages(page, order); diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c index 0cb3bba..e514899 100644 --- a/arch/tile/mm/hugetlbpage.c +++ b/arch/tile/mm/hugetlbpage.c @@ -166,11 +166,6 @@ int pud_huge(pud_t pud) return !!(pud_val(pud) & _PAGE_HUGE_PAGE); } -int pmd_huge_support(void) -{ - return 1; -} - struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) { diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common index 8ddea1f..0228a6a 100644 --- a/arch/um/Kconfig.common +++ b/arch/um/Kconfig.common @@ -7,6 +7,7 @@ config UML bool default y select HAVE_UID16 + select HAVE_FUTEX_CMPXCHG if FUTEX select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES select GENERIC_IO diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 3716e69..e8ab93c 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1277,7 +1277,7 @@ static void do_ubd_request(struct request_queue *q) while(1){ struct ubd *dev = q->queuedata; - if(dev->end_sg == 0){ + if(dev->request == NULL){ struct request *req = blk_fetch_request(q); if(req == NULL) return; @@ -1299,7 +1299,8 @@ static void do_ubd_request(struct request_queue *q) return; } prepare_flush_request(req, io_req); - submit_request(io_req, dev); + if (submit_request(io_req, dev) == false) + return; } while(dev->start_sg < dev->end_sg){ diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 100a278..5c3aef7 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -38,7 +38,7 @@ int handle_page_fault(unsigned long address, unsigned long ip, * If the fault was during atomic operation, don't take the fault, just * fail. */ - if (pagefault_disabled()) + if (in_atomic()) goto out_nosemaphore; if (is_user) diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h index fb5e4c6..ef470a7 100644 --- a/arch/unicore32/include/asm/mmu_context.h +++ b/arch/unicore32/include/asm/mmu_context.h @@ -14,6 +14,8 @@ #include <linux/compiler.h> #include <linux/sched.h> +#include <linux/mm.h> +#include <linux/vmacache.h> #include <linux/io.h> #include <asm/cacheflush.h> @@ -73,7 +75,7 @@ do { \ else \ mm->mmap = NULL; \ rb_erase(&high_vma->vm_rb, &mm->mm_rb); \ - mm->mmap_cache = NULL; \ + vmacache_invalidate(mm); \ mm->map_count--; \ remove_vma(high_vma); \ } \ diff --git a/arch/unicore32/mm/alignment.c b/arch/unicore32/mm/alignment.c index de7dc5f..24e8360 100644 --- a/arch/unicore32/mm/alignment.c +++ b/arch/unicore32/mm/alignment.c @@ -21,6 +21,7 @@ #include <linux/sched.h> #include <linux/uaccess.h> +#include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/unaligned.h> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index edbb857..5b5c6ea 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -21,7 +21,6 @@ config X86_64 ### Arch settings config X86 def_bool y - select HAVE_PREEMPT_LAZY select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select HAVE_AOUT if X86_32 select HAVE_UNSTABLE_SCHED_CLOCK @@ -124,6 +123,7 @@ config X86 select COMPAT_OLD_SIGACTION if IA32_EMULATION select RTC_LIB select HAVE_DEBUG_STACKOVERFLOW + select ARCH_SUPPORTS_ATOMIC_RMW config INSTRUCTION_DECODER def_bool y @@ -180,11 +180,8 @@ config ARCH_MAY_HAVE_PC_FDC def_bool y depends on ISA_DMA_API -config RWSEM_GENERIC_SPINLOCK - def_bool PREEMPT_RT_FULL - config RWSEM_XCHGADD_ALGORITHM - def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL + def_bool y config GENERIC_CALIBRATE_DELAY def_bool y @@ -474,7 +471,7 @@ config X86_MDFLD select MFD_INTEL_MSIC ---help--- Medfield is Intel's Low Power Intel Architecture (LPIA) based Moblin - Internet Device(MID) platform. + Internet Device(MID) platform. Unlike standard x86 PCs, Medfield does not have many legacy devices nor standard legacy replacement devices/features. e.g. Medfield does not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. @@ -821,7 +818,7 @@ config IOMMU_HELPER config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" depends on X86_64 && SMP && DEBUG_KERNEL - select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL + select CPUMASK_OFFSTACK ---help--- Enable maximum number of CPUS and NUMA Nodes for this architecture. If unsure, say N. @@ -864,7 +861,7 @@ source "kernel/Kconfig.preempt" config X86_UP_APIC bool "Local APIC support on uniprocessors" - depends on X86_32 && !SMP && !X86_32_NON_STANDARD && !PCI_MSI + depends on X86_32 && !SMP && !X86_32_NON_STANDARD ---help--- A local APIC (Advanced Programmable Interrupt Controller) is an integrated interrupt controller in the CPU. If you have a single-CPU @@ -875,6 +872,10 @@ config X86_UP_APIC performance counters), and the NMI watchdog which detects hard lockups. +config X86_UP_APIC_MSI + def_bool y + select X86_UP_APIC if X86_32 && !SMP && !X86_32_NON_STANDARD && PCI_MSI + config X86_UP_IOAPIC bool "IO-APIC support on uniprocessors" depends on X86_UP_APIC @@ -976,10 +977,27 @@ config VM86 default y depends on X86_32 ---help--- - This option is required by programs like DOSEMU to run 16-bit legacy - code on X86 processors. It also may be needed by software like - XFree86 to initialize some video cards via BIOS. Disabling this - option saves about 6k. + This option is required by programs like DOSEMU to run + 16-bit real mode legacy code on x86 processors. It also may + be needed by software like XFree86 to initialize some video + cards via BIOS. Disabling this option saves about 6K. + +config X86_16BIT + bool "Enable support for 16-bit segments" if EXPERT + default y + ---help--- + This option is required by programs like Wine to run 16-bit + protected mode legacy code on x86 processors. Disabling + this option saves about 300 bytes on i386, or around 6K text + plus 16K runtime memory on x86-64, + +config X86_ESPFIX32 + def_bool y + depends on X86_16BIT && X86_32 + +config X86_ESPFIX64 + def_bool y + depends on X86_16BIT && X86_64 config TOSHIBA tristate "Toshiba Laptop support" @@ -1594,6 +1612,7 @@ config EFI config EFI_STUB bool "EFI stub support" depends on EFI + select RELOCATABLE ---help--- This kernel feature allows a bzImage to be loaded directly by EFI firmware without the use of a bootloader. @@ -1885,6 +1904,10 @@ config USE_PERCPU_NUMA_NODE_ID def_bool y depends on NUMA +config ARCH_ENABLE_HUGEPAGE_MIGRATION + def_bool y + depends on X86_64 && HUGETLB_PAGE && MIGRATION + menu "Power management and ACPI options" config ARCH_HIBERNATION_HEADER diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index b7388a4..9b883a8 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -865,6 +865,9 @@ fail: * Because the x86 boot code expects to be passed a boot_params we * need to create one ourselves (usually the bootloader would create * one for us). + * + * The caller is responsible for filling out ->code32_start in the + * returned boot_params. */ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table) { @@ -921,8 +924,6 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table) hdr->vid_mode = 0xffff; hdr->boot_flag = 0xAA55; - hdr->code32_start = (__u64)(unsigned long)image->image_base; - hdr->type_of_loader = 0x21; /* Convert unicode cmdline to ascii */ diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 5d6f689..b1bd969 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -50,6 +50,13 @@ ENTRY(efi_pe_entry) pushl %eax pushl %esi pushl %ecx + + call reloc +reloc: + popl %ecx + subl reloc, %ecx + movl %ecx, BP_code32_start(%eax) + sub $0x4, %esp ENTRY(efi_stub_entry) @@ -63,12 +70,7 @@ ENTRY(efi_stub_entry) hlt jmp 1b 2: - call 3f -3: - popl %eax - subl $3b, %eax - subl BP_pref_address(%esi), %eax - add BP_code32_start(%esi), %eax + movl BP_code32_start(%esi), %eax leal preferred_addr(%eax), %eax jmp *%eax diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index c337422..a558403 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -215,6 +215,8 @@ ENTRY(efi_pe_entry) cmpq $0,%rax je 1f mov %rax, %rdx + leaq startup_32(%rip), %rax + movl %eax, BP_code32_start(%rdx) popq %rsi popq %rdi @@ -228,12 +230,7 @@ ENTRY(efi_stub_entry) hlt jmp 1b 2: - call 3f -3: - popq %rax - subq $3b, %rax - subq BP_pref_address(%rsi), %rax - add BP_code32_start(%esi), %eax + movl BP_code32_start(%esi), %eax leaq preferred_addr(%rax), %rax jmp *%rax diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 434f077..1b05afd 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -401,6 +401,8 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, unsigned char *output, unsigned long output_len) { + unsigned char *output_orig = output; + real_mode = rmode; sanitize_boot_params(real_mode); @@ -439,7 +441,12 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, debug_putstr("\nDecompressing Linux... "); decompress(input_data, input_len, NULL, NULL, output, NULL, error); parse_elf(output); - handle_relocations(output, output_len); + /* + * 32-bit always performs relocations. 64-bit relocations are only + * needed if kASLR has chosen a different load address. + */ + if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig) + handle_relocations(output, output_len); debug_putstr("done.\nBooting the kernel.\n"); return; } diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 9ec06a1..4257124 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -91,10 +91,9 @@ bs_die: .section ".bsdata", "a" bugger_off_msg: - .ascii "Direct floppy boot is not supported. " - .ascii "Use a boot loader program instead.\r\n" + .ascii "Use a boot loader.\r\n" .ascii "\n" - .ascii "Remove disk and press any key to reboot ...\r\n" + .ascii "Remove disk and press any key to reboot...\r\n" .byte 0 #ifdef CONFIG_EFI_STUB @@ -108,7 +107,7 @@ coff_header: #else .word 0x8664 # x86-64 #endif - .word 3 # nr_sections + .word 4 # nr_sections .long 0 # TimeDateStamp .long 0 # PointerToSymbolTable .long 1 # NumberOfSymbols @@ -250,6 +249,25 @@ section_table: .word 0 # NumberOfLineNumbers .long 0x60500020 # Characteristics (section flags) + # + # The offset & size fields are filled in by build.c. + # + .ascii ".bss" + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .long 0 + .long 0x0 + .long 0 # Size of initialized data + # on disk + .long 0x0 + .long 0 # PointerToRelocations + .long 0 # PointerToLineNumbers + .word 0 # NumberOfRelocations + .word 0 # NumberOfLineNumbers + .long 0xc8000080 # Characteristics (section flags) + #endif /* CONFIG_EFI_STUB */ # Kernel attributes; used by setup. This is part 1 of the diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index c941d6a..687dd28 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -141,7 +141,7 @@ static void usage(void) #ifdef CONFIG_EFI_STUB -static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) +static void update_pecoff_section_header_fields(char *section_name, u32 vma, u32 size, u32 datasz, u32 offset) { unsigned int pe_header; unsigned short num_sections; @@ -162,10 +162,10 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz put_unaligned_le32(size, section + 0x8); /* section header vma field */ - put_unaligned_le32(offset, section + 0xc); + put_unaligned_le32(vma, section + 0xc); /* section header 'size of initialised data' field */ - put_unaligned_le32(size, section + 0x10); + put_unaligned_le32(datasz, section + 0x10); /* section header 'file offset' field */ put_unaligned_le32(offset, section + 0x14); @@ -177,6 +177,11 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz } } +static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) +{ + update_pecoff_section_header_fields(section_name, offset, size, size, offset); +} + static void update_pecoff_setup_and_reloc(unsigned int size) { u32 setup_offset = 0x200; @@ -201,9 +206,6 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) pe_header = get_unaligned_le32(&buf[0x3c]); - /* Size of image */ - put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); - /* * Size of code: Subtract the size of the first sector (512 bytes) * which includes the header. @@ -218,6 +220,22 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) update_pecoff_section_header(".text", text_start, text_sz); } +static void update_pecoff_bss(unsigned int file_sz, unsigned int init_sz) +{ + unsigned int pe_header; + unsigned int bss_sz = init_sz - file_sz; + + pe_header = get_unaligned_le32(&buf[0x3c]); + + /* Size of uninitialized data */ + put_unaligned_le32(bss_sz, &buf[pe_header + 0x24]); + + /* Size of image */ + put_unaligned_le32(init_sz, &buf[pe_header + 0x50]); + + update_pecoff_section_header_fields(".bss", file_sz, bss_sz, 0, 0); +} + #endif /* CONFIG_EFI_STUB */ @@ -269,6 +287,9 @@ int main(int argc, char ** argv) int fd; void *kernel; u32 crc = 0xffffffffUL; +#ifdef CONFIG_EFI_STUB + unsigned int init_sz; +#endif /* Defaults for old kernel */ #ifdef CONFIG_X86_32 @@ -339,7 +360,9 @@ int main(int argc, char ** argv) put_unaligned_le32(sys_size, &buf[0x1f4]); #ifdef CONFIG_EFI_STUB - update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz)); + update_pecoff_text(setup_sectors * 512, i + (sys_size * 16)); + init_sz = get_unaligned_le32(&buf[0x260]); + update_pecoff_bss(i + (sys_size * 16), init_sz); #ifdef CONFIG_X86_64 /* Yes, this is really how we defined it :( */ efi_stub_entry -= 0x200; diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c index aafe8ce..e26984f 100644 --- a/arch/x86/crypto/aes_glue.c +++ b/arch/x86/crypto/aes_glue.c @@ -66,5 +66,5 @@ module_exit(aes_fini); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("aes"); -MODULE_ALIAS("aes-asm"); +MODULE_ALIAS_CRYPTO("aes"); +MODULE_ALIAS_CRYPTO("aes-asm"); diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 3fbe870..f89e749 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -252,14 +252,14 @@ static int ecb_encrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { - kernel_fpu_begin(); aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, - nbytes & AES_BLOCK_MASK); - kernel_fpu_end(); + nbytes & AES_BLOCK_MASK); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } + kernel_fpu_end(); return err; } @@ -276,14 +276,14 @@ static int ecb_decrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { - kernel_fpu_begin(); aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK); - kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } + kernel_fpu_end(); return err; } @@ -300,14 +300,14 @@ static int cbc_encrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { - kernel_fpu_begin(); aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); - kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } + kernel_fpu_end(); return err; } @@ -324,14 +324,14 @@ static int cbc_decrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { - kernel_fpu_begin(); aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); - kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } + kernel_fpu_end(); return err; } @@ -364,20 +364,18 @@ static int ctr_crypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + kernel_fpu_begin(); while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { - kernel_fpu_begin(); aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); - kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } if (walk.nbytes) { - kernel_fpu_begin(); ctr_crypt_final(ctx, &walk); - kernel_fpu_end(); err = blkcipher_walk_done(desc, &walk, 0); } + kernel_fpu_end(); return err; } @@ -1375,4 +1373,4 @@ module_exit(aesni_exit); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("aes"); +MODULE_ALIAS_CRYPTO("aes"); diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index 50ec333..1477cfc 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -481,5 +481,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized"); -MODULE_ALIAS("blowfish"); -MODULE_ALIAS("blowfish-asm"); +MODULE_ALIAS_CRYPTO("blowfish"); +MODULE_ALIAS_CRYPTO("blowfish-asm"); diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index 414fe5d..da710fc 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -582,5 +582,5 @@ module_exit(camellia_aesni_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX2 optimized"); -MODULE_ALIAS("camellia"); -MODULE_ALIAS("camellia-asm"); +MODULE_ALIAS_CRYPTO("camellia"); +MODULE_ALIAS_CRYPTO("camellia-asm"); diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 37fd0c0..883e1af 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -574,5 +574,5 @@ module_exit(camellia_aesni_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX optimized"); -MODULE_ALIAS("camellia"); -MODULE_ALIAS("camellia-asm"); +MODULE_ALIAS_CRYPTO("camellia"); +MODULE_ALIAS_CRYPTO("camellia-asm"); diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index c171dcb..5c8b626 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -1725,5 +1725,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized"); -MODULE_ALIAS("camellia"); -MODULE_ALIAS("camellia-asm"); +MODULE_ALIAS_CRYPTO("camellia"); +MODULE_ALIAS_CRYPTO("camellia-asm"); diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index 2d48e83..d416069 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -60,7 +60,7 @@ static inline void cast5_fpu_end(bool fpu_enabled) static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, bool enc) { - bool fpu_enabled; + bool fpu_enabled = false; struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = CAST5_BLOCK_SIZE; unsigned int nbytes; @@ -76,7 +76,7 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, u8 *wsrc = walk->src.virt.addr; u8 *wdst = walk->dst.virt.addr; - fpu_enabled = cast5_fpu_begin(false, nbytes); + fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); /* Process multi-block batch */ if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { @@ -104,9 +104,10 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, } while (nbytes >= bsize); done: - cast5_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, walk, nbytes); } + + cast5_fpu_end(fpu_enabled); return err; } @@ -230,7 +231,7 @@ done: static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - bool fpu_enabled; + bool fpu_enabled = false; struct blkcipher_walk walk; int err; @@ -239,11 +240,12 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; while ((nbytes = walk.nbytes)) { - fpu_enabled = cast5_fpu_begin(false, nbytes); + fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); nbytes = __cbc_decrypt(desc, &walk); - cast5_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } + + cast5_fpu_end(fpu_enabled); return err; } @@ -313,7 +315,7 @@ done: static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - bool fpu_enabled; + bool fpu_enabled = false; struct blkcipher_walk walk; int err; @@ -322,12 +324,13 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { - fpu_enabled = cast5_fpu_begin(false, nbytes); + fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); nbytes = __ctr_crypt(desc, &walk); - cast5_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } + cast5_fpu_end(fpu_enabled); + if (walk.nbytes) { ctr_crypt_final(desc, &walk); err = blkcipher_walk_done(desc, &walk, 0); @@ -491,4 +494,4 @@ module_exit(cast5_exit); MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("cast5"); +MODULE_ALIAS_CRYPTO("cast5"); diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 8d0dfb8..c197562 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -611,4 +611,4 @@ module_exit(cast6_exit); MODULE_DESCRIPTION("Cast6 Cipher Algorithm, AVX optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("cast6"); +MODULE_ALIAS_CRYPTO("cast6"); diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c index 9d014a7..1937fc1 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -197,5 +197,5 @@ module_exit(crc32_pclmul_mod_fini); MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("crc32"); -MODULE_ALIAS("crc32-pclmul"); +MODULE_ALIAS_CRYPTO("crc32"); +MODULE_ALIAS_CRYPTO("crc32-pclmul"); diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index 6812ad9..28640c3 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -280,5 +280,5 @@ MODULE_AUTHOR("Austin Zhang <austin.zhang@intel.com>, Kent Liu <kent.liu@intel.c MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware."); MODULE_LICENSE("GPL"); -MODULE_ALIAS("crc32c"); -MODULE_ALIAS("crc32c-intel"); +MODULE_ALIAS_CRYPTO("crc32c"); +MODULE_ALIAS_CRYPTO("crc32c-intel"); diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c index 7845d7f..b6c67bf 100644 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c @@ -147,5 +147,5 @@ MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>"); MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ."); MODULE_LICENSE("GPL"); -MODULE_ALIAS("crct10dif"); -MODULE_ALIAS("crct10dif-pclmul"); +MODULE_ALIAS_CRYPTO("crct10dif"); +MODULE_ALIAS_CRYPTO("crct10dif-pclmul"); diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c index 98d7a18..f368ba2 100644 --- a/arch/x86/crypto/fpu.c +++ b/arch/x86/crypto/fpu.c @@ -17,6 +17,7 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/crypto.h> #include <asm/i387.h> struct crypto_fpu_ctx { @@ -159,3 +160,5 @@ void __exit crypto_fpu_exit(void) { crypto_unregister_template(&crypto_fpu_tmpl); } + +MODULE_ALIAS_CRYPTO("fpu"); diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index d785cf2..a8d6f69 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -341,4 +341,4 @@ module_exit(ghash_pclmulqdqni_mod_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GHASH Message Digest Algorithm, " "acclerated by PCLMULQDQ-NI"); -MODULE_ALIAS("ghash"); +MODULE_ALIAS_CRYPTO("ghash"); diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 4a2bd21..432f1d76 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -39,7 +39,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, void *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = 128 / 8; unsigned int nbytes, i, func_bytes; - bool fpu_enabled; + bool fpu_enabled = false; int err; err = blkcipher_walk_virt(desc, walk); @@ -49,7 +49,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, u8 *wdst = walk->dst.virt.addr; fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, false, nbytes); + desc, fpu_enabled, nbytes); for (i = 0; i < gctx->num_funcs; i++) { func_bytes = bsize * gctx->funcs[i].num_blocks; @@ -71,10 +71,10 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, } done: - glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, walk, nbytes); } + glue_fpu_end(fpu_enabled); return err; } @@ -194,7 +194,7 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, struct scatterlist *src, unsigned int nbytes) { const unsigned int bsize = 128 / 8; - bool fpu_enabled; + bool fpu_enabled = false; struct blkcipher_walk walk; int err; @@ -203,12 +203,12 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, while ((nbytes = walk.nbytes)) { fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, false, nbytes); + desc, fpu_enabled, nbytes); nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); - glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } + glue_fpu_end(fpu_enabled); return err; } EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); @@ -278,7 +278,7 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, struct scatterlist *src, unsigned int nbytes) { const unsigned int bsize = 128 / 8; - bool fpu_enabled; + bool fpu_enabled = false; struct blkcipher_walk walk; int err; @@ -287,12 +287,13 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, while ((nbytes = walk.nbytes) >= bsize) { fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, false, nbytes); + desc, fpu_enabled, nbytes); nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); - glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } + glue_fpu_end(fpu_enabled); + if (walk.nbytes) { glue_ctr_crypt_final_128bit( gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); @@ -347,7 +348,7 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, void *tweak_ctx, void *crypt_ctx) { const unsigned int bsize = 128 / 8; - bool fpu_enabled; + bool fpu_enabled = false; struct blkcipher_walk walk; int err; @@ -360,21 +361,21 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, /* set minimum length to bsize, for tweak_fn */ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, false, + desc, fpu_enabled, nbytes < bsize ? bsize : nbytes); + /* calculate first value of T */ tweak_fn(tweak_ctx, walk.iv, walk.iv); - glue_fpu_end(fpu_enabled); while (nbytes) { - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, false, nbytes); nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk); - glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); nbytes = walk.nbytes; } + + glue_fpu_end(fpu_enabled); + return err; } EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c index 5e8e677..399a29d 100644 --- a/arch/x86/crypto/salsa20_glue.c +++ b/arch/x86/crypto/salsa20_glue.c @@ -119,5 +119,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)"); -MODULE_ALIAS("salsa20"); -MODULE_ALIAS("salsa20-asm"); +MODULE_ALIAS_CRYPTO("salsa20"); +MODULE_ALIAS_CRYPTO("salsa20-asm"); diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 23aabc6..cb57caf 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -558,5 +558,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX2 optimized"); -MODULE_ALIAS("serpent"); -MODULE_ALIAS("serpent-asm"); +MODULE_ALIAS_CRYPTO("serpent"); +MODULE_ALIAS_CRYPTO("serpent-asm"); diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 9ae83cf..0a86e8b 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -617,4 +617,4 @@ module_exit(serpent_exit); MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("serpent"); +MODULE_ALIAS_CRYPTO("serpent"); diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 97a356e..279f389 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -618,4 +618,4 @@ module_exit(serpent_sse2_exit); MODULE_DESCRIPTION("Serpent Cipher Algorithm, SSE2 optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("serpent"); +MODULE_ALIAS_CRYPTO("serpent"); diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 4a11a9d..29e1060 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -237,4 +237,4 @@ module_exit(sha1_ssse3_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, Supplemental SSE3 accelerated"); -MODULE_ALIAS("sha1"); +MODULE_ALIAS_CRYPTO("sha1"); diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index e52947f..4dc100d 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -318,5 +318,5 @@ module_exit(sha256_ssse3_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated"); -MODULE_ALIAS("sha256"); -MODULE_ALIAS("sha384"); +MODULE_ALIAS_CRYPTO("sha256"); +MODULE_ALIAS_CRYPTO("sha224"); diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index f30cd10..26a5898 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -141,7 +141,7 @@ static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) /* save number of bits */ bits[1] = cpu_to_be64(sctx->count[0] << 3); - bits[0] = cpu_to_be64(sctx->count[1] << 3) | sctx->count[0] >> 61; + bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); /* Pad out to 112 mod 128 and append length */ index = sctx->count[0] & 0x7f; @@ -326,5 +326,5 @@ module_exit(sha512_ssse3_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated"); -MODULE_ALIAS("sha512"); -MODULE_ALIAS("sha384"); +MODULE_ALIAS_CRYPTO("sha512"); +MODULE_ALIAS_CRYPTO("sha384"); diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index a62ba54..c8c12c1 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -579,4 +579,4 @@ module_exit(twofish_exit); MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX optimized"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("twofish"); +MODULE_ALIAS_CRYPTO("twofish"); diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c index 0a52023..77e06c2 100644 --- a/arch/x86/crypto/twofish_glue.c +++ b/arch/x86/crypto/twofish_glue.c @@ -96,5 +96,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION ("Twofish Cipher Algorithm, asm optimized"); -MODULE_ALIAS("twofish"); -MODULE_ALIAS("twofish-asm"); +MODULE_ALIAS_CRYPTO("twofish"); +MODULE_ALIAS_CRYPTO("twofish-asm"); diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 13e63b3..56d8a08 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -495,5 +495,5 @@ module_exit(fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized"); -MODULE_ALIAS("twofish"); -MODULE_ALIAS("twofish-asm"); +MODULE_ALIAS_CRYPTO("twofish"); +MODULE_ALIAS_CRYPTO("twofish-asm"); diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 4299eb0..92a2e93 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -151,6 +151,16 @@ ENTRY(ia32_sysenter_target) 1: movl (%rbp),%ebp _ASM_EXTABLE(1b,ia32_badarg) ASM_CLAC + + /* + * Sysenter doesn't filter flags, so we need to clear NT + * ourselves. To save a few cycles, we can check whether + * NT was set instead of doing an unconditional popfq. + */ + testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp) + jnz sysenter_fix_flags +sysenter_flags_fixed: + orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) CFI_REMEMBER_STATE @@ -184,6 +194,8 @@ sysexit_from_sys_call: TRACE_IRQS_ON ENABLE_INTERRUPTS_SYSEXIT32 + CFI_RESTORE_STATE + #ifdef CONFIG_AUDITSYSCALL .macro auditsys_entry_common movl %esi,%r9d /* 6th arg: 4th syscall arg */ @@ -226,7 +238,6 @@ sysexit_from_sys_call: .endm sysenter_auditsys: - CFI_RESTORE_STATE auditsys_entry_common movl %ebp,%r9d /* reload 6th syscall arg */ jmp sysenter_dispatch @@ -235,6 +246,11 @@ sysexit_audit: auditsys_exit sysexit_from_sys_call #endif +sysenter_fix_flags: + pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) + popfq_cfi + jmp sysenter_flags_fixed + sysenter_tracesys: #ifdef CONFIG_AUDITSYSCALL testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 89270b4..c2f19a8 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -203,6 +203,7 @@ #define X86_FEATURE_DECODEASSISTS (8*32+12) /* AMD Decode Assists support */ #define X86_FEATURE_PAUSEFILTER (8*32+13) /* AMD filtered pause intercept */ #define X86_FEATURE_PFTHRESHOLD (8*32+14) /* AMD pause filter threshold */ +#define X86_FEATURE_VMMCALL (8*32+15) /* Prefer vmmcall to vmcall */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index b90e5df..f6aaf7d 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -251,7 +251,8 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; } -#define _LDT_empty(info) \ +/* This intentionally ignores lm, since 32-bit apps don't have that field. */ +#define LDT_empty(info) \ ((info)->base_addr == 0 && \ (info)->limit == 0 && \ (info)->contents == 0 && \ @@ -261,11 +262,18 @@ static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) (info)->seg_not_present == 1 && \ (info)->useable == 0) -#ifdef CONFIG_X86_64 -#define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0)) -#else -#define LDT_empty(info) (_LDT_empty(info)) -#endif +/* Lots of programs expect an all-zero user_desc to mean "no segment at all". */ +static inline bool LDT_zero(const struct user_desc *info) +{ + return (info->base_addr == 0 && + info->limit == 0 && + info->contents == 0 && + info->read_exec_only == 0 && + info->seg_32bit == 0 && + info->limit_in_pages == 0 && + info->seg_not_present == 0 && + info->useable == 0); +} static inline void clear_LDT(void) { diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 9c999c1..01f15b2 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -155,8 +155,9 @@ do { \ #define elf_check_arch(x) \ ((x)->e_machine == EM_X86_64) -#define compat_elf_check_arch(x) \ - (elf_check_arch_ia32(x) || (x)->e_machine == EM_X86_64) +#define compat_elf_check_arch(x) \ + (elf_check_arch_ia32(x) || \ + (IS_ENABLED(CONFIG_X86_X32_ABI) && (x)->e_machine == EM_X86_64)) #if __USER32_DS != __USER_DS # error "The following code assumes __USER32_DS == __USER_DS" diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h new file mode 100644 index 0000000..99efebb --- /dev/null +++ b/arch/x86/include/asm/espfix.h @@ -0,0 +1,16 @@ +#ifndef _ASM_X86_ESPFIX_H +#define _ASM_X86_ESPFIX_H + +#ifdef CONFIG_X86_64 + +#include <asm/percpu.h> + +DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); +DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); + +extern void init_espfix_bsp(void); +extern void init_espfix_ap(void); + +#endif /* CONFIG_X86_64 */ + +#endif /* _ASM_X86_ESPFIX_H */ diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index a809121..68c0539 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -52,6 +52,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { + ptep_clear_flush(vma, addr, ptep); } static inline int huge_pte_none(pte_t pte) diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 0ea10f27..cb6cfcd 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -25,6 +25,7 @@ extern void irq_ctx_init(int cpu); #ifdef CONFIG_HOTPLUG_CPU #include <linux/cpumask.h> +extern int check_irq_vectors_for_cpu_disable(void); extern void fixup_irqs(void); extern void irq_force_complete_move(int); #endif diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index bba3cf8..0a8b519 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -129,7 +129,7 @@ static inline notrace unsigned long arch_local_irq_save(void) #define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */ -#define INTERRUPT_RETURN iretq +#define INTERRUPT_RETURN jmp native_iret #define USERGS_SYSRET64 \ swapgs; \ sysretq; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c76ff74..7cb77dd 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -455,7 +455,7 @@ struct kvm_vcpu_arch { bool nmi_injected; /* Trying to inject an NMI this entry */ struct mtrr_state_type mtrr_state; - u32 pat; + u64 pat; int switch_db_regs; unsigned long db[KVM_NR_DB_REGS]; @@ -473,6 +473,7 @@ struct kvm_vcpu_arch { u64 mmio_gva; unsigned access; gfn_t mmio_gfn; + u64 mmio_gen; struct kvm_pmu pmu; @@ -973,6 +974,20 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); } +static inline u64 get_canonical(u64 la) +{ + return ((int64_t)la << 16) >> 16; +} + +static inline bool is_noncanonical_address(u64 la) +{ +#ifdef CONFIG_X86_64 + return get_canonical(la) != la; +#else + return false; +#endif +} + #define TSS_IOPB_BASE_OFFSET 0x66 #define TSS_BASE_SIZE 0x68 #define TSS_IOPB_SIZE (65536 / 8) @@ -1031,7 +1046,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_define_shared_msr(unsigned index, u32 msr); -void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); +int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index c7678e4..e62cf89 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -2,6 +2,7 @@ #define _ASM_X86_KVM_PARA_H #include <asm/processor.h> +#include <asm/alternative.h> #include <uapi/asm/kvm_para.h> extern void kvmclock_init(void); @@ -16,10 +17,15 @@ static inline bool kvm_check_and_clear_guest_paused(void) } #endif /* CONFIG_KVM_GUEST */ -/* This instruction is vmcall. On non-VT architectures, it will generate a - * trap that we will then rewrite to the appropriate instruction. +#ifdef CONFIG_DEBUG_RODATA +#define KVM_HYPERCALL \ + ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL) +#else +/* On AMD processors, vmcall will generate a trap that we will + * then rewrite to the appropriate instruction. */ #define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" +#endif /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall * instruction. The hypervisor may replace it with something else but only the diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index f48b17d..3a52ee0 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -20,7 +20,6 @@ #define THREAD_SIZE_ORDER 1 #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) -#define STACKFAULT_STACK 0 #define DOUBLEFAULT_STACK 1 #define NMI_STACK 0 #define DEBUG_STACK 0 diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 43dcd80..d1d2972 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -14,12 +14,11 @@ #define IRQ_STACK_ORDER 2 #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) -#define STACKFAULT_STACK 1 -#define DOUBLEFAULT_STACK 2 -#define NMI_STACK 3 -#define DEBUG_STACK 4 -#define MCE_STACK 5 -#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ +#define DOUBLEFAULT_STACK 1 +#define NMI_STACK 2 +#define DEBUG_STACK 3 +#define MCE_STACK 4 +#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */ #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 2d88344..b1609f2 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -61,6 +61,8 @@ typedef struct { pteval_t pte; } pte_t; #define MODULES_VADDR _AC(0xffffffffa0000000, UL) #define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) +#define ESPFIX_PGD_ENTRY _AC(-2, UL) +#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT) #define EARLY_DYNAMIC_PAGE_TABLES 64 diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 942a086..68e9f00 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -232,6 +232,22 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, #define ARCH_HAS_USER_SINGLE_STEP_INFO +/* + * When hitting ptrace_stop(), we cannot return using SYSRET because + * that does not restore the full CPU state, only a minimal set. The + * ptracer can change arbitrary register values, which is usually okay + * because the usual ptrace stops run off the signal delivery path which + * forces IRET; however, ptrace_event() stops happen in arbitrary places + * in the kernel and don't force IRET path. + * + * So force IRET path after a ptrace stop. + */ +#define arch_ptrace_stop_needed(code, info) \ +({ \ + set_thread_flag(TIF_NOTIFY_RESUME); \ + false; \ +}) + struct user_desc; extern int do_get_thread_area(struct task_struct *p, int idx, struct user_desc __user *info); diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 3475554..ad1d8ec 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -64,6 +64,8 @@ static inline void x86_ce4100_early_setup(void) { } #ifndef _SETUP +#include <asm/espfix.h> + /* * This is set up by the setup-routine at boot-time */ diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 6ec0792..35e67a4 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -23,19 +23,6 @@ typedef struct { unsigned long sig[_NSIG_WORDS]; } sigset_t; -/* - * Because some traps use the IST stack, we must keep preemption - * disabled while calling do_trap(), but do_trap() may call - * force_sig_info() which will grab the signal spin_locks for the - * task, which in PREEMPT_RT_FULL are mutexes. By defining - * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set - * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the - * trap. - */ -#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_X86_64) -#define ARCH_RT_DELAYS_SIGNAL_SEND -#endif - #ifndef CONFIG_COMPAT typedef sigset_t compat_sigset_t; #endif diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 64fb5cb..6a99859 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -57,7 +57,7 @@ */ static __always_inline void boot_init_stack_canary(void) { - u64 uninitialized_var(canary); + u64 canary; u64 tsc; #ifdef CONFIG_X86_64 @@ -68,16 +68,8 @@ static __always_inline void boot_init_stack_canary(void) * of randomness. The TSC only matters for very early init, * there it already has some randomness on most systems. Later * on during the bootup the random pool has true entropy too. - * - * For preempt-rt we need to weaken the randomness a bit, as - * we can't call into the random generator from atomic context - * due to locking constraints. We just leave canary - * uninitialized and use the TSC based randomness on top of - * it. */ -#ifndef CONFIG_PREEMPT_RT_FULL get_random_bytes(&canary, sizeof(canary)); -#endif tsc = __native_read_tsc(); canary += tsc + (tsc << 32UL); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index f08e527..bbbac92 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -30,8 +30,6 @@ struct thread_info { __u32 cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ - int preempt_lazy_count; /* 0 => lazy preemptable, - <0 => BUG */ mm_segment_t addr_limit; struct restart_block restart_block; void __user *sysenter_return; @@ -83,7 +81,6 @@ struct thread_info { #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ -#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ @@ -108,7 +105,6 @@ struct thread_info { #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) -#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) @@ -149,7 +145,7 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ - _TIF_USER_RETURN_NOTIFY) + _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ @@ -158,8 +154,6 @@ struct thread_info { #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) -#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) - #define PREEMPT_ACTIVE 0x10000000 #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index e6d90ba..04905bf 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -62,7 +62,7 @@ static inline void __flush_tlb_all(void) static inline void __flush_tlb_one(unsigned long addr) { - count_vm_event(NR_TLB_LOCAL_FLUSH_ONE); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); __flush_tlb_single(addr); } @@ -93,13 +93,13 @@ static inline void __flush_tlb_one(unsigned long addr) */ static inline void __flush_tlb_up(void) { - count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); __flush_tlb(); } static inline void flush_tlb_all(void) { - count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); __flush_tlb_all(); } diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index 2a46ca7..2874be9 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -34,7 +34,7 @@ static inline unsigned int __getcpu(void) native_read_tscp(&p); } else { /* Load per CPU data from GDT */ - asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); + asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); } return p; diff --git a/arch/x86/include/uapi/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h index 46727eb..6e1aaf7 100644 --- a/arch/x86/include/uapi/asm/ldt.h +++ b/arch/x86/include/uapi/asm/ldt.h @@ -28,6 +28,13 @@ struct user_desc { unsigned int seg_not_present:1; unsigned int useable:1; #ifdef __x86_64__ + /* + * Because this bit is not present in 32-bit user code, user + * programs can pass uninitialized values here. Therefore, in + * any context in which a user_desc comes from a 32-bit program, + * the kernel must act as though lm == 0, regardless of the + * actual value. + */ unsigned int lm:1; #endif }; diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 0e79420..990a2fe 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -67,6 +67,7 @@ #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_INVEPT 50 #define EXIT_REASON_PREEMPTION_TIMER 52 +#define EXIT_REASON_INVVPID 53 #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 #define EXIT_REASON_APIC_WRITE 56 @@ -114,6 +115,7 @@ { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ { EXIT_REASON_INVD, "INVD" }, \ + { EXIT_REASON_INVVPID, "INVVPID" }, \ { EXIT_REASON_INVPCID, "INVPCID" } #endif /* _UAPIVMX_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a5408b9..32f1140 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-y += syscall_$(BITS).o obj-$(CONFIG_X86_64) += vsyscall_64.o obj-$(CONFIG_X86_64) += vsyscall_emu_64.o +obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o topology.o kdebugfs.o obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index a7eb82d..7170f17 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1282,7 +1282,7 @@ void setup_local_APIC(void) unsigned int value, queued; int i, j, acked = 0; unsigned long long tsc = 0, ntsc; - long long max_loops = cpu_khz; + long long max_loops = cpu_khz ? cpu_khz : 1000000; if (cpu_has_tsc) rdtscll(tsc); @@ -1379,7 +1379,7 @@ void setup_local_APIC(void) break; } if (queued) { - if (cpu_has_tsc) { + if (cpu_has_tsc && cpu_khz) { rdtscll(ntsc); max_loops = (cpu_khz << 10) - (ntsc - tsc); } else diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5816e6a..e63a5bd 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2396,8 +2396,7 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) { /* If we are moving the irq we need to mask it */ - if (unlikely(irqd_is_setaffinity_pending(data) && - !irqd_irq_inprogress(data))) { + if (unlikely(irqd_is_setaffinity_pending(data))) { mask_ioapic(cfg); return true; } diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index a36d9cf..2861082 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -33,7 +33,6 @@ void common(void) { OFFSET(TI_status, thread_info, status); OFFSET(TI_addr_limit, thread_info, addr_limit); OFFSET(TI_preempt_count, thread_info, preempt_count); - OFFSET(TI_preempt_lazy_count, thread_info, preempt_lazy_count); BLANK(); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 28233b9..ee51e67 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -509,6 +509,13 @@ static void early_init_amd(struct cpuinfo_x86 *c) } #endif + /* + * This is only needed to tell the kernel whether to use VMCALL + * and VMMCALL. VMMCALL is never executed except under virt, so + * we can set it unconditionally. + */ + set_cpu_cap(c, X86_FEATURE_VMMCALL); + /* F16h erratum 793, CVE-2013-6885 */ if (c->x86 == 0x16 && c->x86_model <= 0xf) { u64 val; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3533e2c..00cc6f7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -144,6 +144,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); static int __init x86_xsave_setup(char *s) { + if (strlen(s)) + return 0; setup_clear_cpu_cap(X86_FEATURE_XSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); setup_clear_cpu_cap(X86_FEATURE_AVX); @@ -1135,7 +1137,7 @@ void syscall_init(void) /* Flags to clear on syscall */ wrmsrl(MSR_SYSCALL_MASK, X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF| - X86_EFLAGS_IOPL|X86_EFLAGS_AC); + X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT); } /* diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 87c0be5..f4a9985 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -154,6 +154,21 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_ERMS); } } + + /* + * Intel Quark Core DevMan_001.pdf section 6.4.11 + * "The operating system also is required to invalidate (i.e., flush) + * the TLB when any changes are made to any of the page table entries. + * The operating system must reload CR3 to cause the TLB to be flushed" + * + * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should + * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE + * to be modified + */ + if (c->x86 == 5 && c->x86_model == 9) { + pr_info("Disabling PGE capability bit\n"); + setup_clear_cpu_cap(X86_FEATURE_PGE); + } } #ifdef CONFIG_X86_32 @@ -369,6 +384,13 @@ static void init_intel(struct cpuinfo_x86 *c) detect_extended_topology(c); l2 = init_intel_cacheinfo(c); + + /* Detect legacy cache sizes if init_intel_cacheinfo did not */ + if (l2 == 0) { + cpu_detect_cache_sizes(c); + l2 = c->x86_cache_size; + } + if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); /* Check for version and the number of counters */ @@ -483,6 +505,13 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) */ if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) size = 256; + + /* + * Intel Quark SoC X1000 contains a 4-way set associative + * 16K cache with a 16 byte cache line and 256 lines per tag + */ + if ((c->x86 == 5) && (c->x86_model == 9)) + size = 16; return size; } #endif @@ -688,7 +717,8 @@ static const struct cpu_dev intel_cpu_dev = { [3] = "OverDrive PODP5V83", [4] = "Pentium MMX", [7] = "Mobile Pentium 75 - 200", - [8] = "Mobile Pentium MMX" + [8] = "Mobile Pentium MMX", + [9] = "Quark SoC X1000", } }, { .vendor = X86_VENDOR_INTEL, .family = 6, .model_names = diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 3a7ab0b..b3218cd 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -18,7 +18,6 @@ #include <linux/rcupdate.h> #include <linux/kobject.h> #include <linux/uaccess.h> -#include <linux/kthread.h> #include <linux/kdebug.h> #include <linux/kernel.h> #include <linux/percpu.h> @@ -42,7 +41,6 @@ #include <linux/debugfs.h> #include <linux/irq_work.h> #include <linux/export.h> -#include <linux/jiffies.h> #include <asm/processor.h> #include <asm/mce.h> @@ -1270,7 +1268,7 @@ void mce_log_therm_throt_event(__u64 status) static unsigned long check_interval = 5 * 60; /* 5 minutes */ static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ -static DEFINE_PER_CPU(struct hrtimer, mce_timer); +static DEFINE_PER_CPU(struct timer_list, mce_timer); static unsigned long mce_adjust_timer_default(unsigned long interval) { @@ -1280,10 +1278,13 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; -static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer) +static void mce_timer_fn(unsigned long data) { + struct timer_list *t = &__get_cpu_var(mce_timer); unsigned long iv; + WARN_ON(smp_processor_id() != data); + if (mce_available(__this_cpu_ptr(&cpu_info))) { machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_poll_banks)); @@ -1304,11 +1305,9 @@ static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer) __this_cpu_write(mce_next_interval, iv); /* Might have become 0 after CMCI storm subsided */ if (iv) { - hrtimer_forward_now(timer, ns_to_ktime( - jiffies_to_usecs(iv) * 1000ULL)); - return HRTIMER_RESTART; + t->expires = jiffies + iv; + add_timer_on(t, smp_processor_id()); } - return HRTIMER_NORESTART; } /* @@ -1316,37 +1315,28 @@ static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer) */ void mce_timer_kick(unsigned long interval) { - struct hrtimer *t = &__get_cpu_var(mce_timer); + struct timer_list *t = &__get_cpu_var(mce_timer); + unsigned long when = jiffies + interval; unsigned long iv = __this_cpu_read(mce_next_interval); - if (hrtimer_active(t)) { - s64 exp; - s64 intv_us; - - intv_us = jiffies_to_usecs(interval); - exp = ktime_to_us(hrtimer_expires_remaining(t)); - if (intv_us < exp) { - hrtimer_cancel(t); - hrtimer_start_range_ns(t, - ns_to_ktime(intv_us * 1000), - 0, HRTIMER_MODE_REL_PINNED); - } + if (timer_pending(t)) { + if (time_before(when, t->expires)) + mod_timer_pinned(t, when); } else { - hrtimer_start_range_ns(t, - ns_to_ktime(jiffies_to_usecs(interval) * 1000ULL), - 0, HRTIMER_MODE_REL_PINNED); + t->expires = round_jiffies(when); + add_timer_on(t, smp_processor_id()); } if (interval < iv) __this_cpu_write(mce_next_interval, interval); } -/* Must not be called in IRQ context where hrtimer_cancel() can deadlock */ +/* Must not be called in IRQ context where del_timer_sync() can deadlock */ static void mce_timer_delete_all(void) { int cpu; for_each_online_cpu(cpu) - hrtimer_cancel(&per_cpu(mce_timer, cpu)); + del_timer_sync(&per_cpu(mce_timer, cpu)); } static void mce_do_trigger(struct work_struct *work) @@ -1356,63 +1346,6 @@ static void mce_do_trigger(struct work_struct *work) static DECLARE_WORK(mce_trigger_work, mce_do_trigger); -static void __mce_notify_work(void) -{ - /* Not more than two messages every minute */ - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); - - /* wake processes polling /dev/mcelog */ - wake_up_interruptible(&mce_chrdev_wait); - - /* - * There is no risk of missing notifications because - * work_pending is always cleared before the function is - * executed. - */ - if (mce_helper[0] && !work_pending(&mce_trigger_work)) - schedule_work(&mce_trigger_work); - - if (__ratelimit(&ratelimit)) - pr_info(HW_ERR "Machine check events logged\n"); -} - -#ifdef CONFIG_PREEMPT_RT_FULL -struct task_struct *mce_notify_helper; - -static int mce_notify_helper_thread(void *unused) -{ - while (1) { - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - if (kthread_should_stop()) - break; - __mce_notify_work(); - } - return 0; -} - -static int mce_notify_work_init(void) -{ - mce_notify_helper = kthread_run(mce_notify_helper_thread, NULL, - "mce-notify"); - if (!mce_notify_helper) - return -ENOMEM; - - return 0; -} - -static void mce_notify_work(void) -{ - wake_up_process(mce_notify_helper); -} -#else -static void mce_notify_work(void) -{ - __mce_notify_work(); -} -static inline int mce_notify_work_init(void) { return 0; } -#endif - /* * Notify the user(s) about new machine check events. * Can be called from interrupt context, but not from machine check/NMI @@ -1420,8 +1353,19 @@ static inline int mce_notify_work_init(void) { return 0; } */ int mce_notify_irq(void) { + /* Not more than two messages every minute */ + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); + if (test_and_clear_bit(0, &mce_need_notify)) { - mce_notify_work(); + /* wake processes polling /dev/mcelog */ + wake_up_interruptible(&mce_chrdev_wait); + + if (mce_helper[0]) + schedule_work(&mce_trigger_work); + + if (__ratelimit(&ratelimit)) + pr_info(HW_ERR "Machine check events logged\n"); + return 1; } return 0; @@ -1692,7 +1636,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) } } -static void mce_start_timer(unsigned int cpu, struct hrtimer *t) +static void mce_start_timer(unsigned int cpu, struct timer_list *t) { unsigned long iv = mce_adjust_timer(check_interval * HZ); @@ -1701,17 +1645,16 @@ static void mce_start_timer(unsigned int cpu, struct hrtimer *t) if (mca_cfg.ignore_ce || !iv) return; - hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL), - 0, HRTIMER_MODE_REL_PINNED); + t->expires = round_jiffies(jiffies + iv); + add_timer_on(t, smp_processor_id()); } static void __mcheck_cpu_init_timer(void) { - struct hrtimer *t = &__get_cpu_var(mce_timer); + struct timer_list *t = &__get_cpu_var(mce_timer); unsigned int cpu = smp_processor_id(); - hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - t->function = mce_timer_fn; + setup_timer(t, mce_timer_fn, cpu); mce_start_timer(cpu, t); } @@ -2386,8 +2329,6 @@ static void mce_disable_cpu(void *h) if (!mce_available(__this_cpu_ptr(&cpu_info))) return; - hrtimer_cancel(&__get_cpu_var(mce_timer)); - if (!(action & CPU_TASKS_FROZEN)) cmci_clear(); for (i = 0; i < mca_cfg.banks; i++) { @@ -2414,7 +2355,6 @@ static void mce_reenable_cpu(void *h) if (b->init) wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); } - __mcheck_cpu_init_timer(); } /* Get notified when a cpu comes on/off. Be hotplug friendly. */ @@ -2422,6 +2362,7 @@ static int mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; + struct timer_list *t = &per_cpu(mce_timer, cpu); switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: @@ -2437,9 +2378,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_DOWN_PREPARE: smp_call_function_single(cpu, mce_disable_cpu, &action, 1); + del_timer_sync(t); break; case CPU_DOWN_FAILED: smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); + mce_start_timer(cpu, t); break; } @@ -2501,8 +2444,6 @@ static __init int mcheck_init_device(void) /* register character device /dev/mcelog */ misc_register(&mce_chrdev_device); - err = mce_notify_work_init(); - return err; } device_initcall_sync(mcheck_init_device); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 71a39f3..6474807 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -63,6 +63,7 @@ static struct clocksource hyperv_cs = { .rating = 400, /* use this when running on Hyperv*/ .read = read_hv_clock, .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; static void __init ms_hyperv_init_platform(void) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index ce2d0a2..0e25a1b 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -683,7 +683,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) } /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ - count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); __flush_tlb(); /* Save MTRR state */ @@ -697,7 +697,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) static void post_set(void) __releases(set_atomicity_lock) { /* Flush TLBs (no need to flush caches - they are disabled) */ - count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); __flush_tlb(); /* Intel (P6) standard MTRRs */ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5edd3c0..c7106f1 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -118,6 +118,9 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event) continue; if (event->attr.config1 & ~er->valid_mask) return -EINVAL; + /* Check if the extra msrs can be safely accessed*/ + if (!er->extra_msr_access) + return -ENXIO; reg->idx = er->idx; reg->config = event->attr.config1; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index cc16faa..53bd272 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -279,14 +279,16 @@ struct extra_reg { u64 config_mask; u64 valid_mask; int idx; /* per_xxx->regs[] reg index */ + bool extra_msr_access; }; #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ - .event = (e), \ - .msr = (ms), \ - .config_mask = (m), \ - .valid_mask = (vm), \ - .idx = EXTRA_REG_##i, \ + .event = (e), \ + .msr = (ms), \ + .config_mask = (m), \ + .valid_mask = (vm), \ + .idx = EXTRA_REG_##i, \ + .extra_msr_access = true, \ } #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index f31a165..b400d0b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1365,6 +1365,15 @@ again: intel_pmu_lbr_read(); /* + * CondChgd bit 63 doesn't mean any overflow status. Ignore + * and clear the bit. + */ + if (__test_and_clear_bit(63, (unsigned long *)&status)) { + if (!status) + goto done; + } + + /* * PEBS overflow sets bit 62 in the global status register */ if (__test_and_clear_bit(62, (unsigned long *)&status)) { @@ -2135,6 +2144,41 @@ static void intel_snb_check_microcode(void) } } +/* + * Under certain circumstances, access certain MSR may cause #GP. + * The function tests if the input MSR can be safely accessed. + */ +static bool check_msr(unsigned long msr, u64 mask) +{ + u64 val_old, val_new, val_tmp; + + /* + * Read the current value, change it and read it back to see if it + * matches, this is needed to detect certain hardware emulators + * (qemu/kvm) that don't trap on the MSR access and always return 0s. + */ + if (rdmsrl_safe(msr, &val_old)) + return false; + + /* + * Only change the bits which can be updated by wrmsrl. + */ + val_tmp = val_old ^ mask; + if (wrmsrl_safe(msr, val_tmp) || + rdmsrl_safe(msr, &val_new)) + return false; + + if (val_new != val_tmp) + return false; + + /* Here it's sure that the MSR can be safely accessed. + * Restore the old value and return. + */ + wrmsrl(msr, val_old); + + return true; +} + static __init void intel_sandybridge_quirk(void) { x86_pmu.check_microcode = intel_snb_check_microcode; @@ -2198,7 +2242,8 @@ __init int intel_pmu_init(void) union cpuid10_ebx ebx; struct event_constraint *c; unsigned int unused; - int version; + struct extra_reg *er; + int version, i; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { switch (boot_cpu_data.x86) { @@ -2243,10 +2288,7 @@ __init int intel_pmu_init(void) if (version > 1) x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); - /* - * v2 and above have a perf capabilities MSR - */ - if (version > 1) { + if (boot_cpu_has(X86_FEATURE_PDCM)) { u64 capabilities; rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); @@ -2404,6 +2446,9 @@ __init int intel_pmu_init(void) case 62: /* IvyBridge EP */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + /* dTLB-load-misses on IVB is different than SNB */ + hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */ + memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -2503,6 +2548,34 @@ __init int intel_pmu_init(void) } } + /* + * Access LBR MSR may cause #GP under certain circumstances. + * E.g. KVM doesn't support LBR MSR + * Check all LBT MSR here. + * Disable LBR access if any LBR MSRs can not be accessed. + */ + if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) + x86_pmu.lbr_nr = 0; + for (i = 0; i < x86_pmu.lbr_nr; i++) { + if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && + check_msr(x86_pmu.lbr_to + i, 0xffffUL))) + x86_pmu.lbr_nr = 0; + } + + /* + * Access extra MSR may cause #GP under certain circumstances. + * E.g. KVM doesn't support offcore event + * Check all extra_regs here. + */ + if (x86_pmu.extra_regs) { + for (er = x86_pmu.extra_regs; er->msr; er++) { + er->extra_msr_access = check_msr(er->msr, 0x1ffUL); + /* Disable LBR select mapping */ + if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) + x86_pmu.lbr_sel_map = NULL; + } + } + /* Support full width counters using alternative MSR range */ if (x86_pmu.intel_cap.full_width_write) { x86_pmu.max_period = x86_pmu.cntval_mask; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 4118f9f..3e1cfbb 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2764,6 +2764,17 @@ static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); } +/* + * Using uncore_pmu_event_init pmu event_init callback + * as a detection point for uncore events. + */ +static int uncore_pmu_event_init(struct perf_event *event); + +static bool is_uncore_event(struct perf_event *event) +{ + return event->pmu->event_init == uncore_pmu_event_init; +} + static int uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp) { @@ -2778,13 +2789,18 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, b return -EINVAL; n = box->n_events; - box->event_list[n] = leader; - n++; + + if (is_uncore_event(leader)) { + box->event_list[n] = leader; + n++; + } + if (!dogrp) return n; list_for_each_entry(event, &leader->sibling_list, group_entry) { - if (event->state <= PERF_EVENT_STATE_OFF) + if (!is_uncore_event(event) || + event->state <= PERF_EVENT_STATE_OFF) continue; if (n >= max_count) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index addb207..66e274a 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -24,7 +24,6 @@ static char x86_stack_ids[][8] = { [ DEBUG_STACK-1 ] = "#DB", [ NMI_STACK-1 ] = "NMI", [ DOUBLEFAULT_STACK-1 ] = "#DF", - [ STACKFAULT_STACK-1 ] = "#SS", [ MCE_STACK-1 ] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ [ N_EXCEPTION_STACKS ... diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index e491bfd..1f1c33d 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -364,22 +364,14 @@ ENTRY(resume_kernel) DISABLE_INTERRUPTS(CLBR_ANY) cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_all +need_resched: movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl - jnz 1f - - cmpl $0,TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ? - jnz restore_all - testl $_TIF_NEED_RESCHED_LAZY, %ecx jz restore_all - -1: testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? + testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? jz restore_all call preempt_schedule_irq - movl TI_flags(%ebp), %ecx # need_resched set ? - testl $_TIF_NEED_RESCHED_MASK, %ecx - jnz 1b - jmp restore_all + jmp need_resched END(resume_kernel) #endif CFI_ENDPROC @@ -442,8 +434,9 @@ sysenter_past_esp: jnz sysenter_audit sysenter_do_call: cmpl $(NR_syscalls), %eax - jae syscall_badsys + jae sysenter_badsys call *sys_call_table(,%eax,4) +sysenter_after_call: movl %eax,PT_EAX(%esp) LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) @@ -524,6 +517,7 @@ ENTRY(system_call) jae syscall_badsys syscall_call: call *sys_call_table(,%eax,4) +syscall_after_call: movl %eax,PT_EAX(%esp) # store the return value syscall_exit: LOCKDEP_SYS_EXIT @@ -538,6 +532,7 @@ syscall_exit: restore_all: TRACE_IRQS_IRET restore_all_notrace: +#ifdef CONFIG_X86_ESPFIX32 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS # Warning: PT_OLDSS(%esp) contains the wrong/random values if we # are returning to the kernel. @@ -548,6 +543,7 @@ restore_all_notrace: cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax CFI_REMEMBER_STATE je ldt_ss # returning to user-space with LDT SS +#endif restore_nocheck: RESTORE_REGS 4 # skip orig_eax/error_code irq_return: @@ -560,13 +556,9 @@ ENTRY(iret_exc) .previous _ASM_EXTABLE(irq_return,iret_exc) +#ifdef CONFIG_X86_ESPFIX32 CFI_RESTORE_STATE ldt_ss: - larl PT_OLDSS(%esp), %eax - jnz restore_nocheck - testl $0x00400000, %eax # returning to 32bit stack? - jnz restore_nocheck # allright, normal return - #ifdef CONFIG_PARAVIRT /* * The kernel can't run on a non-flat stack if paravirt mode @@ -608,6 +600,7 @@ ldt_ss: lss (%esp), %esp /* switch to espfix segment */ CFI_ADJUST_CFA_OFFSET -8 jmp restore_nocheck +#endif CFI_ENDPROC ENDPROC(system_call) @@ -615,7 +608,7 @@ ENDPROC(system_call) ALIGN RING0_PTREGS_FRAME # can't unwind into user space anyway work_pending: - testl $_TIF_NEED_RESCHED_MASK, %ecx + testb $_TIF_NEED_RESCHED, %cl jz work_notifysig work_resched: call schedule @@ -628,7 +621,7 @@ work_resched: andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? jz restore_all - testl $_TIF_NEED_RESCHED_MASK, %ecx + testb $_TIF_NEED_RESCHED, %cl jnz work_resched work_notifysig: # deal with pending signals and @@ -698,8 +691,13 @@ syscall_fault: END(syscall_fault) syscall_badsys: - movl $-ENOSYS,PT_EAX(%esp) - jmp resume_userspace + movl $-ENOSYS,%eax + jmp syscall_after_call +END(syscall_badsys) + +sysenter_badsys: + movl $-ENOSYS,%eax + jmp sysenter_after_call END(syscall_badsys) CFI_ENDPROC /* @@ -715,6 +713,7 @@ END(syscall_badsys) * the high word of the segment base from the GDT and swiches to the * normal stack and adjusts ESP with the matching offset. */ +#ifdef CONFIG_X86_ESPFIX32 /* fixup the stack */ mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ @@ -724,8 +723,10 @@ END(syscall_badsys) pushl_cfi %eax lss (%esp), %esp /* switch to the normal stack segment */ CFI_ADJUST_CFA_OFFSET -8 +#endif .endm .macro UNWIND_ESPFIX_STACK +#ifdef CONFIG_X86_ESPFIX32 movl %ss, %eax /* see if on espfix stack */ cmpw $__ESPFIX_SS, %ax @@ -736,6 +737,7 @@ END(syscall_badsys) /* switch to normal stack */ FIXUP_ESPFIX_STACK 27: +#endif .endm /* @@ -1356,11 +1358,13 @@ END(debug) ENTRY(nmi) RING0_INT_FRAME ASM_CLAC +#ifdef CONFIG_X86_ESPFIX32 pushl_cfi %eax movl %ss, %eax cmpw $__ESPFIX_SS, %ax popl_cfi %eax je nmi_espfix_stack +#endif cmpl $ia32_sysenter_target,(%esp) je nmi_stack_fixup pushl_cfi %eax @@ -1400,6 +1404,7 @@ nmi_debug_stack_check: FIX_STACK 24, nmi_stack_correct, 1 jmp nmi_stack_correct +#ifdef CONFIG_X86_ESPFIX32 nmi_espfix_stack: /* We have a RING0_INT_FRAME here. * @@ -1421,6 +1426,7 @@ nmi_espfix_stack: lss 12+4(%esp), %esp # back to espfix stack CFI_ADJUST_CFA_OFFSET -24 jmp irq_return +#endif CFI_ENDPROC END(nmi) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index abca4f4..e965606 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -58,6 +58,7 @@ #include <asm/asm.h> #include <asm/context_tracking.h> #include <asm/smap.h> +#include <asm/pgtable_types.h> #include <linux/err.h> /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ @@ -658,8 +659,8 @@ sysret_check: /* Handle reschedules */ /* edx: work, edi: workmask */ sysret_careful: - testl $_TIF_NEED_RESCHED_MASK,%edx - jz sysret_signal + bt $TIF_NEED_RESCHED,%edx + jnc sysret_signal TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi @@ -771,8 +772,8 @@ GLOBAL(int_with_check) /* First do a reschedule test. */ /* edx: work, edi: workmask */ int_careful: - testl $_TIF_NEED_RESCHED_MASK,%edx - jz int_very_careful + bt $TIF_NEED_RESCHED,%edx + jnc int_very_careful TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi @@ -1041,38 +1042,58 @@ restore_args: irq_return: INTERRUPT_RETURN - _ASM_EXTABLE(irq_return, bad_iret) -#ifdef CONFIG_PARAVIRT ENTRY(native_iret) - iretq - _ASM_EXTABLE(native_iret, bad_iret) + /* + * Are we returning to a stack segment from the LDT? Note: in + * 64-bit mode SS:RSP on the exception stack is always valid. + */ +#ifdef CONFIG_X86_ESPFIX64 + testb $4,(SS-RIP)(%rsp) + jnz native_irq_return_ldt #endif - .section .fixup,"ax" -bad_iret: +.global native_irq_return_iret +native_irq_return_iret: /* - * The iret traps when the %cs or %ss being restored is bogus. - * We've lost the original trap vector and error code. - * #GPF is the most likely one to get for an invalid selector. - * So pretend we completed the iret and took the #GPF in user mode. - * - * We are now running with the kernel GS after exception recovery. - * But error_entry expects us to have user GS to match the user %cs, - * so swap back. + * This may fault. Non-paranoid faults on return to userspace are + * handled by fixup_bad_iret. These include #SS, #GP, and #NP. + * Double-faults due to espfix64 are handled in do_double_fault. + * Other faults here are fatal. */ - pushq $0 + iretq +#ifdef CONFIG_X86_ESPFIX64 +native_irq_return_ldt: + pushq_cfi %rax + pushq_cfi %rdi SWAPGS - jmp general_protection - - .previous + movq PER_CPU_VAR(espfix_waddr),%rdi + movq %rax,(0*8)(%rdi) /* RAX */ + movq (2*8)(%rsp),%rax /* RIP */ + movq %rax,(1*8)(%rdi) + movq (3*8)(%rsp),%rax /* CS */ + movq %rax,(2*8)(%rdi) + movq (4*8)(%rsp),%rax /* RFLAGS */ + movq %rax,(3*8)(%rdi) + movq (6*8)(%rsp),%rax /* SS */ + movq %rax,(5*8)(%rdi) + movq (5*8)(%rsp),%rax /* RSP */ + movq %rax,(4*8)(%rdi) + andl $0xffff0000,%eax + popq_cfi %rdi + orq PER_CPU_VAR(espfix_stack),%rax + SWAPGS + movq %rax,%rsp + popq_cfi %rax + jmp native_irq_return_iret +#endif /* edi: workmask, edx: work */ retint_careful: CFI_RESTORE_STATE - testl $_TIF_NEED_RESCHED_MASK,%edx - jz retint_signal + bt $TIF_NEED_RESCHED,%edx + jnc retint_signal TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi @@ -1105,22 +1126,16 @@ retint_signal: ENTRY(retint_kernel) cmpl $0,TI_preempt_count(%rcx) jnz retint_restore_args - bt $TIF_NEED_RESCHED,TI_flags(%rcx) - jc 1f - - cmpl $0,TI_preempt_lazy_count(%rcx) - jnz retint_restore_args - bt $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx) + bt $TIF_NEED_RESCHED,TI_flags(%rcx) jnc retint_restore_args - -1: bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ + bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ jnc retint_restore_args call preempt_schedule_irq jmp exit_intr #endif - CFI_ENDPROC END(common_interrupt) + /* * End of kprobes section */ @@ -1347,7 +1362,6 @@ bad_gs: jmp 2b .previous -#ifndef CONFIG_PREEMPT_RT_FULL /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(call_softirq) CFI_STARTPROC @@ -1367,7 +1381,6 @@ ENTRY(call_softirq) ret CFI_ENDPROC END(call_softirq) -#endif #ifdef CONFIG_XEN zeroentry xen_hypervisor_callback xen_do_hypervisor_callback @@ -1483,7 +1496,7 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ paranoidzeroentry_ist debug do_debug DEBUG_STACK paranoidzeroentry_ist int3 do_int3 DEBUG_STACK -paranoiderrorentry stack_segment do_stack_segment +errorentry stack_segment do_stack_segment #ifdef CONFIG_XEN zeroentry xen_debug do_debug zeroentry xen_int3 do_int3 @@ -1537,7 +1550,7 @@ paranoid_userspace: movq %rsp,%rdi /* &pt_regs */ call sync_regs movq %rax,%rsp /* switch stack for scheduling */ - testl $_TIF_NEED_RESCHED_MASK,%ebx + testl $_TIF_NEED_RESCHED,%ebx jnz paranoid_schedule movl %ebx,%edx /* arg3: thread flags */ TRACE_IRQS_ON @@ -1593,16 +1606,15 @@ error_sti: /* * There are two places in the kernel that can potentially fault with - * usergs. Handle them here. The exception handlers after iret run with - * kernel gs again, so don't set the user space flag. B stepping K8s - * sometimes report an truncated RIP for IRET exceptions returning to - * compat mode. Check for these here too. + * usergs. Handle them here. B stepping K8s sometimes report a + * truncated RIP for IRET exceptions returning to compat mode. Check + * for these here too. */ error_kernelspace: incl %ebx - leaq irq_return(%rip),%rcx + leaq native_irq_return_iret(%rip),%rcx cmpq %rcx,RIP+8(%rsp) - je error_swapgs + je error_bad_iret movl %ecx,%eax /* zero extend */ cmpq %rax,RIP+8(%rsp) je bstep_iret @@ -1613,7 +1625,15 @@ error_kernelspace: bstep_iret: /* Fix truncated RIP */ movq %rcx,RIP+8(%rsp) - jmp error_swapgs + /* fall through */ + +error_bad_iret: + SWAPGS + mov %rsp,%rdi + call fixup_bad_iret + mov %rax,%rsp + decl %ebx /* Return to usergs */ + jmp error_sti CFI_ENDPROC END(error_entry) diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c new file mode 100644 index 0000000..94d857f --- /dev/null +++ b/arch/x86/kernel/espfix_64.c @@ -0,0 +1,208 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2014 Intel Corporation; author: H. Peter Anvin + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * ----------------------------------------------------------------------- */ + +/* + * The IRET instruction, when returning to a 16-bit segment, only + * restores the bottom 16 bits of the user space stack pointer. This + * causes some 16-bit software to break, but it also leaks kernel state + * to user space. + * + * This works around this by creating percpu "ministacks", each of which + * is mapped 2^16 times 64K apart. When we detect that the return SS is + * on the LDT, we copy the IRET frame to the ministack and use the + * relevant alias to return to userspace. The ministacks are mapped + * readonly, so if the IRET fault we promote #GP to #DF which is an IST + * vector and thus has its own stack; we then do the fixup in the #DF + * handler. + * + * This file sets up the ministacks and the related page tables. The + * actual ministack invocation is in entry_64.S. + */ + +#include <linux/init.h> +#include <linux/init_task.h> +#include <linux/kernel.h> +#include <linux/percpu.h> +#include <linux/gfp.h> +#include <linux/random.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/setup.h> +#include <asm/espfix.h> + +/* + * Note: we only need 6*8 = 48 bytes for the espfix stack, but round + * it up to a cache line to avoid unnecessary sharing. + */ +#define ESPFIX_STACK_SIZE (8*8UL) +#define ESPFIX_STACKS_PER_PAGE (PAGE_SIZE/ESPFIX_STACK_SIZE) + +/* There is address space for how many espfix pages? */ +#define ESPFIX_PAGE_SPACE (1UL << (PGDIR_SHIFT-PAGE_SHIFT-16)) + +#define ESPFIX_MAX_CPUS (ESPFIX_STACKS_PER_PAGE * ESPFIX_PAGE_SPACE) +#if CONFIG_NR_CPUS > ESPFIX_MAX_CPUS +# error "Need more than one PGD for the ESPFIX hack" +#endif + +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) + +/* This contains the *bottom* address of the espfix stack */ +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); + +/* Initialization mutex - should this be a spinlock? */ +static DEFINE_MUTEX(espfix_init_mutex); + +/* Page allocation bitmap - each page serves ESPFIX_STACKS_PER_PAGE CPUs */ +#define ESPFIX_MAX_PAGES DIV_ROUND_UP(CONFIG_NR_CPUS, ESPFIX_STACKS_PER_PAGE) +static void *espfix_pages[ESPFIX_MAX_PAGES]; + +static __page_aligned_bss pud_t espfix_pud_page[PTRS_PER_PUD] + __aligned(PAGE_SIZE); + +static unsigned int page_random, slot_random; + +/* + * This returns the bottom address of the espfix stack for a specific CPU. + * The math allows for a non-power-of-two ESPFIX_STACK_SIZE, in which case + * we have to account for some amount of padding at the end of each page. + */ +static inline unsigned long espfix_base_addr(unsigned int cpu) +{ + unsigned long page, slot; + unsigned long addr; + + page = (cpu / ESPFIX_STACKS_PER_PAGE) ^ page_random; + slot = (cpu + slot_random) % ESPFIX_STACKS_PER_PAGE; + addr = (page << PAGE_SHIFT) + (slot * ESPFIX_STACK_SIZE); + addr = (addr & 0xffffUL) | ((addr & ~0xffffUL) << 16); + addr += ESPFIX_BASE_ADDR; + return addr; +} + +#define PTE_STRIDE (65536/PAGE_SIZE) +#define ESPFIX_PTE_CLONES (PTRS_PER_PTE/PTE_STRIDE) +#define ESPFIX_PMD_CLONES PTRS_PER_PMD +#define ESPFIX_PUD_CLONES (65536/(ESPFIX_PTE_CLONES*ESPFIX_PMD_CLONES)) + +#define PGTABLE_PROT ((_KERNPG_TABLE & ~_PAGE_RW) | _PAGE_NX) + +static void init_espfix_random(void) +{ + unsigned long rand; + + /* + * This is run before the entropy pools are initialized, + * but this is hopefully better than nothing. + */ + if (!arch_get_random_long(&rand)) { + /* The constant is an arbitrary large prime */ + rdtscll(rand); + rand *= 0xc345c6b72fd16123UL; + } + + slot_random = rand % ESPFIX_STACKS_PER_PAGE; + page_random = (rand / ESPFIX_STACKS_PER_PAGE) + & (ESPFIX_PAGE_SPACE - 1); +} + +void __init init_espfix_bsp(void) +{ + pgd_t *pgd_p; + pteval_t ptemask; + + ptemask = __supported_pte_mask; + + /* Install the espfix pud into the kernel page directory */ + pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)]; + pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page); + + /* Randomize the locations */ + init_espfix_random(); + + /* The rest is the same as for any other processor */ + init_espfix_ap(); +} + +void init_espfix_ap(void) +{ + unsigned int cpu, page; + unsigned long addr; + pud_t pud, *pud_p; + pmd_t pmd, *pmd_p; + pte_t pte, *pte_p; + int n; + void *stack_page; + pteval_t ptemask; + + /* We only have to do this once... */ + if (likely(this_cpu_read(espfix_stack))) + return; /* Already initialized */ + + cpu = smp_processor_id(); + addr = espfix_base_addr(cpu); + page = cpu/ESPFIX_STACKS_PER_PAGE; + + /* Did another CPU already set this up? */ + stack_page = ACCESS_ONCE(espfix_pages[page]); + if (likely(stack_page)) + goto done; + + mutex_lock(&espfix_init_mutex); + + /* Did we race on the lock? */ + stack_page = ACCESS_ONCE(espfix_pages[page]); + if (stack_page) + goto unlock_done; + + ptemask = __supported_pte_mask; + + pud_p = &espfix_pud_page[pud_index(addr)]; + pud = *pud_p; + if (!pud_present(pud)) { + pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); + pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); + paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); + for (n = 0; n < ESPFIX_PUD_CLONES; n++) + set_pud(&pud_p[n], pud); + } + + pmd_p = pmd_offset(&pud, addr); + pmd = *pmd_p; + if (!pmd_present(pmd)) { + pte_p = (pte_t *)__get_free_page(PGALLOC_GFP); + pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); + paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT); + for (n = 0; n < ESPFIX_PMD_CLONES; n++) + set_pmd(&pmd_p[n], pmd); + } + + pte_p = pte_offset_kernel(&pmd, addr); + stack_page = (void *)__get_free_page(GFP_KERNEL); + pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); + for (n = 0; n < ESPFIX_PTE_CLONES; n++) + set_pte(&pte_p[n*PTE_STRIDE], pte); + + /* Job is done for this CPU and any CPU which shares this page */ + ACCESS_ONCE(espfix_pages[page]) = stack_page; + +unlock_done: + mutex_unlock(&espfix_init_mutex); +done: + this_cpu_write(espfix_stack, addr); + this_cpu_write(espfix_waddr, (unsigned long)stack_page + + (addr & ~PAGE_MASK)); +} diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index e625319..f8ab203 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -297,16 +297,7 @@ int ftrace_int3_handler(struct pt_regs *regs) static int ftrace_write(unsigned long ip, const char *val, int size) { - /* - * On x86_64, kernel text mappings are mapped read-only with - * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead - * of the kernel text mapping to modify the kernel text. - * - * For 32bit kernels, these mappings are same and we can use - * kernel identity mapping to modify code. - */ - if (within(ip, (unsigned long)_text, (unsigned long)_etext)) - ip = (unsigned long)__va(__pa_symbol(ip)); + ip = text_ip_addr(ip); return probe_kernel_write((void *)ip, val, size); } @@ -659,8 +650,8 @@ ftrace_modify_code(unsigned long ip, unsigned const char *old_code, ret = -EPERM; goto out; } - run_sync(); out: + run_sync(); return ret; fail_update: diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 22d0687..3910078 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -262,6 +262,83 @@ __visible void smp_trace_x86_platform_ipi(struct pt_regs *regs) EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); #ifdef CONFIG_HOTPLUG_CPU + +/* These two declarations are only used in check_irq_vectors_for_cpu_disable() + * below, which is protected by stop_machine(). Putting them on the stack + * results in a stack frame overflow. Dynamically allocating could result in a + * failure so declare these two cpumasks as global. + */ +static struct cpumask affinity_new, online_new; + +/* + * This cpu is going to be removed and its vectors migrated to the remaining + * online cpus. Check to see if there are enough vectors in the remaining cpus. + * This function is protected by stop_machine(). + */ +int check_irq_vectors_for_cpu_disable(void) +{ + int irq, cpu; + unsigned int this_cpu, vector, this_count, count; + struct irq_desc *desc; + struct irq_data *data; + + this_cpu = smp_processor_id(); + cpumask_copy(&online_new, cpu_online_mask); + cpu_clear(this_cpu, online_new); + + this_count = 0; + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + irq = __this_cpu_read(vector_irq[vector]); + if (irq >= 0) { + desc = irq_to_desc(irq); + data = irq_desc_get_irq_data(desc); + cpumask_copy(&affinity_new, data->affinity); + cpu_clear(this_cpu, affinity_new); + + /* Do not count inactive or per-cpu irqs. */ + if (!irq_has_action(irq) || irqd_is_per_cpu(data)) + continue; + + /* + * A single irq may be mapped to multiple + * cpu's vector_irq[] (for example IOAPIC cluster + * mode). In this case we have two + * possibilities: + * + * 1) the resulting affinity mask is empty; that is + * this the down'd cpu is the last cpu in the irq's + * affinity mask, or + * + * 2) the resulting affinity mask is no longer + * a subset of the online cpus but the affinity + * mask is not zero; that is the down'd cpu is the + * last online cpu in a user set affinity mask. + */ + if (cpumask_empty(&affinity_new) || + !cpumask_subset(&affinity_new, &online_new)) + this_count++; + } + } + + count = 0; + for_each_online_cpu(cpu) { + if (cpu == this_cpu) + continue; + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; + vector++) { + if (per_cpu(vector_irq, cpu)[vector] < 0) + count++; + } + } + + if (count < this_count) { + pr_warn("CPU %d disable failed: CPU has %u vectors assigned and there are only %u available.\n", + this_cpu, this_count, count); + return -ERANGE; + } + return 0; +} + /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) { diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 9da1bc7..4186755 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -149,7 +149,6 @@ void irq_ctx_init(int cpu) cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); } -#ifndef CONFIG_PREEMPT_RT_FULL asmlinkage void do_softirq(void) { unsigned long flags; @@ -180,7 +179,6 @@ asmlinkage void do_softirq(void) local_irq_restore(flags); } -#endif bool handle_irq(unsigned irq, struct pt_regs *regs) { diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 831f247..d04d3ec 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -88,7 +88,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } -#ifndef CONFIG_PREEMPT_RT_FULL + extern void call_softirq(void); asmlinkage void do_softirq(void) @@ -108,4 +108,3 @@ asmlinkage void do_softirq(void) } local_irq_restore(flags); } -#endif diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index 3d21f7b..1de84e3 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -38,7 +38,6 @@ __visible void smp_trace_irq_work_interrupt(struct pt_regs *regs) exiting_irq(); } -#ifndef CONFIG_PREEMPT_RT_FULL void arch_irq_work_raise(void) { #ifdef CONFIG_X86_LOCAL_APIC @@ -49,4 +48,3 @@ void arch_irq_work_raise(void) apic_wait_icr_idle(); #endif } -#endif diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 79a3f96..a1f5b18 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1017,6 +1017,15 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) regs->flags &= ~X86_EFLAGS_IF; trace_hardirqs_off(); regs->ip = (unsigned long)(jp->entry); + + /* + * jprobes use jprobe_return() which skips the normal return + * path of the function, and this messes up the accounting of the + * function graph tracer to get messed up. + * + * Pause function graph tracing while performing the jprobe function. + */ + pause_graph_tracing(); return 1; } @@ -1042,24 +1051,25 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); u8 *addr = (u8 *) (regs->ip - 1); struct jprobe *jp = container_of(p, struct jprobe, kp); + void *saved_sp = kcb->jprobe_saved_sp; if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { - if (stack_addr(regs) != kcb->jprobe_saved_sp) { + if (stack_addr(regs) != saved_sp) { struct pt_regs *saved_regs = &kcb->jprobe_saved_regs; printk(KERN_ERR "current sp %p does not match saved sp %p\n", - stack_addr(regs), kcb->jprobe_saved_sp); + stack_addr(regs), saved_sp); printk(KERN_ERR "Saved registers for jprobe %p\n", jp); show_regs(saved_regs); printk(KERN_ERR "Current registers\n"); show_regs(regs); BUG(); } + /* It's OK to start function graph tracing again */ + unpause_graph_tracing(); *regs = kcb->jprobe_saved_regs; - memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp), - kcb->jprobes_stack, - MIN_STACK_SIZE(kcb->jprobe_saved_sp)); + memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp)); preempt_enable_no_resched(); return 1; } diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f022c54..e725933 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -280,7 +280,14 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) static void __init paravirt_ops_setup(void) { pv_info.name = "KVM"; - pv_info.paravirt_enabled = 1; + + /* + * KVM isn't paravirt in the sense of paravirt_enabled. A KVM + * guest kernel works like a bare metal kernel with additional + * features, and paravirt_enabled is about features that are + * missing. + */ + pv_info.paravirt_enabled = 0; if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) pv_cpu_ops.io_delay = kvm_io_delay; diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 1570e07..23457e5 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -262,7 +262,6 @@ void __init kvmclock_init(void) #endif kvm_get_preset_lpj(); clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); - pv_info.paravirt_enabled = 1; pv_info.name = "KVM"; if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index ebc9873..c37886d 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -229,6 +229,11 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) } } + if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { + error = -EINVAL; + goto out_unlock; + } + fill_ldt(&ldt, &ldt_info); if (oldmode) ldt.avl = 0; diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 3f08f34..a1da673 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c @@ -6,7 +6,6 @@ DEF_NATIVE(pv_irq_ops, irq_disable, "cli"); DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq"); DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax"); -DEF_NATIVE(pv_cpu_ops, iret, "iretq"); DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax"); DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax"); DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3"); @@ -50,7 +49,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, PATCH_SITE(pv_irq_ops, save_fl); PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, irq_disable); - PATCH_SITE(pv_cpu_ops, iret); PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); PATCH_SITE(pv_cpu_ops, usergs_sysret32); PATCH_SITE(pv_cpu_ops, usergs_sysret64); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 6ce0537..884f98f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -36,7 +36,6 @@ #include <linux/uaccess.h> #include <linux/io.h> #include <linux/kdebug.h> -#include <linux/highmem.h> #include <asm/pgtable.h> #include <asm/ldt.h> @@ -220,35 +219,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) } EXPORT_SYMBOL_GPL(start_thread); -#ifdef CONFIG_PREEMPT_RT_FULL -static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) -{ - int i; - - /* - * Clear @prev's kmap_atomic mappings - */ - for (i = 0; i < prev_p->kmap_idx; i++) { - int idx = i + KM_TYPE_NR * smp_processor_id(); - pte_t *ptep = kmap_pte - idx; - - kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx)); - } - /* - * Restore @next_p's kmap_atomic mappings - */ - for (i = 0; i < next_p->kmap_idx; i++) { - int idx = i + KM_TYPE_NR * smp_processor_id(); - - if (!pte_none(next_p->kmap_pte[i])) - set_pte(kmap_pte - idx, next_p->kmap_pte[i]); - } -} -#else -static inline void -switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } -#endif - /* * switch_to(x,y) should switch tasks from x to y. @@ -328,8 +298,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) __switch_to_xtra(prev_p, next_p, tss); - switch_kmaps(prev_p, next_p); - /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7461f50..0686fe3 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1441,15 +1441,6 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, force_sig_info(SIGTRAP, &info, tsk); } - -#ifdef CONFIG_X86_32 -# define IS_IA32 1 -#elif defined CONFIG_IA32_EMULATION -# define IS_IA32 is_compat_task() -#else -# define IS_IA32 0 -#endif - /* * We must return the syscall number to actually look up in the table. * This can be -1L to skip running any syscall at all. @@ -1487,7 +1478,7 @@ long syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->orig_ax); - if (IS_IA32) + if (is_ia32_task()) audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax, regs->bx, regs->cx, diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c index 2a26819..80eab01 100644 --- a/arch/x86/kernel/resource.c +++ b/arch/x86/kernel/resource.c @@ -37,10 +37,12 @@ static void remove_e820_regions(struct resource *avail) void arch_remove_reservations(struct resource *avail) { - /* Trim out BIOS areas (low 1MB and high 2MB) and E820 regions */ + /* + * Trim out BIOS area (high 2MB) and E820 regions. We do not remove + * the low 1MB unconditionally, as this area is needed for some ISA + * cards requiring a memory range, e.g. the i82365 PCMCIA controller. + */ if (avail->flags & IORESOURCE_MEM) { - if (avail->start < BIOS_END) - avail->start = BIOS_END; resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END); remove_e820_regions(avail); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index ecfe089..b88fc86 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -673,6 +673,11 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) * handler too. */ regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF); + /* + * Ensure the signal handler starts with the new fpu state. + */ + if (used_math()) + drop_init_fpu(current); } signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP)); } @@ -739,14 +744,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) mce_notify_process(); #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ -#ifdef ARCH_RT_DELAYS_SIGNAL_SEND - if (unlikely(current->forced_info.si_signo)) { - struct task_struct *t = current; - force_sig_info(t->forced_info.si_signo, &t->forced_info, t); - t->forced_info.si_signo = 0; - } -#endif - if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6cacab6..9ccb7ef 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -265,6 +265,13 @@ static void notrace start_secondary(void *unused) check_tsc_sync_target(); /* + * Enable the espfix hack for this CPU + */ +#ifdef CONFIG_X86_ESPFIX64 + init_espfix_ap(); +#endif + + /* * We need to hold vector_lock so there the set of online cpus * does not change while we are assigning vectors to cpus. Holding * this lock ensures we don't half assign or remove an irq from a cpu. @@ -1278,6 +1285,9 @@ static void remove_siblinginfo(int cpu) for_each_cpu(sibling, cpu_sibling_mask(cpu)) cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling)); + for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) + cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling)); + cpumask_clear(cpu_llc_shared_mask(cpu)); cpumask_clear(cpu_sibling_mask(cpu)); cpumask_clear(cpu_core_mask(cpu)); c->phys_proc_id = 0; @@ -1310,6 +1320,12 @@ void cpu_disable_common(void) int native_cpu_disable(void) { + int ret; + + ret = check_irq_vectors_for_cpu_disable(); + if (ret) + return ret; + clear_local_APIC(); cpu_disable_common(); diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index f7fec09..7fc5e84 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -27,6 +27,58 @@ static int get_free_idx(void) return -ESRCH; } +static bool tls_desc_okay(const struct user_desc *info) +{ + /* + * For historical reasons (i.e. no one ever documented how any + * of the segmentation APIs work), user programs can and do + * assume that a struct user_desc that's all zeros except for + * entry_number means "no segment at all". This never actually + * worked. In fact, up to Linux 3.19, a struct user_desc like + * this would create a 16-bit read-write segment with base and + * limit both equal to zero. + * + * That was close enough to "no segment at all" until we + * hardened this function to disallow 16-bit TLS segments. Fix + * it up by interpreting these zeroed segments the way that they + * were almost certainly intended to be interpreted. + * + * The correct way to ask for "no segment at all" is to specify + * a user_desc that satisfies LDT_empty. To keep everything + * working, we accept both. + * + * Note that there's a similar kludge in modify_ldt -- look at + * the distinction between modes 1 and 0x11. + */ + if (LDT_empty(info) || LDT_zero(info)) + return true; + + /* + * espfix is required for 16-bit data segments, but espfix + * only works for LDT segments. + */ + if (!info->seg_32bit) + return false; + + /* Only allow data segments in the TLS array. */ + if (info->contents > 1) + return false; + + /* + * Non-present segments with DPL 3 present an interesting attack + * surface. The kernel should handle such segments correctly, + * but TLS is very difficult to protect in a sandbox, so prevent + * such segments from being created. + * + * If userspace needs to remove a TLS entry, it can still delete + * it outright. + */ + if (info->seg_not_present) + return false; + + return true; +} + static void set_tls_desc(struct task_struct *p, int idx, const struct user_desc *info, int n) { @@ -40,7 +92,7 @@ static void set_tls_desc(struct task_struct *p, int idx, cpu = get_cpu(); while (n-- > 0) { - if (LDT_empty(info)) + if (LDT_empty(info) || LDT_zero(info)) desc->a = desc->b = 0; else fill_ldt(desc, info); @@ -66,6 +118,9 @@ int do_set_thread_area(struct task_struct *p, int idx, if (copy_from_user(&info, u_info, sizeof(info))) return -EFAULT; + if (!tls_desc_okay(&info)) + return -EINVAL; + if (idx == -1) idx = info.entry_number; @@ -192,6 +247,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset, { struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES]; const struct user_desc *info; + int i; if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) || (pos % sizeof(struct user_desc)) != 0 || @@ -205,6 +261,10 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset, else info = infobuf; + for (i = 0; i < count / sizeof(struct user_desc); i++) + if (!tls_desc_okay(info + i)) + return -EINVAL; + set_tls_desc(target, GDT_ENTRY_TLS_MIN + (pos / sizeof(struct user_desc)), info, count / sizeof(struct user_desc)); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 6663bb5..88f3d3b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -86,21 +86,9 @@ static inline void conditional_sti(struct pt_regs *regs) local_irq_enable(); } -static inline void conditional_sti_ist(struct pt_regs *regs) +static inline void preempt_conditional_sti(struct pt_regs *regs) { -#ifdef CONFIG_X86_64 - /* - * X86_64 uses a per CPU stack on the IST for certain traps - * like int3. The task can not be preempted when using one - * of these stacks, thus preemption must be disabled, otherwise - * the stack can be corrupted if the task is scheduled out, - * and another task comes in and uses this stack. - * - * On x86_32 the task keeps its own stack and it is OK if the - * task schedules out. - */ inc_preempt_count(); -#endif if (regs->flags & X86_EFLAGS_IF) local_irq_enable(); } @@ -111,13 +99,11 @@ static inline void conditional_cli(struct pt_regs *regs) local_irq_disable(); } -static inline void conditional_cli_ist(struct pt_regs *regs) +static inline void preempt_conditional_cli(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) local_irq_disable(); -#ifdef CONFIG_X86_64 dec_preempt_count(); -#endif } static int __kprobes @@ -235,33 +221,41 @@ DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) -#ifdef CONFIG_X86_32 DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) -#endif DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) #ifdef CONFIG_X86_64 /* Runs on IST stack */ -dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) -{ - enum ctx_state prev_state; - - prev_state = exception_enter(); - if (notify_die(DIE_TRAP, "stack segment", regs, error_code, - X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { - conditional_sti_ist(regs); - do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); - conditional_cli_ist(regs); - } - exception_exit(prev_state); -} - dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) { static const char str[] = "double fault"; struct task_struct *tsk = current; +#ifdef CONFIG_X86_ESPFIX64 + extern unsigned char native_irq_return_iret[]; + + /* + * If IRET takes a non-IST fault on the espfix64 stack, then we + * end up promoting it to a doublefault. In that case, modify + * the stack to make it look like we just entered the #GP + * handler from user space, similar to bad_iret. + */ + if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && + regs->cs == __KERNEL_CS && + regs->ip == (unsigned long)native_irq_return_iret) + { + struct pt_regs *normal_regs = task_pt_regs(current); + + /* Fake a #GP(0) from userspace. */ + memmove(&normal_regs->ip, (void *)regs->sp, 5*8); + normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ + regs->ip = (unsigned long)general_protection; + regs->sp = (unsigned long)&normal_regs->orig_ax; + return; + } +#endif + exception_enter(); /* Return not checked because double check cannot be ignored */ notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); @@ -361,9 +355,9 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co * as we may switch to the interrupt stack. */ debug_stack_usage_inc(); - conditional_sti_ist(regs); + preempt_conditional_sti(regs); do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); - conditional_cli_ist(regs); + preempt_conditional_cli(regs); debug_stack_usage_dec(); exit: exception_exit(prev_state); @@ -375,7 +369,7 @@ exit: * for scheduling or signal handling. The actual stack switch is done in * entry.S */ -asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) +asmlinkage notrace __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) { struct pt_regs *regs = eregs; /* Did already sync */ @@ -394,6 +388,35 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) *regs = *eregs; return regs; } + +struct bad_iret_stack { + void *error_entry_ret; + struct pt_regs regs; +}; + +asmlinkage __visible notrace __kprobes +struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) +{ + /* + * This is called from entry_64.S early in handling a fault + * caused by a bad iret to user mode. To handle the fault + * correctly, we want move our stack frame to task_pt_regs + * and we want to pretend that the exception came from the + * iret target. + */ + struct bad_iret_stack *new_stack = + container_of(task_pt_regs(current), + struct bad_iret_stack, regs); + + /* Copy the IRET target to the new stack. */ + memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); + + /* Copy the remainder of the stack from the current stack. */ + memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); + + BUG_ON(!user_mode_vm(&new_stack->regs)); + return new_stack; +} #endif /* @@ -469,12 +492,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_inc(); /* It's safe to allow irq's after DR6 has been saved */ - conditional_sti_ist(regs); + preempt_conditional_sti(regs); if (regs->flags & X86_VM_MASK) { handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, X86_TRAP_DB); - conditional_cli_ist(regs); + preempt_conditional_cli(regs); debug_stack_usage_dec(); goto exit; } @@ -494,7 +517,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) si_code = get_si_code(tsk->thread.debugreg6); if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) send_sigtrap(tsk, regs, error_code, si_code); - conditional_cli_ist(regs); + preempt_conditional_cli(regs); debug_stack_usage_dec(); exit: @@ -766,7 +789,7 @@ void __init trap_init(void) set_intr_gate(X86_TRAP_OLD_MF, &coprocessor_segment_overrun); set_intr_gate(X86_TRAP_TS, &invalid_TSS); set_intr_gate(X86_TRAP_NP, &segment_not_present); - set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK); + set_intr_gate(X86_TRAP_SS, stack_segment); set_intr_gate(X86_TRAP_GP, &general_protection); set_intr_gate(X86_TRAP_SPURIOUS, &spurious_interrupt_bug); set_intr_gate(X86_TRAP_MF, &coprocessor_error); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 930e5d4..cefe57c 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -386,7 +386,7 @@ static unsigned long quick_pit_calibrate(void) goto success; } } - pr_err("Fast TSC calibration failed\n"); + pr_info("Fast TSC calibration failed\n"); return 0; success: @@ -974,14 +974,17 @@ void __init tsc_init(void) x86_init.timers.tsc_pre_init(); - if (!cpu_has_tsc) + if (!cpu_has_tsc) { + setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); return; + } tsc_khz = x86_platform.calibrate_tsc(); cpu_khz = tsc_khz; if (!tsc_khz) { mark_tsc_unstable("could not calculate TSC khz"); + setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); return; } diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 1f96f93..09ce23a 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -125,10 +125,10 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, if (!show_unhandled_signals) return; - pr_notice_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", - level, current->comm, task_pid_nr(current), - message, regs->ip, regs->cs, - regs->sp, regs->ax, regs->si, regs->di); + printk_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", + level, current->comm, task_pid_nr(current), + message, regs->ip, regs->cs, + regs->sp, regs->ax, regs->si, regs->di); } static int addr_to_vsyscall_nr(unsigned long addr) diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 422fd82..f5869fc 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -268,8 +268,6 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) return -1; - drop_init_fpu(tsk); /* trigger finit */ - return 0; } @@ -399,8 +397,11 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) set_used_math(); } - if (use_eager_fpu()) + if (use_eager_fpu()) { + preempt_disable(); math_state_restore(); + preempt_enable(); + } return err; } else { diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 92e6f4a..ab1d459 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -498,11 +498,6 @@ static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc); } -static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) -{ - register_address_increment(ctxt, &ctxt->_eip, rel); -} - static u32 desc_limit_scaled(struct desc_struct *desc) { u32 limit = get_desc_limit(desc); @@ -576,6 +571,38 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) return emulate_exception(ctxt, NM_VECTOR, 0, false); } +static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, + int cs_l) +{ + switch (ctxt->op_bytes) { + case 2: + ctxt->_eip = (u16)dst; + break; + case 4: + ctxt->_eip = (u32)dst; + break; + case 8: + if ((cs_l && is_noncanonical_address(dst)) || + (!cs_l && (dst & ~(u32)-1))) + return emulate_gp(ctxt, 0); + ctxt->_eip = dst; + break; + default: + WARN(1, "unsupported eip assignment size\n"); + } + return X86EMUL_CONTINUE; +} + +static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) +{ + return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64); +} + +static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) +{ + return assign_eip_near(ctxt, ctxt->_eip + rel); +} + static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) { u16 selector; @@ -1964,13 +1991,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) case 2: /* call near abs */ { long int old_eip; old_eip = ctxt->_eip; - ctxt->_eip = ctxt->src.val; + rc = assign_eip_near(ctxt, ctxt->src.val); + if (rc != X86EMUL_CONTINUE) + break; ctxt->src.val = old_eip; rc = em_push(ctxt); break; } case 4: /* jmp abs */ - ctxt->_eip = ctxt->src.val; + rc = assign_eip_near(ctxt, ctxt->src.val); break; case 5: /* jmp far */ rc = em_jmp_far(ctxt); @@ -2002,16 +2031,21 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) static int em_ret(struct x86_emulate_ctxt *ctxt) { - ctxt->dst.type = OP_REG; - ctxt->dst.addr.reg = &ctxt->_eip; - ctxt->dst.bytes = ctxt->op_bytes; - return em_pop(ctxt); + int rc; + unsigned long eip; + + rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); + if (rc != X86EMUL_CONTINUE) + return rc; + + return assign_eip_near(ctxt, eip); } static int em_ret_far(struct x86_emulate_ctxt *ctxt) { int rc; unsigned long cs; + int cpl = ctxt->ops->cpl(ctxt); rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) @@ -2021,6 +2055,9 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; + /* Outer-privilege level return is not implemented */ + if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) + return X86EMUL_UNHANDLEABLE; rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); return rc; } @@ -2279,7 +2316,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) { const struct x86_emulate_ops *ops = ctxt->ops; struct desc_struct cs, ss; - u64 msr_data; + u64 msr_data, rcx, rdx; int usermode; u16 cs_sel = 0, ss_sel = 0; @@ -2295,6 +2332,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) else usermode = X86EMUL_MODE_PROT32; + rcx = reg_read(ctxt, VCPU_REGS_RCX); + rdx = reg_read(ctxt, VCPU_REGS_RDX); + cs.dpl = 3; ss.dpl = 3; ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); @@ -2312,6 +2352,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ss_sel = cs_sel + 8; cs.d = 0; cs.l = 1; + if (is_noncanonical_address(rcx) || + is_noncanonical_address(rdx)) + return emulate_gp(ctxt, 0); break; } cs_sel |= SELECTOR_RPL_MASK; @@ -2320,8 +2363,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); - ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX); - *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX); + ctxt->_eip = rdx; + *reg_write(ctxt, VCPU_REGS_RSP) = rcx; return X86EMUL_CONTINUE; } @@ -2860,10 +2903,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt) static int em_call(struct x86_emulate_ctxt *ctxt) { + int rc; long rel = ctxt->src.val; ctxt->src.val = (unsigned long)ctxt->_eip; - jmp_rel(ctxt, rel); + rc = jmp_rel(ctxt, rel); + if (rc != X86EMUL_CONTINUE) + return rc; return em_push(ctxt); } @@ -2895,11 +2941,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) { int rc; + unsigned long eip; - ctxt->dst.type = OP_REG; - ctxt->dst.addr.reg = &ctxt->_eip; - ctxt->dst.bytes = ctxt->op_bytes; - rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); + rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); + if (rc != X86EMUL_CONTINUE) + return rc; + rc = assign_eip_near(ctxt, eip); if (rc != X86EMUL_CONTINUE) return rc; rsp_increment(ctxt, ctxt->src.val); @@ -3189,20 +3236,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt) static int em_loop(struct x86_emulate_ctxt *ctxt) { + int rc = X86EMUL_CONTINUE; + register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) - jmp_rel(ctxt, ctxt->src.val); + rc = jmp_rel(ctxt, ctxt->src.val); - return X86EMUL_CONTINUE; + return rc; } static int em_jcxz(struct x86_emulate_ctxt *ctxt) { + int rc = X86EMUL_CONTINUE; + if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) - jmp_rel(ctxt, ctxt->src.val); + rc = jmp_rel(ctxt, ctxt->src.val); - return X86EMUL_CONTINUE; + return rc; } static int em_in(struct x86_emulate_ctxt *ctxt) @@ -4554,7 +4605,7 @@ special_insn: break; case 0x70 ... 0x7f: /* jcc (short) */ if (test_cc(ctxt->b, ctxt->eflags)) - jmp_rel(ctxt, ctxt->src.val); + rc = jmp_rel(ctxt, ctxt->src.val); break; case 0x8d: /* lea r16/r32, m */ ctxt->dst.val = ctxt->src.addr.mem.ea; @@ -4583,7 +4634,7 @@ special_insn: break; case 0xe9: /* jmp rel */ case 0xeb: /* jmp rel short */ - jmp_rel(ctxt, ctxt->src.val); + rc = jmp_rel(ctxt, ctxt->src.val); ctxt->dst.type = OP_NONE; /* Disable writeback. */ break; case 0xf4: /* hlt */ @@ -4703,7 +4754,7 @@ twobyte_insn: break; case 0x80 ... 0x8f: /* jnz rel, etc*/ if (test_cc(ctxt->b, ctxt->eflags)) - jmp_rel(ctxt, ctxt->src.val); + rc = jmp_rel(ctxt, ctxt->src.val); break; case 0x90 ... 0x9f: /* setcc r/m8 */ ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 518d864..298781d 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -262,8 +262,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) return; timer = &pit->pit_state.timer; + mutex_lock(&pit->pit_state.lock); if (hrtimer_cancel(timer)) hrtimer_start_expires(timer, HRTIMER_MODE_ABS); + mutex_unlock(&pit->pit_state.lock); } static void destroy_pit_timer(struct kvm_pit *pit) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d86ff15..92bbb39 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -360,6 +360,8 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) static inline void apic_set_isr(int vec, struct kvm_lapic *apic) { + /* Note that we never get here with APIC virtualization enabled. */ + if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR)) ++apic->isr_count; BUG_ON(apic->isr_count > MAX_APIC_VECTOR); @@ -371,12 +373,48 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic) apic->highest_isr_cache = vec; } +static inline int apic_find_highest_isr(struct kvm_lapic *apic) +{ + int result; + + /* + * Note that isr_count is always 1, and highest_isr_cache + * is always -1, with APIC virtualization enabled. + */ + if (!apic->isr_count) + return -1; + if (likely(apic->highest_isr_cache != -1)) + return apic->highest_isr_cache; + + result = find_highest_vector(apic->regs + APIC_ISR); + ASSERT(result == -1 || result >= 16); + + return result; +} + static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) { - if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) + struct kvm_vcpu *vcpu; + if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR)) + return; + + vcpu = apic->vcpu; + + /* + * We do get here for APIC virtualization enabled if the guest + * uses the Hyper-V APIC enlightenment. In this case we may need + * to trigger a new interrupt delivery by writing the SVI field; + * on the other hand isr_count and highest_isr_cache are unused + * and must be left alone. + */ + if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) + kvm_x86_ops->hwapic_isr_update(vcpu->kvm, + apic_find_highest_isr(apic)); + else { --apic->isr_count; - BUG_ON(apic->isr_count < 0); - apic->highest_isr_cache = -1; + BUG_ON(apic->isr_count < 0); + apic->highest_isr_cache = -1; + } } int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) @@ -456,22 +494,6 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); } -static inline int apic_find_highest_isr(struct kvm_lapic *apic) -{ - int result; - - /* Note that isr_count is always 1 with vid enabled */ - if (!apic->isr_count) - return -1; - if (likely(apic->highest_isr_cache != -1)) - return apic->highest_isr_cache; - - result = find_highest_vector(apic->regs + APIC_ISR); - ASSERT(result == -1 || result >= 16); - - return result; -} - void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr) { struct kvm_lapic *apic = vcpu->arch.apic; @@ -1605,6 +1627,8 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) int vector = kvm_apic_has_interrupt(vcpu); struct kvm_lapic *apic = vcpu->arch.apic; + /* Note that we never get here with APIC virtualization enabled. */ + if (vector == -1) return -1; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 74dd129..8ad01b4 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -198,16 +198,20 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask) EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); /* - * spte bits of bit 3 ~ bit 11 are used as low 9 bits of generation number, - * the bits of bits 52 ~ bit 61 are used as high 10 bits of generation - * number. + * the low bit of the generation number is always presumed to be zero. + * This disables mmio caching during memslot updates. The concept is + * similar to a seqcount but instead of retrying the access we just punt + * and ignore the cache. + * + * spte bits 3-11 are used as bits 1-9 of the generation number, + * the bits 52-61 are used as bits 10-19 of the generation number. */ -#define MMIO_SPTE_GEN_LOW_SHIFT 3 +#define MMIO_SPTE_GEN_LOW_SHIFT 2 #define MMIO_SPTE_GEN_HIGH_SHIFT 52 -#define MMIO_GEN_SHIFT 19 -#define MMIO_GEN_LOW_SHIFT 9 -#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 1) +#define MMIO_GEN_SHIFT 20 +#define MMIO_GEN_LOW_SHIFT 10 +#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 2) #define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) #define MMIO_MAX_GEN ((1 << MMIO_GEN_SHIFT) - 1) @@ -3161,7 +3165,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return; - vcpu_clear_mmio_info(vcpu, ~0ul); + vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) { hpa_t root = vcpu->arch.mmu.root_hpa; @@ -4424,8 +4428,8 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm) * The very rare case: if the generation-number is round, * zap all shadow pages. */ - if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) { - printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n"); + if (unlikely(kvm_current_mmio_generation(kvm) == 0)) { + printk_ratelimited(KERN_DEBUG "kvm: zapping shadow pages for mmio generation wraparound\n"); kvm_mmu_invalidate_zap_all_pages(kvm); } } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 612c717..5dcdff5 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3204,7 +3204,7 @@ static int wrmsr_interception(struct vcpu_svm *svm) msr.host_initiated = false; svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; - if (svm_set_msr(&svm->vcpu, &msr)) { + if (kvm_set_msr(&svm->vcpu, &msr)) { trace_kvm_msr_write_ex(ecx, data); kvm_inject_gp(&svm->vcpu, 0); } else { @@ -3486,9 +3486,9 @@ static int handle_exit(struct kvm_vcpu *vcpu) if (exit_code >= ARRAY_SIZE(svm_exit_handlers) || !svm_exit_handlers[exit_code]) { - kvm_run->exit_reason = KVM_EXIT_UNKNOWN; - kvm_run->hw.hardware_exit_reason = exit_code; - return 0; + WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code); + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; } return svm_exit_handlers[exit_code](svm); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 59181e6..c7663b1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2540,12 +2540,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; msr = find_msr_entry(vmx, msr_index); if (msr) { + u64 old_msr_data = msr->data; msr->data = data; if (msr - vmx->guest_msrs < vmx->save_nmsrs) { preempt_disable(); - kvm_set_shared_msr(msr->index, msr->data, - msr->mask); + ret = kvm_set_shared_msr(msr->index, msr->data, + msr->mask); preempt_enable(); + if (ret) + msr->data = old_msr_data; } break; } @@ -5113,7 +5116,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu) msr.data = data; msr.index = ecx; msr.host_initiated = false; - if (vmx_set_msr(vcpu, &msr) != 0) { + if (kvm_set_msr(vcpu, &msr) != 0) { trace_kvm_msr_write_ex(ecx, data); kvm_inject_gp(vcpu, 0); return 1; @@ -6385,6 +6388,12 @@ static int handle_invept(struct kvm_vcpu *vcpu) return 1; } +static int handle_invvpid(struct kvm_vcpu *vcpu) +{ + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -6430,6 +6439,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, [EXIT_REASON_INVEPT] = handle_invept, + [EXIT_REASON_INVVPID] = handle_invvpid, }; static const int kvm_vmx_max_exit_handlers = @@ -6656,7 +6666,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: - case EXIT_REASON_INVEPT: + case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID: /* * VMX instructions trap unconditionally. This allows L1 to * emulate them for its L2 guest, i.e., allows 3-level nesting! @@ -6812,10 +6822,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) && kvm_vmx_exit_handlers[exit_reason]) return kvm_vmx_exit_handlers[exit_reason](vcpu); else { - vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; - vcpu->run->hw.hardware_exit_reason = exit_reason; + WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason); + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; } - return 0; } static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e1b7b17..790551b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -225,20 +225,25 @@ static void kvm_shared_msr_cpu_online(void) shared_msr_update(i, shared_msrs_global.msrs[i]); } -void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) +int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) { unsigned int cpu = smp_processor_id(); struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); + int err; if (((value ^ smsr->values[slot].curr) & mask) == 0) - return; + return 0; smsr->values[slot].curr = value; - wrmsrl(shared_msrs_global.msrs[slot], value); + err = wrmsrl_safe(shared_msrs_global.msrs[slot], value); + if (err) + return 1; + if (!smsr->registered) { smsr->urn.on_user_return = kvm_on_user_return; user_return_notifier_register(&smsr->urn); smsr->registered = true; } + return 0; } EXPORT_SYMBOL_GPL(kvm_set_shared_msr); @@ -910,7 +915,6 @@ void kvm_enable_efer_bits(u64 mask) } EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); - /* * Writes msr value into into the appropriate "register". * Returns 0 on success, non-0 otherwise. @@ -918,8 +922,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); */ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) { + switch (msr->index) { + case MSR_FS_BASE: + case MSR_GS_BASE: + case MSR_KERNEL_GS_BASE: + case MSR_CSTAR: + case MSR_LSTAR: + if (is_noncanonical_address(msr->data)) + return 1; + break; + case MSR_IA32_SYSENTER_EIP: + case MSR_IA32_SYSENTER_ESP: + /* + * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if + * non-canonical address is written on Intel but not on + * AMD (which ignores the top 32-bits, because it does + * not implement 64-bit SYSENTER). + * + * 64-bit code should hence be able to write a non-canonical + * value on AMD. Making the address canonical ensures that + * vmentry does not fail on Intel after writing a non-canonical + * value, and that something deterministic happens if the guest + * invokes 64-bit SYSENTER. + */ + msr->data = get_canonical(msr->data); + } return kvm_x86_ops->set_msr(vcpu, msr); } +EXPORT_SYMBOL_GPL(kvm_set_msr); /* * Adapt set_msr() to msr_io()'s calling convention @@ -1073,7 +1103,6 @@ static inline u64 get_kernel_ns(void) { struct timespec ts; - WARN_ON(preemptible()); ktime_get_ts(&ts); monotonic_to_bootbased(&ts); return timespec_to_ns(&ts); @@ -4842,7 +4871,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) ++vcpu->stat.insn_emulation_fail; trace_kvm_emulate_insn_failed(vcpu); - if (!is_guest_mode(vcpu)) { + if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; @@ -5493,13 +5522,6 @@ int kvm_arch_init(void *opaque) goto out; } -#ifdef CONFIG_PREEMPT_RT_FULL - if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { - printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n"); - return -EOPNOTSUPP; - } -#endif - r = kvm_mmu_module_init(); if (r) goto out_free_percpu; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 3186542..7626d3e 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -78,15 +78,23 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, vcpu->arch.mmio_gva = gva & PAGE_MASK; vcpu->arch.access = access; vcpu->arch.mmio_gfn = gfn; + vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation; +} + +static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mmio_gen == kvm_memslots(vcpu->kvm)->generation; } /* - * Clear the mmio cache info for the given gva, - * specially, if gva is ~0ul, we clear all mmio cache info. + * Clear the mmio cache info for the given gva. If gva is MMIO_GVA_ANY, we + * clear all mmio cache info. */ +#define MMIO_GVA_ANY (~(gva_t)0) + static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva) { - if (gva != (~0ul) && vcpu->arch.mmio_gva != (gva & PAGE_MASK)) + if (gva != MMIO_GVA_ANY && vcpu->arch.mmio_gva != (gva & PAGE_MASK)) return; vcpu->arch.mmio_gva = 0; @@ -94,7 +102,8 @@ static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva) static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva) { - if (vcpu->arch.mmio_gva && vcpu->arch.mmio_gva == (gva & PAGE_MASK)) + if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gva && + vcpu->arch.mmio_gva == (gva & PAGE_MASK)) return true; return false; @@ -102,7 +111,8 @@ static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva) static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) { - if (vcpu->arch.mmio_gfn && vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT) + if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gfn && + vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT) return true; return false; diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 0002a3a..3620928 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -30,11 +30,13 @@ struct pg_state { unsigned long start_address; unsigned long current_address; const struct addr_marker *marker; + unsigned long lines; }; struct addr_marker { unsigned long start_address; const char *name; + unsigned long max_lines; }; /* indices for address_markers; keep sync'd w/ address_markers below */ @@ -45,6 +47,7 @@ enum address_markers_idx { LOW_KERNEL_NR, VMALLOC_START_NR, VMEMMAP_START_NR, + ESPFIX_START_NR, HIGH_KERNEL_NR, MODULES_VADDR_NR, MODULES_END_NR, @@ -67,6 +70,7 @@ static struct addr_marker address_markers[] = { { PAGE_OFFSET, "Low Kernel Mapping" }, { VMALLOC_START, "vmalloc() Area" }, { VMEMMAP_START, "Vmemmap" }, + { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, { __START_KERNEL_map, "High Kernel Mapping" }, { MODULES_VADDR, "Modules" }, { MODULES_END, "End Modules" }, @@ -163,7 +167,7 @@ static void note_page(struct seq_file *m, struct pg_state *st, pgprot_t new_prot, int level) { pgprotval_t prot, cur; - static const char units[] = "KMGTPE"; + static const char units[] = "BKMGTPE"; /* * If we have a "break" in the series, we need to flush the state that @@ -178,6 +182,7 @@ static void note_page(struct seq_file *m, struct pg_state *st, st->current_prot = new_prot; st->level = level; st->marker = address_markers; + st->lines = 0; seq_printf(m, "---[ %s ]---\n", st->marker->name); } else if (prot != cur || level != st->level || st->current_address >= st->marker[1].start_address) { @@ -188,17 +193,21 @@ static void note_page(struct seq_file *m, struct pg_state *st, /* * Now print the actual finished series */ - seq_printf(m, "0x%0*lx-0x%0*lx ", - width, st->start_address, - width, st->current_address); - - delta = (st->current_address - st->start_address) >> 10; - while (!(delta & 1023) && unit[1]) { - delta >>= 10; - unit++; + if (!st->marker->max_lines || + st->lines < st->marker->max_lines) { + seq_printf(m, "0x%0*lx-0x%0*lx ", + width, st->start_address, + width, st->current_address); + + delta = (st->current_address - st->start_address) >> 10; + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; + } + seq_printf(m, "%9lu%c ", delta, *unit); + printk_prot(m, st->current_prot, st->level); } - seq_printf(m, "%9lu%c ", delta, *unit); - printk_prot(m, st->current_prot, st->level); + st->lines++; /* * We print markers for special areas of address space, diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index b5c8e37..5b90bbcad9 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1098,7 +1098,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) * If we're in an interrupt, have no user context or are running * in an atomic region then we must not take the fault: */ - if (unlikely(!mm || pagefault_disabled())) { + if (unlikely(in_atomic() || !mm)) { bad_area_nosemaphore(regs, error_code, address); return; } diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 7f96844..4500142 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -32,7 +32,6 @@ EXPORT_SYMBOL(kunmap); */ void *kmap_atomic_prot(struct page *page, pgprot_t prot) { - pte_t pte = mk_pte(page, prot); unsigned long vaddr; int idx, type; @@ -46,10 +45,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); BUG_ON(!pte_none(*(kmap_pte-idx))); -#ifdef CONFIG_PREEMPT_RT_FULL - current->kmap_pte[type] = pte; -#endif - set_pte(kmap_pte-idx, pte); + set_pte(kmap_pte-idx, mk_pte(page, prot)); arch_flush_lazy_mmu_mode(); return (void *)vaddr; @@ -92,9 +88,6 @@ void __kunmap_atomic(void *kvaddr) * is a bad idea also, in case the page changes cacheability * attributes or becomes a protected page in a hypervisor. */ -#ifdef CONFIG_PREEMPT_RT_FULL - current->kmap_pte[type] = __pte(0); -#endif kpte_clear_flush(kmap_pte-idx, vaddr); kmap_atomic_idx_pop(); arch_flush_lazy_mmu_mode(); diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 9d980d8..fa029fb 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -58,11 +58,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, { return NULL; } - -int pmd_huge_support(void) -{ - return 0; -} #else struct page * @@ -80,11 +75,6 @@ int pud_huge(pud_t pud) { return !!(pud_val(pud) & _PAGE_PSE); } - -int pmd_huge_support(void) -{ - return 1; -} #endif /* x86_64 also uses this file */ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 104d56a..b599241 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1110,7 +1110,7 @@ void mark_rodata_ro(void) unsigned long end = (unsigned long) &__end_rodata_hpage_align; unsigned long text_end = PFN_ALIGN(&__stop___ex_table); unsigned long rodata_end = PFN_ALIGN(&__end_rodata); - unsigned long all_end = PFN_ALIGN(&_end); + unsigned long all_end; printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", (end - start) >> 10); @@ -1121,7 +1121,16 @@ void mark_rodata_ro(void) /* * The rodata/data/bss/brk section (but not the kernel text!) * should also be not-executable. + * + * We align all_end to PMD_SIZE because the existing mapping + * is a full PMD. If we would align _brk_end to PAGE_SIZE we + * split the PMD and the reminder between _brk_end and the end + * of the PMD will remain mapped executable. + * + * Any PMD which was setup after the one which covers _brk_end + * has been zapped already via cleanup_highmem(). */ + all_end = roundup((unsigned long)_brk_end, PMD_SIZE); set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); rodata_test(); diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 62377d6..7b179b4 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -56,7 +56,6 @@ EXPORT_SYMBOL_GPL(iomap_free); void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) { - pte_t pte = pfn_pte(pfn, prot); unsigned long vaddr; int idx, type; @@ -65,12 +64,7 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - WARN_ON(!pte_none(*(kmap_pte - idx))); - -#ifdef CONFIG_PREEMPT_RT_FULL - current->kmap_pte[type] = pte; -#endif - set_pte(kmap_pte - idx, pte); + set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); arch_flush_lazy_mmu_mode(); return (void *)vaddr; @@ -116,9 +110,6 @@ iounmap_atomic(void __iomem *kvaddr) * is a bad idea also, in case the page changes cacheability * attributes or becomes a protected page in a hypervisor. */ -#ifdef CONFIG_PREEMPT_RT_FULL - current->kmap_pte[type] = __pte(0); -#endif kpte_clear_flush(kmap_pte-idx, vaddr); kmap_atomic_idx_pop(); } diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 799580c..94bd247 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -50,6 +50,21 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, return err; } +static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, + void *arg) +{ + unsigned long i; + + for (i = 0; i < nr_pages; ++i) + if (pfn_valid(start_pfn + i) && + !PageReserved(pfn_to_page(start_pfn + i))) + return 1; + + WARN_ONCE(1, "ioremap on RAM pfn 0x%lx\n", start_pfn); + + return 0; +} + /* * Remap an arbitrary physical address space into the kernel virtual * address space. Needed when the kernel wants to access high addresses @@ -93,14 +108,11 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, /* * Don't allow anybody to remap normal RAM that we're using.. */ + pfn = phys_addr >> PAGE_SHIFT; last_pfn = last_addr >> PAGE_SHIFT; - for (pfn = phys_addr >> PAGE_SHIFT; pfn <= last_pfn; pfn++) { - int is_ram = page_is_ram(pfn); - - if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn))) - return NULL; - WARN_ON_ONCE(is_ram); - } + if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL, + __ioremap_check_ram) == 1) + return NULL; /* * Mappings have to be page-aligned diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index bb32480..aabdf76 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -389,7 +389,7 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr) psize = page_level_size(level); pmask = page_level_mask(level); offset = virt_addr & ~pmask; - phys_addr = pte_pfn(*pte) << PAGE_SHIFT; + phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; return (phys_addr | offset); } EXPORT_SYMBOL_GPL(slow_virt_to_phys); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index dfa537a..5da29d0 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -386,13 +386,20 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma, int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - int young; - - young = ptep_test_and_clear_young(vma, address, ptep); - if (young) - flush_tlb_page(vma, address); - - return young; + /* + * On x86 CPUs, clearing the accessed bit without a TLB flush + * doesn't cause data corruption. [ It could cause incorrect + * page aging and the (mistaken) reclaim of hot pages, but the + * chance of that should be relatively low. ] + * + * So as a performance optimization don't flush the TLB when + * clearing the accessed bit, it will eventually be flushed by + * a context switch or a VM operation anyway. [ In the rare + * event of it not getting flushed for a long time the delay + * shouldn't really matter because there's no real memory + * pressure for swapout to react to. ] + */ + return ptep_test_and_clear_young(vma, address, ptep); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index ae699b3..dd8dda1 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -103,7 +103,7 @@ static void flush_tlb_func(void *info) if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) return; - count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED); + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { if (f->flush_end == TLB_FLUSH_ALL) local_flush_tlb(); @@ -131,7 +131,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, info.flush_start = start; info.flush_end = end; - count_vm_event(NR_TLB_REMOTE_FLUSH); + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); if (is_uv_system()) { unsigned int cpu; @@ -151,44 +151,19 @@ void flush_tlb_current_task(void) preempt_disable(); - count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); local_flush_tlb(); if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); preempt_enable(); } -/* - * It can find out the THP large page, or - * HUGETLB page in tlb_flush when THP disabled - */ -static inline unsigned long has_large_page(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - unsigned long addr = ALIGN(start, HPAGE_SIZE); - for (; addr < end; addr += HPAGE_SIZE) { - pgd = pgd_offset(mm, addr); - if (likely(!pgd_none(*pgd))) { - pud = pud_offset(pgd, addr); - if (likely(!pud_none(*pud))) { - pmd = pmd_offset(pud, addr); - if (likely(!pmd_none(*pmd))) - if (pmd_large(*pmd)) - return addr; - } - } - } - return 0; -} - void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long vmflag) { unsigned long addr; unsigned act_entries, tlb_entries = 0; + unsigned long nr_base_pages; preempt_disable(); if (current->active_mm != mm) @@ -210,21 +185,20 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, tlb_entries = tlb_lli_4k[ENTRIES]; else tlb_entries = tlb_lld_4k[ENTRIES]; + /* Assume all of TLB entries was occupied by this task */ - act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm; + act_entries = tlb_entries >> tlb_flushall_shift; + act_entries = mm->total_vm > act_entries ? act_entries : mm->total_vm; + nr_base_pages = (end - start) >> PAGE_SHIFT; /* tlb_flushall_shift is on balance point, details in commit log */ - if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) { - count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); + if (nr_base_pages > act_entries) { + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); local_flush_tlb(); } else { - if (has_large_page(mm, start, end)) { - local_flush_tlb(); - goto flush_all; - } /* flush range by one by one 'invlpg' */ for (addr = start; addr < end; addr += PAGE_SIZE) { - count_vm_event(NR_TLB_LOCAL_FLUSH_ONE); + count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); __flush_tlb_single(addr); } @@ -262,7 +236,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) static void do_flush_tlb_all(void *info) { - count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED); + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); __flush_tlb_all(); if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) leave_mm(smp_processor_id()); @@ -270,7 +244,7 @@ static void do_flush_tlb_all(void *info) void flush_tlb_all(void) { - count_vm_event(NR_TLB_REMOTE_FLUSH); + count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); on_each_cpu(do_flush_tlb_all, NULL, 1); } diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index db6b1ab..96a159a 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -162,6 +162,10 @@ pcibios_align_resource(void *data, const struct resource *res, return start; if (start & 0x300) start = (start + 0x3ff) & ~0x3ff; + } else if (res->flags & IORESOURCE_MEM) { + /* The low 1MB range is reserved for ISA cards */ + if (start < BIOS_END) + start = BIOS_END; } return start; } diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 38ae65d..63a8993 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -212,10 +212,10 @@ 203 common sched_setaffinity sys_sched_setaffinity 204 common sched_getaffinity sys_sched_getaffinity 205 64 set_thread_area -206 common io_setup sys_io_setup +206 64 io_setup sys_io_setup 207 common io_destroy sys_io_destroy 208 common io_getevents sys_io_getevents -209 common io_submit sys_io_submit +209 64 io_submit sys_io_submit 210 common io_cancel sys_io_cancel 211 64 get_thread_area 212 common lookup_dcookie sys_lookup_dcookie @@ -356,3 +356,5 @@ 540 x32 process_vm_writev compat_sys_process_vm_writev 541 x32 setsockopt compat_sys_setsockopt 542 x32 getsockopt compat_sys_getsockopt +543 x32 io_setup compat_sys_io_setup +544 x32 io_submit compat_sys_io_submit diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index 531d426..bd16d6c 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -34,7 +34,7 @@ typedef asmlinkage void (*sys_call_ptr_t)(void); extern asmlinkage void sys_ni_syscall(void); -const sys_call_ptr_t sys_call_table[] __cacheline_aligned = { +const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = { /* * Smells like a compiler bug -- it doesn't work * when the & below is removed. diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c index f2f0723..9578308 100644 --- a/arch/x86/um/sys_call_table_64.c +++ b/arch/x86/um/sys_call_table_64.c @@ -46,7 +46,7 @@ typedef void (*sys_call_ptr_t)(void); extern void sys_ni_syscall(void); -const sys_call_ptr_t sys_call_table[] __cacheline_aligned = { +const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = { /* * Smells like a compiler bug -- it doesn't work * when the & below is removed. diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 431e875..ab6ba35 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -117,30 +117,45 @@ subsys_initcall(init_vdso); struct linux_binprm; -/* Put the vdso above the (randomized) stack with another randomized offset. - This way there is no hole in the middle of address space. - To save memory make sure it is still in the same PTE as the stack top. - This doesn't give that many random bits */ +/* + * Put the vdso above the (randomized) stack with another randomized + * offset. This way there is no hole in the middle of address space. + * To save memory make sure it is still in the same PTE as the stack + * top. This doesn't give that many random bits. + * + * Note that this algorithm is imperfect: the distribution of the vdso + * start address within a PMD is biased toward the end. + * + * Only used for the 64-bit and x32 vdsos. + */ static unsigned long vdso_addr(unsigned long start, unsigned len) { unsigned long addr, end; unsigned offset; - end = (start + PMD_SIZE - 1) & PMD_MASK; + + /* + * Round up the start address. It can start out unaligned as a result + * of stack start randomization. + */ + start = PAGE_ALIGN(start); + + /* Round the lowest possible end address up to a PMD boundary. */ + end = (start + len + PMD_SIZE - 1) & PMD_MASK; if (end >= TASK_SIZE_MAX) end = TASK_SIZE_MAX; end -= len; - /* This loses some more bits than a modulo, but is cheaper */ - offset = get_random_int() & (PTRS_PER_PTE - 1); - addr = start + (offset << PAGE_SHIFT); - if (addr >= end) - addr = end; + + if (end > start) { + offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); + addr = start + (offset << PAGE_SHIFT); + } else { + addr = start; + } /* - * page-align it here so that get_unmapped_area doesn't - * align it wrongfully again to the next page. addr can come in 4K - * unaligned here as a result of stack start randomization. + * Forcibly align the final address in case we have a hardware + * issue that requires alignment for performance reasons. */ - addr = PAGE_ALIGN(addr); addr = align_vdso_addr(addr); return addr; diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index be6b860..ba81b54 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -274,7 +274,7 @@ void __init xen_init_spinlocks(void) printk(KERN_DEBUG "xen: PV spinlocks disabled\n"); return; } - + printk(KERN_DEBUG "xen: PV spinlocks enabled\n"); pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning); pv_lock_ops.unlock_kick = xen_unlock_kick; } @@ -290,6 +290,9 @@ static __init int xen_init_spinlocks_jump(void) if (!xen_pvspin) return 0; + if (!xen_domain()) + return 0; + static_key_slow_inc(¶virt_ticketlocks_enabled); return 0; } diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index ee36589..90bfa52 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -445,7 +445,7 @@ void xen_setup_timer(int cpu) irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, IRQF_DISABLED|IRQF_PERCPU| IRQF_NOBALANCING|IRQF_TIMER| - IRQF_FORCE_RESUME, + IRQF_FORCE_RESUME|IRQF_EARLY_RESUME, name, NULL); memcpy(evt, xen_clockevent, sizeof(*evt)); diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 0fdf5d0..4651cb9 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -67,7 +67,12 @@ #define VMALLOC_START 0xC0000000 #define VMALLOC_END 0xC7FEFFFF #define TLBTEMP_BASE_1 0xC7FF0000 -#define TLBTEMP_BASE_2 0xC7FF8000 +#define TLBTEMP_BASE_2 (TLBTEMP_BASE_1 + DCACHE_WAY_SIZE) +#if 2 * DCACHE_WAY_SIZE > ICACHE_WAY_SIZE +#define TLBTEMP_SIZE (2 * DCACHE_WAY_SIZE) +#else +#define TLBTEMP_SIZE ICACHE_WAY_SIZE +#endif /* * For the Xtensa architecture, the PTE layout is as follows: diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index fd686dc..c7211e7 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -52,7 +52,12 @@ */ .macro get_fs ad, sp GET_CURRENT(\ad,\sp) +#if THREAD_CURRENT_DS > 1020 + addi \ad, \ad, TASK_THREAD + l32i \ad, \ad, THREAD_CURRENT_DS - TASK_THREAD +#else l32i \ad, \ad, THREAD_CURRENT_DS +#endif .endm /* diff --git a/arch/xtensa/include/uapi/asm/ioctls.h b/arch/xtensa/include/uapi/asm/ioctls.h index b4cb110..a47909f 100644 --- a/arch/xtensa/include/uapi/asm/ioctls.h +++ b/arch/xtensa/include/uapi/asm/ioctls.h @@ -28,17 +28,17 @@ #define TCSETSW 0x5403 #define TCSETSF 0x5404 -#define TCGETA _IOR('t', 23, struct termio) -#define TCSETA _IOW('t', 24, struct termio) -#define TCSETAW _IOW('t', 25, struct termio) -#define TCSETAF _IOW('t', 28, struct termio) +#define TCGETA 0x80127417 /* _IOR('t', 23, struct termio) */ +#define TCSETA 0x40127418 /* _IOW('t', 24, struct termio) */ +#define TCSETAW 0x40127419 /* _IOW('t', 25, struct termio) */ +#define TCSETAF 0x4012741C /* _IOW('t', 28, struct termio) */ #define TCSBRK _IO('t', 29) #define TCXONC _IO('t', 30) #define TCFLSH _IO('t', 31) -#define TIOCSWINSZ _IOW('t', 103, struct winsize) -#define TIOCGWINSZ _IOR('t', 104, struct winsize) +#define TIOCSWINSZ 0x40087467 /* _IOW('t', 103, struct winsize) */ +#define TIOCGWINSZ 0x80087468 /* _IOR('t', 104, struct winsize) */ #define TIOCSTART _IO('t', 110) /* start output, like ^Q */ #define TIOCSTOP _IO('t', 111) /* stop output, like ^S */ #define TIOCOUTQ _IOR('t', 115, int) /* output queue size */ @@ -88,7 +88,6 @@ #define TIOCSETD _IOW('T', 35, int) #define TIOCGETD _IOR('T', 36, int) #define TCSBRKP _IOW('T', 37, int) /* Needed for POSIX tcsendbreak()*/ -#define TIOCTTYGSTRUCT _IOR('T', 38, struct tty_struct) /* For debugging only*/ #define TIOCSBRK _IO('T', 39) /* BSD compatibility */ #define TIOCCBRK _IO('T', 40) /* BSD compatibility */ #define TIOCGSID _IOR('T', 41, pid_t) /* Return the session ID of FD*/ @@ -114,8 +113,10 @@ #define TIOCSERGETLSR _IOR('T', 89, unsigned int) /* Get line status reg. */ /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ # define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ -#define TIOCSERGETMULTI _IOR('T', 90, struct serial_multiport_struct) /* Get multiport config */ -#define TIOCSERSETMULTI _IOW('T', 91, struct serial_multiport_struct) /* Set multiport config */ +#define TIOCSERGETMULTI 0x80a8545a /* Get multiport config */ + /* _IOR('T', 90, struct serial_multiport_struct) */ +#define TIOCSERSETMULTI 0x40a8545b /* Set multiport config */ + /* _IOW('T', 91, struct serial_multiport_struct) */ #define TIOCMIWAIT _IO('T', 92) /* wait for a change on serial input line(s) */ #define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h index 51940fe..513effd 100644 --- a/arch/xtensa/include/uapi/asm/unistd.h +++ b/arch/xtensa/include/uapi/asm/unistd.h @@ -384,7 +384,8 @@ __SYSCALL(174, sys_chroot, 1) #define __NR_pivot_root 175 __SYSCALL(175, sys_pivot_root, 2) #define __NR_umount 176 -__SYSCALL(176, sys_umount, 2) +__SYSCALL(176, sys_oldumount, 1) +#define __ARCH_WANT_SYS_OLDUMOUNT #define __NR_swapoff 177 __SYSCALL(177, sys_swapoff, 1) #define __NR_sync 178 diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index b61e251..4b8e636 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -1001,9 +1001,8 @@ ENTRY(fast_syscall_xtensa) movi a7, 4 # sizeof(unsigned int) access_ok a3, a7, a0, a2, .Leac # a0: scratch reg, a2: sp - addi a6, a6, -1 # assuming SYS_XTENSA_ATOMIC_SET = 1 - _bgeui a6, SYS_XTENSA_COUNT - 1, .Lill - _bnei a6, SYS_XTENSA_ATOMIC_CMP_SWP - 1, .Lnswp + _bgeui a6, SYS_XTENSA_COUNT, .Lill + _bnei a6, SYS_XTENSA_ATOMIC_CMP_SWP, .Lnswp /* Fall through for ATOMIC_CMP_SWP. */ @@ -1015,27 +1014,26 @@ TRY s32i a5, a3, 0 # different, modify value l32i a7, a2, PT_AREG7 # restore a7 l32i a0, a2, PT_AREG0 # restore a0 movi a2, 1 # and return 1 - addi a6, a6, 1 # restore a6 (really necessary?) rfe 1: l32i a7, a2, PT_AREG7 # restore a7 l32i a0, a2, PT_AREG0 # restore a0 movi a2, 0 # return 0 (note that we cannot set - addi a6, a6, 1 # restore a6 (really necessary?) rfe .Lnswp: /* Atomic set, add, and exg_add. */ TRY l32i a7, a3, 0 # orig + addi a6, a6, -SYS_XTENSA_ATOMIC_SET add a0, a4, a7 # + arg moveqz a0, a4, a6 # set + addi a6, a6, SYS_XTENSA_ATOMIC_SET TRY s32i a0, a3, 0 # write new value mov a0, a2 mov a2, a7 l32i a7, a0, PT_AREG7 # restore a7 l32i a0, a0, PT_AREG0 # restore a0 - addi a6, a6, 1 # restore a6 (really necessary?) rfe CATCH @@ -1044,7 +1042,7 @@ CATCH movi a2, -EFAULT rfe -.Lill: l32i a7, a2, PT_AREG0 # restore a7 +.Lill: l32i a7, a2, PT_AREG7 # restore a7 l32i a0, a2, PT_AREG0 # restore a0 movi a2, -EINVAL rfe @@ -1600,7 +1598,7 @@ ENTRY(fast_second_level_miss) rsr a0, excvaddr bltu a0, a3, 2f - addi a1, a0, -(2 << (DCACHE_ALIAS_ORDER + PAGE_SHIFT)) + addi a1, a0, -TLBTEMP_SIZE bgeu a1, a3, 2f /* Check if we have to restore an ITLB mapping. */ @@ -1855,7 +1853,6 @@ ENTRY(_switch_to) entry a1, 16 - mov a10, a2 # preserve 'prev' (a2) mov a11, a3 # and 'next' (a3) l32i a4, a2, TASK_THREAD_INFO @@ -1863,8 +1860,14 @@ ENTRY(_switch_to) save_xtregs_user a4 a6 a8 a9 a12 a13 THREAD_XTREGS_USER - s32i a0, a10, THREAD_RA # save return address - s32i a1, a10, THREAD_SP # save stack pointer +#if THREAD_RA > 1020 || THREAD_SP > 1020 + addi a10, a2, TASK_THREAD + s32i a0, a10, THREAD_RA - TASK_THREAD # save return address + s32i a1, a10, THREAD_SP - TASK_THREAD # save stack pointer +#else + s32i a0, a2, THREAD_RA # save return address + s32i a1, a2, THREAD_SP # save stack pointer +#endif /* Disable ints while we manipulate the stack pointer. */ @@ -1905,7 +1908,6 @@ ENTRY(_switch_to) load_xtregs_user a5 a6 a8 a9 a12 a13 THREAD_XTREGS_USER wsr a14, ps - mov a2, a10 # return 'prev' rsync retw diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index 2d9cc6d..e8b76b8 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -49,9 +49,8 @@ dma_alloc_coherent(struct device *dev,size_t size,dma_addr_t *handle,gfp_t flag) /* We currently don't support coherent memory outside KSEG */ - if (ret < XCHAL_KSEG_CACHED_VADDR - || ret >= XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE) - BUG(); + BUG_ON(ret < XCHAL_KSEG_CACHED_VADDR || + ret > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1); if (ret != 0) { @@ -68,10 +67,11 @@ EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle) { - long addr=(long)vaddr+XCHAL_KSEG_CACHED_VADDR-XCHAL_KSEG_BYPASS_VADDR; + unsigned long addr = (unsigned long)vaddr + + XCHAL_KSEG_CACHED_VADDR - XCHAL_KSEG_BYPASS_VADDR; - if (addr < 0 || addr >= XCHAL_KSEG_SIZE) - BUG(); + BUG_ON(addr < XCHAL_KSEG_CACHED_VADDR || + addr > XCHAL_KSEG_CACHED_VADDR + XCHAL_KSEG_SIZE - 1); free_pages(addr, get_order(size)); } diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S index cb8fd44..da0224d 100644 --- a/arch/xtensa/kernel/vectors.S +++ b/arch/xtensa/kernel/vectors.S @@ -376,38 +376,42 @@ _DoubleExceptionVector_WindowOverflow: beqz a2, 1f # if at start of vector, don't restore addi a0, a0, -128 - bbsi a0, 8, 1f # don't restore except for overflow 8 and 12 - bbsi a0, 7, 2f + bbsi.l a0, 8, 1f # don't restore except for overflow 8 and 12 + + /* + * This fixup handler is for the extremely unlikely case where the + * overflow handler's reference thru a0 gets a hardware TLB refill + * that bumps out the (distinct, aliasing) TLB entry that mapped its + * prior references thru a9/a13, and where our reference now thru + * a9/a13 gets a 2nd-level miss exception (not hardware TLB refill). + */ + movi a2, window_overflow_restore_a0_fixup + s32i a2, a3, EXC_TABLE_FIXUP + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + + bbsi.l a0, 7, 2f /* * Restore a0 as saved by _WindowOverflow8(). - * - * FIXME: we really need a fixup handler for this L32E, - * for the extremely unlikely case where the overflow handler's - * reference thru a0 gets a hardware TLB refill that bumps out - * the (distinct, aliasing) TLB entry that mapped its prior - * references thru a9, and where our reference now thru a9 - * gets a 2nd-level miss exception (not hardware TLB refill). */ - l32e a2, a9, -16 - wsr a2, depc # replace the saved a0 - j 1f + l32e a0, a9, -16 + wsr a0, depc # replace the saved a0 + j 3f 2: /* * Restore a0 as saved by _WindowOverflow12(). - * - * FIXME: we really need a fixup handler for this L32E, - * for the extremely unlikely case where the overflow handler's - * reference thru a0 gets a hardware TLB refill that bumps out - * the (distinct, aliasing) TLB entry that mapped its prior - * references thru a13, and where our reference now thru a13 - * gets a 2nd-level miss exception (not hardware TLB refill). */ - l32e a2, a13, -16 - wsr a2, depc # replace the saved a0 + l32e a0, a13, -16 + wsr a0, depc # replace the saved a0 +3: + xsr a3, excsave1 + movi a0, 0 + s32i a0, a3, EXC_TABLE_FIXUP + s32i a2, a3, EXC_TABLE_DOUBLE_SAVE 1: /* * Restore WindowBase while leaving all address registers restored. @@ -449,6 +453,7 @@ _DoubleExceptionVector_WindowOverflow: s32i a0, a2, PT_DEPC +_DoubleExceptionVector_handle_exception: addx4 a0, a0, a3 l32i a0, a0, EXC_TABLE_FAST_USER xsr a3, excsave1 @@ -464,11 +469,120 @@ _DoubleExceptionVector_WindowOverflow: rotw -3 j 1b - .end literal_prefix ENDPROC(_DoubleExceptionVector) /* + * Fixup handler for TLB miss in double exception handler for window owerflow. + * We get here with windowbase set to the window that was being spilled and + * a0 trashed. a0 bit 7 determines if this is a call8 (bit clear) or call12 + * (bit set) window. + * + * We do the following here: + * - go to the original window retaining a0 value; + * - set up exception stack to return back to appropriate a0 restore code + * (we'll need to rotate window back and there's no place to save this + * information, use different return address for that); + * - handle the exception; + * - go to the window that was being spilled; + * - set up window_overflow_restore_a0_fixup as a fixup routine; + * - reload a0; + * - restore the original window; + * - reset the default fixup routine; + * - return to user. By the time we get to this fixup handler all information + * about the conditions of the original double exception that happened in + * the window overflow handler is lost, so we just return to userspace to + * retry overflow from start. + * + * a0: value of depc, original value in depc + * a2: trashed, original value in EXC_TABLE_DOUBLE_SAVE + * a3: exctable, original value in excsave1 + */ + +ENTRY(window_overflow_restore_a0_fixup) + + rsr a0, ps + extui a0, a0, PS_OWB_SHIFT, PS_OWB_WIDTH + rsr a2, windowbase + sub a0, a2, a0 + extui a0, a0, 0, 3 + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + + _beqi a0, 1, .Lhandle_1 + _beqi a0, 3, .Lhandle_3 + + .macro overflow_fixup_handle_exception_pane n + + rsr a0, depc + rotw -\n + + xsr a3, excsave1 + wsr a2, depc + l32i a2, a3, EXC_TABLE_KSTK + s32i a0, a2, PT_AREG0 + + movi a0, .Lrestore_\n + s32i a0, a2, PT_DEPC + rsr a0, exccause + j _DoubleExceptionVector_handle_exception + + .endm + + overflow_fixup_handle_exception_pane 2 +.Lhandle_1: + overflow_fixup_handle_exception_pane 1 +.Lhandle_3: + overflow_fixup_handle_exception_pane 3 + + .macro overflow_fixup_restore_a0_pane n + + rotw \n + /* Need to preserve a0 value here to be able to handle exception + * that may occur on a0 reload from stack. It may occur because + * TLB miss handler may not be atomic and pointer to page table + * may be lost before we get here. There are no free registers, + * so we need to use EXC_TABLE_DOUBLE_SAVE area. + */ + xsr a3, excsave1 + s32i a2, a3, EXC_TABLE_DOUBLE_SAVE + movi a2, window_overflow_restore_a0_fixup + s32i a2, a3, EXC_TABLE_FIXUP + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + bbsi.l a0, 7, 1f + l32e a0, a9, -16 + j 2f +1: + l32e a0, a13, -16 +2: + rotw -\n + + .endm + +.Lrestore_2: + overflow_fixup_restore_a0_pane 2 + +.Lset_default_fixup: + xsr a3, excsave1 + s32i a2, a3, EXC_TABLE_DOUBLE_SAVE + movi a2, 0 + s32i a2, a3, EXC_TABLE_FIXUP + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + rfe + +.Lrestore_1: + overflow_fixup_restore_a0_pane 1 + j .Lset_default_fixup +.Lrestore_3: + overflow_fixup_restore_a0_pane 3 + j .Lset_default_fixup + +ENDPROC(window_overflow_restore_a0_fixup) + + .end literal_prefix +/* * Debug interrupt vector * * There is not much space here, so simply jump to another handler. diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index 21acd11..af84f8f 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -262,13 +262,13 @@ SECTIONS .UserExceptionVector.literal) SECTION_VECTOR (_DoubleExceptionVector_literal, .DoubleExceptionVector.literal, - DOUBLEEXC_VECTOR_VADDR - 16, + DOUBLEEXC_VECTOR_VADDR - 40, SIZEOF(.UserExceptionVector.text), .UserExceptionVector.text) SECTION_VECTOR (_DoubleExceptionVector_text, .DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR, - 32, + 40, .DoubleExceptionVector.literal) . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3; diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index ff5c9c7..70fa7bc 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -57,7 +57,7 @@ void do_page_fault(struct pt_regs *regs) /* If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (!mm || pagefault_disabled()) { + if (in_atomic() || !mm) { bad_page_fault(regs, address, SIGSEGV); return; } |