diff options
Diffstat (limited to 'arch/arm64')
109 files changed, 4197 insertions, 636 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5a0a691..9f8b99e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -4,9 +4,11 @@ config ARM64 select ACPI_GENERIC_GSI if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_KCOV select ARCH_HAS_SG_CHAIN select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_USE_CMPXCHG_LOCKREF @@ -85,8 +87,11 @@ config ARM64 select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RCU_TABLE_FREE select HAVE_SYSCALL_TRACEPOINTS + select HAVE_KPROBES + select HAVE_KRETPROBES if HAVE_KPROBES select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING @@ -664,6 +669,16 @@ config PARAVIRT_TIME_ACCOUNTING If in doubt, say N here. +config KEXEC + depends on PM_SLEEP_SMP + select KEXEC_CORE + bool "kexec system call" + ---help--- + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is independent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + config XEN_DOM0 def_bool y depends on XEN @@ -872,7 +887,7 @@ config RELOCATABLE config RANDOMIZE_BASE bool "Randomize the address of the kernel image" - select ARM64_MODULE_PLTS + select ARM64_MODULE_PLTS if MODULES select RELOCATABLE help Randomizes the virtual address at which the kernel image is diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7085e32..d59b690 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -12,7 +12,6 @@ LDFLAGS_vmlinux :=-p --no-undefined -X CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) -OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S GZFLAGS :=-9 ifneq ($(CONFIG_RELOCATABLE),) @@ -95,7 +94,7 @@ boot := arch/arm64/boot Image: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -Image.%: vmlinux +Image.%: Image $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ zinstall install: @@ -121,6 +120,16 @@ archclean: $(Q)$(MAKE) $(clean)=$(boot) $(Q)$(MAKE) $(clean)=$(boot)/dts +# We need to generate vdso-offsets.h before compiling certain files in kernel/. +# In order to do that, we should use the archprepare target, but we can't since +# asm-offsets.h is included in some files used to generate vdso-offsets.h, and +# asm-offsets.h is built in prepare0, for which archprepare is a dependency. +# Therefore we need to generate the header after prepare0 has been made, hence +# this hack. +prepare: vdso_prepare +vdso_prepare: prepare0 + $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h + define archhelp echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)' echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)' diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index 305c552..1f012c5 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -14,6 +14,8 @@ # Based on the ia64 boot/Makefile. # +OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S + targets := Image Image.gz $(obj)/Image: vmlinux FORCE diff --git a/arch/arm64/boot/dts/apm/apm-merlin.dts b/arch/arm64/boot/dts/apm/apm-merlin.dts index 387c6a8..b0f6441 100644 --- a/arch/arm64/boot/dts/apm/apm-merlin.dts +++ b/arch/arm64/boot/dts/apm/apm-merlin.dts @@ -83,3 +83,9 @@ status = "ok"; }; }; + +&mdio { + sgenet0phy: phy@0 { + reg = <0x0>; + }; +}; diff --git a/arch/arm64/boot/dts/apm/apm-mustang.dts b/arch/arm64/boot/dts/apm/apm-mustang.dts index 44db32e..b7fb5d9 100644 --- a/arch/arm64/boot/dts/apm/apm-mustang.dts +++ b/arch/arm64/boot/dts/apm/apm-mustang.dts @@ -79,3 +79,15 @@ &mmc0 { status = "ok"; }; + +&mdio { + menet0phy: phy@3 { + reg = <0x3>; + }; + sgenet0phy: phy@4 { + reg = <0x4>; + }; + sgenet1phy: phy@5 { + reg = <0x5>; + }; +}; diff --git a/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi b/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi index c569f76..2e1e5da 100644 --- a/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi +++ b/arch/arm64/boot/dts/apm/apm-shadowcat.dtsi @@ -625,10 +625,18 @@ apm,irq-start = <8>; }; + mdio: mdio@1f610000 { + compatible = "apm,xgene-mdio-xfi"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0x1f610000 0x0 0xd100>; + clocks = <&xge0clk 0>; + }; + sgenet0: ethernet@1f610000 { compatible = "apm,xgene2-sgenet"; status = "disabled"; - reg = <0x0 0x1f610000 0x0 0x10000>, + reg = <0x0 0x1f610000 0x0 0xd100>, <0x0 0x1f600000 0x0 0Xd100>, <0x0 0x20000000 0x0 0X20000>; interrupts = <0 96 4>, @@ -637,6 +645,7 @@ clocks = <&xge0clk 0>; local-mac-address = [00 01 73 00 00 01]; phy-connection-type = "sgmii"; + phy-handle = <&sgenet0phy>; }; xgenet1: ethernet@1f620000 { diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index 5147d76..6bf7cbe 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -237,20 +237,11 @@ clocks = <&socplldiv2 0>; reg = <0x0 0x1f21c000 0x0 0x1000>; reg-names = "csr-reg"; - csr-mask = <0x3>; + csr-mask = <0xa>; + enable-mask = <0xf>; clock-output-names = "sge0clk"; }; - sge1clk: sge1clk@1f21c000 { - compatible = "apm,xgene-device-clock"; - #clock-cells = <1>; - clocks = <&socplldiv2 0>; - reg = <0x0 0x1f21c000 0x0 0x1000>; - reg-names = "csr-reg"; - csr-mask = <0xc>; - clock-output-names = "sge1clk"; - }; - xge0clk: xge0clk@1f61c000 { compatible = "apm,xgene-device-clock"; #clock-cells = <1>; @@ -921,6 +912,14 @@ clocks = <&rtcclk 0>; }; + mdio: mdio@17020000 { + compatible = "apm,xgene-mdio-rgmii"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0x17020000 0x0 0xd100>; + clocks = <&menetclk 0>; + }; + menet: ethernet@17020000 { compatible = "apm,xgene-enet"; status = "disabled"; @@ -934,7 +933,7 @@ /* mac address will be overwritten by the bootloader */ local-mac-address = [00 00 00 00 00 00]; phy-connection-type = "rgmii"; - phy-handle = <&menetphy>; + phy-handle = <&menet0phy>,<&menetphy>; mdio { compatible = "apm,xgene-mdio"; #address-cells = <1>; @@ -960,6 +959,7 @@ clocks = <&sge0clk 0>; local-mac-address = [00 00 00 00 00 00]; phy-connection-type = "sgmii"; + phy-handle = <&sgenet0phy>; }; sgenet1: ethernet@1f210030 { @@ -973,9 +973,9 @@ <0x0 0xAD 0x4>; port-id = <1>; dma-coherent; - clocks = <&sge1clk 0>; local-mac-address = [00 00 00 00 00 00]; phy-connection-type = "sgmii"; + phy-handle = <&sgenet1phy>; }; xgenet: ethernet@1f610000 { diff --git a/arch/arm64/boot/dts/broadcom/ns2-svk.dts b/arch/arm64/boot/dts/broadcom/ns2-svk.dts index 54ca40c..ea5603f 100644 --- a/arch/arm64/boot/dts/broadcom/ns2-svk.dts +++ b/arch/arm64/boot/dts/broadcom/ns2-svk.dts @@ -52,6 +52,14 @@ }; }; +&pci_phy0 { + status = "ok"; +}; + +&pci_phy1 { + status = "ok"; +}; + &pcie0 { status = "ok"; }; @@ -132,3 +140,11 @@ #size-cells = <1>; }; }; + +&mdio_mux_iproc { + mdio@10 { + gphy0: eth-phy@10 { + reg = <0x10>; + }; + }; +}; diff --git a/arch/arm64/boot/dts/broadcom/ns2.dtsi b/arch/arm64/boot/dts/broadcom/ns2.dtsi index ec68ec1..46b78fa 100644 --- a/arch/arm64/boot/dts/broadcom/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/ns2.dtsi @@ -263,6 +263,45 @@ IRQ_TYPE_LEVEL_HIGH)>; }; + mdio_mux_iproc: mdio-mux@6602023c { + compatible = "brcm,mdio-mux-iproc"; + reg = <0x6602023c 0x14>; + #address-cells = <1>; + #size-cells = <0>; + + mdio@0 { + reg = <0x0>; + #address-cells = <1>; + #size-cells = <0>; + + pci_phy0: pci-phy@0 { + compatible = "brcm,ns2-pcie-phy"; + reg = <0x0>; + #phy-cells = <0>; + status = "disabled"; + }; + }; + + mdio@7 { + reg = <0x7>; + #address-cells = <1>; + #size-cells = <0>; + + pci_phy1: pci-phy@0 { + compatible = "brcm,ns2-pcie-phy"; + reg = <0x0>; + #phy-cells = <0>; + status = "disabled"; + }; + }; + + mdio@10 { + reg = <0x10>; + #address-cells = <1>; + #size-cells = <0>; + }; + }; + timer0: timer@66030000 { compatible = "arm,sp804", "arm,primecell"; reg = <0x66030000 0x1000>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts index f895fc0..4084631 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts @@ -49,6 +49,10 @@ / { model = "LS1043A RDB Board"; + + aliases { + crypto = &crypto; + }; }; &i2c0 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi index de0323b..6bd46c1 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi @@ -159,6 +159,49 @@ big-endian; }; + crypto: crypto@1700000 { + compatible = "fsl,sec-v5.4", "fsl,sec-v5.0", + "fsl,sec-v4.0"; + fsl,sec-era = <3>; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x00 0x1700000 0x100000>; + reg = <0x00 0x1700000 0x0 0x100000>; + interrupts = <0 75 0x4>; + + sec_jr0: jr@10000 { + compatible = "fsl,sec-v5.4-job-ring", + "fsl,sec-v5.0-job-ring", + "fsl,sec-v4.0-job-ring"; + reg = <0x10000 0x10000>; + interrupts = <0 71 0x4>; + }; + + sec_jr1: jr@20000 { + compatible = "fsl,sec-v5.4-job-ring", + "fsl,sec-v5.0-job-ring", + "fsl,sec-v4.0-job-ring"; + reg = <0x20000 0x10000>; + interrupts = <0 72 0x4>; + }; + + sec_jr2: jr@30000 { + compatible = "fsl,sec-v5.4-job-ring", + "fsl,sec-v5.0-job-ring", + "fsl,sec-v4.0-job-ring"; + reg = <0x30000 0x10000>; + interrupts = <0 73 0x4>; + }; + + sec_jr3: jr@40000 { + compatible = "fsl,sec-v5.4-job-ring", + "fsl,sec-v5.0-job-ring", + "fsl,sec-v4.0-job-ring"; + reg = <0x40000 0x10000>; + interrupts = <0 74 0x4>; + }; + }; + dcfg: dcfg@1ee0000 { compatible = "fsl,ls1043a-dcfg", "syscon"; reg = <0x0 0x1ee0000 0x0 0x10000>; diff --git a/arch/arm64/boot/dts/lg/lg1312.dtsi b/arch/arm64/boot/dts/lg/lg1312.dtsi index 3a4e9a2..fbafa24 100644 --- a/arch/arm64/boot/dts/lg/lg1312.dtsi +++ b/arch/arm64/boot/dts/lg/lg1312.dtsi @@ -125,7 +125,7 @@ #size-cells = <1>; #interrupts-cells = <3>; - compatible = "arm,amba-bus"; + compatible = "simple-bus"; interrupt-parent = <&gic>; ranges; diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi index 05f89c4..77b8c4e 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi @@ -168,6 +168,18 @@ }; }; + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + vpu_dma_reserved: vpu_dma_mem_region { + compatible = "shared-dma-pool"; + reg = <0 0xb7000000 0 0x500000>; + alignment = <0x1000>; + no-map; + }; + }; + timer { compatible = "arm,armv8-timer"; interrupt-parent = <&gic>; @@ -312,6 +324,17 @@ clock-names = "spi", "wrap"; }; + vpu: vpu@10020000 { + compatible = "mediatek,mt8173-vpu"; + reg = <0 0x10020000 0 0x30000>, + <0 0x10050000 0 0x100>; + reg-names = "tcm", "cfg_reg"; + interrupts = <GIC_SPI 166 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&topckgen CLK_TOP_SCP_SEL>; + clock-names = "main"; + memory-region = <&vpu_dma_reserved>; + }; + sysirq: intpol-controller@10200620 { compatible = "mediatek,mt8173-sysirq", "mediatek,mt6577-sysirq"; @@ -754,6 +777,45 @@ clock-names = "apb", "smi"; }; + vcodec_enc: vcodec@18002000 { + compatible = "mediatek,mt8173-vcodec-enc"; + reg = <0 0x18002000 0 0x1000>, /* VENC_SYS */ + <0 0x19002000 0 0x1000>; /* VENC_LT_SYS */ + interrupts = <GIC_SPI 198 IRQ_TYPE_LEVEL_LOW>, + <GIC_SPI 202 IRQ_TYPE_LEVEL_LOW>; + mediatek,larb = <&larb3>, + <&larb5>; + iommus = <&iommu M4U_PORT_VENC_RCPU>, + <&iommu M4U_PORT_VENC_REC>, + <&iommu M4U_PORT_VENC_BSDMA>, + <&iommu M4U_PORT_VENC_SV_COMV>, + <&iommu M4U_PORT_VENC_RD_COMV>, + <&iommu M4U_PORT_VENC_CUR_LUMA>, + <&iommu M4U_PORT_VENC_CUR_CHROMA>, + <&iommu M4U_PORT_VENC_REF_LUMA>, + <&iommu M4U_PORT_VENC_REF_CHROMA>, + <&iommu M4U_PORT_VENC_NBM_RDMA>, + <&iommu M4U_PORT_VENC_NBM_WDMA>, + <&iommu M4U_PORT_VENC_RCPU_SET2>, + <&iommu M4U_PORT_VENC_REC_FRM_SET2>, + <&iommu M4U_PORT_VENC_BSDMA_SET2>, + <&iommu M4U_PORT_VENC_SV_COMA_SET2>, + <&iommu M4U_PORT_VENC_RD_COMA_SET2>, + <&iommu M4U_PORT_VENC_CUR_LUMA_SET2>, + <&iommu M4U_PORT_VENC_CUR_CHROMA_SET2>, + <&iommu M4U_PORT_VENC_REF_LUMA_SET2>, + <&iommu M4U_PORT_VENC_REC_CHROMA_SET2>; + mediatek,vpu = <&vpu>; + clocks = <&topckgen CLK_TOP_VENCPLL_D2>, + <&topckgen CLK_TOP_VENC_SEL>, + <&topckgen CLK_TOP_UNIVPLL1_D2>, + <&topckgen CLK_TOP_VENC_LT_SEL>; + clock-names = "venc_sel_src", + "venc_sel", + "venc_lt_sel_src", + "venc_lt_sel"; + }; + vencltsys: clock-controller@19000000 { compatible = "mediatek,mt8173-vencltsys", "syscon"; reg = <0 0x19000000 0 0x1000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 46f325a..188bbea 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -163,7 +163,7 @@ }; amba { - compatible = "arm,amba-bus"; + compatible = "simple-bus"; #address-cells = <2>; #size-cells = <2>; ranges; @@ -492,6 +492,14 @@ interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>; }; + rktimer: rktimer@ff850000 { + compatible = "rockchip,rk3399-timer"; + reg = <0x0 0xff850000 0x0 0x1000>; + interrupts = <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&cru PCLK_TIMER0>, <&cru SCLK_TIMER00>; + clock-names = "pclk", "timer"; + }; + spdif: spdif@ff870000 { compatible = "rockchip,rk3399-spdif"; reg = <0x0 0xff870000 0x0 0x1000>; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index fd2d74d..4ed4756 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -70,6 +70,7 @@ CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CMA=y CONFIG_XEN=y +CONFIG_KEXEC=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_COMPAT=y CONFIG_CPU_IDLE=y diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index cff532a..f43d2c4 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -1,6 +1,5 @@ generic-y += bug.h generic-y += bugs.h -generic-y += checksum.h generic-y += clkdev.h generic-y += cputime.h generic-y += current.h diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index aee323b..5420cb0 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -113,4 +113,14 @@ static inline const char *acpi_get_enable_method(int cpu) pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr); #endif +#ifdef CONFIG_ACPI_NUMA +int arm64_acpi_numa_init(void); +int acpi_numa_get_nid(unsigned int cpu, u64 hwid); +#else +static inline int arm64_acpi_numa_init(void) { return -ENOSYS; } +static inline int acpi_numa_get_nid(unsigned int cpu, u64 hwid) { return NUMA_NO_NODE; } +#endif /* CONFIG_ACPI_NUMA */ + +#define ACPI_TABLE_UPGRADE_MAX_PHYS MEMBLOCK_ALLOC_ACCESSIBLE + #endif /*_ASM_ACPI_H*/ diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index beccbde..8746ff6 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -95,13 +95,11 @@ void apply_alternatives(void *start, size_t length); * The code that follows this macro will be assembled and linked as * normal. There are no restrictions on this code. */ -.macro alternative_if_not cap, enable = 1 - .if \enable +.macro alternative_if_not cap .pushsection .altinstructions, "a" altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f .popsection 661: - .endif .endm /* @@ -118,27 +116,27 @@ void apply_alternatives(void *start, size_t length); * alternative sequence it is defined in (branches into an * alternative sequence are not fixed up). */ -.macro alternative_else, enable = 1 - .if \enable +.macro alternative_else 662: .pushsection .altinstr_replacement, "ax" 663: - .endif .endm /* * Complete an alternative code sequence. */ -.macro alternative_endif, enable = 1 - .if \enable +.macro alternative_endif 664: .popsection .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) - .endif .endm #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) +.macro user_alt, label, oldinstr, newinstr, cond +9999: alternative_insn "\oldinstr", "\newinstr", \cond + _ASM_EXTABLE 9999b, \label +.endm /* * Generate the assembly for UAO alternatives with exception table entries. diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 10b017c..d5025c6 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -24,6 +24,7 @@ #define __ASM_ASSEMBLER_H #include <asm/asm-offsets.h> +#include <asm/cpufeature.h> #include <asm/page.h> #include <asm/pgtable-hwdef.h> #include <asm/ptrace.h> @@ -261,7 +262,16 @@ lr .req x30 // link register add \size, \kaddr, \size sub \tmp2, \tmp1, #1 bic \kaddr, \kaddr, \tmp2 -9998: dc \op, \kaddr +9998: + .if (\op == cvau || \op == cvac) +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE + dc \op, \kaddr +alternative_else + dc civac, \kaddr +alternative_endif + .else + dc \op, \kaddr + .endif add \kaddr, \kaddr, \tmp1 cmp \kaddr, \size b.lo 9998b diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index f3a3586..c0235e0 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -76,6 +76,36 @@ #define atomic_dec_return_release(v) atomic_sub_return_release(1, (v)) #define atomic_dec_return(v) atomic_sub_return(1, (v)) +#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed +#define atomic_fetch_add_acquire atomic_fetch_add_acquire +#define atomic_fetch_add_release atomic_fetch_add_release +#define atomic_fetch_add atomic_fetch_add + +#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed +#define atomic_fetch_sub_acquire atomic_fetch_sub_acquire +#define atomic_fetch_sub_release atomic_fetch_sub_release +#define atomic_fetch_sub atomic_fetch_sub + +#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed +#define atomic_fetch_and_acquire atomic_fetch_and_acquire +#define atomic_fetch_and_release atomic_fetch_and_release +#define atomic_fetch_and atomic_fetch_and + +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed +#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire +#define atomic_fetch_andnot_release atomic_fetch_andnot_release +#define atomic_fetch_andnot atomic_fetch_andnot + +#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed +#define atomic_fetch_or_acquire atomic_fetch_or_acquire +#define atomic_fetch_or_release atomic_fetch_or_release +#define atomic_fetch_or atomic_fetch_or + +#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed +#define atomic_fetch_xor_acquire atomic_fetch_xor_acquire +#define atomic_fetch_xor_release atomic_fetch_xor_release +#define atomic_fetch_xor atomic_fetch_xor + #define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) #define atomic_xchg_acquire(v, new) xchg_acquire(&((v)->counter), (new)) #define atomic_xchg_release(v, new) xchg_release(&((v)->counter), (new)) @@ -125,6 +155,36 @@ #define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v)) #define atomic64_dec_return(v) atomic64_sub_return(1, (v)) +#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed +#define atomic64_fetch_add_acquire atomic64_fetch_add_acquire +#define atomic64_fetch_add_release atomic64_fetch_add_release +#define atomic64_fetch_add atomic64_fetch_add + +#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed +#define atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire +#define atomic64_fetch_sub_release atomic64_fetch_sub_release +#define atomic64_fetch_sub atomic64_fetch_sub + +#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed +#define atomic64_fetch_and_acquire atomic64_fetch_and_acquire +#define atomic64_fetch_and_release atomic64_fetch_and_release +#define atomic64_fetch_and atomic64_fetch_and + +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed +#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire +#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release +#define atomic64_fetch_andnot atomic64_fetch_andnot + +#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed +#define atomic64_fetch_or_acquire atomic64_fetch_or_acquire +#define atomic64_fetch_or_release atomic64_fetch_or_release +#define atomic64_fetch_or atomic64_fetch_or + +#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed +#define atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire +#define atomic64_fetch_xor_release atomic64_fetch_xor_release +#define atomic64_fetch_xor atomic64_fetch_xor + #define atomic64_xchg_relaxed atomic_xchg_relaxed #define atomic64_xchg_acquire atomic_xchg_acquire #define atomic64_xchg_release atomic_xchg_release diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index f61c84f..f819fdc 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -77,26 +77,57 @@ __LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v)) \ } \ __LL_SC_EXPORT(atomic_##op##_return##name); +#define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \ +__LL_SC_INLINE int \ +__LL_SC_PREFIX(atomic_fetch_##op##name(int i, atomic_t *v)) \ +{ \ + unsigned long tmp; \ + int val, result; \ + \ + asm volatile("// atomic_fetch_" #op #name "\n" \ +" prfm pstl1strm, %3\n" \ +"1: ld" #acq "xr %w0, %3\n" \ +" " #asm_op " %w1, %w0, %w4\n" \ +" st" #rel "xr %w2, %w1, %3\n" \ +" cbnz %w2, 1b\n" \ +" " #mb \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i) \ + : cl); \ + \ + return result; \ +} \ +__LL_SC_EXPORT(atomic_fetch_##op##name); + #define ATOMIC_OPS(...) \ ATOMIC_OP(__VA_ARGS__) \ - ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__) - -#define ATOMIC_OPS_RLX(...) \ - ATOMIC_OPS(__VA_ARGS__) \ + ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__)\ ATOMIC_OP_RETURN(_relaxed, , , , , __VA_ARGS__)\ ATOMIC_OP_RETURN(_acquire, , a, , "memory", __VA_ARGS__)\ - ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__) + ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP ( , dmb ish, , l, "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_relaxed, , , , , __VA_ARGS__)\ + ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__) -ATOMIC_OPS_RLX(add, add) -ATOMIC_OPS_RLX(sub, sub) +ATOMIC_OPS(add, add) +ATOMIC_OPS(sub, sub) + +#undef ATOMIC_OPS +#define ATOMIC_OPS(...) \ + ATOMIC_OP(__VA_ARGS__) \ + ATOMIC_FETCH_OP ( , dmb ish, , l, "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_relaxed, , , , , __VA_ARGS__)\ + ATOMIC_FETCH_OP (_acquire, , a, , "memory", __VA_ARGS__)\ + ATOMIC_FETCH_OP (_release, , , l, "memory", __VA_ARGS__) -ATOMIC_OP(and, and) -ATOMIC_OP(andnot, bic) -ATOMIC_OP(or, orr) -ATOMIC_OP(xor, eor) +ATOMIC_OPS(and, and) +ATOMIC_OPS(andnot, bic) +ATOMIC_OPS(or, orr) +ATOMIC_OPS(xor, eor) -#undef ATOMIC_OPS_RLX #undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -140,26 +171,57 @@ __LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v)) \ } \ __LL_SC_EXPORT(atomic64_##op##_return##name); +#define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \ +__LL_SC_INLINE long \ +__LL_SC_PREFIX(atomic64_fetch_##op##name(long i, atomic64_t *v)) \ +{ \ + long result, val; \ + unsigned long tmp; \ + \ + asm volatile("// atomic64_fetch_" #op #name "\n" \ +" prfm pstl1strm, %3\n" \ +"1: ld" #acq "xr %0, %3\n" \ +" " #asm_op " %1, %0, %4\n" \ +" st" #rel "xr %w2, %1, %3\n" \ +" cbnz %w2, 1b\n" \ +" " #mb \ + : "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i) \ + : cl); \ + \ + return result; \ +} \ +__LL_SC_EXPORT(atomic64_fetch_##op##name); + #define ATOMIC64_OPS(...) \ ATOMIC64_OP(__VA_ARGS__) \ - ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__) - -#define ATOMIC64_OPS_RLX(...) \ - ATOMIC64_OPS(__VA_ARGS__) \ + ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__) \ ATOMIC64_OP_RETURN(_relaxed,, , , , __VA_ARGS__) \ ATOMIC64_OP_RETURN(_acquire,, a, , "memory", __VA_ARGS__) \ - ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__) + ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (, dmb ish, , l, "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_relaxed,, , , , __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__) -ATOMIC64_OPS_RLX(add, add) -ATOMIC64_OPS_RLX(sub, sub) +ATOMIC64_OPS(add, add) +ATOMIC64_OPS(sub, sub) + +#undef ATOMIC64_OPS +#define ATOMIC64_OPS(...) \ + ATOMIC64_OP(__VA_ARGS__) \ + ATOMIC64_FETCH_OP (, dmb ish, , l, "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_relaxed,, , , , __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_acquire,, a, , "memory", __VA_ARGS__) \ + ATOMIC64_FETCH_OP (_release,, , l, "memory", __VA_ARGS__) -ATOMIC64_OP(and, and) -ATOMIC64_OP(andnot, bic) -ATOMIC64_OP(or, orr) -ATOMIC64_OP(xor, eor) +ATOMIC64_OPS(and, and) +ATOMIC64_OPS(andnot, bic) +ATOMIC64_OPS(or, orr) +ATOMIC64_OPS(xor, eor) -#undef ATOMIC64_OPS_RLX #undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 39c1d34..b5890be 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -26,54 +26,57 @@ #endif #define __LL_SC_ATOMIC(op) __LL_SC_CALL(atomic_##op) - -static inline void atomic_andnot(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(andnot), - " stclr %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#define ATOMIC_OP(op, asm_op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(op), \ +" " #asm_op " %w[i], %[v]\n") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS); \ } -static inline void atomic_or(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; +ATOMIC_OP(andnot, stclr) +ATOMIC_OP(or, stset) +ATOMIC_OP(xor, steor) +ATOMIC_OP(add, stadd) - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(or), - " stset %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} +#undef ATOMIC_OP -static inline void atomic_xor(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(xor), - " steor %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...) \ +static inline int atomic_fetch_##op##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + __LL_SC_ATOMIC(fetch_##op##name), \ + /* LSE atomics */ \ +" " #asm_op #mb " %w[i], %w[i], %[v]") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return w0; \ } -static inline void atomic_add(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; +#define ATOMIC_FETCH_OPS(op, asm_op) \ + ATOMIC_FETCH_OP(_relaxed, , op, asm_op) \ + ATOMIC_FETCH_OP(_acquire, a, op, asm_op, "memory") \ + ATOMIC_FETCH_OP(_release, l, op, asm_op, "memory") \ + ATOMIC_FETCH_OP( , al, op, asm_op, "memory") - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(add), - " stadd %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} +ATOMIC_FETCH_OPS(andnot, ldclr) +ATOMIC_FETCH_OPS(or, ldset) +ATOMIC_FETCH_OPS(xor, ldeor) +ATOMIC_FETCH_OPS(add, ldadd) + +#undef ATOMIC_FETCH_OP +#undef ATOMIC_FETCH_OPS #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \ static inline int atomic_add_return##name(int i, atomic_t *v) \ @@ -119,6 +122,33 @@ static inline void atomic_and(int i, atomic_t *v) : __LL_SC_CLOBBERS); } +#define ATOMIC_FETCH_OP_AND(name, mb, cl...) \ +static inline int atomic_fetch_and##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC(fetch_and##name), \ + /* LSE atomics */ \ + " mvn %w[i], %w[i]\n" \ + " ldclr" #mb " %w[i], %w[i], %[v]") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return w0; \ +} + +ATOMIC_FETCH_OP_AND(_relaxed, ) +ATOMIC_FETCH_OP_AND(_acquire, a, "memory") +ATOMIC_FETCH_OP_AND(_release, l, "memory") +ATOMIC_FETCH_OP_AND( , al, "memory") + +#undef ATOMIC_FETCH_OP_AND + static inline void atomic_sub(int i, atomic_t *v) { register int w0 asm ("w0") = i; @@ -164,57 +194,87 @@ ATOMIC_OP_SUB_RETURN(_release, l, "memory") ATOMIC_OP_SUB_RETURN( , al, "memory") #undef ATOMIC_OP_SUB_RETURN -#undef __LL_SC_ATOMIC - -#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op) - -static inline void atomic64_andnot(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(andnot), - " stclr %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#define ATOMIC_FETCH_OP_SUB(name, mb, cl...) \ +static inline int atomic_fetch_sub##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC(fetch_sub##name), \ + /* LSE atomics */ \ + " neg %w[i], %w[i]\n" \ + " ldadd" #mb " %w[i], %w[i], %[v]") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return w0; \ } -static inline void atomic64_or(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; +ATOMIC_FETCH_OP_SUB(_relaxed, ) +ATOMIC_FETCH_OP_SUB(_acquire, a, "memory") +ATOMIC_FETCH_OP_SUB(_release, l, "memory") +ATOMIC_FETCH_OP_SUB( , al, "memory") - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(or), - " stset %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#undef ATOMIC_FETCH_OP_SUB +#undef __LL_SC_ATOMIC + +#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op) +#define ATOMIC64_OP(op, asm_op) \ +static inline void atomic64_##op(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op), \ +" " #asm_op " %[i], %[v]\n") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS); \ } -static inline void atomic64_xor(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; +ATOMIC64_OP(andnot, stclr) +ATOMIC64_OP(or, stset) +ATOMIC64_OP(xor, steor) +ATOMIC64_OP(add, stadd) - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(xor), - " steor %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#undef ATOMIC64_OP + +#define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \ +static inline long atomic64_fetch_##op##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + __LL_SC_ATOMIC64(fetch_##op##name), \ + /* LSE atomics */ \ +" " #asm_op #mb " %[i], %[i], %[v]") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return x0; \ } -static inline void atomic64_add(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; +#define ATOMIC64_FETCH_OPS(op, asm_op) \ + ATOMIC64_FETCH_OP(_relaxed, , op, asm_op) \ + ATOMIC64_FETCH_OP(_acquire, a, op, asm_op, "memory") \ + ATOMIC64_FETCH_OP(_release, l, op, asm_op, "memory") \ + ATOMIC64_FETCH_OP( , al, op, asm_op, "memory") - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(add), - " stadd %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} +ATOMIC64_FETCH_OPS(andnot, ldclr) +ATOMIC64_FETCH_OPS(or, ldset) +ATOMIC64_FETCH_OPS(xor, ldeor) +ATOMIC64_FETCH_OPS(add, ldadd) + +#undef ATOMIC64_FETCH_OP +#undef ATOMIC64_FETCH_OPS #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \ static inline long atomic64_add_return##name(long i, atomic64_t *v) \ @@ -260,6 +320,33 @@ static inline void atomic64_and(long i, atomic64_t *v) : __LL_SC_CLOBBERS); } +#define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \ +static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("w0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC64(fetch_and##name), \ + /* LSE atomics */ \ + " mvn %[i], %[i]\n" \ + " ldclr" #mb " %[i], %[i], %[v]") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return x0; \ +} + +ATOMIC64_FETCH_OP_AND(_relaxed, ) +ATOMIC64_FETCH_OP_AND(_acquire, a, "memory") +ATOMIC64_FETCH_OP_AND(_release, l, "memory") +ATOMIC64_FETCH_OP_AND( , al, "memory") + +#undef ATOMIC64_FETCH_OP_AND + static inline void atomic64_sub(long i, atomic64_t *v) { register long x0 asm ("x0") = i; @@ -306,6 +393,33 @@ ATOMIC64_OP_SUB_RETURN( , al, "memory") #undef ATOMIC64_OP_SUB_RETURN +#define ATOMIC64_FETCH_OP_SUB(name, mb, cl...) \ +static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("w0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC64(fetch_sub##name), \ + /* LSE atomics */ \ + " neg %[i], %[i]\n" \ + " ldadd" #mb " %[i], %[i], %[v]") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return x0; \ +} + +ATOMIC64_FETCH_OP_SUB(_relaxed, ) +ATOMIC64_FETCH_OP_SUB(_acquire, a, "memory") +ATOMIC64_FETCH_OP_SUB(_release, l, "memory") +ATOMIC64_FETCH_OP_SUB( , al, "memory") + +#undef ATOMIC64_FETCH_OP_SUB + static inline long atomic64_dec_if_positive(atomic64_t *v) { register long x0 asm ("x0") = (long)v; diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index dae5c49..4eea7f6 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -91,6 +91,19 @@ do { \ __u.__val; \ }) +#define smp_cond_load_acquire(ptr, cond_expr) \ +({ \ + typeof(ptr) __PTR = (ptr); \ + typeof(*ptr) VAL; \ + for (;;) { \ + VAL = smp_load_acquire(__PTR); \ + if (cond_expr) \ + break; \ + __cmpwait_relaxed(__PTR, VAL); \ + } \ + VAL; \ +}) + #include <asm-generic/barrier.h> #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h new file mode 100644 index 0000000..09f6533 --- /dev/null +++ b/arch/arm64/include/asm/checksum.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2016 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_CHECKSUM_H +#define __ASM_CHECKSUM_H + +#include <linux/types.h> + +static inline __sum16 csum_fold(__wsum csum) +{ + u32 sum = (__force u32)csum; + sum += (sum >> 16) | (sum << 16); + return ~(__force __sum16)(sum >> 16); +} +#define csum_fold csum_fold + +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + __uint128_t tmp; + u64 sum; + + tmp = *(const __uint128_t *)iph; + iph += 16; + ihl -= 4; + tmp += ((tmp >> 64) | (tmp << 64)); + sum = tmp >> 64; + do { + sum += *(const u32 *)iph; + iph += 4; + } while (--ihl); + + sum += ((sum >> 32) | (sum << 32)); + return csum_fold(sum >> 32); +} +#define ip_fast_csum ip_fast_csum + +#include <asm-generic/checksum.h> + +#endif /* __ASM_CHECKSUM_H */ diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 510c7b4..bd86a79 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -224,4 +224,55 @@ __CMPXCHG_GEN(_mb) __ret; \ }) +#define __CMPWAIT_CASE(w, sz, name) \ +static inline void __cmpwait_case_##name(volatile void *ptr, \ + unsigned long val) \ +{ \ + unsigned long tmp; \ + \ + asm volatile( \ + " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \ + " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \ + " cbnz %" #w "[tmp], 1f\n" \ + " wfe\n" \ + "1:" \ + : [tmp] "=&r" (tmp), [v] "+Q" (*(unsigned long *)ptr) \ + : [val] "r" (val)); \ +} + +__CMPWAIT_CASE(w, b, 1); +__CMPWAIT_CASE(w, h, 2); +__CMPWAIT_CASE(w, , 4); +__CMPWAIT_CASE( , , 8); + +#undef __CMPWAIT_CASE + +#define __CMPWAIT_GEN(sfx) \ +static inline void __cmpwait##sfx(volatile void *ptr, \ + unsigned long val, \ + int size) \ +{ \ + switch (size) { \ + case 1: \ + return __cmpwait_case##sfx##_1(ptr, (u8)val); \ + case 2: \ + return __cmpwait_case##sfx##_2(ptr, (u16)val); \ + case 4: \ + return __cmpwait_case##sfx##_4(ptr, val); \ + case 8: \ + return __cmpwait_case##sfx##_8(ptr, val); \ + default: \ + BUILD_BUG(); \ + } \ + \ + unreachable(); \ +} + +__CMPWAIT_GEN() + +#undef __CMPWAIT_GEN + +#define __cmpwait_relaxed(ptr, val) \ + __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr))) + #endif /* __ASM_CMPXCHG_H */ diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index 13a6103..889226b 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -25,10 +25,12 @@ */ struct cpuinfo_arm64 { struct cpu cpu; + struct kobject kobj; u32 reg_ctr; u32 reg_cntfrq; u32 reg_dczid; u32 reg_midr; + u32 reg_revidr; u64 reg_id_aa64dfr0; u64 reg_id_aa64dfr1; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 224efe7..49dd1bd 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -191,7 +191,9 @@ void __init setup_cpu_features(void); void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info); +void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps); void check_local_cpu_errata(void); +void __init enable_errata_workarounds(void); void verify_local_cpu_errata(void); void verify_local_cpu_capabilities(void); diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 87e1985..9d9fd4b 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -80,12 +80,14 @@ #define APM_CPU_PART_POTENZA 0x000 #define CAVIUM_CPU_PART_THUNDERX 0x0A1 +#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 #define BRCM_CPU_PART_VULCAN 0x516 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) +#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 2fcb9b7..4b6b3f7 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -66,6 +66,11 @@ #define CACHE_FLUSH_IS_SAFE 1 +/* kprobes BRK opcodes with ESR encoding */ +#define BRK64_ESR_MASK 0xFFFF +#define BRK64_ESR_KPROBES 0x0004 +#define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (BRK64_ESR_KPROBES << 5)) + /* AArch32 */ #define DBG_ESR_EVT_BKPT 0x4 #define DBG_ESR_EVT_VECC 0x5 diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 622db3c..a9e54aa 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -14,8 +14,7 @@ extern void efi_init(void); #endif int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); - -#define efi_set_mapping_permissions efi_create_mapping +int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); #define arch_efi_call_virt_setup() \ ({ \ @@ -23,10 +22,10 @@ int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); efi_virtmap_load(); \ }) -#define arch_efi_call_virt(f, args...) \ +#define arch_efi_call_virt(p, f, args...) \ ({ \ efi_##f##_t *__f; \ - __f = efi.systab->runtime->f; \ + __f = p->f; \ __f(args); \ }) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 77eeb2c..f772e15 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -74,6 +74,7 @@ #define ESR_ELx_EC_SHIFT (26) #define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT) +#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT) #define ESR_ELx_IL (UL(1) << 25) #define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 30e50eb..1dbaa90 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -120,6 +120,29 @@ enum aarch64_insn_register { AARCH64_INSN_REG_SP = 31 /* Stack pointer: as load/store base reg */ }; +enum aarch64_insn_special_register { + AARCH64_INSN_SPCLREG_SPSR_EL1 = 0xC200, + AARCH64_INSN_SPCLREG_ELR_EL1 = 0xC201, + AARCH64_INSN_SPCLREG_SP_EL0 = 0xC208, + AARCH64_INSN_SPCLREG_SPSEL = 0xC210, + AARCH64_INSN_SPCLREG_CURRENTEL = 0xC212, + AARCH64_INSN_SPCLREG_DAIF = 0xDA11, + AARCH64_INSN_SPCLREG_NZCV = 0xDA10, + AARCH64_INSN_SPCLREG_FPCR = 0xDA20, + AARCH64_INSN_SPCLREG_DSPSR_EL0 = 0xDA28, + AARCH64_INSN_SPCLREG_DLR_EL0 = 0xDA29, + AARCH64_INSN_SPCLREG_SPSR_EL2 = 0xE200, + AARCH64_INSN_SPCLREG_ELR_EL2 = 0xE201, + AARCH64_INSN_SPCLREG_SP_EL1 = 0xE208, + AARCH64_INSN_SPCLREG_SPSR_INQ = 0xE218, + AARCH64_INSN_SPCLREG_SPSR_ABT = 0xE219, + AARCH64_INSN_SPCLREG_SPSR_UND = 0xE21A, + AARCH64_INSN_SPCLREG_SPSR_FIQ = 0xE21B, + AARCH64_INSN_SPCLREG_SPSR_EL3 = 0xF200, + AARCH64_INSN_SPCLREG_ELR_EL3 = 0xF201, + AARCH64_INSN_SPCLREG_SP_EL2 = 0xF210 +}; + enum aarch64_insn_variant { AARCH64_INSN_VARIANT_32BIT, AARCH64_INSN_VARIANT_64BIT @@ -223,8 +246,15 @@ static __always_inline bool aarch64_insn_is_##abbr(u32 code) \ static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \ { return (val); } +__AARCH64_INSN_FUNCS(adr_adrp, 0x1F000000, 0x10000000) +__AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000) __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) +__AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) +__AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) +__AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000) +__AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000) +__AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000) __AARCH64_INSN_FUNCS(stp_post, 0x7FC00000, 0x28800000) __AARCH64_INSN_FUNCS(ldp_post, 0x7FC00000, 0x28C00000) __AARCH64_INSN_FUNCS(stp_pre, 0x7FC00000, 0x29800000) @@ -273,10 +303,15 @@ __AARCH64_INSN_FUNCS(svc, 0xFFE0001F, 0xD4000001) __AARCH64_INSN_FUNCS(hvc, 0xFFE0001F, 0xD4000002) __AARCH64_INSN_FUNCS(smc, 0xFFE0001F, 0xD4000003) __AARCH64_INSN_FUNCS(brk, 0xFFE0001F, 0xD4200000) +__AARCH64_INSN_FUNCS(exception, 0xFF000000, 0xD4000000) __AARCH64_INSN_FUNCS(hint, 0xFFFFF01F, 0xD503201F) __AARCH64_INSN_FUNCS(br, 0xFFFFFC1F, 0xD61F0000) __AARCH64_INSN_FUNCS(blr, 0xFFFFFC1F, 0xD63F0000) __AARCH64_INSN_FUNCS(ret, 0xFFFFFC1F, 0xD65F0000) +__AARCH64_INSN_FUNCS(eret, 0xFFFFFFFF, 0xD69F03E0) +__AARCH64_INSN_FUNCS(mrs, 0xFFF00000, 0xD5300000) +__AARCH64_INSN_FUNCS(msr_imm, 0xFFF8F01F, 0xD500401F) +__AARCH64_INSN_FUNCS(msr_reg, 0xFFF00000, 0xD5100000) #undef __AARCH64_INSN_FUNCS @@ -286,6 +321,8 @@ bool aarch64_insn_is_branch_imm(u32 insn); int aarch64_insn_read(void *addr, u32 *insnp); int aarch64_insn_write(void *addr, u32 insn); enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn); +bool aarch64_insn_uses_literal(u32 insn); +bool aarch64_insn_is_branch(u32 insn); u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn); u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, u32 insn, u64 imm); @@ -367,9 +404,13 @@ bool aarch32_insn_is_wide(u32 insn); #define A32_RT_OFFSET 12 #define A32_RT2_OFFSET 0 +u32 aarch64_insn_extract_system_reg(u32 insn); u32 aarch32_insn_extract_reg_num(u32 insn, int offset); u32 aarch32_insn_mcr_extract_opc2(u32 insn); u32 aarch32_insn_mcr_extract_crm(u32 insn); + +typedef bool (pstate_check_t)(unsigned long); +extern pstate_check_t * const aarch32_opcode_cond_checks[16]; #endif /* __ASSEMBLY__ */ #endif /* __ASM_INSN_H */ diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 44be1e0..9b6e408 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -174,13 +174,15 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); #define iounmap __iounmap /* - * io{read,write}{16,32}be() macros + * io{read,write}{16,32,64}be() macros */ #define ioread16be(p) ({ __u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(); __v; }) #define ioread32be(p) ({ __u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(); __v; }) +#define ioread64be(p) ({ __u64 __v = be64_to_cpu((__force __be64)__raw_readq(p)); __iormb(); __v; }) #define iowrite16be(v,p) ({ __iowmb(); __raw_writew((__force __u16)cpu_to_be16(v), p); }) #define iowrite32be(v,p) ({ __iowmb(); __raw_writel((__force __u32)cpu_to_be32(v), p); }) +#define iowrite64be(v,p) ({ __iowmb(); __raw_writeq((__force __u64)cpu_to_be64(v), p); }) /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 11cc941..8c58128 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -110,8 +110,5 @@ static inline int arch_irqs_disabled_flags(unsigned long flags) : : "r" (flags) : "memory"); \ } while (0) -#define local_dbg_enable() asm("msr daifclr, #8" : : : "memory") -#define local_dbg_disable() asm("msr daifset, #8" : : : "memory") - #endif #endif diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h new file mode 100644 index 0000000..04744dc --- /dev/null +++ b/arch/arm64/include/asm/kexec.h @@ -0,0 +1,48 @@ +/* + * kexec for arm64 + * + * Copyright (C) Linaro. + * Copyright (C) Huawei Futurewei Technologies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ARM64_KEXEC_H +#define _ARM64_KEXEC_H + +/* Maximum physical address we can use pages from */ + +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) + +/* Maximum address we can reach in physical address mode */ + +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) + +/* Maximum address we can use for the control code buffer */ + +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL) + +#define KEXEC_CONTROL_PAGE_SIZE 4096 + +#define KEXEC_ARCH KEXEC_ARCH_AARCH64 + +#ifndef __ASSEMBLY__ + +/** + * crash_setup_regs() - save registers for the panic kernel + * + * @newregs: registers are saved here + * @oldregs: registers to be saved (may be %NULL) + */ + +static inline void crash_setup_regs(struct pt_regs *newregs, + struct pt_regs *oldregs) +{ + /* Empty routine needed to avoid build errors. */ +} + +#endif /* __ASSEMBLY__ */ + +#endif diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h index f69f69c..da84645 100644 --- a/arch/arm64/include/asm/kgdb.h +++ b/arch/arm64/include/asm/kgdb.h @@ -38,25 +38,54 @@ extern int kgdb_fault_expected; #endif /* !__ASSEMBLY__ */ /* - * gdb is expecting the following registers layout. + * gdb remote procotol (well most versions of it) expects the following + * register layout. * * General purpose regs: * r0-r30: 64 bit * sp,pc : 64 bit - * pstate : 64 bit - * Total: 34 + * pstate : 32 bit + * Total: 33 + 1 * FPU regs: * f0-f31: 128 bit - * Total: 32 - * Extra regs * fpsr & fpcr: 32 bit - * Total: 2 + * Total: 32 + 2 * + * To expand a little on the "most versions of it"... when the gdb remote + * protocol for AArch64 was developed it depended on a statement in the + * Architecture Reference Manual that claimed "SPSR_ELx is a 32-bit register". + * and, as a result, allocated only 32-bits for the PSTATE in the remote + * protocol. In fact this statement is still present in ARM DDI 0487A.i. + * + * Unfortunately "is a 32-bit register" has a very special meaning for + * system registers. It means that "the upper bits, bits[63:32], are + * RES0.". RES0 is heavily used in the ARM architecture documents as a + * way to leave space for future architecture changes. So to translate a + * little for people who don't spend their spare time reading ARM architecture + * manuals, what "is a 32-bit register" actually means in this context is + * "is a 64-bit register but one with no meaning allocated to any of the + * upper 32-bits... *yet*". + * + * Perhaps then we should not be surprised that this has led to some + * confusion. Specifically a patch, influenced by the above translation, + * that extended PSTATE to 64-bit was accepted into gdb-7.7 but the patch + * was reverted in gdb-7.8.1 and all later releases, when this was + * discovered to be an undocumented protocol change. + * + * So... it is *not* wrong for us to only allocate 32-bits to PSTATE + * here even though the kernel itself allocates 64-bits for the same + * state. That is because this bit of code tells the kernel how the gdb + * remote protocol (well most versions of it) describes the register state. + * + * Note that if you are using one of the versions of gdb that supports + * the gdb-7.7 version of the protocol you cannot use kgdb directly + * without providing a custom register description (gdb can load new + * protocol descriptions at runtime). */ -#define _GP_REGS 34 +#define _GP_REGS 33 #define _FP_REGS 32 -#define _EXTRA_REGS 2 +#define _EXTRA_REGS 3 /* * general purpose registers size in bytes. * pstate is only 4 bytes. subtract 4 bytes diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h new file mode 100644 index 0000000..61b4915 --- /dev/null +++ b/arch/arm64/include/asm/kprobes.h @@ -0,0 +1,62 @@ +/* + * arch/arm64/include/asm/kprobes.h + * + * Copyright (C) 2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _ARM_KPROBES_H +#define _ARM_KPROBES_H + +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/percpu.h> + +#define __ARCH_WANT_KPROBES_INSN_SLOT +#define MAX_INSN_SIZE 1 +#define MAX_STACK_SIZE 128 + +#define flush_insn_slot(p) do { } while (0) +#define kretprobe_blacklist_size 0 + +#include <asm/probes.h> + +struct prev_kprobe { + struct kprobe *kp; + unsigned int status; +}; + +/* Single step context for kprobe */ +struct kprobe_step_ctx { + unsigned long ss_pending; + unsigned long match_addr; +}; + +/* per-cpu kprobe control block */ +struct kprobe_ctlblk { + unsigned int kprobe_status; + unsigned long saved_irqflag; + struct prev_kprobe prev_kprobe; + struct kprobe_step_ctx ss_ctx; + struct pt_regs jprobe_saved_regs; + char jprobes_stack[MAX_STACK_SIZE]; +}; + +void arch_remove_kprobe(struct kprobe *); +int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr); +int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); +int kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr); +int kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr); +void kretprobe_trampoline(void); +void __kprobes *trampoline_probe_handler(struct pt_regs *regs); + +#endif /* _ARM_KPROBES_H */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 40bc168..4cdeae3 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -210,7 +210,7 @@ static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu) static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) >> ESR_ELx_EC_SHIFT; + return ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); } static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 97b1d8f..8d9fce0 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -34,7 +34,7 @@ extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void init_mem_pgprot(void); extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, - pgprot_t prot); + pgprot_t prot, bool allow_block_mappings); extern void *fixmap_remap_fdt(phys_addr_t dt_phys); #endif diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h index e9b4f29..600887e 100644 --- a/arch/arm64/include/asm/numa.h +++ b/arch/arm64/include/asm/numa.h @@ -5,6 +5,8 @@ #ifdef CONFIG_NUMA +#define NR_NODE_MEMBLKS (MAX_NUMNODES * 2) + /* currently, arm64 implements flat NUMA topology */ #define parent_node(node) (node) diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index ff98585..d25f4f1 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -26,7 +26,7 @@ #define check_pgt_cache() do { } while (0) -#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) #define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) #if CONFIG_PGTABLE_LEVELS > 2 diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h new file mode 100644 index 0000000..5af574d --- /dev/null +++ b/arch/arm64/include/asm/probes.h @@ -0,0 +1,35 @@ +/* + * arch/arm64/include/asm/probes.h + * + * Copyright (C) 2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef _ARM_PROBES_H +#define _ARM_PROBES_H + +#include <asm/opcodes.h> + +struct kprobe; +struct arch_specific_insn; + +typedef u32 kprobe_opcode_t; +typedef void (kprobes_handler_t) (u32 opcode, long addr, struct pt_regs *); + +/* architecture specific copy of original instruction */ +struct arch_specific_insn { + kprobe_opcode_t *insn; + pstate_check_t *pstate_cc; + kprobes_handler_t *handler; + /* restore address after step xol */ + unsigned long restore; +}; + +#endif diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index cef1cf3..ace0a96 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -192,5 +192,6 @@ static inline void spin_lock_prefetch(const void *ptr) void cpu_enable_pan(void *__unused); void cpu_enable_uao(void *__unused); +void cpu_enable_cache_maint_trap(void *__unused); #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h new file mode 100644 index 0000000..07b8ed0 --- /dev/null +++ b/arch/arm64/include/asm/ptdump.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2014 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_PTDUMP_H +#define __ASM_PTDUMP_H + +#ifdef CONFIG_ARM64_PTDUMP + +#include <linux/mm_types.h> + +struct addr_marker { + unsigned long start_address; + char *name; +}; + +struct ptdump_info { + struct mm_struct *mm; + const struct addr_marker *markers; + unsigned long base_addr; + unsigned long max_addr; +}; + +int ptdump_register(struct ptdump_info *info, const char *name); + +#else +static inline int ptdump_register(struct ptdump_info *info, const char *name) +{ + return 0; +} +#endif /* CONFIG_ARM64_PTDUMP */ + +#endif /* __ASM_PTDUMP_H */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index a307eb6..ada08b5 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -46,7 +46,6 @@ #define COMPAT_PSR_MODE_UND 0x0000001b #define COMPAT_PSR_MODE_SYS 0x0000001f #define COMPAT_PSR_T_BIT 0x00000020 -#define COMPAT_PSR_E_BIT 0x00000200 #define COMPAT_PSR_F_BIT 0x00000040 #define COMPAT_PSR_I_BIT 0x00000080 #define COMPAT_PSR_A_BIT 0x00000100 @@ -74,6 +73,7 @@ #define COMPAT_PT_DATA_ADDR 0x10004 #define COMPAT_PT_TEXT_END_ADDR 0x10008 #ifndef __ASSEMBLY__ +#include <linux/bug.h> /* sizeof(struct user) for AArch32 */ #define COMPAT_USER_SZ 296 @@ -117,8 +117,12 @@ struct pt_regs { }; u64 orig_x0; u64 syscallno; + u64 orig_addr_limit; + u64 unused; // maintain 16 byte alignment }; +#define MAX_REG_OFFSET offsetof(struct pt_regs, pstate) + #define arch_has_single_step() (1) #ifdef CONFIG_COMPAT @@ -144,9 +148,58 @@ struct pt_regs { #define fast_interrupts_enabled(regs) \ (!((regs)->pstate & PSR_F_BIT)) -#define user_stack_pointer(regs) \ +#define GET_USP(regs) \ (!compat_user_mode(regs) ? (regs)->sp : (regs)->compat_sp) +#define SET_USP(ptregs, value) \ + (!compat_user_mode(regs) ? ((regs)->sp = value) : ((regs)->compat_sp = value)) + +extern int regs_query_register_offset(const char *name); +extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, + unsigned int n); + +/** + * regs_get_register() - get register value from its offset + * @regs: pt_regs from which register value is gotten + * @offset: offset of the register. + * + * regs_get_register returns the value of a register whose offset from @regs. + * The @offset is the offset of the register in struct pt_regs. + * If @offset is bigger than MAX_REG_OFFSET, this returns 0. + */ +static inline u64 regs_get_register(struct pt_regs *regs, unsigned int offset) +{ + u64 val = 0; + + WARN_ON(offset & 7); + + offset >>= 3; + switch (offset) { + case 0 ... 30: + val = regs->regs[offset]; + break; + case offsetof(struct pt_regs, sp) >> 3: + val = regs->sp; + break; + case offsetof(struct pt_regs, pc) >> 3: + val = regs->pc; + break; + case offsetof(struct pt_regs, pstate) >> 3: + val = regs->pstate; + break; + default: + val = 0; + } + + return val; +} + +/* Valid only for Kernel mode traps. */ +static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) +{ + return regs->sp; +} + static inline unsigned long regs_return_value(struct pt_regs *regs) { return regs->regs[0]; @@ -156,8 +209,15 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) struct task_struct; int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task); -#define instruction_pointer(regs) ((unsigned long)(regs)->pc) +#define GET_IP(regs) ((unsigned long)(regs)->pc) +#define SET_IP(regs, value) ((regs)->pc = ((u64) (value))) + +#define GET_FP(ptregs) ((unsigned long)(ptregs)->regs[29]) +#define SET_FP(ptregs, value) ((ptregs)->regs[29] = ((u64) (value))) + +#include <asm-generic/ptrace.h> +#undef profile_pc extern unsigned long profile_pc(struct pt_regs *regs); #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 433e504..0226447 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -124,6 +124,18 @@ static inline void cpu_panic_kernel(void) cpu_park_loop(); } +/* + * If a secondary CPU enters the kernel but fails to come online, + * (e.g. due to mismatched features), and cannot exit the kernel, + * we increment cpus_stuck_in_kernel and leave the CPU in a + * quiesecent loop within the kernel text. The memory containing + * this loop must not be re-used for anything else as the 'stuck' + * core is executing it. + * + * This function is used to inhibit features like kexec and hibernate. + */ +bool cpus_are_stuck_in_kernel(void); + #endif /* ifndef __ASSEMBLY__ */ #endif /* ifndef __ASM_SMP_H */ diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index fc9682b..e875a5a 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -30,22 +30,53 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) { unsigned int tmp; arch_spinlock_t lockval; + u32 owner; + + /* + * Ensure prior spin_lock operations to other locks have completed + * on this CPU before we test whether "lock" is locked. + */ + smp_mb(); + owner = READ_ONCE(lock->owner) << 16; asm volatile( " sevl\n" "1: wfe\n" "2: ldaxr %w0, %2\n" + /* Is the lock free? */ " eor %w1, %w0, %w0, ror #16\n" -" cbnz %w1, 1b\n" +" cbz %w1, 3f\n" + /* Lock taken -- has there been a subsequent unlock->lock transition? */ +" eor %w1, %w3, %w0, lsl #16\n" +" cbz %w1, 1b\n" + /* + * The owner has been updated, so there was an unlock->lock + * transition that we missed. That means we can rely on the + * store-release of the unlock operation paired with the + * load-acquire of the lock operation to publish any of our + * previous stores to the new lock owner and therefore don't + * need to bother with the writeback below. + */ +" b 4f\n" +"3:\n" + /* + * Serialise against any concurrent lockers by writing back the + * unlocked lock value + */ ARM64_LSE_ATOMIC_INSN( /* LL/SC */ " stxr %w1, %w0, %2\n" -" cbnz %w1, 2b\n", /* Serialise against any concurrent lockers */ - /* LSE atomics */ " nop\n" -" nop\n") +" nop\n", + /* LSE atomics */ +" mov %w1, %w0\n" +" cas %w0, %w0, %2\n" +" eor %w1, %w1, %w0\n") + /* Somebody else wrote to the lock, GOTO 10 and reload the value */ +" cbnz %w1, 2b\n" +"4:" : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) - : + : "r" (owner) : "memory"); } @@ -148,6 +179,7 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { + smp_mb(); /* See arch_spin_unlock_wait */ return !arch_spin_value_unlocked(READ_ONCE(*lock)); } diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 751e901..cc06794 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -98,11 +98,11 @@ SCTLR_ELx_SA | SCTLR_ELx_I) /* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_UCI (1 << 26) #define SCTLR_EL1_SPAN (1 << 23) #define SCTLR_EL1_SED (1 << 8) #define SCTLR_EL1_CP15BEN (1 << 5) - /* id_aa64isar0 */ #define ID_AA64ISAR0_RDM_SHIFT 28 #define ID_AA64ISAR0_ATOMICS_SHIFT 20 diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 0cc2f29..9cd03f3 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -34,6 +34,8 @@ struct undef_hook { void register_undef_hook(struct undef_hook *hook); void unregister_undef_hook(struct undef_hook *hook); +void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr); + #ifdef CONFIG_FUNCTION_GRAPH_TRACER static inline int __in_irqentry_text(unsigned long ptr) { diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 9e397a5..5e834d1 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -21,6 +21,7 @@ /* * User space memory access functions */ +#include <linux/kasan-checks.h> #include <linux/string.h> #include <linux/thread_info.h> @@ -256,15 +257,29 @@ do { \ -EFAULT; \ }) -extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n); -extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n); +extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); +extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n); extern unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n); extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); +static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) +{ + kasan_check_write(to, n); + return __arch_copy_from_user(to, from, n); +} + +static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) +{ + kasan_check_read(from, n); + return __arch_copy_to_user(to, from, n); +} + static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { + kasan_check_write(to, n); + if (access_ok(VERIFY_READ, from, n)) - n = __copy_from_user(to, from, n); + n = __arch_copy_from_user(to, from, n); else /* security hole - plug it */ memset(to, 0, n); return n; @@ -272,8 +287,10 @@ static inline unsigned long __must_check copy_from_user(void *to, const void __u static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { + kasan_check_read(from, n); + if (access_ok(VERIFY_WRITE, to, n)) - n = __copy_to_user(to, from, n); + n = __arch_copy_to_user(to, from, n); return n; } diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h index de66199..2b9a637 100644 --- a/arch/arm64/include/asm/vdso_datapage.h +++ b/arch/arm64/include/asm/vdso_datapage.h @@ -22,6 +22,8 @@ struct vdso_data { __u64 cs_cycle_last; /* Timebase at clocksource init */ + __u64 raw_time_sec; /* Raw time */ + __u64 raw_time_nsec; __u64 xtime_clock_sec; /* Kernel time */ __u64 xtime_clock_nsec; __u64 xtime_coarse_sec; /* Coarse time */ @@ -29,8 +31,10 @@ struct vdso_data { __u64 wtm_clock_sec; /* Wall to monotonic time */ __u64 wtm_clock_nsec; __u32 tb_seq_count; /* Timebase sequence counter */ - __u32 cs_mult; /* Clocksource multiplier */ - __u32 cs_shift; /* Clocksource shift */ + /* cs_* members must be adjacent and in this order (ldp accesses) */ + __u32 cs_mono_mult; /* NTP-adjusted clocksource multiplier */ + __u32 cs_shift; /* Clocksource shift (mono = raw) */ + __u32 cs_raw_mult; /* Raw clocksource multiplier */ __u32 tz_minuteswest; /* Whacky timezone stuff */ __u32 tz_dsttime; __u32 use_syscall; diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index dcbcf8d..bbc6a8c 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -34,6 +34,11 @@ */ #define HVC_SET_VECTORS 1 +/* + * HVC_SOFT_RESTART - CPU soft reset, used by the cpu_soft_restart routine. + */ +#define HVC_SOFT_RESTART 2 + #define BOOT_CPU_MODE_EL1 (0xe11) #define BOOT_CPU_MODE_EL2 (0xe12) diff --git a/arch/arm64/include/asm/xen/xen-ops.h b/arch/arm64/include/asm/xen/xen-ops.h new file mode 100644 index 0000000..ec154e7 --- /dev/null +++ b/arch/arm64/include/asm/xen/xen-ops.h @@ -0,0 +1,6 @@ +#ifndef _ASM_XEN_OPS_H +#define _ASM_XEN_OPS_H + +void xen_efi_runtime_setup(void); + +#endif /* _ASM_XEN_OPS_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 2173149..14f7b65 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -26,8 +26,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE $(call if_changed,objcopy) arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ - sys_compat.o entry32.o \ - ../../arm/kernel/opcodes.o + sys_compat.o entry32.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o @@ -42,16 +41,15 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o +arm64-obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o +arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ + cpu-reset.o -obj-y += $(arm64-obj-y) vdso/ +obj-y += $(arm64-obj-y) vdso/ probes/ obj-m += $(arm64-obj-m) head-y := head.o extra-y += $(head-y) vmlinux.lds - -# vDSO - this must be built first to generate the symbol offsets -$(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h -$(obj)/vdso/vdso-offsets.h: $(obj)/vdso diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c new file mode 100644 index 0000000..f85149c --- /dev/null +++ b/arch/arm64/kernel/acpi_numa.c @@ -0,0 +1,112 @@ +/* + * ACPI 5.1 based NUMA setup for ARM64 + * Lots of code was borrowed from arch/x86/mm/srat.c + * + * Copyright 2004 Andi Kleen, SuSE Labs. + * Copyright (C) 2013-2016, Linaro Ltd. + * Author: Hanjun Guo <hanjun.guo@linaro.org> + * + * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs. + * + * Called from acpi_numa_init while reading the SRAT and SLIT tables. + * Assumes all memory regions belonging to a single proximity domain + * are in one chunk. Holes between them will be included in the node. + */ + +#define pr_fmt(fmt) "ACPI: NUMA: " fmt + +#include <linux/acpi.h> +#include <linux/bitmap.h> +#include <linux/bootmem.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/memblock.h> +#include <linux/mmzone.h> +#include <linux/module.h> +#include <linux/topology.h> + +#include <acpi/processor.h> +#include <asm/numa.h> + +static int cpus_in_srat; + +struct __node_cpu_hwid { + u32 node_id; /* logical node containing this CPU */ + u64 cpu_hwid; /* MPIDR for this CPU */ +}; + +static struct __node_cpu_hwid early_node_cpu_hwid[NR_CPUS] = { +[0 ... NR_CPUS - 1] = {NUMA_NO_NODE, PHYS_CPUID_INVALID} }; + +int acpi_numa_get_nid(unsigned int cpu, u64 hwid) +{ + int i; + + for (i = 0; i < cpus_in_srat; i++) { + if (hwid == early_node_cpu_hwid[i].cpu_hwid) + return early_node_cpu_hwid[i].node_id; + } + + return NUMA_NO_NODE; +} + +/* Callback for Proximity Domain -> ACPI processor UID mapping */ +void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) +{ + int pxm, node; + phys_cpuid_t mpidr; + + if (srat_disabled()) + return; + + if (pa->header.length < sizeof(struct acpi_srat_gicc_affinity)) { + pr_err("SRAT: Invalid SRAT header length: %d\n", + pa->header.length); + bad_srat(); + return; + } + + if (!(pa->flags & ACPI_SRAT_GICC_ENABLED)) + return; + + if (cpus_in_srat >= NR_CPUS) { + pr_warn_once("SRAT: cpu_to_node_map[%d] is too small, may not be able to use all cpus\n", + NR_CPUS); + return; + } + + pxm = pa->proximity_domain; + node = acpi_map_pxm_to_node(pxm); + + if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) { + pr_err("SRAT: Too many proximity domains %d\n", pxm); + bad_srat(); + return; + } + + mpidr = acpi_map_madt_entry(pa->acpi_processor_uid); + if (mpidr == PHYS_CPUID_INVALID) { + pr_err("SRAT: PXM %d with ACPI ID %d has no valid MPIDR in MADT\n", + pxm, pa->acpi_processor_uid); + bad_srat(); + return; + } + + early_node_cpu_hwid[cpus_in_srat].node_id = node; + early_node_cpu_hwid[cpus_in_srat].cpu_hwid = mpidr; + node_set(node, numa_nodes_parsed); + cpus_in_srat++; + pr_info("SRAT: PXM %d -> MPIDR 0x%Lx -> Node %d\n", + pxm, mpidr, node); +} + +int __init arm64_acpi_numa_init(void) +{ + int ret; + + ret = acpi_numa_init(); + if (ret) + return ret; + + return srat_disabled() ? -EINVAL : 0; +} diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 678f30b0..78f3680 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -27,6 +27,7 @@ #include <linux/uaccess.h> #include <linux/io.h> #include <linux/arm-smccc.h> +#include <linux/kprobes.h> #include <asm/checksum.h> @@ -34,8 +35,8 @@ EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); /* user mem (segment) */ -EXPORT_SYMBOL(__copy_from_user); -EXPORT_SYMBOL(__copy_to_user); +EXPORT_SYMBOL(__arch_copy_from_user); +EXPORT_SYMBOL(__arch_copy_to_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__copy_in_user); @@ -68,6 +69,7 @@ EXPORT_SYMBOL(test_and_change_bit); #ifdef CONFIG_FUNCTION_TRACER EXPORT_SYMBOL(_mcount); +NOKPROBE_SYMBOL(_mcount); #endif /* arm-smccc */ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index c37202c..42ffdb5 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -121,7 +121,7 @@ static int run_all_cpu_set_hw_mode(struct insn_emulation *insn, bool enable) * 0 - If all the hooks ran successfully. * -EINVAL - At least one hook is not supported by the CPU. */ -static int run_all_insn_set_hw_mode(unsigned long cpu) +static int run_all_insn_set_hw_mode(unsigned int cpu) { int rc = 0; unsigned long flags; @@ -131,7 +131,7 @@ static int run_all_insn_set_hw_mode(unsigned long cpu) list_for_each_entry(insn, &insn_emulation, node) { bool enable = (insn->current_mode == INSN_HW); if (insn->ops->set_hw_mode && insn->ops->set_hw_mode(enable)) { - pr_warn("CPU[%ld] cannot support the emulation of %s", + pr_warn("CPU[%u] cannot support the emulation of %s", cpu, insn->ops->name); rc = -EINVAL; } @@ -316,28 +316,6 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table) */ #define TYPE_SWPB (1 << 22) -/* - * Set up process info to signal segmentation fault - called on access error. - */ -static void set_segfault(struct pt_regs *regs, unsigned long addr) -{ - siginfo_t info; - - down_read(¤t->mm->mmap_sem); - if (find_vma(current->mm, addr) == NULL) - info.si_code = SEGV_MAPERR; - else - info.si_code = SEGV_ACCERR; - up_read(¤t->mm->mmap_sem); - - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_addr = (void *) instruction_pointer(regs); - - pr_debug("SWP{B} emulation: access caused memory abort!\n"); - arm64_notify_die("Illegal memory access", regs, &info, 0); -} - static int emulate_swpX(unsigned int address, unsigned int *data, unsigned int type) { @@ -366,6 +344,21 @@ static int emulate_swpX(unsigned int address, unsigned int *data, return res; } +#define ARM_OPCODE_CONDITION_UNCOND 0xf + +static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr) +{ + u32 cc_bits = opcode >> 28; + + if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) { + if ((*aarch32_opcode_cond_checks[cc_bits])(psr)) + return ARM_OPCODE_CONDTEST_PASS; + else + return ARM_OPCODE_CONDTEST_FAIL; + } + return ARM_OPCODE_CONDTEST_UNCOND; +} + /* * swp_handler logs the id of calling process, dissects the instruction, sanity * checks the memory location, calls emulate_swpX for the actual operation and @@ -380,7 +373,7 @@ static int swp_handler(struct pt_regs *regs, u32 instr) type = instr & TYPE_SWPB; - switch (arm_check_condition(instr, regs->pstate)) { + switch (aarch32_check_condition(instr, regs->pstate)) { case ARM_OPCODE_CONDTEST_PASS: break; case ARM_OPCODE_CONDTEST_FAIL: @@ -430,7 +423,8 @@ ret: return 0; fault: - set_segfault(regs, address); + pr_debug("SWP{B} emulation: access caused memory abort!\n"); + arm64_notify_segfault(regs, address); return 0; } @@ -461,7 +455,7 @@ static int cp15barrier_handler(struct pt_regs *regs, u32 instr) { perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc); - switch (arm_check_condition(instr, regs->pstate)) { + switch (aarch32_check_condition(instr, regs->pstate)) { case ARM_OPCODE_CONDTEST_PASS: break; case ARM_OPCODE_CONDTEST_FAIL: @@ -617,20 +611,6 @@ static struct insn_emulation_ops setend_ops = { .set_hw_mode = setend_set_hw_mode, }; -static int insn_cpu_hotplug_notify(struct notifier_block *b, - unsigned long action, void *hcpu) -{ - int rc = 0; - if ((action & ~CPU_TASKS_FROZEN) == CPU_STARTING) - rc = run_all_insn_set_hw_mode((unsigned long)hcpu); - - return notifier_from_errno(rc); -} - -static struct notifier_block insn_cpu_hotplug_notifier = { - .notifier_call = insn_cpu_hotplug_notify, -}; - /* * Invoked as late_initcall, since not needed before init spawned. */ @@ -649,7 +629,9 @@ static int __init armv8_deprecated_init(void) pr_info("setend instruction emulation is not supported on the system"); } - register_cpu_notifier(&insn_cpu_hotplug_notifier); + cpuhp_setup_state_nocalls(CPUHP_AP_ARM64_ISNDEP_STARTING, + "AP_ARM64_ISNDEP_STARTING", + run_all_insn_set_hw_mode, NULL); register_insn_emulation_sysctl(ctl_abi); return 0; diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index f8e5d47..05070b7 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -51,6 +51,17 @@ int main(void) DEFINE(S_X5, offsetof(struct pt_regs, regs[5])); DEFINE(S_X6, offsetof(struct pt_regs, regs[6])); DEFINE(S_X7, offsetof(struct pt_regs, regs[7])); + DEFINE(S_X8, offsetof(struct pt_regs, regs[8])); + DEFINE(S_X10, offsetof(struct pt_regs, regs[10])); + DEFINE(S_X12, offsetof(struct pt_regs, regs[12])); + DEFINE(S_X14, offsetof(struct pt_regs, regs[14])); + DEFINE(S_X16, offsetof(struct pt_regs, regs[16])); + DEFINE(S_X18, offsetof(struct pt_regs, regs[18])); + DEFINE(S_X20, offsetof(struct pt_regs, regs[20])); + DEFINE(S_X22, offsetof(struct pt_regs, regs[22])); + DEFINE(S_X24, offsetof(struct pt_regs, regs[24])); + DEFINE(S_X26, offsetof(struct pt_regs, regs[26])); + DEFINE(S_X28, offsetof(struct pt_regs, regs[28])); DEFINE(S_LR, offsetof(struct pt_regs, regs[30])); DEFINE(S_SP, offsetof(struct pt_regs, sp)); #ifdef CONFIG_COMPAT @@ -60,6 +71,7 @@ int main(void) DEFINE(S_PC, offsetof(struct pt_regs, pc)); DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0)); DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); + DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); @@ -77,6 +89,7 @@ int main(void) BLANK(); DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW); DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE); @@ -84,6 +97,8 @@ int main(void) DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); BLANK(); DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last)); + DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec)); + DEFINE(VDSO_RAW_TIME_NSEC, offsetof(struct vdso_data, raw_time_nsec)); DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec)); DEFINE(VDSO_XTIME_CLK_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); @@ -91,7 +106,8 @@ int main(void) DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec)); DEFINE(VDSO_WTM_CLK_NSEC, offsetof(struct vdso_data, wtm_clock_nsec)); DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count)); - DEFINE(VDSO_CS_MULT, offsetof(struct vdso_data, cs_mult)); + DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult)); + DEFINE(VDSO_CS_RAW_MULT, offsetof(struct vdso_data, cs_raw_mult)); DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift)); DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest)); DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S new file mode 100644 index 0000000..65f42d2 --- /dev/null +++ b/arch/arm64/kernel/cpu-reset.S @@ -0,0 +1,54 @@ +/* + * CPU reset routines + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * Copyright (C) 2012 ARM Ltd. + * Copyright (C) 2015 Huawei Futurewei Technologies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/sysreg.h> +#include <asm/virt.h> + +.text +.pushsection .idmap.text, "ax" + +/* + * __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for + * cpu_soft_restart. + * + * @el2_switch: Flag to indicate a swich to EL2 is needed. + * @entry: Location to jump to for soft reset. + * arg0: First argument passed to @entry. + * arg1: Second argument passed to @entry. + * arg2: Third argument passed to @entry. + * + * Put the CPU into the same state as it would be if it had been reset, and + * branch to what would be the reset vector. It must be executed with the + * flat identity mapping. + */ +ENTRY(__cpu_soft_restart) + /* Clear sctlr_el1 flags. */ + mrs x12, sctlr_el1 + ldr x13, =SCTLR_ELx_FLAGS + bic x12, x12, x13 + msr sctlr_el1, x12 + isb + + cbz x0, 1f // el2_switch? + mov x0, #HVC_SOFT_RESTART + hvc #0 // no return + +1: mov x18, x1 // entry + mov x0, x2 // arg0 + mov x1, x3 // arg1 + mov x2, x4 // arg2 + br x18 +ENDPROC(__cpu_soft_restart) + +.popsection diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h new file mode 100644 index 0000000..d4e9ecb --- /dev/null +++ b/arch/arm64/kernel/cpu-reset.h @@ -0,0 +1,34 @@ +/* + * CPU reset routines + * + * Copyright (C) 2015 Huawei Futurewei Technologies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _ARM64_CPU_RESET_H +#define _ARM64_CPU_RESET_H + +#include <asm/virt.h> + +void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry, + unsigned long arg0, unsigned long arg1, unsigned long arg2); + +static inline void __noreturn cpu_soft_restart(unsigned long el2_switch, + unsigned long entry, unsigned long arg0, unsigned long arg1, + unsigned long arg2) +{ + typeof(__cpu_soft_restart) *restart; + + el2_switch = el2_switch && !is_kernel_in_hyp_mode() && + is_hyp_mode_available(); + restart = (void *)virt_to_phys(__cpu_soft_restart); + + cpu_install_idmap(); + restart(el2_switch, entry, arg0, arg1, arg2); + unreachable(); +} + +#endif diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index d427894..82b0fc2 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -46,6 +46,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "ARM errata 826319, 827319, 824069", .capability = ARM64_WORKAROUND_CLEAN_CACHE, MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x02), + .enable = cpu_enable_cache_maint_trap, }, #endif #ifdef CONFIG_ARM64_ERRATUM_819472 @@ -54,6 +55,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .desc = "ARM errata 819472", .capability = ARM64_WORKAROUND_CLEAN_CACHE, MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x01), + .enable = cpu_enable_cache_maint_trap, }, #endif #ifdef CONFIG_ARM64_ERRATUM_832075 @@ -98,6 +100,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_RANGE(MIDR_THUNDERX, 0x00, (1 << MIDR_VARIANT_SHIFT) | 1), }, + { + /* Cavium ThunderX, T81 pass 1.0 */ + .desc = "Cavium erratum 27456", + .capability = ARM64_WORKAROUND_CAVIUM_27456, + MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00), + }, #endif { } @@ -127,3 +135,8 @@ void check_local_cpu_errata(void) { update_cpu_capabilities(arm64_errata, "enabling workaround for"); } + +void __init enable_errata_workarounds(void) +{ + enable_cpu_capabilities(arm64_errata); +} diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 811773d..916d27a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -913,8 +913,7 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, * Run through the enabled capabilities and enable() it on all active * CPUs */ -static void __init -enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) +void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { for (; caps->matches; caps++) if (caps->enable && cpus_have_cap(caps->capability)) @@ -1036,6 +1035,7 @@ void __init setup_cpu_features(void) /* Set the CPU feature capabilies */ setup_feature_capabilities(); + enable_errata_workarounds(); setup_elf_hwcaps(arm64_elf_hwcaps); if (system_supports_32bit_el0()) diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index e11857f..75a0f8a 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -9,13 +9,16 @@ * published by the Free Software Foundation. */ +#include <linux/acpi.h> +#include <linux/cpuidle.h> +#include <linux/cpu_pm.h> #include <linux/of.h> #include <linux/of_device.h> #include <asm/cpuidle.h> #include <asm/cpu_ops.h> -int __init arm_cpuidle_init(unsigned int cpu) +int arm_cpuidle_init(unsigned int cpu) { int ret = -EOPNOTSUPP; @@ -39,3 +42,18 @@ int arm_cpuidle_suspend(int index) return cpu_ops[cpu]->cpu_suspend(index); } + +#ifdef CONFIG_ACPI + +#include <acpi/processor.h> + +int acpi_processor_ffh_lpi_probe(unsigned int cpu) +{ + return arm_cpuidle_init(cpu); +} + +int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi) +{ + return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, lpi->index); +} +#endif diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index c173d32..ed1b84f 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -183,6 +183,123 @@ const struct seq_operations cpuinfo_op = { .show = c_show }; + +static struct kobj_type cpuregs_kobj_type = { + .sysfs_ops = &kobj_sysfs_ops, +}; + +/* + * The ARM ARM uses the phrase "32-bit register" to describe a register + * whose upper 32 bits are RES0 (per C5.1.1, ARM DDI 0487A.i), however + * no statement is made as to whether the upper 32 bits will or will not + * be made use of in future, and between ARM DDI 0487A.c and ARM DDI + * 0487A.d CLIDR_EL1 was expanded from 32-bit to 64-bit. + * + * Thus, while both MIDR_EL1 and REVIDR_EL1 are described as 32-bit + * registers, we expose them both as 64 bit values to cater for possible + * future expansion without an ABI break. + */ +#define kobj_to_cpuinfo(kobj) container_of(kobj, struct cpuinfo_arm64, kobj) +#define CPUREGS_ATTR_RO(_name, _field) \ + static ssize_t _name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buf) \ + { \ + struct cpuinfo_arm64 *info = kobj_to_cpuinfo(kobj); \ + \ + if (info->reg_midr) \ + return sprintf(buf, "0x%016x\n", info->reg_##_field); \ + else \ + return 0; \ + } \ + static struct kobj_attribute cpuregs_attr_##_name = __ATTR_RO(_name) + +CPUREGS_ATTR_RO(midr_el1, midr); +CPUREGS_ATTR_RO(revidr_el1, revidr); + +static struct attribute *cpuregs_id_attrs[] = { + &cpuregs_attr_midr_el1.attr, + &cpuregs_attr_revidr_el1.attr, + NULL +}; + +static struct attribute_group cpuregs_attr_group = { + .attrs = cpuregs_id_attrs, + .name = "identification" +}; + +static int cpuid_add_regs(int cpu) +{ + int rc; + struct device *dev; + struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu); + + dev = get_cpu_device(cpu); + if (!dev) { + rc = -ENODEV; + goto out; + } + rc = kobject_add(&info->kobj, &dev->kobj, "regs"); + if (rc) + goto out; + rc = sysfs_create_group(&info->kobj, &cpuregs_attr_group); + if (rc) + kobject_del(&info->kobj); +out: + return rc; +} + +static int cpuid_remove_regs(int cpu) +{ + struct device *dev; + struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu); + + dev = get_cpu_device(cpu); + if (!dev) + return -ENODEV; + if (info->kobj.parent) { + sysfs_remove_group(&info->kobj, &cpuregs_attr_group); + kobject_del(&info->kobj); + } + + return 0; +} + +static int cpuid_callback(struct notifier_block *nb, + unsigned long action, void *hcpu) +{ + int rc = 0; + unsigned long cpu = (unsigned long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + rc = cpuid_add_regs(cpu); + break; + case CPU_DEAD: + rc = cpuid_remove_regs(cpu); + break; + } + + return notifier_from_errno(rc); +} + +static int __init cpuinfo_regs_init(void) +{ + int cpu; + + cpu_notifier_register_begin(); + + for_each_possible_cpu(cpu) { + struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu); + + kobject_init(&info->kobj, &cpuregs_kobj_type); + if (cpu_online(cpu)) + cpuid_add_regs(cpu); + } + __hotcpu_notifier(cpuid_callback, 0); + + cpu_notifier_register_done(); + return 0; +} static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) { unsigned int cpu = smp_processor_id(); @@ -212,6 +329,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_ctr = read_cpuid_cachetype(); info->reg_dczid = read_cpuid(DCZID_EL0); info->reg_midr = read_cpuid_id(); + info->reg_revidr = read_cpuid(REVIDR_EL1); info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1); info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1); @@ -264,3 +382,5 @@ void __init cpuinfo_store_boot_cpu(void) boot_cpu_data = *info; init_cpu_features(&boot_cpu_data); } + +device_initcall(cpuinfo_regs_init); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 4fbf3c5..91fff48 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -23,6 +23,7 @@ #include <linux/hardirq.h> #include <linux/init.h> #include <linux/ptrace.h> +#include <linux/kprobes.h> #include <linux/stat.h> #include <linux/uaccess.h> @@ -48,6 +49,7 @@ static void mdscr_write(u32 mdscr) asm volatile("msr mdscr_el1, %0" :: "r" (mdscr)); local_dbg_restore(flags); } +NOKPROBE_SYMBOL(mdscr_write); static u32 mdscr_read(void) { @@ -55,6 +57,7 @@ static u32 mdscr_read(void) asm volatile("mrs %0, mdscr_el1" : "=r" (mdscr)); return mdscr; } +NOKPROBE_SYMBOL(mdscr_read); /* * Allow root to disable self-hosted debug from userspace. @@ -103,6 +106,7 @@ void enable_debug_monitors(enum dbg_active_el el) mdscr_write(mdscr); } } +NOKPROBE_SYMBOL(enable_debug_monitors); void disable_debug_monitors(enum dbg_active_el el) { @@ -123,6 +127,7 @@ void disable_debug_monitors(enum dbg_active_el el) mdscr_write(mdscr); } } +NOKPROBE_SYMBOL(disable_debug_monitors); /* * OS lock clearing. @@ -151,7 +156,6 @@ static int debug_monitors_init(void) /* Clear the OS lock. */ on_each_cpu(clear_os_lock, NULL, 1); isb(); - local_dbg_enable(); /* Register hotplug handler. */ __register_cpu_notifier(&os_lock_nb); @@ -166,22 +170,15 @@ postcore_initcall(debug_monitors_init); */ static void set_regs_spsr_ss(struct pt_regs *regs) { - unsigned long spsr; - - spsr = regs->pstate; - spsr &= ~DBG_SPSR_SS; - spsr |= DBG_SPSR_SS; - regs->pstate = spsr; + regs->pstate |= DBG_SPSR_SS; } +NOKPROBE_SYMBOL(set_regs_spsr_ss); static void clear_regs_spsr_ss(struct pt_regs *regs) { - unsigned long spsr; - - spsr = regs->pstate; - spsr &= ~DBG_SPSR_SS; - regs->pstate = spsr; + regs->pstate &= ~DBG_SPSR_SS; } +NOKPROBE_SYMBOL(clear_regs_spsr_ss); /* EL1 Single Step Handler hooks */ static LIST_HEAD(step_hook); @@ -225,6 +222,7 @@ static int call_step_hook(struct pt_regs *regs, unsigned int esr) return retval; } +NOKPROBE_SYMBOL(call_step_hook); static void send_user_sigtrap(int si_code) { @@ -266,6 +264,10 @@ static int single_step_handler(unsigned long addr, unsigned int esr, */ user_rewind_single_step(current); } else { +#ifdef CONFIG_KPROBES + if (kprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED) + return 0; +#endif if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED) return 0; @@ -279,6 +281,7 @@ static int single_step_handler(unsigned long addr, unsigned int esr, return 0; } +NOKPROBE_SYMBOL(single_step_handler); /* * Breakpoint handler is re-entrant as another breakpoint can @@ -316,19 +319,28 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr) return fn ? fn(regs, esr) : DBG_HOOK_ERROR; } +NOKPROBE_SYMBOL(call_break_hook); static int brk_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { if (user_mode(regs)) { send_user_sigtrap(TRAP_BRKPT); - } else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) { - pr_warning("Unexpected kernel BRK exception at EL1\n"); + } +#ifdef CONFIG_KPROBES + else if ((esr & BRK64_ESR_MASK) == BRK64_ESR_KPROBES) { + if (kprobe_breakpoint_handler(regs, esr) != DBG_HOOK_HANDLED) + return -EFAULT; + } +#endif + else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) { + pr_warn("Unexpected kernel BRK exception at EL1\n"); return -EFAULT; } return 0; } +NOKPROBE_SYMBOL(brk_handler); int aarch32_break_handler(struct pt_regs *regs) { @@ -365,6 +377,7 @@ int aarch32_break_handler(struct pt_regs *regs) send_user_sigtrap(TRAP_BRKPT); return 0; } +NOKPROBE_SYMBOL(aarch32_break_handler); static int __init debug_traps_init(void) { @@ -386,6 +399,7 @@ void user_rewind_single_step(struct task_struct *task) if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP)) set_regs_spsr_ss(task_pt_regs(task)); } +NOKPROBE_SYMBOL(user_rewind_single_step); void user_fastforward_single_step(struct task_struct *task) { @@ -401,6 +415,7 @@ void kernel_enable_single_step(struct pt_regs *regs) mdscr_write(mdscr_read() | DBG_MDSCR_SS); enable_debug_monitors(DBG_ACTIVE_EL1); } +NOKPROBE_SYMBOL(kernel_enable_single_step); void kernel_disable_single_step(void) { @@ -408,12 +423,14 @@ void kernel_disable_single_step(void) mdscr_write(mdscr_read() & ~DBG_MDSCR_SS); disable_debug_monitors(DBG_ACTIVE_EL1); } +NOKPROBE_SYMBOL(kernel_disable_single_step); int kernel_active_single_step(void) { WARN_ON(!irqs_disabled()); return mdscr_read() & DBG_MDSCR_SS; } +NOKPROBE_SYMBOL(kernel_active_single_step); /* ptrace API */ void user_enable_single_step(struct task_struct *task) @@ -421,8 +438,10 @@ void user_enable_single_step(struct task_struct *task) set_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); set_regs_spsr_ss(task_pt_regs(task)); } +NOKPROBE_SYMBOL(user_enable_single_step); void user_disable_single_step(struct task_struct *task) { clear_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); } +NOKPROBE_SYMBOL(user_disable_single_step); diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 78f5248..ba9bee3 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -62,13 +62,61 @@ struct screen_info screen_info __section(.data); int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) { pteval_t prot_val = create_mapping_protection(md); + bool allow_block_mappings = (md->type != EFI_RUNTIME_SERVICES_CODE && + md->type != EFI_RUNTIME_SERVICES_DATA); + + if (!PAGE_ALIGNED(md->phys_addr) || + !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT)) { + /* + * If the end address of this region is not aligned to page + * size, the mapping is rounded up, and may end up sharing a + * page frame with the next UEFI memory region. If we create + * a block entry now, we may need to split it again when mapping + * the next region, and support for that is going to be removed + * from the MMU routines. So avoid block mappings altogether in + * that case. + */ + allow_block_mappings = false; + } create_pgd_mapping(mm, md->phys_addr, md->virt_addr, md->num_pages << EFI_PAGE_SHIFT, - __pgprot(prot_val | PTE_NG)); + __pgprot(prot_val | PTE_NG), allow_block_mappings); + return 0; +} + +static int __init set_permissions(pte_t *ptep, pgtable_t token, + unsigned long addr, void *data) +{ + efi_memory_desc_t *md = data; + pte_t pte = *ptep; + + if (md->attribute & EFI_MEMORY_RO) + pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); + if (md->attribute & EFI_MEMORY_XP) + pte = set_pte_bit(pte, __pgprot(PTE_PXN)); + set_pte(ptep, pte); return 0; } +int __init efi_set_mapping_permissions(struct mm_struct *mm, + efi_memory_desc_t *md) +{ + BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE && + md->type != EFI_RUNTIME_SERVICES_DATA); + + /* + * Calling apply_to_page_range() is only safe on regions that are + * guaranteed to be mapped down to pages. Since we are only called + * for regions that have been mapped using efi_create_mapping() above + * (and this is checked by the generic Memory Attributes table parsing + * routines), there is no need to check that again here. + */ + return apply_to_page_range(mm, md->virt_addr, + md->num_pages << EFI_PAGE_SHIFT, + set_permissions, md); +} + static int __init arm64_dmi_init(void) { /* diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 12e8d2b..96e4a2b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -28,6 +28,7 @@ #include <asm/errno.h> #include <asm/esr.h> #include <asm/irq.h> +#include <asm/memory.h> #include <asm/thread_info.h> #include <asm/unistd.h> @@ -97,7 +98,14 @@ mov x29, xzr // fp pointed to user-space .else add x21, sp, #S_FRAME_SIZE - .endif + get_thread_info tsk + /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */ + ldr x20, [tsk, #TI_ADDR_LIMIT] + str x20, [sp, #S_ORIG_ADDR_LIMIT] + mov x20, #TASK_SIZE_64 + str x20, [tsk, #TI_ADDR_LIMIT] + ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO) + .endif /* \el == 0 */ mrs x22, elr_el1 mrs x23, spsr_el1 stp lr, x21, [sp, #S_LR] @@ -128,6 +136,14 @@ .endm .macro kernel_exit, el + .if \el != 0 + /* Restore the task's original addr_limit. */ + ldr x20, [sp, #S_ORIG_ADDR_LIMIT] + str x20, [tsk, #TI_ADDR_LIMIT] + + /* No need to restore UAO, it will be restored from SPSR_EL1 */ + .endif + ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 ct_user_enter @@ -242,6 +258,7 @@ tsk .req x28 // current thread_info /* * Exception vectors. */ + .pushsection ".entry.text", "ax" .align 11 ENTRY(vectors) @@ -406,7 +423,6 @@ el1_irq: bl trace_hardirqs_off #endif - get_thread_info tsk irq_handler #ifdef CONFIG_PREEMPT @@ -451,7 +467,7 @@ el0_sync: cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception b.eq el0_fpsimd_exc cmp x24, #ESR_ELx_EC_SYS64 // configurable trap - b.eq el0_undef + b.eq el0_sys cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception b.eq el0_sp_pc cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception @@ -532,7 +548,7 @@ el0_ia: enable_dbg_and_irq ct_user_exit mov x0, x26 - orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts + mov x1, x25 mov x2, sp bl do_mem_abort b ret_to_user @@ -579,6 +595,16 @@ el0_undef: mov x0, sp bl do_undefinstr b ret_to_user +el0_sys: + /* + * System instructions, for trapped cache maintenance instructions + */ + enable_dbg_and_irq + ct_user_exit + mov x0, x25 + mov x1, sp + bl do_sysinstr + b ret_to_user el0_dbg: /* * Debug exception handling @@ -774,6 +800,8 @@ __ni_sys_trace: bl do_ni_syscall b __sys_trace_return + .popsection // .entry.text + /* * Special system call wrappers. */ diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index f8df75d..21ab5df 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -33,6 +33,7 @@ #include <asm/pgtable.h> #include <asm/pgtable-hwdef.h> #include <asm/sections.h> +#include <asm/smp.h> #include <asm/suspend.h> #include <asm/virt.h> @@ -236,6 +237,11 @@ int swsusp_arch_suspend(void) unsigned long flags; struct sleep_stack_data state; + if (cpus_are_stuck_in_kernel()) { + pr_err("Can't hibernate: no mechanism to offline secondary CPUs.\n"); + return -EBUSY; + } + local_dbg_save(flags); if (__cpu_suspend_enter(&state)) { diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index ce21aa8..26a6bf7 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -24,6 +24,7 @@ #include <linux/cpu_pm.h> #include <linux/errno.h> #include <linux/hw_breakpoint.h> +#include <linux/kprobes.h> #include <linux/perf_event.h> #include <linux/ptrace.h> #include <linux/smp.h> @@ -127,6 +128,7 @@ static u64 read_wb_reg(int reg, int n) return val; } +NOKPROBE_SYMBOL(read_wb_reg); static void write_wb_reg(int reg, int n, u64 val) { @@ -140,6 +142,7 @@ static void write_wb_reg(int reg, int n, u64 val) } isb(); } +NOKPROBE_SYMBOL(write_wb_reg); /* * Convert a breakpoint privilege level to the corresponding exception @@ -157,6 +160,7 @@ static enum dbg_active_el debug_exception_level(int privilege) return -EINVAL; } } +NOKPROBE_SYMBOL(debug_exception_level); enum hw_breakpoint_ops { HW_BREAKPOINT_INSTALL, @@ -575,6 +579,7 @@ static void toggle_bp_registers(int reg, enum dbg_active_el el, int enable) write_wb_reg(reg, i, ctrl); } } +NOKPROBE_SYMBOL(toggle_bp_registers); /* * Debug exception handlers. @@ -654,6 +659,7 @@ unlock: return 0; } +NOKPROBE_SYMBOL(breakpoint_handler); static int watchpoint_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) @@ -756,6 +762,7 @@ unlock: return 0; } +NOKPROBE_SYMBOL(watchpoint_handler); /* * Handle single-step exception. @@ -813,6 +820,7 @@ int reinstall_suspended_bps(struct pt_regs *regs) return !handled_exception; } +NOKPROBE_SYMBOL(reinstall_suspended_bps); /* * Context-switcher for restoring suspended breakpoints. diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 8727f44..d3b5f75 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -71,8 +71,16 @@ el1_sync: msr vbar_el2, x1 b 9f +2: cmp x0, #HVC_SOFT_RESTART + b.ne 3f + mov x0, x2 + mov x2, x4 + mov x4, x1 + mov x1, x3 + br x4 // no return + /* Someone called kvm_call_hyp() against the hyp-stub... */ -2: mov x0, #ARM_EXCEPTION_HYP_GONE +3: mov x0, #ARM_EXCEPTION_HYP_GONE 9: eret ENDPROC(el1_sync) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 368c082..63f9432 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -30,6 +30,7 @@ #include <asm/cacheflush.h> #include <asm/debug-monitors.h> #include <asm/fixmap.h> +#include <asm/opcodes.h> #include <asm/insn.h> #define AARCH64_INSN_SF_BIT BIT(31) @@ -162,6 +163,32 @@ static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) aarch64_insn_is_nop(insn); } +bool __kprobes aarch64_insn_uses_literal(u32 insn) +{ + /* ldr/ldrsw (literal), prfm */ + + return aarch64_insn_is_ldr_lit(insn) || + aarch64_insn_is_ldrsw_lit(insn) || + aarch64_insn_is_adr_adrp(insn) || + aarch64_insn_is_prfm_lit(insn); +} + +bool __kprobes aarch64_insn_is_branch(u32 insn) +{ + /* b, bl, cb*, tb*, b.cond, br, blr */ + + return aarch64_insn_is_b(insn) || + aarch64_insn_is_bl(insn) || + aarch64_insn_is_cbz(insn) || + aarch64_insn_is_cbnz(insn) || + aarch64_insn_is_tbz(insn) || + aarch64_insn_is_tbnz(insn) || + aarch64_insn_is_ret(insn) || + aarch64_insn_is_br(insn) || + aarch64_insn_is_blr(insn) || + aarch64_insn_is_bcond(insn); +} + /* * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a * Section B2.6.5 "Concurrent modification and execution of instructions": @@ -1175,6 +1202,14 @@ u32 aarch64_set_branch_offset(u32 insn, s32 offset) BUG(); } +/* + * Extract the Op/CR data from a msr/mrs instruction. + */ +u32 aarch64_insn_extract_system_reg(u32 insn) +{ + return (insn & 0x1FFFE0) >> 5; +} + bool aarch32_insn_is_wide(u32 insn) { return insn >= 0xe800; @@ -1200,3 +1235,101 @@ u32 aarch32_insn_mcr_extract_crm(u32 insn) { return insn & CRM_MASK; } + +static bool __kprobes __check_eq(unsigned long pstate) +{ + return (pstate & PSR_Z_BIT) != 0; +} + +static bool __kprobes __check_ne(unsigned long pstate) +{ + return (pstate & PSR_Z_BIT) == 0; +} + +static bool __kprobes __check_cs(unsigned long pstate) +{ + return (pstate & PSR_C_BIT) != 0; +} + +static bool __kprobes __check_cc(unsigned long pstate) +{ + return (pstate & PSR_C_BIT) == 0; +} + +static bool __kprobes __check_mi(unsigned long pstate) +{ + return (pstate & PSR_N_BIT) != 0; +} + +static bool __kprobes __check_pl(unsigned long pstate) +{ + return (pstate & PSR_N_BIT) == 0; +} + +static bool __kprobes __check_vs(unsigned long pstate) +{ + return (pstate & PSR_V_BIT) != 0; +} + +static bool __kprobes __check_vc(unsigned long pstate) +{ + return (pstate & PSR_V_BIT) == 0; +} + +static bool __kprobes __check_hi(unsigned long pstate) +{ + pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ + return (pstate & PSR_C_BIT) != 0; +} + +static bool __kprobes __check_ls(unsigned long pstate) +{ + pstate &= ~(pstate >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ + return (pstate & PSR_C_BIT) == 0; +} + +static bool __kprobes __check_ge(unsigned long pstate) +{ + pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + return (pstate & PSR_N_BIT) == 0; +} + +static bool __kprobes __check_lt(unsigned long pstate) +{ + pstate ^= (pstate << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + return (pstate & PSR_N_BIT) != 0; +} + +static bool __kprobes __check_gt(unsigned long pstate) +{ + /*PSR_N_BIT ^= PSR_V_BIT */ + unsigned long temp = pstate ^ (pstate << 3); + + temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */ + return (temp & PSR_N_BIT) == 0; +} + +static bool __kprobes __check_le(unsigned long pstate) +{ + /*PSR_N_BIT ^= PSR_V_BIT */ + unsigned long temp = pstate ^ (pstate << 3); + + temp |= (pstate << 1); /*PSR_N_BIT |= PSR_Z_BIT */ + return (temp & PSR_N_BIT) != 0; +} + +static bool __kprobes __check_al(unsigned long pstate) +{ + return true; +} + +/* + * Note that the ARMv8 ARM calls condition code 0b1111 "nv", but states that + * it behaves identically to 0b1110 ("al"). + */ +pstate_check_t * const aarch32_opcode_cond_checks[16] = { + __check_eq, __check_ne, __check_cs, __check_cc, + __check_mi, __check_pl, __check_vs, __check_vc, + __check_hi, __check_ls, __check_ge, __check_lt, + __check_gt, __check_le, __check_al, __check_al +}; diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index b67531a..8c57f64 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -22,6 +22,7 @@ #include <linux/irq.h> #include <linux/kdebug.h> #include <linux/kgdb.h> +#include <linux/kprobes.h> #include <asm/traps.h> struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { @@ -58,7 +59,17 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { { "x30", 8, offsetof(struct pt_regs, regs[30])}, { "sp", 8, offsetof(struct pt_regs, sp)}, { "pc", 8, offsetof(struct pt_regs, pc)}, - { "pstate", 8, offsetof(struct pt_regs, pstate)}, + /* + * struct pt_regs thinks PSTATE is 64-bits wide but gdb remote + * protocol disagrees. Therefore we must extract only the lower + * 32-bits. Look for the big comment in asm/kgdb.h for more + * detail. + */ + { "pstate", 4, offsetof(struct pt_regs, pstate) +#ifdef CONFIG_CPU_BIG_ENDIAN + + 4 +#endif + }, { "v0", 16, -1 }, { "v1", 16, -1 }, { "v2", 16, -1 }, @@ -128,6 +139,8 @@ sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) memset((char *)gdb_regs, 0, NUMREGBYTES); thread_regs = task_pt_regs(task); memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES); + /* Special case for PSTATE (check comments in asm/kgdb.h for details) */ + dbg_get_reg(33, gdb_regs + GP_REG_BYTES, thread_regs); } void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) @@ -218,6 +231,7 @@ static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) kgdb_handle_exception(1, SIGTRAP, 0, regs); return 0; } +NOKPROBE_SYMBOL(kgdb_brk_fn) static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) { @@ -226,12 +240,14 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) return 0; } +NOKPROBE_SYMBOL(kgdb_compiled_brk_fn); static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) { kgdb_handle_exception(1, SIGTRAP, 0, regs); return 0; } +NOKPROBE_SYMBOL(kgdb_step_brk_fn); static struct break_hook kgdb_brkpt_hook = { .esr_mask = 0xffffffff, diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c new file mode 100644 index 0000000..bc96c8a --- /dev/null +++ b/arch/arm64/kernel/machine_kexec.c @@ -0,0 +1,212 @@ +/* + * kexec for arm64 + * + * Copyright (C) Linaro. + * Copyright (C) Huawei Futurewei Technologies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kexec.h> +#include <linux/smp.h> + +#include <asm/cacheflush.h> +#include <asm/cpu_ops.h> +#include <asm/mmu_context.h> + +#include "cpu-reset.h" + +/* Global variables for the arm64_relocate_new_kernel routine. */ +extern const unsigned char arm64_relocate_new_kernel[]; +extern const unsigned long arm64_relocate_new_kernel_size; + +static unsigned long kimage_start; + +/** + * kexec_image_info - For debugging output. + */ +#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i) +static void _kexec_image_info(const char *func, int line, + const struct kimage *kimage) +{ + unsigned long i; + + pr_debug("%s:%d:\n", func, line); + pr_debug(" kexec kimage info:\n"); + pr_debug(" type: %d\n", kimage->type); + pr_debug(" start: %lx\n", kimage->start); + pr_debug(" head: %lx\n", kimage->head); + pr_debug(" nr_segments: %lu\n", kimage->nr_segments); + + for (i = 0; i < kimage->nr_segments; i++) { + pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", + i, + kimage->segment[i].mem, + kimage->segment[i].mem + kimage->segment[i].memsz, + kimage->segment[i].memsz, + kimage->segment[i].memsz / PAGE_SIZE); + } +} + +void machine_kexec_cleanup(struct kimage *kimage) +{ + /* Empty routine needed to avoid build errors. */ +} + +/** + * machine_kexec_prepare - Prepare for a kexec reboot. + * + * Called from the core kexec code when a kernel image is loaded. + * Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus + * are stuck in the kernel. This avoids a panic once we hit machine_kexec(). + */ +int machine_kexec_prepare(struct kimage *kimage) +{ + kimage_start = kimage->start; + + kexec_image_info(kimage); + + if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) { + pr_err("Can't kexec: CPUs are stuck in the kernel.\n"); + return -EBUSY; + } + + return 0; +} + +/** + * kexec_list_flush - Helper to flush the kimage list and source pages to PoC. + */ +static void kexec_list_flush(struct kimage *kimage) +{ + kimage_entry_t *entry; + + for (entry = &kimage->head; ; entry++) { + unsigned int flag; + void *addr; + + /* flush the list entries. */ + __flush_dcache_area(entry, sizeof(kimage_entry_t)); + + flag = *entry & IND_FLAGS; + if (flag == IND_DONE) + break; + + addr = phys_to_virt(*entry & PAGE_MASK); + + switch (flag) { + case IND_INDIRECTION: + /* Set entry point just before the new list page. */ + entry = (kimage_entry_t *)addr - 1; + break; + case IND_SOURCE: + /* flush the source pages. */ + __flush_dcache_area(addr, PAGE_SIZE); + break; + case IND_DESTINATION: + break; + default: + BUG(); + } + } +} + +/** + * kexec_segment_flush - Helper to flush the kimage segments to PoC. + */ +static void kexec_segment_flush(const struct kimage *kimage) +{ + unsigned long i; + + pr_debug("%s:\n", __func__); + + for (i = 0; i < kimage->nr_segments; i++) { + pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", + i, + kimage->segment[i].mem, + kimage->segment[i].mem + kimage->segment[i].memsz, + kimage->segment[i].memsz, + kimage->segment[i].memsz / PAGE_SIZE); + + __flush_dcache_area(phys_to_virt(kimage->segment[i].mem), + kimage->segment[i].memsz); + } +} + +/** + * machine_kexec - Do the kexec reboot. + * + * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC. + */ +void machine_kexec(struct kimage *kimage) +{ + phys_addr_t reboot_code_buffer_phys; + void *reboot_code_buffer; + + /* + * New cpus may have become stuck_in_kernel after we loaded the image. + */ + BUG_ON(cpus_are_stuck_in_kernel() || (num_online_cpus() > 1)); + + reboot_code_buffer_phys = page_to_phys(kimage->control_code_page); + reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys); + + kexec_image_info(kimage); + + pr_debug("%s:%d: control_code_page: %p\n", __func__, __LINE__, + kimage->control_code_page); + pr_debug("%s:%d: reboot_code_buffer_phys: %pa\n", __func__, __LINE__, + &reboot_code_buffer_phys); + pr_debug("%s:%d: reboot_code_buffer: %p\n", __func__, __LINE__, + reboot_code_buffer); + pr_debug("%s:%d: relocate_new_kernel: %p\n", __func__, __LINE__, + arm64_relocate_new_kernel); + pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n", + __func__, __LINE__, arm64_relocate_new_kernel_size, + arm64_relocate_new_kernel_size); + + /* + * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use + * after the kernel is shut down. + */ + memcpy(reboot_code_buffer, arm64_relocate_new_kernel, + arm64_relocate_new_kernel_size); + + /* Flush the reboot_code_buffer in preparation for its execution. */ + __flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size); + flush_icache_range((uintptr_t)reboot_code_buffer, + arm64_relocate_new_kernel_size); + + /* Flush the kimage list and its buffers. */ + kexec_list_flush(kimage); + + /* Flush the new image if already in place. */ + if (kimage->head & IND_DONE) + kexec_segment_flush(kimage); + + pr_info("Bye!\n"); + + /* Disable all DAIF exceptions. */ + asm volatile ("msr daifset, #0xf" : : : "memory"); + + /* + * cpu_soft_restart will shutdown the MMU, disable data caches, then + * transfer control to the reboot_code_buffer which contains a copy of + * the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel + * uses physical addressing to relocate the new image to its final + * position and transfers control to the image entry point when the + * relocation is complete. + */ + + cpu_soft_restart(1, reboot_code_buffer_phys, kimage->head, + kimage_start, 0); + + BUG(); /* Should never get here. */ +} + +void machine_crash_shutdown(struct pt_regs *regs) +{ + /* Empty routine needed to avoid build errors. */ +} diff --git a/arch/arm64/kernel/probes/Makefile b/arch/arm64/kernel/probes/Makefile new file mode 100644 index 0000000..ce06312 --- /dev/null +++ b/arch/arm64/kernel/probes/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o \ + kprobes_trampoline.o \ + simulate-insn.o diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c new file mode 100644 index 0000000..37e47a9 --- /dev/null +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -0,0 +1,174 @@ +/* + * arch/arm64/kernel/probes/decode-insn.c + * + * Copyright (C) 2013 Linaro Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/module.h> +#include <asm/kprobes.h> +#include <asm/insn.h> +#include <asm/sections.h> + +#include "decode-insn.h" +#include "simulate-insn.h" + +static bool __kprobes aarch64_insn_is_steppable(u32 insn) +{ + /* + * Branch instructions will write a new value into the PC which is + * likely to be relative to the XOL address and therefore invalid. + * Deliberate generation of an exception during stepping is also not + * currently safe. Lastly, MSR instructions can do any number of nasty + * things we can't handle during single-stepping. + */ + if (aarch64_get_insn_class(insn) == AARCH64_INSN_CLS_BR_SYS) { + if (aarch64_insn_is_branch(insn) || + aarch64_insn_is_msr_imm(insn) || + aarch64_insn_is_msr_reg(insn) || + aarch64_insn_is_exception(insn) || + aarch64_insn_is_eret(insn)) + return false; + + /* + * The MRS instruction may not return a correct value when + * executing in the single-stepping environment. We do make one + * exception, for reading the DAIF bits. + */ + if (aarch64_insn_is_mrs(insn)) + return aarch64_insn_extract_system_reg(insn) + != AARCH64_INSN_SPCLREG_DAIF; + + /* + * The HINT instruction is is problematic when single-stepping, + * except for the NOP case. + */ + if (aarch64_insn_is_hint(insn)) + return aarch64_insn_is_nop(insn); + + return true; + } + + /* + * Instructions which load PC relative literals are not going to work + * when executed from an XOL slot. Instructions doing an exclusive + * load/store are not going to complete successfully when single-step + * exception handling happens in the middle of the sequence. + */ + if (aarch64_insn_uses_literal(insn) || + aarch64_insn_is_exclusive(insn)) + return false; + + return true; +} + +/* Return: + * INSN_REJECTED If instruction is one not allowed to kprobe, + * INSN_GOOD If instruction is supported and uses instruction slot, + * INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot. + */ +static enum kprobe_insn __kprobes +arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi) +{ + /* + * Instructions reading or modifying the PC won't work from the XOL + * slot. + */ + if (aarch64_insn_is_steppable(insn)) + return INSN_GOOD; + + if (aarch64_insn_is_bcond(insn)) { + asi->handler = simulate_b_cond; + } else if (aarch64_insn_is_cbz(insn) || + aarch64_insn_is_cbnz(insn)) { + asi->handler = simulate_cbz_cbnz; + } else if (aarch64_insn_is_tbz(insn) || + aarch64_insn_is_tbnz(insn)) { + asi->handler = simulate_tbz_tbnz; + } else if (aarch64_insn_is_adr_adrp(insn)) { + asi->handler = simulate_adr_adrp; + } else if (aarch64_insn_is_b(insn) || + aarch64_insn_is_bl(insn)) { + asi->handler = simulate_b_bl; + } else if (aarch64_insn_is_br(insn) || + aarch64_insn_is_blr(insn) || + aarch64_insn_is_ret(insn)) { + asi->handler = simulate_br_blr_ret; + } else if (aarch64_insn_is_ldr_lit(insn)) { + asi->handler = simulate_ldr_literal; + } else if (aarch64_insn_is_ldrsw_lit(insn)) { + asi->handler = simulate_ldrsw_literal; + } else { + /* + * Instruction cannot be stepped out-of-line and we don't + * (yet) simulate it. + */ + return INSN_REJECTED; + } + + return INSN_GOOD_NO_SLOT; +} + +static bool __kprobes +is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end) +{ + while (scan_start > scan_end) { + /* + * atomic region starts from exclusive load and ends with + * exclusive store. + */ + if (aarch64_insn_is_store_ex(le32_to_cpu(*scan_start))) + return false; + else if (aarch64_insn_is_load_ex(le32_to_cpu(*scan_start))) + return true; + scan_start--; + } + + return false; +} + +enum kprobe_insn __kprobes +arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi) +{ + enum kprobe_insn decoded; + kprobe_opcode_t insn = le32_to_cpu(*addr); + kprobe_opcode_t *scan_start = addr - 1; + kprobe_opcode_t *scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE; +#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) + struct module *mod; +#endif + + if (addr >= (kprobe_opcode_t *)_text && + scan_end < (kprobe_opcode_t *)_text) + scan_end = (kprobe_opcode_t *)_text; +#if defined(CONFIG_MODULES) && defined(MODULES_VADDR) + else { + preempt_disable(); + mod = __module_address((unsigned long)addr); + if (mod && within_module_init((unsigned long)addr, mod) && + !within_module_init((unsigned long)scan_end, mod)) + scan_end = (kprobe_opcode_t *)mod->init_layout.base; + else if (mod && within_module_core((unsigned long)addr, mod) && + !within_module_core((unsigned long)scan_end, mod)) + scan_end = (kprobe_opcode_t *)mod->core_layout.base; + preempt_enable(); + } +#endif + decoded = arm_probe_decode_insn(insn, asi); + + if (decoded == INSN_REJECTED || + is_probed_address_atomic(scan_start, scan_end)) + return INSN_REJECTED; + + return decoded; +} diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h new file mode 100644 index 0000000..d438289 --- /dev/null +++ b/arch/arm64/kernel/probes/decode-insn.h @@ -0,0 +1,35 @@ +/* + * arch/arm64/kernel/probes/decode-insn.h + * + * Copyright (C) 2013 Linaro Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _ARM_KERNEL_KPROBES_ARM64_H +#define _ARM_KERNEL_KPROBES_ARM64_H + +/* + * ARM strongly recommends a limit of 128 bytes between LoadExcl and + * StoreExcl instructions in a single thread of execution. So keep the + * max atomic context size as 32. + */ +#define MAX_ATOMIC_CONTEXT_SIZE (128 / sizeof(kprobe_opcode_t)) + +enum kprobe_insn { + INSN_REJECTED, + INSN_GOOD_NO_SLOT, + INSN_GOOD, +}; + +enum kprobe_insn __kprobes +arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi); + +#endif /* _ARM_KERNEL_KPROBES_ARM64_H */ diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c new file mode 100644 index 0000000..bf97685 --- /dev/null +++ b/arch/arm64/kernel/probes/kprobes.c @@ -0,0 +1,686 @@ +/* + * arch/arm64/kernel/probes/kprobes.c + * + * Kprobes support for ARM64 + * + * Copyright (C) 2013 Linaro Limited. + * Author: Sandeepa Prabhu <sandeepa.prabhu@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ +#include <linux/kasan.h> +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/stop_machine.h> +#include <linux/stringify.h> +#include <asm/traps.h> +#include <asm/ptrace.h> +#include <asm/cacheflush.h> +#include <asm/debug-monitors.h> +#include <asm/system_misc.h> +#include <asm/insn.h> +#include <asm/uaccess.h> +#include <asm/irq.h> +#include <asm-generic/sections.h> + +#include "decode-insn.h" + +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +static void __kprobes +post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); + +static inline unsigned long min_stack_size(unsigned long addr) +{ + unsigned long size; + + if (on_irq_stack(addr, raw_smp_processor_id())) + size = IRQ_STACK_PTR(raw_smp_processor_id()) - addr; + else + size = (unsigned long)current_thread_info() + THREAD_START_SP - addr; + + return min(size, FIELD_SIZEOF(struct kprobe_ctlblk, jprobes_stack)); +} + +static void __kprobes arch_prepare_ss_slot(struct kprobe *p) +{ + /* prepare insn slot */ + p->ainsn.insn[0] = cpu_to_le32(p->opcode); + + flush_icache_range((uintptr_t) (p->ainsn.insn), + (uintptr_t) (p->ainsn.insn) + + MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + + /* + * Needs restoring of return address after stepping xol. + */ + p->ainsn.restore = (unsigned long) p->addr + + sizeof(kprobe_opcode_t); +} + +static void __kprobes arch_prepare_simulate(struct kprobe *p) +{ + /* This instructions is not executed xol. No need to adjust the PC */ + p->ainsn.restore = 0; +} + +static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (p->ainsn.handler) + p->ainsn.handler((u32)p->opcode, (long)p->addr, regs); + + /* single step simulated, now go for post processing */ + post_kprobe_handler(kcb, regs); +} + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + unsigned long probe_addr = (unsigned long)p->addr; + extern char __start_rodata[]; + extern char __end_rodata[]; + + if (probe_addr & 0x3) + return -EINVAL; + + /* copy instruction */ + p->opcode = le32_to_cpu(*p->addr); + + if (in_exception_text(probe_addr)) + return -EINVAL; + if (probe_addr >= (unsigned long) __start_rodata && + probe_addr <= (unsigned long) __end_rodata) + return -EINVAL; + + /* decode instruction */ + switch (arm_kprobe_decode_insn(p->addr, &p->ainsn)) { + case INSN_REJECTED: /* insn not supported */ + return -EINVAL; + + case INSN_GOOD_NO_SLOT: /* insn need simulation */ + p->ainsn.insn = NULL; + break; + + case INSN_GOOD: /* instruction uses slot */ + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) + return -ENOMEM; + break; + }; + + /* prepare the instruction */ + if (p->ainsn.insn) + arch_prepare_ss_slot(p); + else + arch_prepare_simulate(p); + + return 0; +} + +static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) +{ + void *addrs[1]; + u32 insns[1]; + + addrs[0] = (void *)addr; + insns[0] = (u32)opcode; + + return aarch64_insn_patch_text(addrs, insns, 1); +} + +/* arm kprobe: install breakpoint in text */ +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + patch_text(p->addr, BRK64_OPCODE_KPROBES); +} + +/* disarm kprobe: remove breakpoint from text */ +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + patch_text(p->addr, p->opcode); +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; + } +} + +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; +} + +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); + kcb->kprobe_status = kcb->prev_kprobe.status; +} + +static void __kprobes set_current_kprobe(struct kprobe *p) +{ + __this_cpu_write(current_kprobe, p); +} + +/* + * The D-flag (Debug mask) is set (masked) upon debug exception entry. + * Kprobes needs to clear (unmask) D-flag -ONLY- in case of recursive + * probe i.e. when probe hit from kprobe handler context upon + * executing the pre/post handlers. In this case we return with + * D-flag clear so that single-stepping can be carried-out. + * + * Leave D-flag set in all other cases. + */ +static void __kprobes +spsr_set_debug_flag(struct pt_regs *regs, int mask) +{ + unsigned long spsr = regs->pstate; + + if (mask) + spsr |= PSR_D_BIT; + else + spsr &= ~PSR_D_BIT; + + regs->pstate = spsr; +} + +/* + * Interrupts need to be disabled before single-step mode is set, and not + * reenabled until after single-step mode ends. + * Without disabling interrupt on local CPU, there is a chance of + * interrupt occurrence in the period of exception return and start of + * out-of-line single-step, that result in wrongly single stepping + * into the interrupt handler. + */ +static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb, + struct pt_regs *regs) +{ + kcb->saved_irqflag = regs->pstate; + regs->pstate |= PSR_I_BIT; +} + +static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb, + struct pt_regs *regs) +{ + if (kcb->saved_irqflag & PSR_I_BIT) + regs->pstate |= PSR_I_BIT; + else + regs->pstate &= ~PSR_I_BIT; +} + +static void __kprobes +set_ss_context(struct kprobe_ctlblk *kcb, unsigned long addr) +{ + kcb->ss_ctx.ss_pending = true; + kcb->ss_ctx.match_addr = addr + sizeof(kprobe_opcode_t); +} + +static void __kprobes clear_ss_context(struct kprobe_ctlblk *kcb) +{ + kcb->ss_ctx.ss_pending = false; + kcb->ss_ctx.match_addr = 0; +} + +static void __kprobes setup_singlestep(struct kprobe *p, + struct pt_regs *regs, + struct kprobe_ctlblk *kcb, int reenter) +{ + unsigned long slot; + + if (reenter) { + save_previous_kprobe(kcb); + set_current_kprobe(p); + kcb->kprobe_status = KPROBE_REENTER; + } else { + kcb->kprobe_status = KPROBE_HIT_SS; + } + + + if (p->ainsn.insn) { + /* prepare for single stepping */ + slot = (unsigned long)p->ainsn.insn; + + set_ss_context(kcb, slot); /* mark pending ss */ + + if (kcb->kprobe_status == KPROBE_REENTER) + spsr_set_debug_flag(regs, 0); + else + WARN_ON(regs->pstate & PSR_D_BIT); + + /* IRQs and single stepping do not mix well. */ + kprobes_save_local_irqflag(kcb, regs); + kernel_enable_single_step(regs); + instruction_pointer_set(regs, slot); + } else { + /* insn simulation */ + arch_simulate_insn(p, regs); + } +} + +static int __kprobes reenter_kprobe(struct kprobe *p, + struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + switch (kcb->kprobe_status) { + case KPROBE_HIT_SSDONE: + case KPROBE_HIT_ACTIVE: + kprobes_inc_nmissed_count(p); + setup_singlestep(p, regs, kcb, 1); + break; + case KPROBE_HIT_SS: + case KPROBE_REENTER: + pr_warn("Unrecoverable kprobe detected at %p.\n", p->addr); + dump_kprobe(p); + BUG(); + break; + default: + WARN_ON(1); + return 0; + } + + return 1; +} + +static void __kprobes +post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + + if (!cur) + return; + + /* return addr restore if non-branching insn */ + if (cur->ainsn.restore != 0) + instruction_pointer_set(regs, cur->ainsn.restore); + + /* restore back original saved kprobe variables and continue */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + return; + } + /* call post handler */ + kcb->kprobe_status = KPROBE_HIT_SSDONE; + if (cur->post_handler) { + /* post_handler can hit breakpoint and single step + * again, so we enable D-flag for recursive exception. + */ + cur->post_handler(cur, regs, 0); + } + + reset_current_kprobe(); +} + +int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + switch (kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the ip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + instruction_pointer_set(regs, (unsigned long) cur->addr); + if (!instruction_pointer(regs)) + BUG(); + + kernel_disable_single_step(); + if (kcb->kprobe_status == KPROBE_REENTER) + spsr_set_debug_flag(regs, 1); + + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); + + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accounting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, fsr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + if (fixup_exception(regs)) + return 1; + } + return 0; +} + +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return NOTIFY_DONE; +} + +static void __kprobes kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *p, *cur_kprobe; + struct kprobe_ctlblk *kcb; + unsigned long addr = instruction_pointer(regs); + + kcb = get_kprobe_ctlblk(); + cur_kprobe = kprobe_running(); + + p = get_kprobe((kprobe_opcode_t *) addr); + + if (p) { + if (cur_kprobe) { + if (reenter_kprobe(p, regs, kcb)) + return; + } else { + /* Probe hit */ + set_current_kprobe(p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + + /* + * If we have no pre-handler or it returned 0, we + * continue with normal processing. If we have a + * pre-handler and it returned non-zero, it prepped + * for calling the break_handler below on re-entry, + * so get out doing nothing more here. + * + * pre_handler can hit a breakpoint and can step thru + * before return, keep PSTATE D-flag enabled until + * pre_handler return back. + */ + if (!p->pre_handler || !p->pre_handler(p, regs)) { + setup_singlestep(p, regs, kcb, 0); + return; + } + } + } else if ((le32_to_cpu(*(kprobe_opcode_t *) addr) == + BRK64_OPCODE_KPROBES) && cur_kprobe) { + /* We probably hit a jprobe. Call its break handler. */ + if (cur_kprobe->break_handler && + cur_kprobe->break_handler(cur_kprobe, regs)) { + setup_singlestep(cur_kprobe, regs, kcb, 0); + return; + } + } + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + * Return back to original instruction, and continue. + */ +} + +static int __kprobes +kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr) +{ + if ((kcb->ss_ctx.ss_pending) + && (kcb->ss_ctx.match_addr == addr)) { + clear_ss_context(kcb); /* clear pending ss */ + return DBG_HOOK_HANDLED; + } + /* not ours, kprobes should ignore it */ + return DBG_HOOK_ERROR; +} + +int __kprobes +kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + int retval; + + /* return error if this is not our step */ + retval = kprobe_ss_hit(kcb, instruction_pointer(regs)); + + if (retval == DBG_HOOK_HANDLED) { + kprobes_restore_local_irqflag(kcb, regs); + kernel_disable_single_step(); + + if (kcb->kprobe_status == KPROBE_REENTER) + spsr_set_debug_flag(regs, 1); + + post_kprobe_handler(kcb, regs); + } + + return retval; +} + +int __kprobes +kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr) +{ + kprobe_handler(regs); + return DBG_HOOK_HANDLED; +} + +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + long stack_ptr = kernel_stack_pointer(regs); + + kcb->jprobe_saved_regs = *regs; + /* + * As Linus pointed out, gcc assumes that the callee + * owns the argument space and could overwrite it, e.g. + * tailcall optimization. So, to be absolutely safe + * we also save and restore enough stack bytes to cover + * the argument area. + */ + kasan_disable_current(); + memcpy(kcb->jprobes_stack, (void *)stack_ptr, + min_stack_size(stack_ptr)); + kasan_enable_current(); + + instruction_pointer_set(regs, (unsigned long) jp->entry); + preempt_disable(); + pause_graph_tracing(); + return 1; +} + +void __kprobes jprobe_return(void) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + /* + * Jprobe handler return by entering break exception, + * encoded same as kprobe, but with following conditions + * -a special PC to identify it from the other kprobes. + * -restore stack addr to original saved pt_regs + */ + asm volatile(" mov sp, %0 \n" + "jprobe_return_break: brk %1 \n" + : + : "r" (kcb->jprobe_saved_regs.sp), + "I" (BRK64_ESR_KPROBES) + : "memory"); + + unreachable(); +} + +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + long stack_addr = kcb->jprobe_saved_regs.sp; + long orig_sp = kernel_stack_pointer(regs); + struct jprobe *jp = container_of(p, struct jprobe, kp); + extern const char jprobe_return_break[]; + + if (instruction_pointer(regs) != (u64) jprobe_return_break) + return 0; + + if (orig_sp != stack_addr) { + struct pt_regs *saved_regs = + (struct pt_regs *)kcb->jprobe_saved_regs.sp; + pr_err("current sp %lx does not match saved sp %lx\n", + orig_sp, stack_addr); + pr_err("Saved registers for jprobe %p\n", jp); + show_regs(saved_regs); + pr_err("Current registers\n"); + show_regs(regs); + BUG(); + } + unpause_graph_tracing(); + *regs = kcb->jprobe_saved_regs; + kasan_disable_current(); + memcpy((void *)stack_addr, kcb->jprobes_stack, + min_stack_size(stack_addr)); + kasan_enable_current(); + preempt_enable_no_resched(); + return 1; +} + +bool arch_within_kprobe_blacklist(unsigned long addr) +{ + extern char __idmap_text_start[], __idmap_text_end[]; + extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; + + if ((addr >= (unsigned long)__kprobes_text_start && + addr < (unsigned long)__kprobes_text_end) || + (addr >= (unsigned long)__entry_text_start && + addr < (unsigned long)__entry_text_end) || + (addr >= (unsigned long)__idmap_text_start && + addr < (unsigned long)__idmap_text_end) || + !!search_exception_tables(addr)) + return true; + + if (!is_kernel_in_hyp_mode()) { + if ((addr >= (unsigned long)__hyp_text_start && + addr < (unsigned long)__hyp_text_end) || + (addr >= (unsigned long)__hyp_idmap_text_start && + addr < (unsigned long)__hyp_idmap_text_end)) + return true; + } + + return false; +} + +void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs) +{ + struct kretprobe_instance *ri = NULL; + struct hlist_head *head, empty_rp; + struct hlist_node *tmp; + unsigned long flags, orig_ret_address = 0; + unsigned long trampoline_address = + (unsigned long)&kretprobe_trampoline; + kprobe_opcode_t *correct_ret_addr = NULL; + + INIT_HLIST_HEAD(&empty_rp); + kretprobe_hash_lock(current, &head, &flags); + + /* + * It is possible to have multiple instances associated with a given + * task either because multiple functions in the call path have + * return probes installed on them, and/or more than one + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always pushed into the head of the list + * - when multiple return probes are registered for the same + * function, the (chronologically) first instance's ret_addr + * will be the real return address, and all the rest will + * point to kretprobe_trampoline. + */ + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; + if (ri->rp && ri->rp->handler) { + __this_cpu_write(current_kprobe, &ri->rp->kp); + get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; + ri->ret_addr = correct_ret_addr; + ri->rp->handler(ri, regs); + __this_cpu_write(current_kprobe, NULL); + } + + recycle_rp_inst(ri, &empty_rp); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_hash_unlock(current, &flags); + + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { + hlist_del(&ri->hlist); + kfree(ri); + } + return (void *)orig_ret_address; +} + +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + ri->ret_addr = (kprobe_opcode_t *)regs->regs[30]; + + /* replace return addr (x30) with trampoline */ + regs->regs[30] = (long)&kretprobe_trampoline; +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + return 0; +} + +int __init arch_init_kprobes(void) +{ + return 0; +} diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S new file mode 100644 index 0000000..5d6e7f1 --- /dev/null +++ b/arch/arm64/kernel/probes/kprobes_trampoline.S @@ -0,0 +1,81 @@ +/* + * trampoline entry and return code for kretprobes. + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/assembler.h> + + .text + + .macro save_all_base_regs + stp x0, x1, [sp, #S_X0] + stp x2, x3, [sp, #S_X2] + stp x4, x5, [sp, #S_X4] + stp x6, x7, [sp, #S_X6] + stp x8, x9, [sp, #S_X8] + stp x10, x11, [sp, #S_X10] + stp x12, x13, [sp, #S_X12] + stp x14, x15, [sp, #S_X14] + stp x16, x17, [sp, #S_X16] + stp x18, x19, [sp, #S_X18] + stp x20, x21, [sp, #S_X20] + stp x22, x23, [sp, #S_X22] + stp x24, x25, [sp, #S_X24] + stp x26, x27, [sp, #S_X26] + stp x28, x29, [sp, #S_X28] + add x0, sp, #S_FRAME_SIZE + stp lr, x0, [sp, #S_LR] + /* + * Construct a useful saved PSTATE + */ + mrs x0, nzcv + mrs x1, daif + orr x0, x0, x1 + mrs x1, CurrentEL + orr x0, x0, x1 + mrs x1, SPSel + orr x0, x0, x1 + stp xzr, x0, [sp, #S_PC] + .endm + + .macro restore_all_base_regs + ldr x0, [sp, #S_PSTATE] + and x0, x0, #(PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT) + msr nzcv, x0 + ldp x0, x1, [sp, #S_X0] + ldp x2, x3, [sp, #S_X2] + ldp x4, x5, [sp, #S_X4] + ldp x6, x7, [sp, #S_X6] + ldp x8, x9, [sp, #S_X8] + ldp x10, x11, [sp, #S_X10] + ldp x12, x13, [sp, #S_X12] + ldp x14, x15, [sp, #S_X14] + ldp x16, x17, [sp, #S_X16] + ldp x18, x19, [sp, #S_X18] + ldp x20, x21, [sp, #S_X20] + ldp x22, x23, [sp, #S_X22] + ldp x24, x25, [sp, #S_X24] + ldp x26, x27, [sp, #S_X26] + ldp x28, x29, [sp, #S_X28] + .endm + +ENTRY(kretprobe_trampoline) + sub sp, sp, #S_FRAME_SIZE + + save_all_base_regs + + mov x0, sp + bl trampoline_probe_handler + /* + * Replace trampoline address in lr with actual orig_ret_addr return + * address. + */ + mov lr, x0 + + restore_all_base_regs + + add sp, sp, #S_FRAME_SIZE + ret + +ENDPROC(kretprobe_trampoline) diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c new file mode 100644 index 0000000..8977ce9 --- /dev/null +++ b/arch/arm64/kernel/probes/simulate-insn.c @@ -0,0 +1,217 @@ +/* + * arch/arm64/kernel/probes/simulate-insn.c + * + * Copyright (C) 2013 Linaro Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> + +#include "simulate-insn.h" + +#define sign_extend(x, signbit) \ + ((x) | (0 - ((x) & (1 << (signbit))))) + +#define bbl_displacement(insn) \ + sign_extend(((insn) & 0x3ffffff) << 2, 27) + +#define bcond_displacement(insn) \ + sign_extend(((insn >> 5) & 0x7ffff) << 2, 20) + +#define cbz_displacement(insn) \ + sign_extend(((insn >> 5) & 0x7ffff) << 2, 20) + +#define tbz_displacement(insn) \ + sign_extend(((insn >> 5) & 0x3fff) << 2, 15) + +#define ldr_displacement(insn) \ + sign_extend(((insn >> 5) & 0x7ffff) << 2, 20) + +static inline void set_x_reg(struct pt_regs *regs, int reg, u64 val) +{ + if (reg < 31) + regs->regs[reg] = val; +} + +static inline void set_w_reg(struct pt_regs *regs, int reg, u64 val) +{ + if (reg < 31) + regs->regs[reg] = lower_32_bits(val); +} + +static inline u64 get_x_reg(struct pt_regs *regs, int reg) +{ + if (reg < 31) + return regs->regs[reg]; + else + return 0; +} + +static inline u32 get_w_reg(struct pt_regs *regs, int reg) +{ + if (reg < 31) + return lower_32_bits(regs->regs[reg]); + else + return 0; +} + +static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + + return (opcode & (1 << 31)) ? + (get_x_reg(regs, xn) == 0) : (get_w_reg(regs, xn) == 0); +} + +static bool __kprobes check_cbnz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + + return (opcode & (1 << 31)) ? + (get_x_reg(regs, xn) != 0) : (get_w_reg(regs, xn) != 0); +} + +static bool __kprobes check_tbz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + int bit_pos = ((opcode & (1 << 31)) >> 26) | ((opcode >> 19) & 0x1f); + + return ((get_x_reg(regs, xn) >> bit_pos) & 0x1) == 0; +} + +static bool __kprobes check_tbnz(u32 opcode, struct pt_regs *regs) +{ + int xn = opcode & 0x1f; + int bit_pos = ((opcode & (1 << 31)) >> 26) | ((opcode >> 19) & 0x1f); + + return ((get_x_reg(regs, xn) >> bit_pos) & 0x1) != 0; +} + +/* + * instruction simulation functions + */ +void __kprobes +simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs) +{ + long imm, xn, val; + + xn = opcode & 0x1f; + imm = ((opcode >> 3) & 0x1ffffc) | ((opcode >> 29) & 0x3); + imm = sign_extend(imm, 20); + if (opcode & 0x80000000) + val = (imm<<12) + (addr & 0xfffffffffffff000); + else + val = imm + addr; + + set_x_reg(regs, xn, val); + + instruction_pointer_set(regs, instruction_pointer(regs) + 4); +} + +void __kprobes +simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = bbl_displacement(opcode); + + /* Link register is x30 */ + if (opcode & (1 << 31)) + set_x_reg(regs, 30, addr + 4); + + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = 4; + + if (aarch32_opcode_cond_checks[opcode & 0xf](regs->pstate & 0xffffffff)) + disp = bcond_displacement(opcode); + + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs) +{ + int xn = (opcode >> 5) & 0x1f; + + /* update pc first in case we're doing a "blr lr" */ + instruction_pointer_set(regs, get_x_reg(regs, xn)); + + /* Link register is x30 */ + if (((opcode >> 21) & 0x3) == 1) + set_x_reg(regs, 30, addr + 4); +} + +void __kprobes +simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = 4; + + if (opcode & (1 << 24)) { + if (check_cbnz(opcode, regs)) + disp = cbz_displacement(opcode); + } else { + if (check_cbz(opcode, regs)) + disp = cbz_displacement(opcode); + } + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs) +{ + int disp = 4; + + if (opcode & (1 << 24)) { + if (check_tbnz(opcode, regs)) + disp = tbz_displacement(opcode); + } else { + if (check_tbz(opcode, regs)) + disp = tbz_displacement(opcode); + } + instruction_pointer_set(regs, addr + disp); +} + +void __kprobes +simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs) +{ + u64 *load_addr; + int xn = opcode & 0x1f; + int disp; + + disp = ldr_displacement(opcode); + load_addr = (u64 *) (addr + disp); + + if (opcode & (1 << 30)) /* x0-x30 */ + set_x_reg(regs, xn, *load_addr); + else /* w0-w30 */ + set_w_reg(regs, xn, *load_addr); + + instruction_pointer_set(regs, instruction_pointer(regs) + 4); +} + +void __kprobes +simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs) +{ + s32 *load_addr; + int xn = opcode & 0x1f; + int disp; + + disp = ldr_displacement(opcode); + load_addr = (s32 *) (addr + disp); + + set_x_reg(regs, xn, *load_addr); + + instruction_pointer_set(regs, instruction_pointer(regs) + 4); +} diff --git a/arch/arm64/kernel/probes/simulate-insn.h b/arch/arm64/kernel/probes/simulate-insn.h new file mode 100644 index 0000000..050bde6 --- /dev/null +++ b/arch/arm64/kernel/probes/simulate-insn.h @@ -0,0 +1,28 @@ +/* + * arch/arm64/kernel/probes/simulate-insn.h + * + * Copyright (C) 2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _ARM_KERNEL_KPROBES_SIMULATE_INSN_H +#define _ARM_KERNEL_KPROBES_SIMULATE_INSN_H + +void simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs); +void simulate_b_bl(u32 opcode, long addr, struct pt_regs *regs); +void simulate_b_cond(u32 opcode, long addr, struct pt_regs *regs); +void simulate_br_blr_ret(u32 opcode, long addr, struct pt_regs *regs); +void simulate_cbz_cbnz(u32 opcode, long addr, struct pt_regs *regs); +void simulate_tbz_tbnz(u32 opcode, long addr, struct pt_regs *regs); +void simulate_ldr_literal(u32 opcode, long addr, struct pt_regs *regs); +void simulate_ldrsw_literal(u32 opcode, long addr, struct pt_regs *regs); + +#endif /* _ARM_KERNEL_KPROBES_SIMULATE_INSN_H */ diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 3f6cd5c..e0c81da 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -48,6 +48,107 @@ #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define REG_OFFSET_END {.name = NULL, .offset = 0} +#define GPR_OFFSET_NAME(r) \ + {.name = "x" #r, .offset = offsetof(struct pt_regs, regs[r])} + +static const struct pt_regs_offset regoffset_table[] = { + GPR_OFFSET_NAME(0), + GPR_OFFSET_NAME(1), + GPR_OFFSET_NAME(2), + GPR_OFFSET_NAME(3), + GPR_OFFSET_NAME(4), + GPR_OFFSET_NAME(5), + GPR_OFFSET_NAME(6), + GPR_OFFSET_NAME(7), + GPR_OFFSET_NAME(8), + GPR_OFFSET_NAME(9), + GPR_OFFSET_NAME(10), + GPR_OFFSET_NAME(11), + GPR_OFFSET_NAME(12), + GPR_OFFSET_NAME(13), + GPR_OFFSET_NAME(14), + GPR_OFFSET_NAME(15), + GPR_OFFSET_NAME(16), + GPR_OFFSET_NAME(17), + GPR_OFFSET_NAME(18), + GPR_OFFSET_NAME(19), + GPR_OFFSET_NAME(20), + GPR_OFFSET_NAME(21), + GPR_OFFSET_NAME(22), + GPR_OFFSET_NAME(23), + GPR_OFFSET_NAME(24), + GPR_OFFSET_NAME(25), + GPR_OFFSET_NAME(26), + GPR_OFFSET_NAME(27), + GPR_OFFSET_NAME(28), + GPR_OFFSET_NAME(29), + GPR_OFFSET_NAME(30), + {.name = "lr", .offset = offsetof(struct pt_regs, regs[30])}, + REG_OFFSET_NAME(sp), + REG_OFFSET_NAME(pc), + REG_OFFSET_NAME(pstate), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_within_kernel_stack() - check the address in the stack + * @regs: pt_regs which contains kernel stack pointer. + * @addr: address which is checked. + * + * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). + * If @addr is within the kernel stack, it returns true. If not, returns false. + */ +static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) +{ + return ((addr & ~(THREAD_SIZE - 1)) == + (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) || + on_irq_stack(addr, raw_smp_processor_id()); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specified by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return *addr; + else + return 0; +} + /* * TODO: does not yet catch signals sent when the child dies. * in exit.c or in signal.c. @@ -1246,13 +1347,13 @@ static void tracehook_report_syscall(struct pt_regs *regs, asmlinkage int syscall_trace_enter(struct pt_regs *regs) { - /* Do the secure computing check first; failures should be fast. */ - if (secure_computing() == -1) - return -1; - if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + /* Do the secure computing after ptrace; failures should be fast. */ + if (secure_computing(NULL) == -1) + return -1; + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, regs->syscallno); diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S new file mode 100644 index 0000000..51b73cd --- /dev/null +++ b/arch/arm64/kernel/relocate_kernel.S @@ -0,0 +1,130 @@ +/* + * kexec for arm64 + * + * Copyright (C) Linaro. + * Copyright (C) Huawei Futurewei Technologies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kexec.h> +#include <linux/linkage.h> + +#include <asm/assembler.h> +#include <asm/kexec.h> +#include <asm/page.h> +#include <asm/sysreg.h> + +/* + * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it. + * + * The memory that the old kernel occupies may be overwritten when coping the + * new image to its final location. To assure that the + * arm64_relocate_new_kernel routine which does that copy is not overwritten, + * all code and data needed by arm64_relocate_new_kernel must be between the + * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end. The + * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec + * control_code_page, a special page which has been set up to be preserved + * during the copy operation. + */ +ENTRY(arm64_relocate_new_kernel) + + /* Setup the list loop variables. */ + mov x17, x1 /* x17 = kimage_start */ + mov x16, x0 /* x16 = kimage_head */ + dcache_line_size x15, x0 /* x15 = dcache line size */ + mov x14, xzr /* x14 = entry ptr */ + mov x13, xzr /* x13 = copy dest */ + + /* Clear the sctlr_el2 flags. */ + mrs x0, CurrentEL + cmp x0, #CurrentEL_EL2 + b.ne 1f + mrs x0, sctlr_el2 + ldr x1, =SCTLR_ELx_FLAGS + bic x0, x0, x1 + msr sctlr_el2, x0 + isb +1: + + /* Check if the new image needs relocation. */ + tbnz x16, IND_DONE_BIT, .Ldone + +.Lloop: + and x12, x16, PAGE_MASK /* x12 = addr */ + + /* Test the entry flags. */ +.Ltest_source: + tbz x16, IND_SOURCE_BIT, .Ltest_indirection + + /* Invalidate dest page to PoC. */ + mov x0, x13 + add x20, x0, #PAGE_SIZE + sub x1, x15, #1 + bic x0, x0, x1 +2: dc ivac, x0 + add x0, x0, x15 + cmp x0, x20 + b.lo 2b + dsb sy + + mov x20, x13 + mov x21, x12 + copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7 + + /* dest += PAGE_SIZE */ + add x13, x13, PAGE_SIZE + b .Lnext + +.Ltest_indirection: + tbz x16, IND_INDIRECTION_BIT, .Ltest_destination + + /* ptr = addr */ + mov x14, x12 + b .Lnext + +.Ltest_destination: + tbz x16, IND_DESTINATION_BIT, .Lnext + + /* dest = addr */ + mov x13, x12 + +.Lnext: + /* entry = *ptr++ */ + ldr x16, [x14], #8 + + /* while (!(entry & DONE)) */ + tbz x16, IND_DONE_BIT, .Lloop + +.Ldone: + /* wait for writes from copy_page to finish */ + dsb nsh + ic iallu + dsb nsh + isb + + /* Start new image. */ + mov x0, xzr + mov x1, xzr + mov x2, xzr + mov x3, xzr + br x17 + +ENDPROC(arm64_relocate_new_kernel) + +.ltorg + +.align 3 /* To keep the 64-bit values below naturally aligned. */ + +.Lcopy_end: +.org KEXEC_CONTROL_PAGE_SIZE + +/* + * arm64_relocate_new_kernel_size - Number of bytes to copy to the + * control_code_page. + */ +.globl arm64_relocate_new_kernel_size +arm64_relocate_new_kernel_size: + .quad .Lcopy_end - arm64_relocate_new_kernel diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index c907e2f..536dce2 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -200,7 +200,7 @@ static void __init request_standard_resources(void) struct resource *res; kernel_code.start = virt_to_phys(_text); - kernel_code.end = virt_to_phys(_etext - 1); + kernel_code.end = virt_to_phys(__init_begin - 1); kernel_data.start = virt_to_phys(_sdata); kernel_data.end = virt_to_phys(_end - 1); @@ -255,14 +255,17 @@ void __init setup_arch(char **cmdline_p) */ cpu_uninstall_idmap(); + xen_early_init(); efi_init(); arm64_memblock_init(); + paging_init(); + + acpi_table_upgrade(); + /* Parse the ACPI tables for possible boot-time configuration */ acpi_boot_table_init(); - paging_init(); - if (acpi_disabled) unflatten_device_tree(); @@ -279,8 +282,6 @@ void __init setup_arch(char **cmdline_p) else psci_acpi_init(); - xen_early_init(); - cpu_read_bootcpu_ops(); smp_init_cpus(); smp_build_mpidr_hash(); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 678e084..76a6d92 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -267,7 +267,6 @@ asmlinkage void secondary_start_kernel(void) set_cpu_online(cpu, true); complete(&cpu_running); - local_dbg_enable(); local_irq_enable(); local_async_enable(); @@ -437,9 +436,9 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); cpuinfo_store_boot_cpu(); save_boot_cpu_run_el(); - set_my_cpu_offset(per_cpu_offset(smp_processor_id())); } static u64 __init of_get_cpu_mpidr(struct device_node *dn) @@ -560,6 +559,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) */ acpi_set_mailbox_entry(cpu_count, processor); + early_map_cpu_to_node(cpu_count, acpi_numa_get_nid(cpu_count, hwid)); + cpu_count++; } @@ -694,6 +695,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus) smp_store_cpu_info(smp_processor_id()); /* + * If UP is mandated by "nosmp" (which implies "maxcpus=0"), don't set + * secondary CPUs present. + */ + if (max_cpus == 0) + return; + + /* * Initialise the present map (which describes the set of CPUs * actually populated at the present time) and release the * secondaries from the bootloader. @@ -909,3 +917,21 @@ int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; } + +static bool have_cpu_die(void) +{ +#ifdef CONFIG_HOTPLUG_CPU + int any_cpu = raw_smp_processor_id(); + + if (cpu_ops[any_cpu]->cpu_die) + return true; +#endif + return false; +} + +bool cpus_are_stuck_in_kernel(void) +{ + bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die()); + + return !!cpus_stuck_in_kernel || smp_spin_tables; +} diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index f7cf463..e04f838 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -41,6 +41,7 @@ #include <asm/stacktrace.h> #include <asm/exception.h> #include <asm/system_misc.h> +#include <asm/sysreg.h> static const char *handler[]= { "Synchronous Abort", @@ -52,20 +53,18 @@ static const char *handler[]= { int show_unhandled_signals = 1; /* - * Dump out the contents of some memory nicely... + * Dump out the contents of some kernel memory nicely... */ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, - unsigned long top, bool compat) + unsigned long top) { unsigned long first; mm_segment_t fs; int i; - unsigned int width = compat ? 4 : 8; /* * We need to switch to kernel mode so that we can use __get_user - * to safely read from kernel space. Note that we now dump the - * code first, just in case the backtrace kills us. + * to safely read from kernel space. */ fs = get_fs(); set_fs(KERNEL_DS); @@ -79,22 +78,15 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, memset(str, ' ', sizeof(str)); str[sizeof(str) - 1] = '\0'; - for (p = first, i = 0; i < (32 / width) - && p < top; i++, p += width) { + for (p = first, i = 0; i < (32 / 8) + && p < top; i++, p += 8) { if (p >= bottom && p < top) { unsigned long val; - if (width == 8) { - if (__get_user(val, (unsigned long *)p) == 0) - sprintf(str + i * 17, " %016lx", val); - else - sprintf(str + i * 17, " ????????????????"); - } else { - if (__get_user(val, (unsigned int *)p) == 0) - sprintf(str + i * 9, " %08lx", val); - else - sprintf(str + i * 9, " ????????"); - } + if (__get_user(val, (unsigned long *)p) == 0) + sprintf(str + i * 17, " %016lx", val); + else + sprintf(str + i * 17, " ????????????????"); } } printk("%s%04lx:%s\n", lvl, first & 0xffff, str); @@ -111,21 +103,12 @@ static void dump_backtrace_entry(unsigned long where) print_ip_sym(where); } -static void dump_instr(const char *lvl, struct pt_regs *regs) +static void __dump_instr(const char *lvl, struct pt_regs *regs) { unsigned long addr = instruction_pointer(regs); - mm_segment_t fs; char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; int i; - /* - * We need to switch to kernel mode so that we can use __get_user - * to safely read from kernel space. Note that we now dump the - * code first, just in case the backtrace kills us. - */ - fs = get_fs(); - set_fs(KERNEL_DS); - for (i = -4; i < 1; i++) { unsigned int val, bad; @@ -139,8 +122,18 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) } } printk("%sCode: %s\n", lvl, str); +} - set_fs(fs); +static void dump_instr(const char *lvl, struct pt_regs *regs) +{ + if (!user_mode(regs)) { + mm_segment_t fs = get_fs(); + set_fs(KERNEL_DS); + __dump_instr(lvl, regs); + set_fs(fs); + } else { + __dump_instr(lvl, regs); + } } static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) @@ -216,7 +209,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) stack = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); dump_mem("", "Exception stack", stack, - stack + sizeof(struct pt_regs), false); + stack + sizeof(struct pt_regs)); } } } @@ -254,10 +247,9 @@ static int __die(const char *str, int err, struct thread_info *thread, pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n", TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1); - if (!user_mode(regs) || in_interrupt()) { + if (!user_mode(regs)) { dump_mem(KERN_EMERG, "Stack: ", regs->sp, - THREAD_SIZE + (unsigned long)task_stack_page(tsk), - compat_user_mode(regs)); + THREAD_SIZE + (unsigned long)task_stack_page(tsk)); dump_backtrace(regs, tsk); dump_instr(KERN_EMERG, regs); } @@ -373,11 +365,59 @@ exit: return fn ? fn(regs, instr) : 1; } -asmlinkage void __exception do_undefinstr(struct pt_regs *regs) +static void force_signal_inject(int signal, int code, struct pt_regs *regs, + unsigned long address) { siginfo_t info; void __user *pc = (void __user *)instruction_pointer(regs); + const char *desc; + + switch (signal) { + case SIGILL: + desc = "undefined instruction"; + break; + case SIGSEGV: + desc = "illegal memory access"; + break; + default: + desc = "bad mode"; + break; + } + + if (unhandled_signal(current, signal) && + show_unhandled_signals_ratelimited()) { + pr_info("%s[%d]: %s: pc=%p\n", + current->comm, task_pid_nr(current), desc, pc); + dump_instr(KERN_INFO, regs); + } + info.si_signo = signal; + info.si_errno = 0; + info.si_code = code; + info.si_addr = pc; + + arm64_notify_die(desc, regs, &info, 0); +} + +/* + * Set up process info to signal segmentation fault - called on access error. + */ +void arm64_notify_segfault(struct pt_regs *regs, unsigned long addr) +{ + int code; + + down_read(¤t->mm->mmap_sem); + if (find_vma(current->mm, addr) == NULL) + code = SEGV_MAPERR; + else + code = SEGV_ACCERR; + up_read(¤t->mm->mmap_sem); + + force_signal_inject(SIGSEGV, code, regs, addr); +} + +asmlinkage void __exception do_undefinstr(struct pt_regs *regs) +{ /* check for AArch32 breakpoint instructions */ if (!aarch32_break_handler(regs)) return; @@ -385,18 +425,66 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) if (call_undef_hook(regs) == 0) return; - if (unhandled_signal(current, SIGILL) && show_unhandled_signals_ratelimited()) { - pr_info("%s[%d]: undefined instruction: pc=%p\n", - current->comm, task_pid_nr(current), pc); - dump_instr(KERN_INFO, regs); - } + force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); +} - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_ILLOPC; - info.si_addr = pc; +void cpu_enable_cache_maint_trap(void *__unused) +{ + config_sctlr_el1(SCTLR_EL1_UCI, 0); +} - arm64_notify_die("Oops - undefined instruction", regs, &info, 0); +#define __user_cache_maint(insn, address, res) \ + asm volatile ( \ + "1: " insn ", %1\n" \ + " mov %w0, #0\n" \ + "2:\n" \ + " .pushsection .fixup,\"ax\"\n" \ + " .align 2\n" \ + "3: mov %w0, %w2\n" \ + " b 2b\n" \ + " .popsection\n" \ + _ASM_EXTABLE(1b, 3b) \ + : "=r" (res) \ + : "r" (address), "i" (-EFAULT) ) + +asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) +{ + unsigned long address; + int ret; + + /* if this is a write with: Op0=1, Op2=1, Op1=3, CRn=7 */ + if ((esr & 0x01fffc01) == 0x0012dc00) { + int rt = (esr >> 5) & 0x1f; + int crm = (esr >> 1) & 0x0f; + + address = (rt == 31) ? 0 : regs->regs[rt]; + + switch (crm) { + case 11: /* DC CVAU, gets promoted */ + __user_cache_maint("dc civac", address, ret); + break; + case 10: /* DC CVAC, gets promoted */ + __user_cache_maint("dc civac", address, ret); + break; + case 14: /* DC CIVAC */ + __user_cache_maint("dc civac", address, ret); + break; + case 5: /* IC IVAU */ + __user_cache_maint("ic ivau", address, ret); + break; + default: + force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); + return; + } + } else { + force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); + return; + } + + if (ret) + arm64_notify_segfault(regs, address); + else + regs->pc += 4; } long compat_arm_syscall(struct pt_regs *regs); @@ -465,7 +553,7 @@ static const char *esr_class_str[] = { const char *esr_get_class_string(u32 esr) { - return esr_class_str[esr >> ESR_ELx_EC_SHIFT]; + return esr_class_str[ESR_ELx_EC(esr)]; } /* diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 9fefb00..076312b 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -214,10 +214,16 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; if (!use_syscall) { + /* tkr_mono.cycle_last == tkr_raw.cycle_last */ vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; + vdso_data->raw_time_sec = tk->raw_time.tv_sec; + vdso_data->raw_time_nsec = tk->raw_time.tv_nsec; vdso_data->xtime_clock_sec = tk->xtime_sec; vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; - vdso_data->cs_mult = tk->tkr_mono.mult; + /* tkr_raw.xtime_nsec == 0 */ + vdso_data->cs_mono_mult = tk->tkr_mono.mult; + vdso_data->cs_raw_mult = tk->tkr_raw.mult; + /* tkr_mono.shift == tkr_raw.shift */ vdso_data->cs_shift = tk->tkr_mono.shift; } diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index b467fd0..62c84f7 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -23,7 +23,7 @@ GCOV_PROFILE := n ccflags-y += -Wl,-shared obj-y += vdso.o -extra-y += vdso.lds vdso-offsets.h +extra-y += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) # Force dependency (incbin is bad) @@ -42,11 +42,10 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh quiet_cmd_vdsosym = VDSOSYM $@ define cmd_vdsosym - $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ && \ - cp $@ include/generated/ + $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ endef -$(obj)/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE +include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) # Assembly rules for the .S files diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index efa79e8..e00b467 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -26,24 +26,109 @@ #define NSEC_PER_SEC_HI16 0x3b9a vdso_data .req x6 -use_syscall .req w7 -seqcnt .req w8 +seqcnt .req w7 +w_tmp .req w8 +x_tmp .req x8 + +/* + * Conventions for macro arguments: + * - An argument is write-only if its name starts with "res". + * - All other arguments are read-only, unless otherwise specified. + */ .macro seqcnt_acquire 9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT] tbnz seqcnt, #0, 9999b dmb ishld - ldr use_syscall, [vdso_data, #VDSO_USE_SYSCALL] .endm - .macro seqcnt_read, cnt + .macro seqcnt_check fail dmb ishld - ldr \cnt, [vdso_data, #VDSO_TB_SEQ_COUNT] + ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT] + cmp w_tmp, seqcnt + b.ne \fail .endm - .macro seqcnt_check, cnt, fail - cmp \cnt, seqcnt - b.ne \fail + .macro syscall_check fail + ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL] + cbnz w_tmp, \fail + .endm + + .macro get_nsec_per_sec res + mov \res, #NSEC_PER_SEC_LO16 + movk \res, #NSEC_PER_SEC_HI16, lsl #16 + .endm + + /* + * Returns the clock delta, in nanoseconds left-shifted by the clock + * shift. + */ + .macro get_clock_shifted_nsec res, cycle_last, mult + /* Read the virtual counter. */ + isb + mrs x_tmp, cntvct_el0 + /* Calculate cycle delta and convert to ns. */ + sub \res, x_tmp, \cycle_last + /* We can only guarantee 56 bits of precision. */ + movn x_tmp, #0xff00, lsl #48 + and \res, x_tmp, \res + mul \res, \res, \mult + .endm + + /* + * Returns in res_{sec,nsec} the REALTIME timespec, based on the + * "wall time" (xtime) and the clock_mono delta. + */ + .macro get_ts_realtime res_sec, res_nsec, \ + clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec + add \res_nsec, \clock_nsec, \xtime_nsec + udiv x_tmp, \res_nsec, \nsec_to_sec + add \res_sec, \xtime_sec, x_tmp + msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec + .endm + + /* + * Returns in res_{sec,nsec} the timespec based on the clock_raw delta, + * used for CLOCK_MONOTONIC_RAW. + */ + .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec + udiv \res_sec, \clock_nsec, \nsec_to_sec + msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec + .endm + + /* sec and nsec are modified in place. */ + .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec + /* Add timespec. */ + add \sec, \sec, \ts_sec + add \nsec, \nsec, \ts_nsec + + /* Normalise the new timespec. */ + cmp \nsec, \nsec_to_sec + b.lt 9999f + sub \nsec, \nsec, \nsec_to_sec + add \sec, \sec, #1 +9999: + cmp \nsec, #0 + b.ge 9998f + add \nsec, \nsec, \nsec_to_sec + sub \sec, \sec, #1 +9998: + .endm + + .macro clock_gettime_return, shift=0 + .if \shift == 1 + lsr x11, x11, x12 + .endif + stp x10, x11, [x1, #TSPEC_TV_SEC] + mov x0, xzr + ret + .endm + + .macro jump_slot jumptable, index, label + .if (. - \jumptable) != 4 * (\index) + .error "Jump slot index mismatch" + .endif + b \label .endm .text @@ -51,18 +136,25 @@ seqcnt .req w8 /* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */ ENTRY(__kernel_gettimeofday) .cfi_startproc - mov x2, x30 - .cfi_register x30, x2 - - /* Acquire the sequence counter and get the timespec. */ adr vdso_data, _vdso_data -1: seqcnt_acquire - cbnz use_syscall, 4f - /* If tv is NULL, skip to the timezone code. */ cbz x0, 2f - bl __do_get_tspec - seqcnt_check w9, 1b + + /* Compute the time of day. */ +1: seqcnt_acquire + syscall_check fail=4f + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + /* w11 = cs_mono_mult, w12 = cs_shift */ + ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] + ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] + seqcnt_check fail=1b + + get_nsec_per_sec res=x9 + lsl x9, x9, x12 + + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_realtime res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 /* Convert ns to us. */ mov x13, #1000 @@ -76,95 +168,126 @@ ENTRY(__kernel_gettimeofday) stp w4, w5, [x1, #TZ_MINWEST] 3: mov x0, xzr - ret x2 + ret 4: /* Syscall fallback. */ mov x8, #__NR_gettimeofday svc #0 - ret x2 + ret .cfi_endproc ENDPROC(__kernel_gettimeofday) +#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE + /* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */ ENTRY(__kernel_clock_gettime) .cfi_startproc - cmp w0, #CLOCK_REALTIME - ccmp w0, #CLOCK_MONOTONIC, #0x4, ne - b.ne 2f + cmp w0, #JUMPSLOT_MAX + b.hi syscall + adr vdso_data, _vdso_data + adr x_tmp, jumptable + add x_tmp, x_tmp, w0, uxtw #2 + br x_tmp + + ALIGN +jumptable: + jump_slot jumptable, CLOCK_REALTIME, realtime + jump_slot jumptable, CLOCK_MONOTONIC, monotonic + b syscall + b syscall + jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw + jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse + jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse + + .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1) + .error "Wrong jumptable size" + .endif + + ALIGN +realtime: + seqcnt_acquire + syscall_check fail=syscall + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + /* w11 = cs_mono_mult, w12 = cs_shift */ + ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] + ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] + seqcnt_check fail=realtime - mov x2, x30 - .cfi_register x30, x2 + /* All computations are done with left-shifted nsecs. */ + get_nsec_per_sec res=x9 + lsl x9, x9, x12 - /* Get kernel timespec. */ - adr vdso_data, _vdso_data -1: seqcnt_acquire - cbnz use_syscall, 7f + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_realtime res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 + clock_gettime_return, shift=1 - bl __do_get_tspec - seqcnt_check w9, 1b + ALIGN +monotonic: + seqcnt_acquire + syscall_check fail=syscall + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + /* w11 = cs_mono_mult, w12 = cs_shift */ + ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] + ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] + ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC] + seqcnt_check fail=monotonic - mov x30, x2 + /* All computations are done with left-shifted nsecs. */ + lsl x4, x4, x12 + get_nsec_per_sec res=x9 + lsl x9, x9, x12 - cmp w0, #CLOCK_MONOTONIC - b.ne 6f + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_realtime res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 - /* Get wtm timespec. */ - ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] + add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9 + clock_gettime_return, shift=1 - /* Check the sequence counter. */ - seqcnt_read w9 - seqcnt_check w9, 1b - b 4f -2: - cmp w0, #CLOCK_REALTIME_COARSE - ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne - b.ne 8f + ALIGN +monotonic_raw: + seqcnt_acquire + syscall_check fail=syscall + ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] + /* w11 = cs_raw_mult, w12 = cs_shift */ + ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT] + ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC] + seqcnt_check fail=monotonic_raw - /* xtime_coarse_nsec is already right-shifted */ - mov x12, #0 + /* All computations are done with left-shifted nsecs. */ + lsl x14, x14, x12 + get_nsec_per_sec res=x9 + lsl x9, x9, x12 - /* Get coarse timespec. */ - adr vdso_data, _vdso_data -3: seqcnt_acquire + get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + get_ts_clock_raw res_sec=x10, res_nsec=x11, \ + clock_nsec=x15, nsec_to_sec=x9 + + add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 + clock_gettime_return, shift=1 + + ALIGN +realtime_coarse: + seqcnt_acquire ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] + seqcnt_check fail=realtime_coarse + clock_gettime_return - /* Get wtm timespec. */ + ALIGN +monotonic_coarse: + seqcnt_acquire + ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] + seqcnt_check fail=monotonic_coarse - /* Check the sequence counter. */ - seqcnt_read w9 - seqcnt_check w9, 3b + /* Computations are done in (non-shifted) nsecs. */ + get_nsec_per_sec res=x9 + add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 + clock_gettime_return - cmp w0, #CLOCK_MONOTONIC_COARSE - b.ne 6f -4: - /* Add on wtm timespec. */ - add x10, x10, x13 - lsl x14, x14, x12 - add x11, x11, x14 - - /* Normalise the new timespec. */ - mov x15, #NSEC_PER_SEC_LO16 - movk x15, #NSEC_PER_SEC_HI16, lsl #16 - lsl x15, x15, x12 - cmp x11, x15 - b.lt 5f - sub x11, x11, x15 - add x10, x10, #1 -5: - cmp x11, #0 - b.ge 6f - add x11, x11, x15 - sub x10, x10, #1 - -6: /* Store to the user timespec. */ - lsr x11, x11, x12 - stp x10, x11, [x1, #TSPEC_TV_SEC] - mov x0, xzr - ret -7: - mov x30, x2 -8: /* Syscall fallback. */ + ALIGN +syscall: /* Syscall fallback. */ mov x8, #__NR_clock_gettime svc #0 ret @@ -176,6 +299,7 @@ ENTRY(__kernel_clock_getres) .cfi_startproc cmp w0, #CLOCK_REALTIME ccmp w0, #CLOCK_MONOTONIC, #0x4, ne + ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne b.ne 1f ldr x2, 5f @@ -203,46 +327,3 @@ ENTRY(__kernel_clock_getres) .quad CLOCK_COARSE_RES .cfi_endproc ENDPROC(__kernel_clock_getres) - -/* - * Read the current time from the architected counter. - * Expects vdso_data to be initialised. - * Clobbers the temporary registers (x9 - x15). - * Returns: - * - w9 = vDSO sequence counter - * - (x10, x11) = (ts->tv_sec, shifted ts->tv_nsec) - * - w12 = cs_shift - */ -ENTRY(__do_get_tspec) - .cfi_startproc - - /* Read from the vDSO data page. */ - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - ldp w11, w12, [vdso_data, #VDSO_CS_MULT] - seqcnt_read w9 - - /* Read the virtual counter. */ - isb - mrs x15, cntvct_el0 - - /* Calculate cycle delta and convert to ns. */ - sub x10, x15, x10 - /* We can only guarantee 56 bits of precision. */ - movn x15, #0xff00, lsl #48 - and x10, x15, x10 - mul x10, x10, x11 - - /* Use the kernel time to calculate the new timespec. */ - mov x11, #NSEC_PER_SEC_LO16 - movk x11, #NSEC_PER_SEC_HI16, lsl #16 - lsl x11, x11, x12 - add x15, x10, x14 - udiv x14, x15, x11 - add x10, x13, x14 - mul x13, x14, x11 - sub x11, x15, x13 - - ret - .cfi_endproc -ENDPROC(__do_get_tspec) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 435e820..89d6e17 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -118,9 +118,11 @@ SECTIONS __exception_text_end = .; IRQENTRY_TEXT SOFTIRQENTRY_TEXT + ENTRY_TEXT TEXT_TEXT SCHED_TEXT LOCK_TEXT + KPROBES_TEXT HYPERVISOR_TEXT IDMAP_TEXT HIBERNATE_TEXT @@ -131,12 +133,13 @@ SECTIONS } . = ALIGN(SEGMENT_ALIGN); - RO_DATA(PAGE_SIZE) /* everything from this point to */ - EXCEPTION_TABLE(8) /* _etext will be marked RO NX */ + _etext = .; /* End of text section */ + + RO_DATA(PAGE_SIZE) /* everything from this point to */ + EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */ NOTES . = ALIGN(SEGMENT_ALIGN); - _etext = .; /* End of text and rodata section */ __init_begin = .; INIT_TEXT_SECTION(8) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 3246c4a..fa96fe2 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -106,7 +106,7 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) run->exit_reason = KVM_EXIT_DEBUG; run->debug.arch.hsr = hsr; - switch (hsr >> ESR_ELx_EC_SHIFT) { + switch (ESR_ELx_EC(hsr)) { case ESR_ELx_EC_WATCHPT_LOW: run->debug.arch.far = vcpu->arch.fault.far_el2; /* fall through */ @@ -149,7 +149,7 @@ static exit_handle_fn arm_exit_handlers[] = { static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) { u32 hsr = kvm_vcpu_get_hsr(vcpu); - u8 hsr_ec = hsr >> ESR_ELx_EC_SHIFT; + u8 hsr_ec = ESR_ELx_EC(hsr); if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || !arm_exit_handlers[hsr_ec]) { diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 778d0ef..0c85feb 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -17,6 +17,10 @@ obj-$(CONFIG_KVM_ARM_HOST) += tlb.o obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o obj-$(CONFIG_KVM_ARM_HOST) += s2-setup.o +# KVM code is run at a different exception code with a different map, so +# compiler instrumentation that inserts callbacks or checks into the code may +# cause crashes. Just disable it. GCOV_PROFILE := n KASAN_SANITIZE := n UBSAN_SANITIZE := n +KCOV_INSTRUMENT := n diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 437cfad..4373997 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -198,7 +198,7 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) { u64 esr = read_sysreg_el2(esr); - u8 ec = esr >> ESR_ELx_EC_SHIFT; + u8 ec = ESR_ELx_EC(esr); u64 hpfar, far; vcpu->arch.fault.esr_el2 = esr; diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 0f7c40e..9341376 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -27,8 +27,8 @@ static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { } /* * Non-VHE: Both host and guest must save everything. * - * VHE: Host must save tpidr*_el[01], actlr_el1, sp0, pc, pstate, and - * guest must save everything. + * VHE: Host must save tpidr*_el[01], actlr_el1, mdscr_el1, sp0, pc, + * pstate, and guest must save everything. */ static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) @@ -37,6 +37,7 @@ static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt) ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); + ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); ctxt->gp_regs.regs.sp = read_sysreg(sp_el0); ctxt->gp_regs.regs.pc = read_sysreg_el2(elr); ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr); @@ -61,7 +62,6 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair); ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl); ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); - ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr); @@ -90,6 +90,7 @@ static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctx write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); + write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); write_sysreg(ctxt->gp_regs.regs.sp, sp_el0); write_sysreg_el2(ctxt->gp_regs.regs.pc, elr); write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr); @@ -114,7 +115,6 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair); write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl); write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); - write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); write_sysreg_el1(ctxt->gp_regs.elr_el1, elr); diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 17e8306..0b90497 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -66,7 +66,7 @@ .endm end .req x5 -ENTRY(__copy_from_user) +ENTRY(__arch_copy_from_user) ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) add end, x0, x2 @@ -75,7 +75,7 @@ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) mov x0, #0 // Nothing to copy ret -ENDPROC(__copy_from_user) +ENDPROC(__arch_copy_from_user) .section .fixup,"ax" .align 2 diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 21faae6..7a7efe2 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -65,7 +65,7 @@ .endm end .req x5 -ENTRY(__copy_to_user) +ENTRY(__arch_copy_to_user) ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) add end, x0, x2 @@ -74,7 +74,7 @@ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \ CONFIG_ARM64_PAN) mov x0, #0 ret -ENDPROC(__copy_to_user) +ENDPROC(__arch_copy_to_user) .section .fixup,"ax" .align 2 diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 50ff9ba..07d7352 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -52,7 +52,7 @@ ENTRY(__flush_cache_user_range) sub x3, x2, #1 bic x4, x0, x3 1: -USER(9f, dc cvau, x4 ) // clean D line to PoU +user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE add x4, x4, x2 cmp x4, x1 b.lo 1b diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index b7b3978..efcf1f7 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -179,7 +179,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) &asid_generation); flush_context(cpu); - /* We have at least 1 ASID per CPU, so this will always succeed */ + /* We have more ASIDs than CPUs, so this will always succeed */ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); set_asid: @@ -227,8 +227,11 @@ switch_mm_fastpath: static int asids_init(void) { asid_bits = get_cpu_asid_bits(); - /* If we end up with more CPUs than ASIDs, expect things to crash */ - WARN_ON(NUM_USER_ASIDS < num_possible_cpus()); + /* + * Expect allocation after rollover to fail if we don't have at least + * one more ASID than CPUs. ASID #0 is reserved for init_mm. + */ + WARN_ON(NUM_USER_ASIDS - 1 <= num_possible_cpus()); atomic64_set(&asid_generation, ASID_FIRST_VERSION); asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map), GFP_KERNEL); diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index c566ec8..f6c55af 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -19,6 +19,7 @@ #include <linux/gfp.h> #include <linux/acpi.h> +#include <linux/bootmem.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/genalloc.h> @@ -29,6 +30,8 @@ #include <asm/cacheflush.h> +static int swiotlb __read_mostly; + static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, bool coherent) { @@ -341,6 +344,13 @@ static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, return ret; } +static int __swiotlb_dma_supported(struct device *hwdev, u64 mask) +{ + if (swiotlb) + return swiotlb_dma_supported(hwdev, mask); + return 1; +} + static struct dma_map_ops swiotlb_dma_ops = { .alloc = __dma_alloc, .free = __dma_free, @@ -354,7 +364,7 @@ static struct dma_map_ops swiotlb_dma_ops = { .sync_single_for_device = __swiotlb_sync_single_for_device, .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, .sync_sg_for_device = __swiotlb_sync_sg_for_device, - .dma_supported = swiotlb_dma_supported, + .dma_supported = __swiotlb_dma_supported, .mapping_error = swiotlb_dma_mapping_error, }; @@ -513,6 +523,9 @@ EXPORT_SYMBOL(dummy_dma_ops); static int __init arm64_dma_init(void) { + if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + swiotlb = 1; + return atomic_pool_init(); } arch_initcall(arm64_dma_init); @@ -848,15 +861,16 @@ static int __iommu_attach_notifier(struct notifier_block *nb, { struct iommu_dma_notifier_data *master, *tmp; - if (action != BUS_NOTIFY_ADD_DEVICE) + if (action != BUS_NOTIFY_BIND_DRIVER) return 0; mutex_lock(&iommu_dma_notifier_lock); list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) { - if (do_iommu_attach(master->dev, master->ops, - master->dma_base, master->size)) { + if (data == master->dev && do_iommu_attach(master->dev, + master->ops, master->dma_base, master->size)) { list_del(&master->list); kfree(master); + break; } } mutex_unlock(&iommu_dma_notifier_lock); @@ -870,17 +884,8 @@ static int __init register_iommu_dma_ops_notifier(struct bus_type *bus) if (!nb) return -ENOMEM; - /* - * The device must be attached to a domain before the driver probe - * routine gets a chance to start allocating DMA buffers. However, - * the IOMMU driver also needs a chance to configure the iommu_group - * via its add_device callback first, so we need to make the attach - * happen between those two points. Since the IOMMU core uses a bus - * notifier with default priority for add_device, do the same but - * with a lower priority to ensure the appropriate ordering. - */ + nb->notifier_call = __iommu_attach_notifier; - nb->priority = -100; ret = bus_register_notifier(bus, nb); if (ret) { @@ -904,10 +909,6 @@ static int __init __iommu_dma_init(void) if (!ret) ret = register_iommu_dma_ops_notifier(&pci_bus_type); #endif - - /* handle devices queued before this arch_initcall */ - if (!ret) - __iommu_attach_notifier(NULL, BUS_NOTIFY_ADD_DEVICE, NULL); return ret; } arch_initcall(__iommu_dma_init); diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index ccfde23..f94b80e 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -27,11 +27,7 @@ #include <asm/memory.h> #include <asm/pgtable.h> #include <asm/pgtable-hwdef.h> - -struct addr_marker { - unsigned long start_address; - const char *name; -}; +#include <asm/ptdump.h> static const struct addr_marker address_markers[] = { #ifdef CONFIG_KASAN @@ -290,7 +286,8 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) } } -static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start) +static void walk_pgd(struct pg_state *st, struct mm_struct *mm, + unsigned long start) { pgd_t *pgd = pgd_offset(mm, 0UL); unsigned i; @@ -309,12 +306,13 @@ static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long st static int ptdump_show(struct seq_file *m, void *v) { + struct ptdump_info *info = m->private; struct pg_state st = { .seq = m, - .marker = address_markers, + .marker = info->markers, }; - walk_pgd(&st, &init_mm, VA_START); + walk_pgd(&st, info->mm, info->base_addr); note_page(&st, 0, 0, 0); return 0; @@ -322,7 +320,7 @@ static int ptdump_show(struct seq_file *m, void *v) static int ptdump_open(struct inode *inode, struct file *file) { - return single_open(file, ptdump_show, NULL); + return single_open(file, ptdump_show, inode->i_private); } static const struct file_operations ptdump_fops = { @@ -332,7 +330,7 @@ static const struct file_operations ptdump_fops = { .release = single_release, }; -static int ptdump_init(void) +int ptdump_register(struct ptdump_info *info, const char *name) { struct dentry *pe; unsigned i, j; @@ -342,8 +340,18 @@ static int ptdump_init(void) for (j = 0; j < pg_level[i].num; j++) pg_level[i].mask |= pg_level[i].bits[j].mask; - pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, - &ptdump_fops); + pe = debugfs_create_file(name, 0400, NULL, info, &ptdump_fops); return pe ? 0 : -ENOMEM; } + +static struct ptdump_info kernel_ptdump_info = { + .mm = &init_mm, + .markers = address_markers, + .base_addr = VA_START, +}; + +static int ptdump_init(void) +{ + return ptdump_register(&kernel_ptdump_info, "kernel_page_tables"); +} device_initcall(ptdump_init); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ba3fc12..c8beaa0 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -41,6 +41,28 @@ static const char *fault_name(unsigned int esr); +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) +{ + int ret = 0; + + /* kprobe_running() needs smp_processor_id() */ + if (!user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, esr)) + ret = 1; + preempt_enable(); + } + + return ret; +} +#else +static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) +{ + return 0; +} +#endif + /* * Dump out the page tables associated with 'addr' in mm 'mm'. */ @@ -202,8 +224,6 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re #define VM_FAULT_BADMAP 0x010000 #define VM_FAULT_BADACCESS 0x020000 -#define ESR_LNX_EXEC (1 << 24) - static int __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int mm_flags, unsigned long vm_flags, struct task_struct *tsk) @@ -233,7 +253,7 @@ good_area: goto out; } - return handle_mm_fault(mm, vma, addr & PAGE_MASK, mm_flags); + return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags); check_stack: if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) @@ -242,14 +262,19 @@ out: return fault; } -static inline int permission_fault(unsigned int esr) +static inline bool is_permission_fault(unsigned int esr) { - unsigned int ec = (esr & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT; + unsigned int ec = ESR_ELx_EC(esr); unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM); } +static bool is_el0_instruction_abort(unsigned int esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; +} + static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -259,6 +284,9 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + if (notify_page_fault(regs, esr)) + return 0; + tsk = current; mm = tsk->mm; @@ -272,15 +300,16 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (user_mode(regs)) mm_flags |= FAULT_FLAG_USER; - if (esr & ESR_LNX_EXEC) { + if (is_el0_instruction_abort(esr)) { vm_flags = VM_EXEC; } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; } - if (permission_fault(esr) && (addr < USER_DS)) { - if (get_fs() == KERNEL_DS) + if (is_permission_fault(esr) && (addr < USER_DS)) { + /* regs->orig_addr_limit may be 0 if we entered from EL0 */ + if (regs->orig_addr_limit == KERNEL_DS) die("Accessing user space memory with fs=KERNEL_DS", regs, esr); if (!search_exception_tables(regs->pc)) @@ -441,7 +470,7 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) return 1; } -static struct fault_info { +static const struct fault_info { int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs); int sig; int code; @@ -629,6 +658,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, return rv; } +NOKPROBE_SYMBOL(do_debug_exception); #ifdef CONFIG_ARM64_PAN void cpu_enable_pan(void *__unused) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index dbd12ea..43a76b0 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -71,10 +71,6 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr) { struct page *page = pte_page(pte); - /* no flushing needed for anonymous pages */ - if (!page_mapping(page)) - return; - if (!test_and_set_bit(PG_dcache_clean, &page->flags)) sync_icache_aliases(page_address(page), PAGE_SIZE << compound_order(page)); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index d45f862..bbb7ee7 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -160,12 +160,10 @@ static void __init arm64_memory_present(void) static void __init arm64_memory_present(void) { struct memblock_region *reg; - int nid = 0; for_each_memblock(memory, reg) { -#ifdef CONFIG_NUMA - nid = reg->nid; -#endif + int nid = memblock_get_region_node(reg); + memory_present(nid, memblock_region_memory_base_pfn(reg), memblock_region_memory_end_pfn(reg)); } @@ -226,7 +224,7 @@ void __init arm64_memblock_init(void) * via the linear mapping. */ if (memory_limit != (phys_addr_t)ULLONG_MAX) { - memblock_enforce_memory_limit(memory_limit); + memblock_mem_limit_remove_map(memory_limit); memblock_add(__pa(_text), (u64)(_end - _text)); } @@ -403,7 +401,8 @@ static void __init free_unused_memmap(void) */ void __init mem_init(void) { - swiotlb_init(1); + if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + swiotlb_init(1); set_max_mapnr(pfn_to_page(max_pfn) - mem_map); @@ -430,9 +429,9 @@ void __init mem_init(void) pr_cont(" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n", MLG(VMALLOC_START, VMALLOC_END)); pr_cont(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n", - MLK_ROUNDUP(_text, __start_rodata)); + MLK_ROUNDUP(_text, _etext)); pr_cont(" .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n", - MLK_ROUNDUP(__start_rodata, _etext)); + MLK_ROUNDUP(__start_rodata, __init_begin)); pr_cont(" .init : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(__init_begin, __init_end)); pr_cont(" .data : 0x%p" " - 0x%p" " (%6ld KB)\n", diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 0f85a46..51a5581 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -77,7 +77,6 @@ static phys_addr_t __init early_pgtable_alloc(void) void *ptr; phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - BUG_ON(!phys); /* * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE @@ -97,24 +96,6 @@ static phys_addr_t __init early_pgtable_alloc(void) return phys; } -/* - * remap a PMD into pages - */ -static void split_pmd(pmd_t *pmd, pte_t *pte) -{ - unsigned long pfn = pmd_pfn(*pmd); - int i = 0; - - do { - /* - * Need to have the least restrictive permissions available - * permissions will be fixed up later - */ - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); - pfn++; - } while (pte++, i++, i < PTRS_PER_PTE); -} - static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot, @@ -122,15 +103,13 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, { pte_t *pte; - if (pmd_none(*pmd) || pmd_sect(*pmd)) { + BUG_ON(pmd_sect(*pmd)); + if (pmd_none(*pmd)) { phys_addr_t pte_phys; BUG_ON(!pgtable_alloc); pte_phys = pgtable_alloc(); pte = pte_set_fixmap(pte_phys); - if (pmd_sect(*pmd)) - split_pmd(pmd, pte); __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE); - flush_tlb_all(); pte_clear_fixmap(); } BUG_ON(pmd_bad(*pmd)); @@ -144,41 +123,10 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, pte_clear_fixmap(); } -static void split_pud(pud_t *old_pud, pmd_t *pmd) -{ - unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT; - pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr); - int i = 0; - - do { - set_pmd(pmd, __pmd(addr | pgprot_val(prot))); - addr += PMD_SIZE; - } while (pmd++, i++, i < PTRS_PER_PMD); -} - -#ifdef CONFIG_DEBUG_PAGEALLOC -static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void)) -{ - - /* - * If debug_page_alloc is enabled we must map the linear map - * using pages. However, other mappings created by - * create_mapping_noalloc must use sections in some cases. Allow - * sections to be used in those cases, where no pgtable_alloc - * function is provided. - */ - return !pgtable_alloc || !debug_pagealloc_enabled(); -} -#else -static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void)) -{ - return true; -} -#endif - static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) + phys_addr_t (*pgtable_alloc)(void), + bool allow_block_mappings) { pmd_t *pmd; unsigned long next; @@ -186,20 +134,13 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, /* * Check for initial section mappings in the pgd/pud and remove them. */ - if (pud_none(*pud) || pud_sect(*pud)) { + BUG_ON(pud_sect(*pud)); + if (pud_none(*pud)) { phys_addr_t pmd_phys; BUG_ON(!pgtable_alloc); pmd_phys = pgtable_alloc(); pmd = pmd_set_fixmap(pmd_phys); - if (pud_sect(*pud)) { - /* - * need to have the 1G of mappings continue to be - * present - */ - split_pud(pud, pmd); - } __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE); - flush_tlb_all(); pmd_clear_fixmap(); } BUG_ON(pud_bad(*pud)); @@ -209,7 +150,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end, next = pmd_addr_end(addr, end); /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0 && - block_mappings_allowed(pgtable_alloc)) { + allow_block_mappings) { pmd_t old_pmd =*pmd; pmd_set_huge(pmd, phys, prot); /* @@ -248,7 +189,8 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) + phys_addr_t (*pgtable_alloc)(void), + bool allow_block_mappings) { pud_t *pud; unsigned long next; @@ -268,8 +210,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, /* * For 4K granule only, attempt to put down a 1GB block */ - if (use_1G_block(addr, next, phys) && - block_mappings_allowed(pgtable_alloc)) { + if (use_1G_block(addr, next, phys) && allow_block_mappings) { pud_t old_pud = *pud; pud_set_huge(pud, phys, prot); @@ -290,7 +231,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, } } else { alloc_init_pmd(pud, addr, next, phys, prot, - pgtable_alloc); + pgtable_alloc, allow_block_mappings); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -298,15 +239,14 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, pud_clear_fixmap(); } -/* - * Create the page directory entries and any necessary page tables for the - * mapping specified by 'md'. - */ -static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void)) +static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(void), + bool allow_block_mappings) { unsigned long addr, length, end, next; + pgd_t *pgd = pgd_offset_raw(pgdir, virt); /* * If the virtual and physical address don't have the same offset @@ -322,29 +262,23 @@ static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc); + alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc, + allow_block_mappings); phys += next - addr; } while (pgd++, addr = next, addr != end); } -static phys_addr_t late_pgtable_alloc(void) +static phys_addr_t pgd_pgtable_alloc(void) { void *ptr = (void *)__get_free_page(PGALLOC_GFP); - BUG_ON(!ptr); + if (!ptr || !pgtable_page_ctor(virt_to_page(ptr))) + BUG(); /* Ensure the zeroed page is visible to the page table walker */ dsb(ishst); return __pa(ptr); } -static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - pgprot_t prot, - phys_addr_t (*alloc)(void)) -{ - init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc); -} - /* * This function can only be used to modify existing table entries, * without allocating new levels of table. Note that this permits the @@ -358,16 +292,17 @@ static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, - NULL); + __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, true); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, - pgprot_t prot) + pgprot_t prot, bool allow_block_mappings) { + BUG_ON(mm == &init_mm); + __create_pgd_mapping(mm->pgd, phys, virt, size, prot, - late_pgtable_alloc); + pgd_pgtable_alloc, allow_block_mappings); } static void create_mapping_late(phys_addr_t phys, unsigned long virt, @@ -380,51 +315,54 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, } __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, - late_pgtable_alloc); + NULL, !debug_pagealloc_enabled()); } static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) { unsigned long kernel_start = __pa(_text); - unsigned long kernel_end = __pa(_etext); + unsigned long kernel_end = __pa(__init_begin); /* * Take care not to create a writable alias for the * read-only text and rodata sections of the kernel image. */ - /* No overlap with the kernel text */ + /* No overlap with the kernel text/rodata */ if (end < kernel_start || start >= kernel_end) { __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start, PAGE_KERNEL, - early_pgtable_alloc); + early_pgtable_alloc, + !debug_pagealloc_enabled()); return; } /* - * This block overlaps the kernel text mapping. + * This block overlaps the kernel text/rodata mappings. * Map the portion(s) which don't overlap. */ if (start < kernel_start) __create_pgd_mapping(pgd, start, __phys_to_virt(start), kernel_start - start, PAGE_KERNEL, - early_pgtable_alloc); + early_pgtable_alloc, + !debug_pagealloc_enabled()); if (kernel_end < end) __create_pgd_mapping(pgd, kernel_end, __phys_to_virt(kernel_end), end - kernel_end, PAGE_KERNEL, - early_pgtable_alloc); + early_pgtable_alloc, + !debug_pagealloc_enabled()); /* - * Map the linear alias of the [_text, _etext) interval as + * Map the linear alias of the [_text, __init_begin) interval as * read-only/non-executable. This makes the contents of the * region accessible to subsystems such as hibernate, but * protects it from inadvertent modification or execution. */ __create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start), kernel_end - kernel_start, PAGE_KERNEL_RO, - early_pgtable_alloc); + early_pgtable_alloc, !debug_pagealloc_enabled()); } static void __init map_mem(pgd_t *pgd) @@ -449,14 +387,14 @@ void mark_rodata_ro(void) { unsigned long section_size; - section_size = (unsigned long)__start_rodata - (unsigned long)_text; + section_size = (unsigned long)_etext - (unsigned long)_text; create_mapping_late(__pa(_text), (unsigned long)_text, section_size, PAGE_KERNEL_ROX); /* - * mark .rodata as read only. Use _etext rather than __end_rodata to - * cover NOTES and EXCEPTION_TABLE. + * mark .rodata as read only. Use __init_begin rather than __end_rodata + * to cover NOTES and EXCEPTION_TABLE. */ - section_size = (unsigned long)_etext - (unsigned long)__start_rodata; + section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata; create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata, section_size, PAGE_KERNEL_RO); } @@ -481,7 +419,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, BUG_ON(!PAGE_ALIGNED(size)); __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot, - early_pgtable_alloc); + early_pgtable_alloc, !debug_pagealloc_enabled()); vma->addr = va_start; vma->phys_addr = pa_start; @@ -499,8 +437,8 @@ static void __init map_kernel(pgd_t *pgd) { static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_init, vmlinux_data; - map_kernel_segment(pgd, _text, __start_rodata, PAGE_KERNEL_EXEC, &vmlinux_text); - map_kernel_segment(pgd, __start_rodata, _etext, PAGE_KERNEL, &vmlinux_rodata); + map_kernel_segment(pgd, _text, _etext, PAGE_KERNEL_EXEC, &vmlinux_text); + map_kernel_segment(pgd, __start_rodata, __init_begin, PAGE_KERNEL, &vmlinux_rodata); map_kernel_segment(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC, &vmlinux_init); map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data); diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 98dc104..c7fe3ec 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/acpi.h> #include <linux/bootmem.h> #include <linux/memblock.h> #include <linux/module.h> @@ -29,7 +30,7 @@ static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; static int numa_distance_cnt; static u8 *numa_distance; -static int numa_off; +static bool numa_off; static __init int numa_parse_early_param(char *opt) { @@ -37,7 +38,7 @@ static __init int numa_parse_early_param(char *opt) return -EINVAL; if (!strncmp(opt, "off", 3)) { pr_info("%s\n", "NUMA turned off"); - numa_off = 1; + numa_off = true; } return 0; } @@ -131,25 +132,25 @@ void __init early_map_cpu_to_node(unsigned int cpu, int nid) * numa_add_memblk - Set node id to memblk * @nid: NUMA node ID of the new memblk * @start: Start address of the new memblk - * @size: Size of the new memblk + * @end: End address of the new memblk * * RETURNS: * 0 on success, -errno on failure. */ -int __init numa_add_memblk(int nid, u64 start, u64 size) +int __init numa_add_memblk(int nid, u64 start, u64 end) { int ret; - ret = memblock_set_node(start, size, &memblock.memory, nid); + ret = memblock_set_node(start, (end - start), &memblock.memory, nid); if (ret < 0) { pr_err("NUMA: memblock [0x%llx - 0x%llx] failed to add on node %d\n", - start, (start + size - 1), nid); + start, (end - 1), nid); return ret; } node_set(nid, numa_nodes_parsed); pr_info("NUMA: Adding memblock [0x%llx - 0x%llx] on node %d\n", - start, (start + size - 1), nid); + start, (end - 1), nid); return ret; } @@ -362,12 +363,15 @@ static int __init dummy_numa_init(void) int ret; struct memblock_region *mblk; - pr_info("%s\n", "No NUMA configuration found"); + if (numa_off) + pr_info("NUMA disabled\n"); /* Forced off on command line. */ + else + pr_info("No NUMA configuration found\n"); pr_info("NUMA: Faking a node at [mem %#018Lx-%#018Lx]\n", 0LLU, PFN_PHYS(max_pfn) - 1); for_each_memblock(memory, mblk) { - ret = numa_add_memblk(0, mblk->base, mblk->size); + ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size); if (!ret) continue; @@ -375,7 +379,7 @@ static int __init dummy_numa_init(void) return ret; } - numa_off = 1; + numa_off = true; return 0; } @@ -388,7 +392,9 @@ static int __init dummy_numa_init(void) void __init arm64_numa_init(void) { if (!numa_off) { - if (!numa_init(of_numa_init)) + if (!acpi_disabled && !numa_init(arm64_acpi_numa_init)) + return; + if (acpi_disabled && !numa_init(of_numa_init)) return; } diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c431787..5bb61de 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -180,6 +180,8 @@ ENTRY(__cpu_setup) msr cpacr_el1, x0 // Enable FP/ASIMD mov x0, #1 << 12 // Reset mdscr_el1 and disable msr mdscr_el1, x0 // access to the DCC from EL0 + isb // Unmask debug exceptions now, + enable_dbg // since this is per-cpu reset_pmuserenr_el0 x0 // Disable PMU access from EL0 /* * Memory region attributes for LPAE: diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index aee5637..7c16e54 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -1,7 +1,7 @@ /* * BPF JIT compiler for ARM64 * - * Copyright (C) 2014-2015 Zi Shen Lim <zlim.lnx@gmail.com> + * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -55,6 +55,7 @@ #define A64_BL(imm26) A64_BRANCH((imm26) << 2, LINK) /* Unconditional branch (register) */ +#define A64_BR(Rn) aarch64_insn_gen_branch_reg(Rn, AARCH64_INSN_BRANCH_NOLINK) #define A64_BLR(Rn) aarch64_insn_gen_branch_reg(Rn, AARCH64_INSN_BRANCH_LINK) #define A64_RET(Rn) aarch64_insn_gen_branch_reg(Rn, AARCH64_INSN_BRANCH_RETURN) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 49ba37e..b2fc97a 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -18,6 +18,7 @@ #define pr_fmt(fmt) "bpf_jit: " fmt +#include <linux/bpf.h> #include <linux/filter.h> #include <linux/printk.h> #include <linux/skbuff.h> @@ -33,6 +34,7 @@ int bpf_jit_enable __read_mostly; #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) +#define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Map BPF registers to A64 registers */ static const int bpf2a64[] = { @@ -54,6 +56,8 @@ static const int bpf2a64[] = { /* temporary registers for internal BPF JIT */ [TMP_REG_1] = A64_R(10), [TMP_REG_2] = A64_R(11), + /* tail_call_cnt */ + [TCALL_CNT] = A64_R(26), /* temporary register for blinding constants */ [BPF_REG_AX] = A64_R(9), }; @@ -146,13 +150,18 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) #define STACK_SIZE STACK_ALIGN(_STACK_SIZE) -static void build_prologue(struct jit_ctx *ctx) +#define PROLOGUE_OFFSET 8 + +static int build_prologue(struct jit_ctx *ctx) { const u8 r6 = bpf2a64[BPF_REG_6]; const u8 r7 = bpf2a64[BPF_REG_7]; const u8 r8 = bpf2a64[BPF_REG_8]; const u8 r9 = bpf2a64[BPF_REG_9]; const u8 fp = bpf2a64[BPF_REG_FP]; + const u8 tcc = bpf2a64[TCALL_CNT]; + const int idx0 = ctx->idx; + int cur_offset; /* * BPF prog stack layout @@ -162,8 +171,6 @@ static void build_prologue(struct jit_ctx *ctx) * |FP/LR| * current A64_FP => -16:+-----+ * | ... | callee saved registers - * +-----+ - * | | x25/x26 * BPF fp register => -64:+-----+ <= (BPF_FP) * | | * | ... | BPF prog stack @@ -183,18 +190,90 @@ static void build_prologue(struct jit_ctx *ctx) emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); emit(A64_MOV(1, A64_FP, A64_SP), ctx); - /* Save callee-saved register */ + /* Save callee-saved registers */ emit(A64_PUSH(r6, r7, A64_SP), ctx); emit(A64_PUSH(r8, r9, A64_SP), ctx); + emit(A64_PUSH(fp, tcc, A64_SP), ctx); - /* Save fp (x25) and x26. SP requires 16 bytes alignment */ - emit(A64_PUSH(fp, A64_R(26), A64_SP), ctx); - - /* Set up BPF prog stack base register (x25) */ + /* Set up BPF prog stack base register */ emit(A64_MOV(1, fp, A64_SP), ctx); + /* Initialize tail_call_cnt */ + emit(A64_MOVZ(1, tcc, 0, 0), ctx); + /* Set up function call stack */ emit(A64_SUB_I(1, A64_SP, A64_SP, STACK_SIZE), ctx); + + cur_offset = ctx->idx - idx0; + if (cur_offset != PROLOGUE_OFFSET) { + pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", + cur_offset, PROLOGUE_OFFSET); + return -1; + } + return 0; +} + +static int out_offset = -1; /* initialized on the first pass of build_body() */ +static int emit_bpf_tail_call(struct jit_ctx *ctx) +{ + /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ + const u8 r2 = bpf2a64[BPF_REG_2]; + const u8 r3 = bpf2a64[BPF_REG_3]; + + const u8 tmp = bpf2a64[TMP_REG_1]; + const u8 prg = bpf2a64[TMP_REG_2]; + const u8 tcc = bpf2a64[TCALL_CNT]; + const int idx0 = ctx->idx; +#define cur_offset (ctx->idx - idx0) +#define jmp_offset (out_offset - (cur_offset)) + size_t off; + + /* if (index >= array->map.max_entries) + * goto out; + */ + off = offsetof(struct bpf_array, map.max_entries); + emit_a64_mov_i64(tmp, off, ctx); + emit(A64_LDR32(tmp, r2, tmp), ctx); + emit(A64_CMP(0, r3, tmp), ctx); + emit(A64_B_(A64_COND_GE, jmp_offset), ctx); + + /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) + * goto out; + * tail_call_cnt++; + */ + emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx); + emit(A64_CMP(1, tcc, tmp), ctx); + emit(A64_B_(A64_COND_GT, jmp_offset), ctx); + emit(A64_ADD_I(1, tcc, tcc, 1), ctx); + + /* prog = array->ptrs[index]; + * if (prog == NULL) + * goto out; + */ + off = offsetof(struct bpf_array, ptrs); + emit_a64_mov_i64(tmp, off, ctx); + emit(A64_LDR64(tmp, r2, tmp), ctx); + emit(A64_LDR64(prg, tmp, r3), ctx); + emit(A64_CBZ(1, prg, jmp_offset), ctx); + + /* goto *(prog->bpf_func + prologue_size); */ + off = offsetof(struct bpf_prog, bpf_func); + emit_a64_mov_i64(tmp, off, ctx); + emit(A64_LDR64(tmp, prg, tmp), ctx); + emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx); + emit(A64_BR(tmp), ctx); + + /* out: */ + if (out_offset == -1) + out_offset = cur_offset; + if (cur_offset != out_offset) { + pr_err_once("tail_call out_offset = %d, expected %d!\n", + cur_offset, out_offset); + return -1; + } + return 0; +#undef cur_offset +#undef jmp_offset } static void build_epilogue(struct jit_ctx *ctx) @@ -499,13 +578,15 @@ emit_cond_jmp: const u64 func = (u64)__bpf_call_base + imm; emit_a64_mov_i64(tmp, func, ctx); - emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); - emit(A64_MOV(1, A64_FP, A64_SP), ctx); emit(A64_BLR(tmp), ctx); emit(A64_MOV(1, r0, A64_R(0)), ctx); - emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); break; } + /* tail call */ + case BPF_JMP | BPF_CALL | BPF_X: + if (emit_bpf_tail_call(ctx)) + return -EFAULT; + break; /* function return */ case BPF_JMP | BPF_EXIT: /* Optimization: when last instruction is EXIT, @@ -650,11 +731,8 @@ emit_cond_jmp: emit_a64_mov_i64(r3, size, ctx); emit(A64_SUB_I(1, r4, fp, STACK_SIZE), ctx); emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx); - emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); - emit(A64_MOV(1, A64_FP, A64_SP), ctx); emit(A64_BLR(r5), ctx); emit(A64_MOV(1, r0, A64_R(0)), ctx); - emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); jmp_offset = epilogue_offset(ctx); check_imm19(jmp_offset); @@ -780,7 +858,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto out_off; } - build_prologue(&ctx); + if (build_prologue(&ctx)) { + prog = orig_prog; + goto out_off; + } ctx.epilogue_offset = ctx.idx; build_epilogue(&ctx); diff --git a/arch/arm64/xen/Makefile b/arch/arm64/xen/Makefile index 74a8d87..8ff8aa9 100644 --- a/arch/arm64/xen/Makefile +++ b/arch/arm64/xen/Makefile @@ -1,2 +1,3 @@ xen-arm-y += $(addprefix ../../arm/xen/, enlighten.o grant-table.o p2m.o mm.o) obj-y := xen-arm.o hypercall.o +obj-$(CONFIG_XEN_EFI) += $(addprefix ../../arm/xen/, efi.o) diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 70df80e..329c802 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -82,6 +82,7 @@ HYPERCALL3(vcpu_op); HYPERCALL1(tmem_op); HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); +HYPERCALL2(vm_assist); ENTRY(privcmd_call) mov x16, x0 |