From efa773fe913ff188c554ccaac577d0a1bb1dc2f4 Mon Sep 17 00:00:00 2001 From: Alim Akhtar Date: Mon, 24 Aug 2015 18:01:15 +0530 Subject: arm64: defconfig: Enable samsung serial and mmc This patch update defconfig, adds samsung serial and Synopsys Designware MMC configs related to exynos SoC Signed-off-by: Alim Akhtar Signed-off-by: Catalin Marinas diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 34d71dd..c8ccda1 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -109,6 +109,10 @@ CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_8250_MT6577=y CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_SERIAL_SAMSUNG=y +CONFIG_SERIAL_SAMSUNG_UARTS_4=y +CONFIG_SERIAL_SAMSUNG_UARTS=4 +CONFIG_SERIAL_SAMSUNG_CONSOLE=y CONFIG_SERIAL_MSM=y CONFIG_SERIAL_MSM_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y @@ -145,6 +149,10 @@ CONFIG_MMC_ARMMMCI=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SPI=y +CONFIG_MMC_DW=y +CONFIG_MMC_DW_IDMAC=y +CONFIG_MMC_DW_PLTFM=y +CONFIG_MMC_DW_EXYNOS=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_SYSCON=y -- cgit v0.10.2 From e5c88e3f2fb35dca5f3e46d65095bf5d008595b7 Mon Sep 17 00:00:00 2001 From: Feng Kan Date: Wed, 23 Sep 2015 11:55:38 -0700 Subject: arm64: Change memcpy in kernel to use the copy template file This converts the memcpy.S to use the copy template file. The copy template file was based originally on the memcpy.S Signed-off-by: Feng Kan Signed-off-by: Balamurugan Shanmugam [catalin.marinas@arm.com: removed tmp3(w) .req statements as they are not used] Signed-off-by: Catalin Marinas diff --git a/arch/arm64/lib/copy_template.S b/arch/arm64/lib/copy_template.S new file mode 100644 index 0000000..410fbdb --- /dev/null +++ b/arch/arm64/lib/copy_template.S @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2013 ARM Ltd. + * Copyright (C) 2013 Linaro. + * + * This code is based on glibc cortex strings work originally authored by Linaro + * and re-licensed under GPLv2 for the Linux kernel. The original code can + * be found @ + * + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ + * files/head:/src/aarch64/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +/* + * Copy a buffer from src to dest (alignment handled by the hardware) + * + * Parameters: + * x0 - dest + * x1 - src + * x2 - n + * Returns: + * x0 - dest + */ +dstin .req x0 +src .req x1 +count .req x2 +tmp1 .req x3 +tmp1w .req w3 +tmp2 .req x4 +tmp2w .req w4 +dst .req x6 + +A_l .req x7 +A_h .req x8 +B_l .req x9 +B_h .req x10 +C_l .req x11 +C_h .req x12 +D_l .req x13 +D_h .req x14 + + mov dst, dstin + cmp count, #16 + /*When memory length is less than 16, the accessed are not aligned.*/ + b.lo .Ltiny15 + + neg tmp2, src + ands tmp2, tmp2, #15/* Bytes to reach alignment. */ + b.eq .LSrcAligned + sub count, count, tmp2 + /* + * Copy the leading memory data from src to dst in an increasing + * address order.By this way,the risk of overwritting the source + * memory data is eliminated when the distance between src and + * dst is less than 16. The memory accesses here are alignment. + */ + tbz tmp2, #0, 1f + ldrb1 tmp1w, src, #1 + strb1 tmp1w, dst, #1 +1: + tbz tmp2, #1, 2f + ldrh1 tmp1w, src, #2 + strh1 tmp1w, dst, #2 +2: + tbz tmp2, #2, 3f + ldr1 tmp1w, src, #4 + str1 tmp1w, dst, #4 +3: + tbz tmp2, #3, .LSrcAligned + ldr1 tmp1, src, #8 + str1 tmp1, dst, #8 + +.LSrcAligned: + cmp count, #64 + b.ge .Lcpy_over64 + /* + * Deal with small copies quickly by dropping straight into the + * exit block. + */ +.Ltail63: + /* + * Copy up to 48 bytes of data. At this point we only need the + * bottom 6 bits of count to be accurate. + */ + ands tmp1, count, #0x30 + b.eq .Ltiny15 + cmp tmp1w, #0x20 + b.eq 1f + b.lt 2f + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +1: + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +2: + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 +.Ltiny15: + /* + * Prefer to break one ldp/stp into several load/store to access + * memory in an increasing address order,rather than to load/store 16 + * bytes from (src-16) to (dst-16) and to backward the src to aligned + * address,which way is used in original cortex memcpy. If keeping + * the original memcpy process here, memmove need to satisfy the + * precondition that src address is at least 16 bytes bigger than dst + * address,otherwise some source data will be overwritten when memove + * call memcpy directly. To make memmove simpler and decouple the + * memcpy's dependency on memmove, withdrew the original process. + */ + tbz count, #3, 1f + ldr1 tmp1, src, #8 + str1 tmp1, dst, #8 +1: + tbz count, #2, 2f + ldr1 tmp1w, src, #4 + str1 tmp1w, dst, #4 +2: + tbz count, #1, 3f + ldrh1 tmp1w, src, #2 + strh1 tmp1w, dst, #2 +3: + tbz count, #0, .Lexitfunc + ldrb1 tmp1w, src, #1 + strb1 tmp1w, dst, #1 + + b .Lexitfunc + +.Lcpy_over64: + subs count, count, #128 + b.ge .Lcpy_body_large + /* + * Less than 128 bytes to copy, so handle 64 here and then jump + * to the tail. + */ + ldp1 A_l, A_h, src, #16 + stp1 A_l, A_h, dst, #16 + ldp1 B_l, B_h, src, #16 + ldp1 C_l, C_h, src, #16 + stp1 B_l, B_h, dst, #16 + stp1 C_l, C_h, dst, #16 + ldp1 D_l, D_h, src, #16 + stp1 D_l, D_h, dst, #16 + + tst count, #0x3f + b.ne .Ltail63 + b .Lexitfunc + + /* + * Critical loop. Start at a new cache line boundary. Assuming + * 64 bytes per line this ensures the entire loop is in one line. + */ + .p2align L1_CACHE_SHIFT +.Lcpy_body_large: + /* pre-get 64 bytes data. */ + ldp1 A_l, A_h, src, #16 + ldp1 B_l, B_h, src, #16 + ldp1 C_l, C_h, src, #16 + ldp1 D_l, D_h, src, #16 +1: + /* + * interlace the load of next 64 bytes data block with store of the last + * loaded 64 bytes data. + */ + stp1 A_l, A_h, dst, #16 + ldp1 A_l, A_h, src, #16 + stp1 B_l, B_h, dst, #16 + ldp1 B_l, B_h, src, #16 + stp1 C_l, C_h, dst, #16 + ldp1 C_l, C_h, src, #16 + stp1 D_l, D_h, dst, #16 + ldp1 D_l, D_h, src, #16 + subs count, count, #64 + b.ge 1b + stp1 A_l, A_h, dst, #16 + stp1 B_l, B_h, dst, #16 + stp1 C_l, C_h, dst, #16 + stp1 D_l, D_h, dst, #16 + + tst count, #0x3f + b.ne .Ltail63 +.Lexitfunc: diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index 8a9a96d..173a1aa 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -36,166 +36,39 @@ * Returns: * x0 - dest */ -dstin .req x0 -src .req x1 -count .req x2 -tmp1 .req x3 -tmp1w .req w3 -tmp2 .req x4 -tmp2w .req w4 -tmp3 .req x5 -tmp3w .req w5 -dst .req x6 + .macro ldrb1 ptr, regB, val + ldrb \ptr, [\regB], \val + .endm -A_l .req x7 -A_h .req x8 -B_l .req x9 -B_h .req x10 -C_l .req x11 -C_h .req x12 -D_l .req x13 -D_h .req x14 + .macro strb1 ptr, regB, val + strb \ptr, [\regB], \val + .endm -ENTRY(memcpy) - mov dst, dstin - cmp count, #16 - /*When memory length is less than 16, the accessed are not aligned.*/ - b.lo .Ltiny15 + .macro ldrh1 ptr, regB, val + ldrh \ptr, [\regB], \val + .endm - neg tmp2, src - ands tmp2, tmp2, #15/* Bytes to reach alignment. */ - b.eq .LSrcAligned - sub count, count, tmp2 - /* - * Copy the leading memory data from src to dst in an increasing - * address order.By this way,the risk of overwritting the source - * memory data is eliminated when the distance between src and - * dst is less than 16. The memory accesses here are alignment. - */ - tbz tmp2, #0, 1f - ldrb tmp1w, [src], #1 - strb tmp1w, [dst], #1 -1: - tbz tmp2, #1, 2f - ldrh tmp1w, [src], #2 - strh tmp1w, [dst], #2 -2: - tbz tmp2, #2, 3f - ldr tmp1w, [src], #4 - str tmp1w, [dst], #4 -3: - tbz tmp2, #3, .LSrcAligned - ldr tmp1, [src],#8 - str tmp1, [dst],#8 + .macro strh1 ptr, regB, val + strh \ptr, [\regB], \val + .endm -.LSrcAligned: - cmp count, #64 - b.ge .Lcpy_over64 - /* - * Deal with small copies quickly by dropping straight into the - * exit block. - */ -.Ltail63: - /* - * Copy up to 48 bytes of data. At this point we only need the - * bottom 6 bits of count to be accurate. - */ - ands tmp1, count, #0x30 - b.eq .Ltiny15 - cmp tmp1w, #0x20 - b.eq 1f - b.lt 2f - ldp A_l, A_h, [src], #16 - stp A_l, A_h, [dst], #16 -1: - ldp A_l, A_h, [src], #16 - stp A_l, A_h, [dst], #16 -2: - ldp A_l, A_h, [src], #16 - stp A_l, A_h, [dst], #16 -.Ltiny15: - /* - * Prefer to break one ldp/stp into several load/store to access - * memory in an increasing address order,rather than to load/store 16 - * bytes from (src-16) to (dst-16) and to backward the src to aligned - * address,which way is used in original cortex memcpy. If keeping - * the original memcpy process here, memmove need to satisfy the - * precondition that src address is at least 16 bytes bigger than dst - * address,otherwise some source data will be overwritten when memove - * call memcpy directly. To make memmove simpler and decouple the - * memcpy's dependency on memmove, withdrew the original process. - */ - tbz count, #3, 1f - ldr tmp1, [src], #8 - str tmp1, [dst], #8 -1: - tbz count, #2, 2f - ldr tmp1w, [src], #4 - str tmp1w, [dst], #4 -2: - tbz count, #1, 3f - ldrh tmp1w, [src], #2 - strh tmp1w, [dst], #2 -3: - tbz count, #0, .Lexitfunc - ldrb tmp1w, [src] - strb tmp1w, [dst] + .macro ldr1 ptr, regB, val + ldr \ptr, [\regB], \val + .endm -.Lexitfunc: - ret + .macro str1 ptr, regB, val + str \ptr, [\regB], \val + .endm -.Lcpy_over64: - subs count, count, #128 - b.ge .Lcpy_body_large - /* - * Less than 128 bytes to copy, so handle 64 here and then jump - * to the tail. - */ - ldp A_l, A_h, [src],#16 - stp A_l, A_h, [dst],#16 - ldp B_l, B_h, [src],#16 - ldp C_l, C_h, [src],#16 - stp B_l, B_h, [dst],#16 - stp C_l, C_h, [dst],#16 - ldp D_l, D_h, [src],#16 - stp D_l, D_h, [dst],#16 + .macro ldp1 ptr, regB, regC, val + ldp \ptr, \regB, [\regC], \val + .endm - tst count, #0x3f - b.ne .Ltail63 - ret + .macro stp1 ptr, regB, regC, val + stp \ptr, \regB, [\regC], \val + .endm - /* - * Critical loop. Start at a new cache line boundary. Assuming - * 64 bytes per line this ensures the entire loop is in one line. - */ - .p2align L1_CACHE_SHIFT -.Lcpy_body_large: - /* pre-get 64 bytes data. */ - ldp A_l, A_h, [src],#16 - ldp B_l, B_h, [src],#16 - ldp C_l, C_h, [src],#16 - ldp D_l, D_h, [src],#16 -1: - /* - * interlace the load of next 64 bytes data block with store of the last - * loaded 64 bytes data. - */ - stp A_l, A_h, [dst],#16 - ldp A_l, A_h, [src],#16 - stp B_l, B_h, [dst],#16 - ldp B_l, B_h, [src],#16 - stp C_l, C_h, [dst],#16 - ldp C_l, C_h, [src],#16 - stp D_l, D_h, [dst],#16 - ldp D_l, D_h, [src],#16 - subs count, count, #64 - b.ge 1b - stp A_l, A_h, [dst],#16 - stp B_l, B_h, [dst],#16 - stp C_l, C_h, [dst],#16 - stp D_l, D_h, [dst],#16 - - tst count, #0x3f - b.ne .Ltail63 +ENTRY(memcpy) +#include "copy_template.S" ret ENDPROC(memcpy) -- cgit v0.10.2 From 404268828c74ba06b1f21584b26edafb381c9d7d Mon Sep 17 00:00:00 2001 From: Feng Kan Date: Wed, 23 Sep 2015 11:55:39 -0700 Subject: arm64: copy_to-from-in_user optimization using copy template This patch optimize copy_to-from-in_user for arm 64bit architecture. The copy template is used as template file for all the copy*.S files. Minor change was made to it to accommodate the copy to/from/in user files. Signed-off-by: Feng Kan Signed-off-by: Balamurugan Shanmugam Signed-off-by: Catalin Marinas diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 1be9ef2..4699cd7 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -31,49 +32,58 @@ * Returns: * x0 - bytes not copied */ + + .macro ldrb1 ptr, regB, val + USER(9998f, ldrb \ptr, [\regB], \val) + .endm + + .macro strb1 ptr, regB, val + strb \ptr, [\regB], \val + .endm + + .macro ldrh1 ptr, regB, val + USER(9998f, ldrh \ptr, [\regB], \val) + .endm + + .macro strh1 ptr, regB, val + strh \ptr, [\regB], \val + .endm + + .macro ldr1 ptr, regB, val + USER(9998f, ldr \ptr, [\regB], \val) + .endm + + .macro str1 ptr, regB, val + str \ptr, [\regB], \val + .endm + + .macro ldp1 ptr, regB, regC, val + USER(9998f, ldp \ptr, \regB, [\regC], \val) + .endm + + .macro stp1 ptr, regB, regC, val + stp \ptr, \regB, [\regC], \val + .endm + +end .req x5 ENTRY(__copy_from_user) ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) - add x5, x1, x2 // upper user buffer boundary - subs x2, x2, #16 - b.mi 1f -0: -USER(9f, ldp x3, x4, [x1], #16) - subs x2, x2, #16 - stp x3, x4, [x0], #16 - b.pl 0b -1: adds x2, x2, #8 - b.mi 2f -USER(9f, ldr x3, [x1], #8 ) - sub x2, x2, #8 - str x3, [x0], #8 -2: adds x2, x2, #4 - b.mi 3f -USER(9f, ldr w3, [x1], #4 ) - sub x2, x2, #4 - str w3, [x0], #4 -3: adds x2, x2, #2 - b.mi 4f -USER(9f, ldrh w3, [x1], #2 ) - sub x2, x2, #2 - strh w3, [x0], #2 -4: adds x2, x2, #1 - b.mi 5f -USER(9f, ldrb w3, [x1] ) - strb w3, [x0] -5: mov x0, #0 + add end, x0, x2 +#include "copy_template.S" ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) + mov x0, #0 // Nothing to copy ret ENDPROC(__copy_from_user) .section .fixup,"ax" .align 2 -9: sub x2, x5, x1 - mov x3, x2 -10: strb wzr, [x0], #1 // zero remaining buffer space - subs x3, x3, #1 - b.ne 10b - mov x0, x2 // bytes not copied +9998: + sub x0, end, dst +9999: + strb wzr, [dst], #1 // zero remaining buffer space + cmp dst, end + b.lo 9999b ret .previous diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 1b94661e..81c8fc9 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -33,44 +34,52 @@ * Returns: * x0 - bytes not copied */ + .macro ldrb1 ptr, regB, val + USER(9998f, ldrb \ptr, [\regB], \val) + .endm + + .macro strb1 ptr, regB, val + USER(9998f, strb \ptr, [\regB], \val) + .endm + + .macro ldrh1 ptr, regB, val + USER(9998f, ldrh \ptr, [\regB], \val) + .endm + + .macro strh1 ptr, regB, val + USER(9998f, strh \ptr, [\regB], \val) + .endm + + .macro ldr1 ptr, regB, val + USER(9998f, ldr \ptr, [\regB], \val) + .endm + + .macro str1 ptr, regB, val + USER(9998f, str \ptr, [\regB], \val) + .endm + + .macro ldp1 ptr, regB, regC, val + USER(9998f, ldp \ptr, \regB, [\regC], \val) + .endm + + .macro stp1 ptr, regB, regC, val + USER(9998f, stp \ptr, \regB, [\regC], \val) + .endm + +end .req x5 ENTRY(__copy_in_user) ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) - add x5, x0, x2 // upper user buffer boundary - subs x2, x2, #16 - b.mi 1f -0: -USER(9f, ldp x3, x4, [x1], #16) - subs x2, x2, #16 -USER(9f, stp x3, x4, [x0], #16) - b.pl 0b -1: adds x2, x2, #8 - b.mi 2f -USER(9f, ldr x3, [x1], #8 ) - sub x2, x2, #8 -USER(9f, str x3, [x0], #8 ) -2: adds x2, x2, #4 - b.mi 3f -USER(9f, ldr w3, [x1], #4 ) - sub x2, x2, #4 -USER(9f, str w3, [x0], #4 ) -3: adds x2, x2, #2 - b.mi 4f -USER(9f, ldrh w3, [x1], #2 ) - sub x2, x2, #2 -USER(9f, strh w3, [x0], #2 ) -4: adds x2, x2, #1 - b.mi 5f -USER(9f, ldrb w3, [x1] ) -USER(9f, strb w3, [x0] ) -5: mov x0, #0 + add end, x0, x2 +#include "copy_template.S" ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) + mov x0, #0 ret ENDPROC(__copy_in_user) .section .fixup,"ax" .align 2 -9: sub x0, x5, x0 // bytes not copied +9998: sub x0, end, dst // bytes not copied ret .previous diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index a257b47..7512bbb 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -31,44 +32,52 @@ * Returns: * x0 - bytes not copied */ + .macro ldrb1 ptr, regB, val + ldrb \ptr, [\regB], \val + .endm + + .macro strb1 ptr, regB, val + USER(9998f, strb \ptr, [\regB], \val) + .endm + + .macro ldrh1 ptr, regB, val + ldrh \ptr, [\regB], \val + .endm + + .macro strh1 ptr, regB, val + USER(9998f, strh \ptr, [\regB], \val) + .endm + + .macro ldr1 ptr, regB, val + ldr \ptr, [\regB], \val + .endm + + .macro str1 ptr, regB, val + USER(9998f, str \ptr, [\regB], \val) + .endm + + .macro ldp1 ptr, regB, regC, val + ldp \ptr, \regB, [\regC], \val + .endm + + .macro stp1 ptr, regB, regC, val + USER(9998f, stp \ptr, \regB, [\regC], \val) + .endm + +end .req x5 ENTRY(__copy_to_user) ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) - add x5, x0, x2 // upper user buffer boundary - subs x2, x2, #16 - b.mi 1f -0: - ldp x3, x4, [x1], #16 - subs x2, x2, #16 -USER(9f, stp x3, x4, [x0], #16) - b.pl 0b -1: adds x2, x2, #8 - b.mi 2f - ldr x3, [x1], #8 - sub x2, x2, #8 -USER(9f, str x3, [x0], #8 ) -2: adds x2, x2, #4 - b.mi 3f - ldr w3, [x1], #4 - sub x2, x2, #4 -USER(9f, str w3, [x0], #4 ) -3: adds x2, x2, #2 - b.mi 4f - ldrh w3, [x1], #2 - sub x2, x2, #2 -USER(9f, strh w3, [x0], #2 ) -4: adds x2, x2, #1 - b.mi 5f - ldrb w3, [x1] -USER(9f, strb w3, [x0] ) -5: mov x0, #0 + add end, x0, x2 +#include "copy_template.S" ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) + mov x0, #0 ret ENDPROC(__copy_to_user) .section .fixup,"ax" .align 2 -9: sub x0, x5, x0 // bytes not copied +9998: sub x0, end, dst // bytes not copied ret .previous -- cgit v0.10.2 From 127db024a7baee9874014dac33628253f438b4da Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 17 Sep 2015 12:38:07 +0300 Subject: arm64: introduce VA_START macro - the first kernel virtual address. In order to not use lengthy (UL(0xffffffffffffffff) << VA_BITS) everywhere, replace it with VA_START. Signed-off-by: Andrey Ryabinin Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 6b4c3ad..11ccf6c 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -42,12 +42,14 @@ * PAGE_OFFSET - the virtual address of the start of the kernel image (top * (VA_BITS - 1)) * VA_BITS - the maximum number of bits for virtual addresses. + * VA_START - the first kernel virtual address. * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. * The module space lives between the addresses given by TASK_SIZE * and PAGE_OFFSET - it must be within 128MB of the kernel text. */ #define VA_BITS (CONFIG_ARM64_VA_BITS) +#define VA_START (UL(0xffffffffffffffff) << VA_BITS) #define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) #define MODULES_END (PAGE_OFFSET) #define MODULES_VADDR (MODULES_END - SZ_64M) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 26b0666..581d140 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -41,7 +41,7 @@ * fixed mappings and modules */ #define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE) -#define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS) +#define VMALLOC_START (VA_START) #define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K) #define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) -- cgit v0.10.2 From c51e97d89e526368eb697f87cd4d391b9e19f369 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 6 Oct 2015 18:46:21 +0100 Subject: arm64: mm: remove unused cpu_set_idmap_tcr_t0sz function With commit b08d4640a3dc ("arm64: remove dead code"), cpu_set_idmap_tcr_t0sz is no longer called and can therefore be removed from the kernel. This patch removes the function and effectively inlines the helper function __cpu_set_tcr_t0sz into cpu_set_default_tcr_t0sz. Reviewed-by: Catalin Marinas Acked-by: Ard Biesheuvel Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 8ec41e5..549b895 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -77,34 +77,23 @@ static inline bool __cpu_uses_extended_idmap(void) unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS))); } -static inline void __cpu_set_tcr_t0sz(u64 t0sz) -{ - unsigned long tcr; - - if (__cpu_uses_extended_idmap()) - asm volatile ( - " mrs %0, tcr_el1 ;" - " bfi %0, %1, %2, %3 ;" - " msr tcr_el1, %0 ;" - " isb" - : "=&r" (tcr) - : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); -} - -/* - * Set TCR.T0SZ to the value appropriate for activating the identity map. - */ -static inline void cpu_set_idmap_tcr_t0sz(void) -{ - __cpu_set_tcr_t0sz(idmap_t0sz); -} - /* * Set TCR.T0SZ to its default value (based on VA_BITS) */ static inline void cpu_set_default_tcr_t0sz(void) { - __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS)); + unsigned long tcr; + + if (!__cpu_uses_extended_idmap()) + return; + + asm volatile ( + " mrs %0, tcr_el1 ;" + " bfi %0, %1, %2, %3 ;" + " msr tcr_el1, %0 ;" + " isb" + : "=&r" (tcr) + : "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); } static inline void switch_new_context(struct mm_struct *mm) -- cgit v0.10.2 From fa7aae8a4257e6be7051420dac1f150c1eef721b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 6 Oct 2015 18:46:22 +0100 Subject: arm64: proc: de-scope TLBI operation during cold boot When cold-booting a CPU, we must invalidate any junk entries from the local TLB prior to enabling the MMU. This doesn't require broadcasting within the inner-shareable domain, so de-scope the operation to apply only to the local CPU. Acked-by: Catalin Marinas Reviewed-by: Catalin Marinas Tested-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index e4ee7bd..bbde13d 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -146,8 +146,8 @@ ENDPROC(cpu_do_switch_mm) * value of the SCTLR_EL1 register. */ ENTRY(__cpu_setup) - tlbi vmalle1is // invalidate I + D TLBs - dsb ish + tlbi vmalle1 // Invalidate local TLB + dsb nsh mov x0, #3 << 20 msr cpacr_el1, x0 // Enable FP/ASIMD -- cgit v0.10.2 From 8e63d38876691756f9bc6930850f1fb77809be1b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 6 Oct 2015 18:46:23 +0100 Subject: arm64: flush: use local TLB and I-cache invalidation There are a number of places where a single CPU is running with a private page-table and we need to perform maintenance on the TLB and I-cache in order to ensure correctness, but do not require the operation to be broadcast to other CPUs. This patch adds local variants of tlb_flush_all and __flush_icache_all to support these use-cases and updates the callers respectively. __local_flush_icache_all also implies an isb, since it is intended to be used synchronously. Reviewed-by: Catalin Marinas Acked-by: David Daney Acked-by: Ard Biesheuvel Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index c75b8d0..54efeda 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -115,6 +115,13 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *, #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); +static inline void __local_flush_icache_all(void) +{ + asm("ic iallu"); + dsb(nsh); + isb(); +} + static inline void __flush_icache_all(void) { asm("ic ialluis"); diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 7bd2da0..96f944e 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -63,6 +63,14 @@ * only require the D-TLB to be invalidated. * - kaddr - Kernel virtual memory address */ +static inline void local_flush_tlb_all(void) +{ + dsb(nshst); + asm("tlbi vmalle1"); + dsb(nsh); + isb(); +} + static inline void flush_tlb_all(void) { dsb(ishst); diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 13671a9..4d12926 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -344,9 +344,9 @@ static void efi_set_pgd(struct mm_struct *mm) else cpu_switch_mm(mm->pgd, mm); - flush_tlb_all(); + local_flush_tlb_all(); if (icache_is_aivivt()) - __flush_icache_all(); + __local_flush_icache_all(); } void efi_virtmap_load(void) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index dbdaacd..fdd4d4d 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -152,7 +152,7 @@ asmlinkage void secondary_start_kernel(void) * point to zero page to avoid speculatively fetching new entries. */ cpu_set_reserved_ttbr0(); - flush_tlb_all(); + local_flush_tlb_all(); cpu_set_default_tcr_t0sz(); preempt_disable(); diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 8297d50..3c5e4e6 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -90,7 +90,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) else cpu_switch_mm(mm->pgd, mm); - flush_tlb_all(); + local_flush_tlb_all(); /* * Restore per-cpu offset before any kernel diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index d70ff14..48b53fb 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -48,9 +48,9 @@ static void flush_context(void) { /* set the reserved TTBR0 before flushing the TLB */ cpu_set_reserved_ttbr0(); - flush_tlb_all(); + local_flush_tlb_all(); if (icache_is_aivivt()) - __flush_icache_all(); + __local_flush_icache_all(); } static void set_mm_context(struct mm_struct *mm, unsigned int asid) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 9211b85..71a3104 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -456,7 +456,7 @@ void __init paging_init(void) * point to zero page to avoid speculatively fetching new entries. */ cpu_set_reserved_ttbr0(); - flush_tlb_all(); + local_flush_tlb_all(); cpu_set_default_tcr_t0sz(); } -- cgit v0.10.2 From 5aec715d7d3122f77cabaa7578d9d25a0c1ed20e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 6 Oct 2015 18:46:24 +0100 Subject: arm64: mm: rewrite ASID allocator and MM context-switching code Our current switch_mm implementation suffers from a number of problems: (1) The ASID allocator relies on IPIs to synchronise the CPUs on a rollover event (2) Because of (1), we cannot allocate ASIDs with interrupts disabled and therefore make use of a TIF_SWITCH_MM flag to postpone the actual switch to finish_arch_post_lock_switch (3) We run context switch with a reserved (invalid) TTBR0 value, even though the ASID and pgd are updated atomically (4) We take a global spinlock (cpu_asid_lock) during context-switch (5) We use h/w broadcast TLB operations when they are not required (e.g. in flush_context) This patch addresses these problems by rewriting the ASID algorithm to match the bitmap-based arch/arm/ implementation more closely. This in turn allows us to remove much of the complications surrounding switch_mm, including the ugly thread flag. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 0302087..990124a 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -17,15 +17,16 @@ #define __ASM_MMU_H typedef struct { - unsigned int id; - raw_spinlock_t id_lock; - void *vdso; + atomic64_t id; + void *vdso; } mm_context_t; -#define INIT_MM_CONTEXT(name) \ - .context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock), - -#define ASID(mm) ((mm)->context.id & 0xffff) +/* + * This macro is only used by the TLBI code, which cannot race with an + * ASID change and therefore doesn't need to reload the counter using + * atomic64_read. + */ +#define ASID(mm) ((mm)->context.id.counter & 0xffff) extern void paging_init(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 549b895..f4c74a9 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -28,13 +28,6 @@ #include #include -#define MAX_ASID_BITS 16 - -extern unsigned int cpu_last_asid; - -void __init_new_context(struct task_struct *tsk, struct mm_struct *mm); -void __new_context(struct mm_struct *mm); - #ifdef CONFIG_PID_IN_CONTEXTIDR static inline void contextidr_thread_switch(struct task_struct *next) { @@ -96,66 +89,19 @@ static inline void cpu_set_default_tcr_t0sz(void) : "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); } -static inline void switch_new_context(struct mm_struct *mm) -{ - unsigned long flags; - - __new_context(mm); - - local_irq_save(flags); - cpu_switch_mm(mm->pgd, mm); - local_irq_restore(flags); -} - -static inline void check_and_switch_context(struct mm_struct *mm, - struct task_struct *tsk) -{ - /* - * Required during context switch to avoid speculative page table - * walking with the wrong TTBR. - */ - cpu_set_reserved_ttbr0(); - - if (!((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) - /* - * The ASID is from the current generation, just switch to the - * new pgd. This condition is only true for calls from - * context_switch() and interrupts are already disabled. - */ - cpu_switch_mm(mm->pgd, mm); - else if (irqs_disabled()) - /* - * Defer the new ASID allocation until after the context - * switch critical region since __new_context() cannot be - * called with interrupts disabled. - */ - set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM); - else - /* - * That is a direct call to switch_mm() or activate_mm() with - * interrupts enabled and a new context. - */ - switch_new_context(mm); -} - -#define init_new_context(tsk,mm) (__init_new_context(tsk,mm),0) +/* + * It would be nice to return ASIDs back to the allocator, but unfortunately + * that introduces a race with a generation rollover where we could erroneously + * free an ASID allocated in a future generation. We could workaround this by + * freeing the ASID from the context of the dying mm (e.g. in arch_exit_mmap), + * but we'd then need to make sure that we didn't dirty any TLBs afterwards. + * Setting a reserved TTBR0 or EPD0 would work, but it all gets ugly when you + * take CPU migration into account. + */ #define destroy_context(mm) do { } while(0) +void check_and_switch_context(struct mm_struct *mm, unsigned int cpu); -#define finish_arch_post_lock_switch \ - finish_arch_post_lock_switch -static inline void finish_arch_post_lock_switch(void) -{ - if (test_and_clear_thread_flag(TIF_SWITCH_MM)) { - struct mm_struct *mm = current->mm; - unsigned long flags; - - __new_context(mm); - - local_irq_save(flags); - cpu_switch_mm(mm->pgd, mm); - local_irq_restore(flags); - } -} +#define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; }) /* * This is called when "tsk" is about to enter lazy TLB mode. diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index dcd06d1..555c6de 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -111,7 +111,6 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTORE_SIGMASK 20 #define TIF_SINGLESTEP 21 #define TIF_32BIT 22 /* 32bit process */ -#define TIF_SWITCH_MM 23 /* deferred switch_mm */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 8d89cf8..25de8b2 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -60,7 +60,7 @@ int main(void) DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); - DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id)); + DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); BLANK(); DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); DEFINE(VMA_VM_FLAGS, offsetof(struct vm_area_struct, vm_flags)); diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 4d12926..a48d1f4 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -48,7 +48,6 @@ static struct mm_struct efi_mm = { .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), - INIT_MM_CONTEXT(efi_mm) }; static int uefi_debug __initdata; diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 48b53fb..e902229 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -17,135 +17,187 @@ * along with this program. If not, see . */ -#include +#include #include +#include #include -#include -#include +#include #include #include -#include -#define asid_bits(reg) \ - (((read_cpuid(ID_AA64MMFR0_EL1) & 0xf0) >> 2) + 8) +static u32 asid_bits; +static DEFINE_RAW_SPINLOCK(cpu_asid_lock); -#define ASID_FIRST_VERSION (1 << MAX_ASID_BITS) +static atomic64_t asid_generation; +static unsigned long *asid_map; -static DEFINE_RAW_SPINLOCK(cpu_asid_lock); -unsigned int cpu_last_asid = ASID_FIRST_VERSION; +static DEFINE_PER_CPU(atomic64_t, active_asids); +static DEFINE_PER_CPU(u64, reserved_asids); +static cpumask_t tlb_flush_pending; -/* - * We fork()ed a process, and we need a new context for the child to run in. - */ -void __init_new_context(struct task_struct *tsk, struct mm_struct *mm) +#define ASID_MASK (~GENMASK(asid_bits - 1, 0)) +#define ASID_FIRST_VERSION (1UL << asid_bits) +#define NUM_USER_ASIDS ASID_FIRST_VERSION + +static void flush_context(unsigned int cpu) { - mm->context.id = 0; - raw_spin_lock_init(&mm->context.id_lock); + int i; + u64 asid; + + /* Update the list of reserved ASIDs and the ASID bitmap. */ + bitmap_clear(asid_map, 0, NUM_USER_ASIDS); + + /* + * Ensure the generation bump is observed before we xchg the + * active_asids. + */ + smp_wmb(); + + for_each_possible_cpu(i) { + asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); + /* + * If this CPU has already been through a + * rollover, but hasn't run another task in + * the meantime, we must preserve its reserved + * ASID, as this is the only trace we have of + * the process it is still running. + */ + if (asid == 0) + asid = per_cpu(reserved_asids, i); + __set_bit(asid & ~ASID_MASK, asid_map); + per_cpu(reserved_asids, i) = asid; + } + + /* Queue a TLB invalidate and flush the I-cache if necessary. */ + cpumask_setall(&tlb_flush_pending); + + if (icache_is_aivivt()) + __flush_icache_all(); } -static void flush_context(void) +static int is_reserved_asid(u64 asid) { - /* set the reserved TTBR0 before flushing the TLB */ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - if (icache_is_aivivt()) - __local_flush_icache_all(); + int cpu; + for_each_possible_cpu(cpu) + if (per_cpu(reserved_asids, cpu) == asid) + return 1; + return 0; } -static void set_mm_context(struct mm_struct *mm, unsigned int asid) +static u64 new_context(struct mm_struct *mm, unsigned int cpu) { - unsigned long flags; + static u32 cur_idx = 1; + u64 asid = atomic64_read(&mm->context.id); + u64 generation = atomic64_read(&asid_generation); - /* - * Locking needed for multi-threaded applications where the same - * mm->context.id could be set from different CPUs during the - * broadcast. This function is also called via IPI so the - * mm->context.id_lock has to be IRQ-safe. - */ - raw_spin_lock_irqsave(&mm->context.id_lock, flags); - if (likely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) { + if (asid != 0) { /* - * Old version of ASID found. Set the new one and reset - * mm_cpumask(mm). + * If our current ASID was active during a rollover, we + * can continue to use it and this was just a false alarm. */ - mm->context.id = asid; - cpumask_clear(mm_cpumask(mm)); + if (is_reserved_asid(asid)) + return generation | (asid & ~ASID_MASK); + + /* + * We had a valid ASID in a previous life, so try to re-use + * it if possible. + */ + asid &= ~ASID_MASK; + if (!__test_and_set_bit(asid, asid_map)) + goto bump_gen; } - raw_spin_unlock_irqrestore(&mm->context.id_lock, flags); /* - * Set the mm_cpumask(mm) bit for the current CPU. + * Allocate a free ASID. If we can't find one, take a note of the + * currently active ASIDs and mark the TLBs as requiring flushes. + * We always count from ASID #1, as we use ASID #0 when setting a + * reserved TTBR0 for the init_mm. */ - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx); + if (asid != NUM_USER_ASIDS) + goto set_asid; + + /* We're out of ASIDs, so increment the global generation count */ + generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION, + &asid_generation); + flush_context(cpu); + + /* We have at least 1 ASID per CPU, so this will always succeed */ + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); + +set_asid: + __set_bit(asid, asid_map); + cur_idx = asid; + +bump_gen: + asid |= generation; + cpumask_clear(mm_cpumask(mm)); + return asid; } -/* - * Reset the ASID on the current CPU. This function call is broadcast from the - * CPU handling the ASID rollover and holding cpu_asid_lock. - */ -static void reset_context(void *info) +void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) { - unsigned int asid; - unsigned int cpu = smp_processor_id(); - struct mm_struct *mm = current->active_mm; + unsigned long flags; + u64 asid; + + asid = atomic64_read(&mm->context.id); /* - * current->active_mm could be init_mm for the idle thread immediately - * after secondary CPU boot or hotplug. TTBR0_EL1 is already set to - * the reserved value, so no need to reset any context. + * The memory ordering here is subtle. We rely on the control + * dependency between the generation read and the update of + * active_asids to ensure that we are synchronised with a + * parallel rollover (i.e. this pairs with the smp_wmb() in + * flush_context). */ - if (mm == &init_mm) - return; + if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits) + && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid)) + goto switch_mm_fastpath; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + /* Check that our ASID belongs to the current generation. */ + asid = atomic64_read(&mm->context.id); + if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) { + asid = new_context(mm, cpu); + atomic64_set(&mm->context.id, asid); + } - smp_rmb(); - asid = cpu_last_asid + cpu; + if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) + local_flush_tlb_all(); - flush_context(); - set_mm_context(mm, asid); + atomic64_set(&per_cpu(active_asids, cpu), asid); + cpumask_set_cpu(cpu, mm_cpumask(mm)); + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); - /* set the new ASID */ +switch_mm_fastpath: cpu_switch_mm(mm->pgd, mm); } -void __new_context(struct mm_struct *mm) +static int asids_init(void) { - unsigned int asid; - unsigned int bits = asid_bits(); - - raw_spin_lock(&cpu_asid_lock); - /* - * Check the ASID again, in case the change was broadcast from another - * CPU before we acquired the lock. - */ - if (!unlikely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) { - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); - raw_spin_unlock(&cpu_asid_lock); - return; - } - /* - * At this point, it is guaranteed that the current mm (with an old - * ASID) isn't active on any other CPU since the ASIDs are changed - * simultaneously via IPI. - */ - asid = ++cpu_last_asid; - - /* - * If we've used up all our ASIDs, we need to start a new version and - * flush the TLB. - */ - if (unlikely((asid & ((1 << bits) - 1)) == 0)) { - /* increment the ASID version */ - cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits); - if (cpu_last_asid == 0) - cpu_last_asid = ASID_FIRST_VERSION; - asid = cpu_last_asid + smp_processor_id(); - flush_context(); - smp_wmb(); - smp_call_function(reset_context, NULL, 1); - cpu_last_asid += NR_CPUS - 1; + int fld = cpuid_feature_extract_field(read_cpuid(ID_AA64MMFR0_EL1), 4); + + switch (fld) { + default: + pr_warn("Unknown ASID size (%d); assuming 8-bit\n", fld); + /* Fallthrough */ + case 0: + asid_bits = 8; + break; + case 2: + asid_bits = 16; } - set_mm_context(mm, asid); - raw_spin_unlock(&cpu_asid_lock); + /* If we end up with more CPUs than ASIDs, expect things to crash */ + WARN_ON(NUM_USER_ASIDS < num_possible_cpus()); + atomic64_set(&asid_generation, ASID_FIRST_VERSION); + asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map), + GFP_KERNEL); + if (!asid_map) + panic("Failed to allocate bitmap for %lu ASIDs\n", + NUM_USER_ASIDS); + + pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS); + return 0; } +early_initcall(asids_init); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index bbde13d..91cb2ea 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -130,7 +130,7 @@ ENDPROC(cpu_do_resume) * - pgd_phys - physical address of new TTB */ ENTRY(cpu_do_switch_mm) - mmid w1, x1 // get mm->context.id + mmid x1, x1 // get mm->context.id bfi x0, x1, #48, #16 // set the ASID msr ttbr0_el1, x0 // set TTBR0 isb -- cgit v0.10.2 From f3e002c24e1f3b66f6e392ecd6928b5d04672c54 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 6 Oct 2015 18:46:25 +0100 Subject: arm64: tlbflush: remove redundant ASID casts to (unsigned long) The ASID macro returns a 64-bit (long long) value, so there is no need to cast to (unsigned long) before shifting prior to a TLBI operation. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 96f944e..93e9f96 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -81,7 +81,7 @@ static inline void flush_tlb_all(void) static inline void flush_tlb_mm(struct mm_struct *mm) { - unsigned long asid = (unsigned long)ASID(mm) << 48; + unsigned long asid = ASID(mm) << 48; dsb(ishst); asm("tlbi aside1is, %0" : : "r" (asid)); @@ -91,8 +91,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm) static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) { - unsigned long addr = uaddr >> 12 | - ((unsigned long)ASID(vma->vm_mm) << 48); + unsigned long addr = uaddr >> 12 | (ASID(vma->vm_mm) << 48); dsb(ishst); asm("tlbi vale1is, %0" : : "r" (addr)); @@ -109,7 +108,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, bool last_level) { - unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48; + unsigned long asid = ASID(vma->vm_mm) << 48; unsigned long addr; if ((end - start) > MAX_TLB_RANGE) { @@ -162,7 +161,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end static inline void __flush_tlb_pgtable(struct mm_struct *mm, unsigned long uaddr) { - unsigned long addr = uaddr >> 12 | ((unsigned long)ASID(mm) << 48); + unsigned long addr = uaddr >> 12 | (ASID(mm) << 48); dsb(ishst); asm("tlbi vae1is, %0" : : "r" (addr)); -- cgit v0.10.2 From 5a7862e83000ccfd36db927c6f060458fe271157 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 6 Oct 2015 18:46:26 +0100 Subject: arm64: tlbflush: avoid flushing when fullmm == 1 The TLB gather code sets fullmm=1 when tearing down the entire address space for an mm_struct on exit or execve. Given that the ASID allocator will never re-allocate a dirty ASID, this flushing is not needed and can simply be avoided in the flushing code. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index d6e6b66..ffdaea7 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -37,17 +37,21 @@ static inline void __tlb_remove_table(void *_table) static inline void tlb_flush(struct mmu_gather *tlb) { - if (tlb->fullmm) { - flush_tlb_mm(tlb->mm); - } else { - struct vm_area_struct vma = { .vm_mm = tlb->mm, }; - /* - * The intermediate page table levels are already handled by - * the __(pte|pmd|pud)_free_tlb() functions, so last level - * TLBI is sufficient here. - */ - __flush_tlb_range(&vma, tlb->start, tlb->end, true); - } + struct vm_area_struct vma = { .vm_mm = tlb->mm, }; + + /* + * The ASID allocator will either invalidate the ASID or mark + * it as used. + */ + if (tlb->fullmm) + return; + + /* + * The intermediate page table levels are already handled by + * the __(pte|pmd|pud)_free_tlb() functions, so last level + * TLBI is sufficient here. + */ + __flush_tlb_range(&vma, tlb->start, tlb->end, true); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, -- cgit v0.10.2 From c2775b2ee5caca19f661ee2ab5af92462596db71 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 6 Oct 2015 18:46:27 +0100 Subject: arm64: switch_mm: simplify mm and CPU checks switch_mm performs some checks to try and avoid entering the ASID allocator: (1) If we're switching to the init_mm (no user mappings), then simply set a reserved TTBR0 value with no page table (the zero page) (2) If prev == next *and* the mm_cpumask indicates that we've run on this CPU before, then we can skip the allocator. However, there is plenty of redundancy here. With the new ASID allocator, if prev == next, then we know that our ASID is valid and do not need to worry about re-allocation. Consequently, we can drop the mm_cpumask check in (2) and move the prev == next check before the init_mm check, since if prev == next == init_mm then there's nothing to do. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index f4c74a9..c0e8789 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -129,6 +129,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, { unsigned int cpu = smp_processor_id(); + if (prev == next) + return; + /* * init_mm.pgd does not contain any user mappings and it is always * active for kernel addresses in TTBR1. Just set the reserved TTBR0. @@ -138,8 +141,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, return; } - if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) - check_and_switch_context(next, tsk); + check_and_switch_context(next, cpu); } #define deactivate_mm(tsk,mm) do { } while (0) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index e902229..4b9ec44 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -166,10 +166,10 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) local_flush_tlb_all(); atomic64_set(&per_cpu(active_asids, cpu), asid); - cpumask_set_cpu(cpu, mm_cpumask(mm)); raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); switch_mm_fastpath: + cpumask_set_cpu(cpu, mm_cpumask(mm)); cpu_switch_mm(mm->pgd, mm); } -- cgit v0.10.2 From 38d96287504a2478eb538bfecfa1fddd743bb6b2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 6 Oct 2015 18:46:28 +0100 Subject: arm64: mm: kill mm_cpumask usage mm_cpumask isn't actually used for anything on arm64, so remove all the code trying to keep it up-to-date. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index fdd4d4d..03b0aa2 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -142,7 +142,6 @@ asmlinkage void secondary_start_kernel(void) */ atomic_inc(&mm->mm_count); current->active_mm = mm; - cpumask_set_cpu(cpu, mm_cpumask(mm)); set_my_cpu_offset(per_cpu_offset(smp_processor_id())); printk("CPU%u: Booted secondary processor\n", cpu); @@ -233,12 +232,6 @@ int __cpu_disable(void) * OK - migrate IRQs away from this CPU */ migrate_irqs(); - - /* - * Remove this CPU from the vm mask set of all processes. - */ - clear_tasks_mm_cpumask(cpu); - return 0; } diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 4b9ec44..f636a26 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -132,7 +132,6 @@ set_asid: bump_gen: asid |= generation; - cpumask_clear(mm_cpumask(mm)); return asid; } @@ -169,7 +168,6 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); switch_mm_fastpath: - cpumask_set_cpu(cpu, mm_cpumask(mm)); cpu_switch_mm(mm->pgd, mm); } -- cgit v0.10.2 From 28c6fbc3b446caf5f8d1f2d7b79e09e743158a4d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 6 Oct 2015 18:46:29 +0100 Subject: arm64: tlb: remove redundant barrier from __flush_tlb_pgtable __flush_tlb_pgtable is used to invalidate intermediate page table entries after they have been cleared and are about to be freed. Since pXd_clear imply memory barriers, we don't need the extra one here. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 93e9f96..b460ae2 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -163,7 +163,6 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm, { unsigned long addr = uaddr >> 12 | (ASID(mm) << 48); - dsb(ishst); asm("tlbi vae1is, %0" : : "r" (addr)); dsb(ish); } -- cgit v0.10.2 From 120798d2e7d1ac87365fe5ea91b074bb42ca1eff Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 6 Oct 2015 18:46:30 +0100 Subject: arm64: mm: remove dsb from update_mmu_cache update_mmu_cache() consists of a dsb(ishst) instruction so that new user mappings are guaranteed to be visible to the page table walker on exception return. In reality this can be a very expensive operation which is rarely needed. Removing this barrier shows a modest improvement in hackbench scores and , in the worst case, we re-take the user fault and establish that there was nothing to do. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 581d140..5043a84e 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -646,10 +646,10 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { /* - * set_pte() does not have a DSB for user mappings, so make sure that - * the page table write is visible. + * We don't do anything here, so there's a very small chance of + * us retaking a user fault which we just fixed up. The alternative + * is doing a dsb(ishst), but that penalises the fastpath. */ - dsb(ishst); } #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) -- cgit v0.10.2 From 8f48c0629049fdebb6e783803325bff19176d3fd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 7 Oct 2015 11:37:36 +0100 Subject: arm64: hw_breakpoint: use target state to determine ABI behaviour The arm64 hw_breakpoint interface is slightly less flexible than its 32-bit counterpart, thanks to some changes in the architecture rendering unaligned watchpoint addresses obselete for AArch64. However, in a multi-arch environment (i.e. debugging a 32-bit target with a 64-bit GDB under a 64-bit kernel), we need to provide a feature compatible interface to GDB in order for debugging to function correctly. This patch adds a new helper, is_compat_bp, to our hw_breakpoint implementation which changes the interface behaviour based on the architecture of the debug target as opposed to the debugger itself. This allows debugged to function as expected for multi-arch configurations without relying on deprecated architectural behaviours when debugging native applications. Cc: Yao Qi Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index bba85c8..46465d9 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -163,6 +163,20 @@ enum hw_breakpoint_ops { HW_BREAKPOINT_RESTORE }; +static int is_compat_bp(struct perf_event *bp) +{ + struct task_struct *tsk = bp->hw.target; + + /* + * tsk can be NULL for per-cpu (non-ptrace) breakpoints. + * In this case, use the native interface, since we don't have + * the notion of a "compat CPU" and could end up relying on + * deprecated behaviour if we use unaligned watchpoints in + * AArch64 state. + */ + return tsk && is_compat_thread(task_thread_info(tsk)); +} + /** * hw_breakpoint_slot_setup - Find and setup a perf slot according to * operations @@ -420,7 +434,7 @@ static int arch_build_bp_info(struct perf_event *bp) * Watchpoints can be of length 1, 2, 4 or 8 bytes. */ if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { - if (is_compat_task()) { + if (is_compat_bp(bp)) { if (info->ctrl.len != ARM_BREAKPOINT_LEN_2 && info->ctrl.len != ARM_BREAKPOINT_LEN_4) return -EINVAL; @@ -477,7 +491,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * AArch32 tasks expect some simple alignment fixups, so emulate * that here. */ - if (is_compat_task()) { + if (is_compat_bp(bp)) { if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) alignment_mask = 0x7; else -- cgit v0.10.2 From 6475b2d846176e3272351266869481a21ff47866 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 2 Oct 2015 10:55:03 +0100 Subject: arm64: perf: move to shared arm_pmu framework Now that the arm_pmu framework has been factored out to drivers/perf we can make use of it for arm64, gaining support for heterogeneous PMUs and unifying the two codebases before they diverge further. The as yet unused PMU name for PMUv3 is changed to armv8_pmuv3, matching the style previously applied to the 32-bit PMUs. Signed-off-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 07d1811..98b4f98 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -454,12 +454,8 @@ config HAVE_ARCH_PFN_VALID def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM config HW_PERF_EVENTS - bool "Enable hardware performance counter support for perf events" - depends on PERF_EVENTS - default y - help - Enable hardware performance counter support for perf events. If - disabled, perf events will use software events only. + def_bool y + depends on ARM_PMU config SYS_SUPPORTS_HUGETLBFS def_bool y diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h deleted file mode 100644 index b7710a5..0000000 --- a/arch/arm64/include/asm/pmu.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Based on arch/arm/include/asm/pmu.h - * - * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef __ASM_PMU_H -#define __ASM_PMU_H - -#ifdef CONFIG_HW_PERF_EVENTS - -/* The events for a given PMU register set. */ -struct pmu_hw_events { - /* - * The events that are active on the PMU for the given index. - */ - struct perf_event **events; - - /* - * A 1 bit for an index indicates that the counter is being used for - * an event. A 0 means that the counter can be used. - */ - unsigned long *used_mask; - - /* - * Hardware lock to serialize accesses to PMU registers. Needed for the - * read/modify/write sequences. - */ - raw_spinlock_t pmu_lock; -}; - -struct arm_pmu { - struct pmu pmu; - cpumask_t active_irqs; - int *irq_affinity; - const char *name; - irqreturn_t (*handle_irq)(int irq_num, void *dev); - void (*enable)(struct hw_perf_event *evt, int idx); - void (*disable)(struct hw_perf_event *evt, int idx); - int (*get_event_idx)(struct pmu_hw_events *hw_events, - struct hw_perf_event *hwc); - int (*set_event_filter)(struct hw_perf_event *evt, - struct perf_event_attr *attr); - u32 (*read_counter)(int idx); - void (*write_counter)(int idx, u32 val); - void (*start)(void); - void (*stop)(void); - void (*reset)(void *); - int (*map_event)(struct perf_event *event); - int num_events; - atomic_t active_events; - struct mutex reserve_mutex; - u64 max_period; - struct platform_device *plat_device; - struct pmu_hw_events *(*get_hw_events)(void); -}; - -#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) - -int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type); - -u64 armpmu_event_update(struct perf_event *event, - struct hw_perf_event *hwc, - int idx); - -int armpmu_event_set_period(struct perf_event *event, - struct hw_perf_event *hwc, - int idx); - -#endif /* CONFIG_HW_PERF_EVENTS */ -#endif /* __ASM_PMU_H */ diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index f9a74d4..a614100 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -18,651 +18,12 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -#define pr_fmt(fmt) "hw perfevents: " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include - -/* - * ARMv8 supports a maximum of 32 events. - * The cycle counter is included in this total. - */ -#define ARMPMU_MAX_HWEVENTS 32 - -static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events); -static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask); -static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); - -#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) - -/* Set at runtime when we know what CPU type we are. */ -static struct arm_pmu *cpu_pmu; - -int -armpmu_get_max_events(void) -{ - int max_events = 0; - - if (cpu_pmu != NULL) - max_events = cpu_pmu->num_events; - - return max_events; -} -EXPORT_SYMBOL_GPL(armpmu_get_max_events); - -int perf_num_counters(void) -{ - return armpmu_get_max_events(); -} -EXPORT_SYMBOL_GPL(perf_num_counters); - -#define HW_OP_UNSUPPORTED 0xFFFF - -#define C(_x) \ - PERF_COUNT_HW_CACHE_##_x - -#define CACHE_OP_UNSUPPORTED 0xFFFF - -#define PERF_MAP_ALL_UNSUPPORTED \ - [0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED - -#define PERF_CACHE_MAP_ALL_UNSUPPORTED \ -[0 ... C(MAX) - 1] = { \ - [0 ... C(OP_MAX) - 1] = { \ - [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \ - }, \ -} - -static int -armpmu_map_cache_event(const unsigned (*cache_map) - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX], - u64 config) -{ - unsigned int cache_type, cache_op, cache_result, ret; - - cache_type = (config >> 0) & 0xff; - if (cache_type >= PERF_COUNT_HW_CACHE_MAX) - return -EINVAL; - - cache_op = (config >> 8) & 0xff; - if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) - return -EINVAL; - - cache_result = (config >> 16) & 0xff; - if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; - - if (ret == CACHE_OP_UNSUPPORTED) - return -ENOENT; - - return ret; -} - -static int -armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) -{ - int mapping; - - if (config >= PERF_COUNT_HW_MAX) - return -EINVAL; - - mapping = (*event_map)[config]; - return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; -} - -static int -armpmu_map_raw_event(u32 raw_event_mask, u64 config) -{ - return (int)(config & raw_event_mask); -} - -static int map_cpu_event(struct perf_event *event, - const unsigned (*event_map)[PERF_COUNT_HW_MAX], - const unsigned (*cache_map) - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX], - u32 raw_event_mask) -{ - u64 config = event->attr.config; - - switch (event->attr.type) { - case PERF_TYPE_HARDWARE: - return armpmu_map_event(event_map, config); - case PERF_TYPE_HW_CACHE: - return armpmu_map_cache_event(cache_map, config); - case PERF_TYPE_RAW: - return armpmu_map_raw_event(raw_event_mask, config); - } - - return -ENOENT; -} - -int -armpmu_event_set_period(struct perf_event *event, - struct hw_perf_event *hwc, - int idx) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - s64 left = local64_read(&hwc->period_left); - s64 period = hwc->sample_period; - int ret = 0; - - if (unlikely(left <= -period)) { - left = period; - local64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - if (unlikely(left <= 0)) { - left += period; - local64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - /* - * Limit the maximum period to prevent the counter value - * from overtaking the one we are about to program. In - * effect we are reducing max_period to account for - * interrupt latency (and we are being very conservative). - */ - if (left > (armpmu->max_period >> 1)) - left = armpmu->max_period >> 1; - - local64_set(&hwc->prev_count, (u64)-left); - - armpmu->write_counter(idx, (u64)(-left) & 0xffffffff); - - perf_event_update_userpage(event); - - return ret; -} - -u64 -armpmu_event_update(struct perf_event *event, - struct hw_perf_event *hwc, - int idx) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - u64 delta, prev_raw_count, new_raw_count; - -again: - prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = armpmu->read_counter(idx); - - if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) - goto again; - - delta = (new_raw_count - prev_raw_count) & armpmu->max_period; - - local64_add(delta, &event->count); - local64_sub(delta, &hwc->period_left); - - return new_raw_count; -} - -static void -armpmu_read(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - /* Don't read disabled counters! */ - if (hwc->idx < 0) - return; - - armpmu_event_update(event, hwc, hwc->idx); -} - -static void -armpmu_stop(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - - /* - * ARM pmu always has to update the counter, so ignore - * PERF_EF_UPDATE, see comments in armpmu_start(). - */ - if (!(hwc->state & PERF_HES_STOPPED)) { - armpmu->disable(hwc, hwc->idx); - barrier(); /* why? */ - armpmu_event_update(event, hwc, hwc->idx); - hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; - } -} -static void -armpmu_start(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - - /* - * ARM pmu always has to reprogram the period, so ignore - * PERF_EF_RELOAD, see the comment below. - */ - if (flags & PERF_EF_RELOAD) - WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); - - hwc->state = 0; - /* - * Set the period again. Some counters can't be stopped, so when we - * were stopped we simply disabled the IRQ source and the counter - * may have been left counting. If we don't do this step then we may - * get an interrupt too soon or *way* too late if the overflow has - * happened since disabling. - */ - armpmu_event_set_period(event, hwc, hwc->idx); - armpmu->enable(hwc, hwc->idx); -} - -static void -armpmu_del(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *hw_events = armpmu->get_hw_events(); - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - WARN_ON(idx < 0); - - armpmu_stop(event, PERF_EF_UPDATE); - hw_events->events[idx] = NULL; - clear_bit(idx, hw_events->used_mask); - - perf_event_update_userpage(event); -} - -static int -armpmu_add(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *hw_events = armpmu->get_hw_events(); - struct hw_perf_event *hwc = &event->hw; - int idx; - int err = 0; - - perf_pmu_disable(event->pmu); - - /* If we don't have a space for the counter then finish early. */ - idx = armpmu->get_event_idx(hw_events, hwc); - if (idx < 0) { - err = idx; - goto out; - } - - /* - * If there is an event in the counter we are going to use then make - * sure it is disabled. - */ - event->hw.idx = idx; - armpmu->disable(hwc, idx); - hw_events->events[idx] = event; - - hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; - if (flags & PERF_EF_START) - armpmu_start(event, PERF_EF_RELOAD); - - /* Propagate our changes to the userspace mapping. */ - perf_event_update_userpage(event); - -out: - perf_pmu_enable(event->pmu); - return err; -} - -static int -validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events, - struct perf_event *event) -{ - struct arm_pmu *armpmu; - struct hw_perf_event fake_event = event->hw; - struct pmu *leader_pmu = event->group_leader->pmu; - - if (is_software_event(event)) - return 1; - - /* - * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The - * core perf code won't check that the pmu->ctx == leader->ctx - * until after pmu->event_init(event). - */ - if (event->pmu != pmu) - return 0; - - if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF) - return 1; - - if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) - return 1; - - armpmu = to_arm_pmu(event->pmu); - return armpmu->get_event_idx(hw_events, &fake_event) >= 0; -} - -static int -validate_group(struct perf_event *event) -{ - struct perf_event *sibling, *leader = event->group_leader; - struct pmu_hw_events fake_pmu; - DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS); - - /* - * Initialise the fake PMU. We only need to populate the - * used_mask for the purposes of validation. - */ - memset(fake_used_mask, 0, sizeof(fake_used_mask)); - fake_pmu.used_mask = fake_used_mask; - - if (!validate_event(event->pmu, &fake_pmu, leader)) - return -EINVAL; - - list_for_each_entry(sibling, &leader->sibling_list, group_entry) { - if (!validate_event(event->pmu, &fake_pmu, sibling)) - return -EINVAL; - } - - if (!validate_event(event->pmu, &fake_pmu, event)) - return -EINVAL; - - return 0; -} - -static void -armpmu_disable_percpu_irq(void *data) -{ - unsigned int irq = *(unsigned int *)data; - disable_percpu_irq(irq); -} - -static void -armpmu_release_hardware(struct arm_pmu *armpmu) -{ - int irq; - unsigned int i, irqs; - struct platform_device *pmu_device = armpmu->plat_device; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - if (!irqs) - return; - - irq = platform_get_irq(pmu_device, 0); - if (irq <= 0) - return; - - if (irq_is_percpu(irq)) { - on_each_cpu(armpmu_disable_percpu_irq, &irq, 1); - free_percpu_irq(irq, &cpu_hw_events); - } else { - for (i = 0; i < irqs; ++i) { - int cpu = i; - - if (armpmu->irq_affinity) - cpu = armpmu->irq_affinity[i]; - - if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs)) - continue; - irq = platform_get_irq(pmu_device, i); - if (irq > 0) - free_irq(irq, armpmu); - } - } -} - -static void -armpmu_enable_percpu_irq(void *data) -{ - unsigned int irq = *(unsigned int *)data; - enable_percpu_irq(irq, IRQ_TYPE_NONE); -} - -static int -armpmu_reserve_hardware(struct arm_pmu *armpmu) -{ - int err, irq; - unsigned int i, irqs; - struct platform_device *pmu_device = armpmu->plat_device; - - if (!pmu_device) - return -ENODEV; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - if (!irqs) { - pr_err("no irqs for PMUs defined\n"); - return -ENODEV; - } - - irq = platform_get_irq(pmu_device, 0); - if (irq <= 0) { - pr_err("failed to get valid irq for PMU device\n"); - return -ENODEV; - } - - if (irq_is_percpu(irq)) { - err = request_percpu_irq(irq, armpmu->handle_irq, - "arm-pmu", &cpu_hw_events); - - if (err) { - pr_err("unable to request percpu IRQ%d for ARM PMU counters\n", - irq); - armpmu_release_hardware(armpmu); - return err; - } - - on_each_cpu(armpmu_enable_percpu_irq, &irq, 1); - } else { - for (i = 0; i < irqs; ++i) { - int cpu = i; - - err = 0; - irq = platform_get_irq(pmu_device, i); - if (irq <= 0) - continue; - - if (armpmu->irq_affinity) - cpu = armpmu->irq_affinity[i]; - - /* - * If we have a single PMU interrupt that we can't shift, - * assume that we're running on a uniprocessor machine and - * continue. Otherwise, continue without this interrupt. - */ - if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { - pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, cpu); - continue; - } - - err = request_irq(irq, armpmu->handle_irq, - IRQF_NOBALANCING | IRQF_NO_THREAD, - "arm-pmu", armpmu); - if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); - armpmu_release_hardware(armpmu); - return err; - } - - cpumask_set_cpu(cpu, &armpmu->active_irqs); - } - } - - return 0; -} - -static void -hw_perf_event_destroy(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - atomic_t *active_events = &armpmu->active_events; - struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex; - - if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) { - armpmu_release_hardware(armpmu); - mutex_unlock(pmu_reserve_mutex); - } -} - -static int -event_requires_mode_exclusion(struct perf_event_attr *attr) -{ - return attr->exclude_idle || attr->exclude_user || - attr->exclude_kernel || attr->exclude_hv; -} - -static int -__hw_perf_event_init(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - int mapping, err; - - mapping = armpmu->map_event(event); - - if (mapping < 0) { - pr_debug("event %x:%llx not supported\n", event->attr.type, - event->attr.config); - return mapping; - } - - /* - * We don't assign an index until we actually place the event onto - * hardware. Use -1 to signify that we haven't decided where to put it - * yet. For SMP systems, each core has it's own PMU so we can't do any - * clever allocation or constraints checking at this point. - */ - hwc->idx = -1; - hwc->config_base = 0; - hwc->config = 0; - hwc->event_base = 0; - - /* - * Check whether we need to exclude the counter from certain modes. - */ - if ((!armpmu->set_event_filter || - armpmu->set_event_filter(hwc, &event->attr)) && - event_requires_mode_exclusion(&event->attr)) { - pr_debug("ARM performance counters do not support mode exclusion\n"); - return -EPERM; - } - - /* - * Store the event encoding into the config_base field. - */ - hwc->config_base |= (unsigned long)mapping; - - if (!hwc->sample_period) { - /* - * For non-sampling runs, limit the sample_period to half - * of the counter width. That way, the new counter value - * is far less likely to overtake the previous one unless - * you have some serious IRQ latency issues. - */ - hwc->sample_period = armpmu->max_period >> 1; - hwc->last_period = hwc->sample_period; - local64_set(&hwc->period_left, hwc->sample_period); - } - - err = 0; - if (event->group_leader != event) { - err = validate_group(event); - if (err) - return -EINVAL; - } - - return err; -} - -static int armpmu_event_init(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - int err = 0; - atomic_t *active_events = &armpmu->active_events; - - if (armpmu->map_event(event) == -ENOENT) - return -ENOENT; - - event->destroy = hw_perf_event_destroy; - - if (!atomic_inc_not_zero(active_events)) { - mutex_lock(&armpmu->reserve_mutex); - if (atomic_read(active_events) == 0) - err = armpmu_reserve_hardware(armpmu); - - if (!err) - atomic_inc(active_events); - mutex_unlock(&armpmu->reserve_mutex); - } - - if (err) - return err; - - err = __hw_perf_event_init(event); - if (err) - hw_perf_event_destroy(event); - - return err; -} - -static void armpmu_enable(struct pmu *pmu) -{ - struct arm_pmu *armpmu = to_arm_pmu(pmu); - struct pmu_hw_events *hw_events = armpmu->get_hw_events(); - int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); - - if (enabled) - armpmu->start(); -} - -static void armpmu_disable(struct pmu *pmu) -{ - struct arm_pmu *armpmu = to_arm_pmu(pmu); - armpmu->stop(); -} - -static void __init armpmu_init(struct arm_pmu *armpmu) -{ - atomic_set(&armpmu->active_events, 0); - mutex_init(&armpmu->reserve_mutex); - - armpmu->pmu = (struct pmu) { - .pmu_enable = armpmu_enable, - .pmu_disable = armpmu_disable, - .event_init = armpmu_event_init, - .add = armpmu_add, - .del = armpmu_del, - .start = armpmu_start, - .stop = armpmu_stop, - .read = armpmu_read, - }; -} - -int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type) -{ - armpmu_init(armpmu); - return perf_pmu_register(&armpmu->pmu, name, type); -} +#include +#include +#include /* * ARMv8 PMUv3 Performance Events handling code. @@ -739,7 +100,8 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] */ #define ARMV8_IDX_CYCLE_COUNTER 0 #define ARMV8_IDX_COUNTER0 1 -#define ARMV8_IDX_COUNTER_LAST (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) +#define ARMV8_IDX_COUNTER_LAST(cpu_pmu) \ + (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) #define ARMV8_MAX_COUNTERS 32 #define ARMV8_COUNTER_MASK (ARMV8_MAX_COUNTERS - 1) @@ -805,49 +167,34 @@ static inline int armv8pmu_has_overflowed(u32 pmovsr) return pmovsr & ARMV8_OVERFLOWED_MASK; } -static inline int armv8pmu_counter_valid(int idx) +static inline int armv8pmu_counter_valid(struct arm_pmu *cpu_pmu, int idx) { - return idx >= ARMV8_IDX_CYCLE_COUNTER && idx <= ARMV8_IDX_COUNTER_LAST; + return idx >= ARMV8_IDX_CYCLE_COUNTER && + idx <= ARMV8_IDX_COUNTER_LAST(cpu_pmu); } static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) { - int ret = 0; - u32 counter; - - if (!armv8pmu_counter_valid(idx)) { - pr_err("CPU%u checking wrong counter %d overflow status\n", - smp_processor_id(), idx); - } else { - counter = ARMV8_IDX_TO_COUNTER(idx); - ret = pmnc & BIT(counter); - } - - return ret; + return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); } static inline int armv8pmu_select_counter(int idx) { - u32 counter; - - if (!armv8pmu_counter_valid(idx)) { - pr_err("CPU%u selecting wrong PMNC counter %d\n", - smp_processor_id(), idx); - return -EINVAL; - } - - counter = ARMV8_IDX_TO_COUNTER(idx); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); asm volatile("msr pmselr_el0, %0" :: "r" (counter)); isb(); return idx; } -static inline u32 armv8pmu_read_counter(int idx) +static inline u32 armv8pmu_read_counter(struct perf_event *event) { + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; u32 value = 0; - if (!armv8pmu_counter_valid(idx)) + if (!armv8pmu_counter_valid(cpu_pmu, idx)) pr_err("CPU%u reading wrong counter %d\n", smp_processor_id(), idx); else if (idx == ARMV8_IDX_CYCLE_COUNTER) @@ -858,9 +205,13 @@ static inline u32 armv8pmu_read_counter(int idx) return value; } -static inline void armv8pmu_write_counter(int idx, u32 value) +static inline void armv8pmu_write_counter(struct perf_event *event, u32 value) { - if (!armv8pmu_counter_valid(idx)) + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (!armv8pmu_counter_valid(cpu_pmu, idx)) pr_err("CPU%u writing wrong counter %d\n", smp_processor_id(), idx); else if (idx == ARMV8_IDX_CYCLE_COUNTER) @@ -879,65 +230,34 @@ static inline void armv8pmu_write_evtype(int idx, u32 val) static inline int armv8pmu_enable_counter(int idx) { - u32 counter; - - if (!armv8pmu_counter_valid(idx)) { - pr_err("CPU%u enabling wrong PMNC counter %d\n", - smp_processor_id(), idx); - return -EINVAL; - } - - counter = ARMV8_IDX_TO_COUNTER(idx); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter))); return idx; } static inline int armv8pmu_disable_counter(int idx) { - u32 counter; - - if (!armv8pmu_counter_valid(idx)) { - pr_err("CPU%u disabling wrong PMNC counter %d\n", - smp_processor_id(), idx); - return -EINVAL; - } - - counter = ARMV8_IDX_TO_COUNTER(idx); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter))); return idx; } static inline int armv8pmu_enable_intens(int idx) { - u32 counter; - - if (!armv8pmu_counter_valid(idx)) { - pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n", - smp_processor_id(), idx); - return -EINVAL; - } - - counter = ARMV8_IDX_TO_COUNTER(idx); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter))); return idx; } static inline int armv8pmu_disable_intens(int idx) { - u32 counter; - - if (!armv8pmu_counter_valid(idx)) { - pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n", - smp_processor_id(), idx); - return -EINVAL; - } - - counter = ARMV8_IDX_TO_COUNTER(idx); + u32 counter = ARMV8_IDX_TO_COUNTER(idx); asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter))); isb(); /* Clear the overflow flag in case an interrupt is pending. */ asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter))); isb(); + return idx; } @@ -955,10 +275,13 @@ static inline u32 armv8pmu_getreset_flags(void) return value; } -static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx) +static void armv8pmu_enable_event(struct perf_event *event) { unsigned long flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + int idx = hwc->idx; /* * Enable counter and interrupt, and set the counter to count @@ -989,10 +312,13 @@ static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void armv8pmu_disable_event(struct hw_perf_event *hwc, int idx) +static void armv8pmu_disable_event(struct perf_event *event) { unsigned long flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); + int idx = hwc->idx; /* * Disable counter and interrupt @@ -1016,7 +342,8 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) { u32 pmovsr; struct perf_sample_data data; - struct pmu_hw_events *cpuc; + struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; int idx; @@ -1036,7 +363,6 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) */ regs = get_irq_regs(); - cpuc = this_cpu_ptr(&cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; @@ -1053,13 +379,13 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) continue; hwc = &event->hw; - armpmu_event_update(event, hwc, idx); + armpmu_event_update(event); perf_sample_data_init(&data, 0, hwc->last_period); - if (!armpmu_event_set_period(event, hwc, idx)) + if (!armpmu_event_set_period(event)) continue; if (perf_event_overflow(event, &data, regs)) - cpu_pmu->disable(hwc, idx); + cpu_pmu->disable(event); } /* @@ -1074,10 +400,10 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) return IRQ_HANDLED; } -static void armv8pmu_start(void) +static void armv8pmu_start(struct arm_pmu *cpu_pmu) { unsigned long flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); raw_spin_lock_irqsave(&events->pmu_lock, flags); /* Enable all counters */ @@ -1085,10 +411,10 @@ static void armv8pmu_start(void) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static void armv8pmu_stop(void) +static void armv8pmu_stop(struct arm_pmu *cpu_pmu) { unsigned long flags; - struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); raw_spin_lock_irqsave(&events->pmu_lock, flags); /* Disable all counters */ @@ -1097,10 +423,12 @@ static void armv8pmu_stop(void) } static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, - struct hw_perf_event *event) + struct perf_event *event) { int idx; - unsigned long evtype = event->config_base & ARMV8_EVTYPE_EVENT; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + unsigned long evtype = hwc->config_base & ARMV8_EVTYPE_EVENT; /* Always place a cycle counter into the cycle counter. */ if (evtype == ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES) { @@ -1151,11 +479,14 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, static void armv8pmu_reset(void *info) { + struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; u32 idx, nb_cnt = cpu_pmu->num_events; /* The counter and interrupt enable registers are unknown at reset. */ - for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) - armv8pmu_disable_event(NULL, idx); + for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { + armv8pmu_disable_counter(idx); + armv8pmu_disable_intens(idx); + } /* Initialize & Reset PMNC: C and P bits. */ armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C); @@ -1166,169 +497,67 @@ static void armv8pmu_reset(void *info) static int armv8_pmuv3_map_event(struct perf_event *event) { - return map_cpu_event(event, &armv8_pmuv3_perf_map, + return armpmu_map_event(event, &armv8_pmuv3_perf_map, &armv8_pmuv3_perf_cache_map, ARMV8_EVTYPE_EVENT); } -static struct arm_pmu armv8pmu = { - .handle_irq = armv8pmu_handle_irq, - .enable = armv8pmu_enable_event, - .disable = armv8pmu_disable_event, - .read_counter = armv8pmu_read_counter, - .write_counter = armv8pmu_write_counter, - .get_event_idx = armv8pmu_get_event_idx, - .start = armv8pmu_start, - .stop = armv8pmu_stop, - .reset = armv8pmu_reset, - .max_period = (1LLU << 32) - 1, -}; - -static u32 __init armv8pmu_read_num_pmnc_events(void) +static void armv8pmu_read_num_pmnc_events(void *info) { - u32 nb_cnt; + int *nb_cnt = info; /* Read the nb of CNTx counters supported from PMNC */ - nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK; + *nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK; - /* Add the CPU cycles counter and return */ - return nb_cnt + 1; + /* Add the CPU cycles counter */ + *nb_cnt += 1; } -static struct arm_pmu *__init armv8_pmuv3_pmu_init(void) +static int armv8pmu_probe_num_events(struct arm_pmu *arm_pmu) { - armv8pmu.name = "arm/armv8-pmuv3"; - armv8pmu.map_event = armv8_pmuv3_map_event; - armv8pmu.num_events = armv8pmu_read_num_pmnc_events(); - armv8pmu.set_event_filter = armv8pmu_set_event_filter; - return &armv8pmu; + return smp_call_function_any(&arm_pmu->supported_cpus, + armv8pmu_read_num_pmnc_events, + &arm_pmu->num_events, 1); } -/* - * Ensure the PMU has sane values out of reset. - * This requires SMP to be available, so exists as a separate initcall. - */ -static int __init -cpu_pmu_reset(void) +static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) { - if (cpu_pmu && cpu_pmu->reset) - return on_each_cpu(cpu_pmu->reset, NULL, 1); - return 0; + cpu_pmu->handle_irq = armv8pmu_handle_irq, + cpu_pmu->enable = armv8pmu_enable_event, + cpu_pmu->disable = armv8pmu_disable_event, + cpu_pmu->read_counter = armv8pmu_read_counter, + cpu_pmu->write_counter = armv8pmu_write_counter, + cpu_pmu->get_event_idx = armv8pmu_get_event_idx, + cpu_pmu->start = armv8pmu_start, + cpu_pmu->stop = armv8pmu_stop, + cpu_pmu->reset = armv8pmu_reset, + cpu_pmu->max_period = (1LLU << 32) - 1, + cpu_pmu->name = "armv8_pmuv3"; + cpu_pmu->map_event = armv8_pmuv3_map_event; + cpu_pmu->set_event_filter = armv8pmu_set_event_filter; + return armv8pmu_probe_num_events(cpu_pmu); } -arch_initcall(cpu_pmu_reset); -/* - * PMU platform driver and devicetree bindings. - */ -static const struct of_device_id armpmu_of_device_ids[] = { - {.compatible = "arm,armv8-pmuv3"}, +static const struct of_device_id armv8_pmu_of_device_ids[] = { + {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init}, {}, }; -static int armpmu_device_probe(struct platform_device *pdev) +static int armv8_pmu_device_probe(struct platform_device *pdev) { - int i, irq, *irqs; - - if (!cpu_pmu) - return -ENODEV; - - /* Don't bother with PPIs; they're already affine */ - irq = platform_get_irq(pdev, 0); - if (irq >= 0 && irq_is_percpu(irq)) - goto out; - - irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); - if (!irqs) - return -ENOMEM; - - for (i = 0; i < pdev->num_resources; ++i) { - struct device_node *dn; - int cpu; - - dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", - i); - if (!dn) { - pr_warn("Failed to parse %s/interrupt-affinity[%d]\n", - of_node_full_name(pdev->dev.of_node), i); - break; - } - - for_each_possible_cpu(cpu) - if (dn == of_cpu_device_node_get(cpu)) - break; - - if (cpu >= nr_cpu_ids) { - pr_warn("Failed to find logical CPU for %s\n", - dn->name); - of_node_put(dn); - break; - } - of_node_put(dn); - - irqs[i] = cpu; - } - - if (i == pdev->num_resources) - cpu_pmu->irq_affinity = irqs; - else - kfree(irqs); - -out: - cpu_pmu->plat_device = pdev; - return 0; + return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL); } -static struct platform_driver armpmu_driver = { +static struct platform_driver armv8_pmu_driver = { .driver = { - .name = "arm-pmu", - .of_match_table = armpmu_of_device_ids, + .name = "armv8-pmu", + .of_match_table = armv8_pmu_of_device_ids, }, - .probe = armpmu_device_probe, + .probe = armv8_pmu_device_probe, }; -static int __init register_pmu_driver(void) +static int __init register_armv8_pmu_driver(void) { - return platform_driver_register(&armpmu_driver); + return platform_driver_register(&armv8_pmu_driver); } -device_initcall(register_pmu_driver); - -static struct pmu_hw_events *armpmu_get_cpu_events(void) -{ - return this_cpu_ptr(&cpu_hw_events); -} - -static void __init cpu_pmu_init(struct arm_pmu *armpmu) -{ - int cpu; - for_each_possible_cpu(cpu) { - struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); - events->events = per_cpu(hw_events, cpu); - events->used_mask = per_cpu(used_mask, cpu); - raw_spin_lock_init(&events->pmu_lock); - } - armpmu->get_hw_events = armpmu_get_cpu_events; -} - -static int __init init_hw_perf_events(void) -{ - u64 dfr = read_cpuid(ID_AA64DFR0_EL1); - - switch ((dfr >> 8) & 0xf) { - case 0x1: /* PMUv3 */ - cpu_pmu = armv8_pmuv3_pmu_init(); - break; - } - - if (cpu_pmu) { - pr_info("enabled with %s PMU driver, %d counters available\n", - cpu_pmu->name, cpu_pmu->num_events); - cpu_pmu_init(cpu_pmu); - armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW); - } else { - pr_info("no hardware support available\n"); - } - - return 0; -} -early_initcall(init_hw_perf_events); - +device_initcall(register_armv8_pmu_driver); diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index d9de36e..04e2653 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -5,7 +5,7 @@ menu "Performance monitor support" config ARM_PMU - depends on PERF_EVENTS && ARM + depends on PERF_EVENTS && (ARM || ARM64) bool "ARM PMU framework" default y help -- cgit v0.10.2 From ac82d12772158dfbc1d3827a68b317e10326bbaa Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 2 Oct 2015 10:55:04 +0100 Subject: arm64: perf: add Cortex-A53 support The Cortex-A53 PMU supports a few events outside of the required PMUv3 set that are rather useful. This patch adds the event map data for said events. Signed-off-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt index 435251f..2e6737b 100644 --- a/Documentation/devicetree/bindings/arm/pmu.txt +++ b/Documentation/devicetree/bindings/arm/pmu.txt @@ -8,6 +8,7 @@ Required properties: - compatible : should be one of "arm,armv8-pmuv3" + "arm.cortex-a53-pmu" "arm,cortex-a17-pmu" "arm,cortex-a15-pmu" "arm,cortex-a12-pmu" diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index a614100..5a5308a 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -69,6 +69,11 @@ enum armv8_pmuv3_perf_types { ARMV8_PMUV3_PERFCTR_BUS_CYCLES = 0x1D, }; +/* ARMv8 Cortex-A53 specific event types. */ +enum armv8_a53_pmu_perf_types { + ARMV8_A53_PERFCTR_PREFETCH_LINEFILL = 0xC2, +}; + /* PMUv3 HW events mapping. */ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { PERF_MAP_ALL_UNSUPPORTED, @@ -79,6 +84,18 @@ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, }; +/* ARM Cortex-A53 HW events mapping. */ +static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, +}; + static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { @@ -95,6 +112,28 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, }; +static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREFETCH_LINEFILL, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, +}; + /* * Perf Events' indices */ @@ -502,6 +541,13 @@ static int armv8_pmuv3_map_event(struct perf_event *event) ARMV8_EVTYPE_EVENT); } +static int armv8_a53_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv8_a53_perf_map, + &armv8_a53_perf_cache_map, + ARMV8_EVTYPE_EVENT); +} + static void armv8pmu_read_num_pmnc_events(void *info) { int *nb_cnt = info; @@ -520,7 +566,7 @@ static int armv8pmu_probe_num_events(struct arm_pmu *arm_pmu) &arm_pmu->num_events, 1); } -static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) +static void armv8_pmu_init(struct arm_pmu *cpu_pmu) { cpu_pmu->handle_irq = armv8pmu_handle_irq, cpu_pmu->enable = armv8pmu_enable_event, @@ -532,14 +578,28 @@ static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) cpu_pmu->stop = armv8pmu_stop, cpu_pmu->reset = armv8pmu_reset, cpu_pmu->max_period = (1LLU << 32) - 1, + cpu_pmu->set_event_filter = armv8pmu_set_event_filter; +} + +static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu) +{ + armv8_pmu_init(cpu_pmu); cpu_pmu->name = "armv8_pmuv3"; cpu_pmu->map_event = armv8_pmuv3_map_event; - cpu_pmu->set_event_filter = armv8pmu_set_event_filter; + return armv8pmu_probe_num_events(cpu_pmu); +} + +static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv8_pmu_init(cpu_pmu); + cpu_pmu->name = "armv8_cortex_a53"; + cpu_pmu->map_event = armv8_a53_map_event; return armv8pmu_probe_num_events(cpu_pmu); } static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init}, + {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init}, {}, }; -- cgit v0.10.2 From 62a4dda9d63a10e5b28943967fc936c74fa16dfb Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 2 Oct 2015 10:55:05 +0100 Subject: arm64: perf: add Cortex-A57 support The Cortex-A57 PMU supports a few events outside of the required PMUv3 set that are rather useful. This patch adds the event map data for said events. Signed-off-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt index 2e6737b..4b7c3d9 100644 --- a/Documentation/devicetree/bindings/arm/pmu.txt +++ b/Documentation/devicetree/bindings/arm/pmu.txt @@ -8,6 +8,7 @@ Required properties: - compatible : should be one of "arm,armv8-pmuv3" + "arm.cortex-a57-pmu" "arm.cortex-a53-pmu" "arm,cortex-a17-pmu" "arm,cortex-a15-pmu" diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 5a5308a..5b1897e 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -74,6 +74,16 @@ enum armv8_a53_pmu_perf_types { ARMV8_A53_PERFCTR_PREFETCH_LINEFILL = 0xC2, }; +/* ARMv8 Cortex-A57 specific event types. */ +enum armv8_a57_perf_types { + ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD = 0x40, + ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST = 0x41, + ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD = 0x42, + ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST = 0x43, + ARMV8_A57_PERFCTR_DTLB_REFILL_LD = 0x4c, + ARMV8_A57_PERFCTR_DTLB_REFILL_ST = 0x4d, +}; + /* PMUv3 HW events mapping. */ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { PERF_MAP_ALL_UNSUPPORTED, @@ -96,6 +106,16 @@ static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, }; +static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV8_PMUV3_PERFCTR_BUS_CYCLES, +}; + static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { @@ -134,6 +154,31 @@ static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, }; +static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST, + + [C(L1I)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS, + [C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL, + + [C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_LD, + [C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_A57_PERFCTR_DTLB_REFILL_ST, + + [C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_ITLB_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, +}; + + /* * Perf Events' indices */ @@ -548,6 +593,13 @@ static int armv8_a53_map_event(struct perf_event *event) ARMV8_EVTYPE_EVENT); } +static int armv8_a57_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv8_a57_perf_map, + &armv8_a57_perf_cache_map, + ARMV8_EVTYPE_EVENT); +} + static void armv8pmu_read_num_pmnc_events(void *info) { int *nb_cnt = info; @@ -597,9 +649,18 @@ static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu) return armv8pmu_probe_num_events(cpu_pmu); } +static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv8_pmu_init(cpu_pmu); + cpu_pmu->name = "armv8_cortex_a57"; + cpu_pmu->map_event = armv8_a57_map_event; + return armv8pmu_probe_num_events(cpu_pmu); +} + static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,armv8-pmuv3", .data = armv8_pmuv3_init}, {.compatible = "arm,cortex-a53-pmu", .data = armv8_a53_pmu_init}, + {.compatible = "arm,cortex-a57-pmu", .data = armv8_a57_pmu_init}, {}, }; -- cgit v0.10.2 From 01a507a371a35a5ee2b283af2ae7f8ed0b5e36f1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 2 Oct 2015 10:55:06 +0100 Subject: arm64: dts: juno: describe PMUs separately The A57 and A53 PMUs in Juno support different events, so describe them separately in both the Juno and Juno R1 DTs. Signed-off-by: Mark Rutland Cc: Liviu Dudau Acked-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/boot/dts/arm/juno-r1.dts b/arch/arm64/boot/dts/arm/juno-r1.dts index c627511..734e127 100644 --- a/arch/arm64/boot/dts/arm/juno-r1.dts +++ b/arch/arm64/boot/dts/arm/juno-r1.dts @@ -91,17 +91,21 @@ }; }; - pmu { - compatible = "arm,armv8-pmuv3"; + pmu_a57 { + compatible = "arm,cortex-a57-pmu"; interrupts = , - , - , + ; + interrupt-affinity = <&A57_0>, + <&A57_1>; + }; + + pmu_a53 { + compatible = "arm,cortex-a53-pmu"; + interrupts = , , , ; - interrupt-affinity = <&A57_0>, - <&A57_1>, - <&A53_0>, + interrupt-affinity = <&A53_0>, <&A53_1>, <&A53_2>, <&A53_3>; diff --git a/arch/arm64/boot/dts/arm/juno.dts b/arch/arm64/boot/dts/arm/juno.dts index d7cbdd4..ffa05ae 100644 --- a/arch/arm64/boot/dts/arm/juno.dts +++ b/arch/arm64/boot/dts/arm/juno.dts @@ -91,17 +91,21 @@ }; }; - pmu { - compatible = "arm,armv8-pmuv3"; + pmu_a57 { + compatible = "arm,cortex-a57-pmu"; interrupts = , - , - , + ; + interrupt-affinity = <&A57_0>, + <&A57_1>; + }; + + pmu_a53 { + compatible = "arm,cortex-a53-pmu"; + interrupts = , , , ; - interrupt-affinity = <&A57_0>, - <&A57_1>, - <&A53_0>, + interrupt-affinity = <&A53_0>, <&A53_1>, <&A53_2>, <&A53_3>; -- cgit v0.10.2 From dd06a84bf6446dab1592d4c7908a3037fbccdf1e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 2 Oct 2015 10:55:07 +0100 Subject: MAINTAINERS: update ARM PMU profiling and debugging for arm64 Will Deacon maintains the profiling and debugging code under both arch/arm and arch/arm64. Update MAINTAINERS to reflect this, in preparation for adding myself as a reviewer of said code. Signed-off-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/MAINTAINERS b/MAINTAINERS index 797236b..c6d977d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -823,11 +823,11 @@ F: arch/arm/include/asm/floppy.h ARM PMU PROFILING AND DEBUGGING M: Will Deacon S: Maintained -F: arch/arm/kernel/perf_* +F: arch/arm*/kernel/perf_* F: arch/arm/oprofile/common.c -F: arch/arm/kernel/hw_breakpoint.c -F: arch/arm/include/asm/hw_breakpoint.h -F: arch/arm/include/asm/perf_event.h +F: arch/arm*/kernel/hw_breakpoint.c +F: arch/arm*/include/asm/hw_breakpoint.h +F: arch/arm*/include/asm/perf_event.h F: drivers/perf/arm_pmu.c F: include/linux/perf/arm_pmu.h -- cgit v0.10.2 From 70e238df1daa05366d091d75163e1b08d7f122c3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 2 Oct 2015 10:55:08 +0100 Subject: MAINTAINERS: add myself as arm perf reviewer As suggested by Will Deacon, add myself as a reviewer of the ARM PMU profiling and debugging code. Signed-off-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/MAINTAINERS b/MAINTAINERS index c6d977d..8addb19 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -822,6 +822,7 @@ F: arch/arm/include/asm/floppy.h ARM PMU PROFILING AND DEBUGGING M: Will Deacon +R: Mark Rutland S: Maintained F: arch/arm*/kernel/perf_* F: arch/arm/oprofile/common.c -- cgit v0.10.2 From 2ff4439bb45a05433282d4fa9ca84202147819c7 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 7 Oct 2015 12:00:19 -0500 Subject: arm64: Add contiguous page flag shifts and constants Add the number of pages required to form a contiguous range, as well as some supporting constants. Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 7d9c7e4..33892ef 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -20,14 +20,20 @@ #define __ASM_PAGE_H /* PAGE_SHIFT determines the page size */ +/* CONT_SHIFT determines the number of pages which can be tracked together */ #ifdef CONFIG_ARM64_64K_PAGES #define PAGE_SHIFT 16 +#define CONT_SHIFT 5 #else #define PAGE_SHIFT 12 +#define CONT_SHIFT 4 #endif -#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) +#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) +#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT)) +#define CONT_MASK (~(CONT_SIZE-1)) + /* * The idmap and swapper page tables need some space reserved in the kernel * image. Both require pgd, pud (4 levels only) and pmd tables to (section) -- cgit v0.10.2 From ecf35a237a85d747ef1d6f713888c782f42064ac Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 7 Oct 2015 12:00:20 -0500 Subject: arm64: PTE/PMD contiguous bit definition Define the bit positions in the PTE and PMD for the contiguous bit. Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 24154b0..95c1ec0 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -55,6 +55,13 @@ #define SECTION_MASK (~(SECTION_SIZE-1)) /* + * Contiguous page definitions. + */ +#define CONT_PTES (_AC(1, UL) << CONT_SHIFT) +/* the the numerical offset of the PTE within a range of CONT_PTES */ +#define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1)) + +/* * Hardware page table definitions. * * Level 1 descriptor (PUD). @@ -83,6 +90,7 @@ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) #define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) #define PMD_SECT_NG (_AT(pmdval_t, 1) << 11) +#define PMD_SECT_CONT (_AT(pmdval_t, 1) << 52) #define PMD_SECT_PXN (_AT(pmdval_t, 1) << 53) #define PMD_SECT_UXN (_AT(pmdval_t, 1) << 54) @@ -105,6 +113,7 @@ #define PTE_AF (_AT(pteval_t, 1) << 10) /* Access Flag */ #define PTE_NG (_AT(pteval_t, 1) << 11) /* nG */ #define PTE_DBM (_AT(pteval_t, 1) << 51) /* Dirty Bit Management */ +#define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ -- cgit v0.10.2 From 93ef666a094ff9c9fc8d7cf1774ef0b92e270a75 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 7 Oct 2015 12:00:21 -0500 Subject: arm64: Macros to check/set/unset the contiguous bit Add the supporting macros to check if the contiguous bit is set, set the bit, or clear it in a PTE entry. Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 5043a84e..1a1a6ef 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -140,6 +140,7 @@ extern struct page *empty_zero_page; #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) +#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) #ifdef CONFIG_ARM64_HW_AFDBM #define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) @@ -202,6 +203,16 @@ static inline pte_t pte_mkspecial(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_SPECIAL)); } +static inline pte_t pte_mkcont(pte_t pte) +{ + return set_pte_bit(pte, __pgprot(PTE_CONT)); +} + +static inline pte_t pte_mknoncont(pte_t pte) +{ + return clear_pte_bit(pte, __pgprot(PTE_CONT)); +} + static inline void set_pte(pte_t *ptep, pte_t pte) { *ptep = pte; -- cgit v0.10.2 From 06f90d2527845c4767207f54280df2c5ca11e82b Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 7 Oct 2015 12:00:22 -0500 Subject: arm64: Default kernel pages should be contiguous The default page attributes for a PMD being broken should have the CONT bit set. Create a new definition for an early boot range of PTE's that are contiguous. Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 1a1a6ef..3f481ef 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -72,6 +72,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) +#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) #define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP) #define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) -- cgit v0.10.2 From 202e41a1c257eb56a05b5ff5bfde06f7ae98e6eb Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 7 Oct 2015 12:00:23 -0500 Subject: arm64: Make the kernel page dump utility aware of the CONT bit The kernel page dump utility needs to be aware of the CONT bit before it will break up pages ranges for display. Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index f3d6221..5a22a11 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -67,6 +67,12 @@ static struct addr_marker address_markers[] = { { -1, NULL }, }; +/* + * The page dumper groups page table entries of the same type into a single + * description. It uses pg_state to track the range information while + * iterating over the pte entries. When the continuity is broken it then + * dumps out a description of the range. + */ struct pg_state { struct seq_file *seq; const struct addr_marker *marker; @@ -114,6 +120,16 @@ static const struct prot_bits pte_bits[] = { .set = "NG", .clear = " ", }, { + .mask = PTE_CONT, + .val = PTE_CONT, + .set = "CON", + .clear = " ", + }, { + .mask = PTE_TABLE_BIT, + .val = PTE_TABLE_BIT, + .set = " ", + .clear = "BLK", + }, { .mask = PTE_UXN, .val = PTE_UXN, .set = "UXN", @@ -198,7 +214,7 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, unsigned long delta; if (st->current_prot) { - seq_printf(st->seq, "0x%16lx-0x%16lx ", + seq_printf(st->seq, "0x%016lx-0x%016lx ", st->start_address, addr); delta = (addr - st->start_address) >> 10; -- cgit v0.10.2 From 348a65cdcbbf243073ee39d1f7d4413081ad7eab Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 7 Oct 2015 12:00:25 -0500 Subject: arm64: Mark kernel page ranges contiguous With 64k pages, the next larger segment size is 512M. The linux kernel also uses different protection flags to cover its code and data. Because of this requirement, the vast majority of the kernel code and data structures end up being mapped with 64k pages instead of the larger pages common with a 4k page kernel. Recent ARM processors support a contiguous bit in the page tables which allows the a TLB to cover a range larger than a single PTE if that range is mapped into physically contiguous ram. So, for the kernel its a good idea to set this flag. Some basic micro benchmarks show it can significantly reduce the number of L1 dTLB refills. Add boot option to enable/disable CONT marking, as well as fix a bug found by Steve Capper. Signed-off-by: Jeremy Linton [catalin.marinas@arm.com: remove CONFIG_ARM64_CONT_PTE altogether] Signed-off-by: Catalin Marinas diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 71a3104..eed6d52 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -80,19 +80,55 @@ static void split_pmd(pmd_t *pmd, pte_t *pte) do { /* * Need to have the least restrictive permissions available - * permissions will be fixed up later + * permissions will be fixed up later. Default the new page + * range as contiguous ptes. */ - set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC_CONT)); pfn++; } while (pte++, i++, i < PTRS_PER_PTE); } +/* + * Given a PTE with the CONT bit set, determine where the CONT range + * starts, and clear the entire range of PTE CONT bits. + */ +static void clear_cont_pte_range(pte_t *pte, unsigned long addr) +{ + int i; + + pte -= CONT_RANGE_OFFSET(addr); + for (i = 0; i < CONT_PTES; i++) { + set_pte(pte, pte_mknoncont(*pte)); + pte++; + } + flush_tlb_all(); +} + +/* + * Given a range of PTEs set the pfn and provided page protection flags + */ +static void __populate_init_pte(pte_t *pte, unsigned long addr, + unsigned long end, phys_addr_t phys, + pgprot_t prot) +{ + unsigned long pfn = __phys_to_pfn(phys); + + do { + /* clear all the bits except the pfn, then apply the prot */ + set_pte(pte, pfn_pte(pfn, prot)); + pte++; + pfn++; + addr += PAGE_SIZE; + } while (addr != end); +} + static void alloc_init_pte(pmd_t *pmd, unsigned long addr, - unsigned long end, unsigned long pfn, + unsigned long end, phys_addr_t phys, pgprot_t prot, void *(*alloc)(unsigned long size)) { pte_t *pte; + unsigned long next; if (pmd_none(*pmd) || pmd_sect(*pmd)) { pte = alloc(PTRS_PER_PTE * sizeof(pte_t)); @@ -105,9 +141,27 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, pte = pte_offset_kernel(pmd, addr); do { - set_pte(pte, pfn_pte(pfn, prot)); - pfn++; - } while (pte++, addr += PAGE_SIZE, addr != end); + next = min(end, (addr + CONT_SIZE) & CONT_MASK); + if (((addr | next | phys) & ~CONT_MASK) == 0) { + /* a block of CONT_PTES */ + __populate_init_pte(pte, addr, next, phys, + prot | __pgprot(PTE_CONT)); + } else { + /* + * If the range being split is already inside of a + * contiguous range but this PTE isn't going to be + * contiguous, then we want to unmark the adjacent + * ranges, then update the portion of the range we + * are interrested in. + */ + clear_cont_pte_range(pte, addr); + __populate_init_pte(pte, addr, next, phys, prot); + } + + pte += (next - addr) >> PAGE_SHIFT; + phys += next - addr; + addr = next; + } while (addr != end); } void split_pud(pud_t *old_pud, pmd_t *pmd) @@ -168,8 +222,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, } } } else { - alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot, alloc); + alloc_init_pte(pmd, addr, next, phys, prot, alloc); } phys += next - addr; } while (pmd++, addr = next, addr != end); -- cgit v0.10.2 From 217d453d473c5ddfd140a06bf9d8575218551020 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 24 Sep 2015 17:32:14 +0800 Subject: arm64: fix a migrating irq bug when hotplug cpu When cpu is disabled, all irqs will be migratged to another cpu. In some cases, a new affinity is different, the old affinity need to be updated and if irq_set_affinity's return value is IRQ_SET_MASK_OK_DONE, the old affinity can not be updated. Fix it by using irq_do_set_affinity. And migrating interrupts is a core code matter, so use the generic function irq_migrate_all_off_this_cpu() to migrate interrupts in kernel/irq/migration.c. Cc: Jiang Liu Cc: Thomas Gleixner Cc: Mark Rutland Cc: Will Deacon Cc: Russell King - ARM Linux Cc: Hanjun Guo Acked-by: Marc Zyngier Signed-off-by: Yang Yingliang Signed-off-by: Catalin Marinas diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 98b4f98..1b35bdb 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -427,6 +427,7 @@ config NR_CPUS config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" + select GENERIC_IRQ_MIGRATION help Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index bbb251b..0916929 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -7,7 +7,6 @@ struct pt_regs; -extern void migrate_irqs(void); extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); static inline void acpi_irq_init(void) diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 11dc3fd..9f17ec0 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -27,7 +27,6 @@ #include #include #include -#include unsigned long irq_err_count; @@ -54,64 +53,3 @@ void __init init_IRQ(void) if (!handle_arch_irq) panic("No interrupt controller found."); } - -#ifdef CONFIG_HOTPLUG_CPU -static bool migrate_one_irq(struct irq_desc *desc) -{ - struct irq_data *d = irq_desc_get_irq_data(desc); - const struct cpumask *affinity = irq_data_get_affinity_mask(d); - struct irq_chip *c; - bool ret = false; - - /* - * If this is a per-CPU interrupt, or the affinity does not - * include this CPU, then we have nothing to do. - */ - if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity)) - return false; - - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - affinity = cpu_online_mask; - ret = true; - } - - c = irq_data_get_irq_chip(d); - if (!c->irq_set_affinity) - pr_debug("IRQ%u: unable to set affinity\n", d->irq); - else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret) - cpumask_copy(irq_data_get_affinity_mask(d), affinity); - - return ret; -} - -/* - * The current CPU has been marked offline. Migrate IRQs off this CPU. - * If the affinity settings do not allow other CPUs, force them onto any - * available CPU. - * - * Note: we must iterate over all IRQs, whether they have an attached - * action structure or not, as we need to get chained interrupts too. - */ -void migrate_irqs(void) -{ - unsigned int i; - struct irq_desc *desc; - unsigned long flags; - - local_irq_save(flags); - - for_each_irq_desc(i, desc) { - bool affinity_broken; - - raw_spin_lock(&desc->lock); - affinity_broken = migrate_one_irq(desc); - raw_spin_unlock(&desc->lock); - - if (affinity_broken) - pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n", - i, smp_processor_id()); - } - - local_irq_restore(flags); -} -#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 03b0aa2..b7a973d 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -231,7 +231,8 @@ int __cpu_disable(void) /* * OK - migrate IRQs away from this CPU */ - migrate_irqs(); + irq_migrate_all_off_this_cpu(); + return 0; } -- cgit v0.10.2 From cb50ce324e72bfd5d191d0a834a0ead1a08666b7 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 12 Oct 2015 12:10:53 +0100 Subject: arm64: Fix missing #include in hw_breakpoint.c A prior commit used to detect the hw breakpoint ABI behaviour based on the target state missed the asm/compat.h include and the build fails with !CONFIG_COMPAT. Fixes: 8f48c0629049 ("arm64: hw_breakpoint: use target state to determine ABI behaviour") Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 46465d9..b45c95d 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -28,6 +28,7 @@ #include #include +#include #include #include #include -- cgit v0.10.2 From d4dddfdbbc75f46d2cbab4e9f421999452617d64 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 8 Oct 2015 20:02:02 +0100 Subject: arm64/efi: remove /chosen/linux, uefi-stub-kern-ver DT property With the stub to kernel interface being promoted to a proper interface so that other agents than the stub can boot the kernel proper in EFI mode, we can remove the linux,uefi-stub-kern-ver field, considering that its original purpose was to prevent this from happening in the first place. Signed-off-by: Ard Biesheuvel Reviewed-by: Matt Fleming Signed-off-by: Catalin Marinas diff --git a/Documentation/arm/uefi.txt b/Documentation/arm/uefi.txt index d60030a..7f1bed8 100644 --- a/Documentation/arm/uefi.txt +++ b/Documentation/arm/uefi.txt @@ -58,7 +58,5 @@ linux,uefi-mmap-desc-size | 32-bit | Size in bytes of each entry in the UEFI -------------------------------------------------------------------------------- linux,uefi-mmap-desc-ver | 32-bit | Version of the mmap descriptor format. -------------------------------------------------------------------------------- -linux,uefi-stub-kern-ver | string | Copy of linux_banner from build. --------------------------------------------------------------------------------- For verbose debug messages, specify 'uefi_debug' on the kernel command line. diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index ef5d764..b62e2f5 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -147,15 +147,6 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, if (status) goto fdt_set_fail; - /* - * Add kernel version banner so stub/kernel match can be - * verified. - */ - status = fdt_setprop_string(fdt, node, "linux,uefi-stub-kern-ver", - linux_banner); - if (status) - goto fdt_set_fail; - return EFI_SUCCESS; fdt_set_fail: -- cgit v0.10.2 From 207918461eb0aca720fddec5da79bc71c133b9f1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 8 Oct 2015 20:02:03 +0100 Subject: arm64: use ENDPIPROC() to annotate position independent assembler routines For more control over which functions are called with the MMU off or with the UEFI 1:1 mapping active, annotate some assembler routines as position independent. This is done by introducing ENDPIPROC(), which replaces the ENDPROC() declaration of those routines. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index b51f2cc..12eff92 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -193,4 +193,15 @@ lr .req x30 // link register str \src, [\tmp, :lo12:\sym] .endm +/* + * Annotate a function as position independent, i.e., safe to be called before + * the kernel virtual mapping is activated. + */ +#define ENDPIPROC(x) \ + .globl __pi_##x; \ + .type __pi_##x, %function; \ + .set __pi_##x, x; \ + .size __pi_##x, . - x; \ + ENDPROC(x) + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S index 8636b75..4444c1d 100644 --- a/arch/arm64/lib/memchr.S +++ b/arch/arm64/lib/memchr.S @@ -41,4 +41,4 @@ ENTRY(memchr) ret 2: mov x0, #0 ret -ENDPROC(memchr) +ENDPIPROC(memchr) diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S index 6ea0776..ffbdec0 100644 --- a/arch/arm64/lib/memcmp.S +++ b/arch/arm64/lib/memcmp.S @@ -255,4 +255,4 @@ CPU_LE( rev data2, data2 ) .Lret0: mov result, #0 ret -ENDPROC(memcmp) +ENDPIPROC(memcmp) diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index 173a1aa..36a6a62 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -71,4 +71,4 @@ ENTRY(memcpy) #include "copy_template.S" ret -ENDPROC(memcpy) +ENDPIPROC(memcpy) diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S index 57b19ea2..68e2f20 100644 --- a/arch/arm64/lib/memmove.S +++ b/arch/arm64/lib/memmove.S @@ -194,4 +194,4 @@ ENTRY(memmove) tst count, #0x3f b.ne .Ltail63 ret -ENDPROC(memmove) +ENDPIPROC(memmove) diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index 7c72dfd..29f405f 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -213,4 +213,4 @@ ENTRY(memset) ands count, count, zva_bits_x b.ne .Ltail_maybe_long ret -ENDPROC(memset) +ENDPIPROC(memset) diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S index 42f828b..471fe61 100644 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@ -231,4 +231,4 @@ CPU_BE( orr syndrome, diff, has_nul ) lsr data1, data1, #56 sub result, data1, data2, lsr #56 ret -ENDPROC(strcmp) +ENDPIPROC(strcmp) diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S index 987b68b..55ccc8e 100644 --- a/arch/arm64/lib/strlen.S +++ b/arch/arm64/lib/strlen.S @@ -123,4 +123,4 @@ CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ csinv data1, data1, xzr, le csel data2, data2, data2a, le b .Lrealigned -ENDPROC(strlen) +ENDPIPROC(strlen) diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S index 0224cf5..e267044 100644 --- a/arch/arm64/lib/strncmp.S +++ b/arch/arm64/lib/strncmp.S @@ -307,4 +307,4 @@ CPU_BE( orr syndrome, diff, has_nul ) .Lret0: mov result, #0 ret -ENDPROC(strncmp) +ENDPIPROC(strncmp) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index eb48d5d..cfa44a6 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -98,7 +98,7 @@ ENTRY(__flush_dcache_area) b.lo 1b dsb sy ret -ENDPROC(__flush_dcache_area) +ENDPIPROC(__flush_dcache_area) /* * __inval_cache_range(start, end) @@ -131,7 +131,7 @@ __dma_inv_range: b.lo 2b dsb sy ret -ENDPROC(__inval_cache_range) +ENDPIPROC(__inval_cache_range) ENDPROC(__dma_inv_range) /* @@ -171,7 +171,7 @@ ENTRY(__dma_flush_range) b.lo 1b dsb sy ret -ENDPROC(__dma_flush_range) +ENDPIPROC(__dma_flush_range) /* * __dma_map_area(start, size, dir) @@ -184,7 +184,7 @@ ENTRY(__dma_map_area) cmp w2, #DMA_FROM_DEVICE b.eq __dma_inv_range b __dma_clean_range -ENDPROC(__dma_map_area) +ENDPIPROC(__dma_map_area) /* * __dma_unmap_area(start, size, dir) @@ -197,4 +197,4 @@ ENTRY(__dma_unmap_area) cmp w2, #DMA_TO_DEVICE b.ne __dma_inv_range ret -ENDPROC(__dma_unmap_area) +ENDPIPROC(__dma_unmap_area) -- cgit v0.10.2 From e8f3010f7326c00368dbc057bd052bec80dfc072 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 8 Oct 2015 20:02:04 +0100 Subject: arm64/efi: isolate EFI stub from the kernel proper Since arm64 does not use a builtin decompressor, the EFI stub is built into the kernel proper. So far, this has been working fine, but actually, since the stub is in fact a PE/COFF relocatable binary that is executed at an unknown offset in the 1:1 mapping provided by the UEFI firmware, we should not be seamlessly sharing code with the kernel proper, which is a position dependent executable linked at a high virtual offset. So instead, separate the contents of libstub and its dependencies, by putting them into their own namespace by prefixing all of its symbols with __efistub. This way, we have tight control over what parts of the kernel proper are referenced by the stub. Signed-off-by: Ard Biesheuvel Reviewed-by: Matt Fleming Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 22dc9bc..7b17f62 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -20,6 +20,14 @@ arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o +stub-obj := efi-stub.o efi-entry.o +extra-y := $(stub-obj) +stub-obj := $(patsubst %.o,%.stub.o,$(stub-obj)) + +OBJCOPYFLAGS := --prefix-symbols=__efistub_ +$(obj)/%.stub.o: $(obj)/%.o FORCE + $(call if_changed,objcopy) + arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o entry32.o \ ../../arm/kernel/opcodes.o @@ -32,7 +40,7 @@ arm64-obj-$(CONFIG_CPU_PM) += sleep.o suspend.o arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o arm64-obj-$(CONFIG_KGDB) += kgdb.o -arm64-obj-$(CONFIG_EFI) += efi.o efi-stub.o efi-entry.o +arm64-obj-$(CONFIG_EFI) += efi.o $(stub-obj) arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o @@ -40,7 +48,7 @@ arm64-obj-$(CONFIG_ACPI) += acpi.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) head-y := head.o -extra-y := $(head-y) vmlinux.lds +extra-y += $(head-y) vmlinux.lds # vDSO - this must be built first to generate the symbol offsets $(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index 8ce9b05..a773db9 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -29,7 +29,7 @@ * we want to be. The kernel image wants to be placed at TEXT_OFFSET * from start of RAM. */ -ENTRY(efi_stub_entry) +ENTRY(entry) /* * Create a stack frame to save FP/LR with extra space * for image_addr variable passed to efi_entry(). @@ -86,8 +86,8 @@ ENTRY(efi_stub_entry) * entries for the VA range of the current image, so no maintenance is * necessary. */ - adr x0, efi_stub_entry - adr x1, efi_stub_entry_end + adr x0, entry + adr x1, entry_end sub x1, x1, x0 bl __flush_dcache_area @@ -120,5 +120,5 @@ efi_load_fail: ldp x29, x30, [sp], #32 ret -efi_stub_entry_end: -ENDPROC(efi_stub_entry) +entry_end: +ENDPROC(entry) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 90d09ed..28a81e9 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -120,8 +120,8 @@ efi_head: #endif #ifdef CONFIG_EFI - .globl stext_offset - .set stext_offset, stext - efi_head + .globl __efistub_stext_offset + .set __efistub_stext_offset, stext - efi_head .align 3 pe_header: .ascii "PE" @@ -144,8 +144,8 @@ optional_header: .long _end - stext // SizeOfCode .long 0 // SizeOfInitializedData .long 0 // SizeOfUninitializedData - .long efi_stub_entry - efi_head // AddressOfEntryPoint - .long stext_offset // BaseOfCode + .long __efistub_entry - efi_head // AddressOfEntryPoint + .long __efistub_stext_offset // BaseOfCode extra_header_fields: .quad 0 // ImageBase @@ -162,7 +162,7 @@ extra_header_fields: .long _end - efi_head // SizeOfImage // Everything before the kernel image is considered part of the header - .long stext_offset // SizeOfHeaders + .long __efistub_stext_offset // SizeOfHeaders .long 0 // CheckSum .short 0xa // Subsystem (EFI application) .short 0 // DllCharacteristics @@ -207,9 +207,9 @@ section_table: .byte 0 .byte 0 // end of 0 padding of section name .long _end - stext // VirtualSize - .long stext_offset // VirtualAddress + .long __efistub_stext_offset // VirtualAddress .long _edata - stext // SizeOfRawData - .long stext_offset // PointerToRawData + .long __efistub_stext_offset // PointerToRawData .long 0 // PointerToRelocations (0 for executables) .long 0 // PointerToLineNumbers (0 for executables) diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index 8fae075..e083af0 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -59,4 +59,31 @@ _kernel_offset_le = DATA_LE64(TEXT_OFFSET); \ _kernel_flags_le = DATA_LE64(__HEAD_FLAGS); +#ifdef CONFIG_EFI + +/* + * The EFI stub has its own symbol namespace prefixed by __efistub_, to + * isolate it from the kernel proper. The following symbols are legally + * accessed by the stub, so provide some aliases to make them accessible. + * Only include data symbols here, or text symbols of functions that are + * guaranteed to be safe when executed at another offset than they were + * linked at. The routines below are all implemented in assembler in a + * position independent manner + */ +__efistub_memcmp = __pi_memcmp; +__efistub_memchr = __pi_memchr; +__efistub_memcpy = __pi_memcpy; +__efistub_memmove = __pi_memmove; +__efistub_memset = __pi_memset; +__efistub_strlen = __pi_strlen; +__efistub_strcmp = __pi_strcmp; +__efistub_strncmp = __pi_strncmp; +__efistub___flush_dcache_area = __pi___flush_dcache_area; + +__efistub__text = _text; +__efistub__end = _end; +__efistub__edata = _edata; + +#endif + #endif /* __ASM_IMAGE_H */ diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 816dbe9..bca9a76 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -14,6 +14,8 @@ cflags-$(CONFIG_ARM64) := $(subst -pg,,$(KBUILD_CFLAGS)) cflags-$(CONFIG_ARM) := $(subst -pg,,$(KBUILD_CFLAGS)) \ -fno-builtin -fpic -mno-single-pic-base +cflags-$(CONFIG_EFI_ARMSTUB) += -I$(srctree)/scripts/dtc/libfdt + KBUILD_CFLAGS := $(cflags-y) \ $(call cc-option,-ffreestanding) \ $(call cc-option,-fno-stack-protector) @@ -22,7 +24,15 @@ GCOV_PROFILE := n KASAN_SANITIZE := n lib-y := efi-stub-helper.o -lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o + +# include the stub's generic dependencies from lib/ when building for ARM/arm64 +arm-deps := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c fdt_empty_tree.c fdt_sw.c sort.c + +$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE + $(call if_changed_rule,cc_o_c) + +lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o string.o \ + $(patsubst %.c,lib-%.o,$(arm-deps)) # # arm64 puts the stub in the kernel proper, which will unnecessarily retain all @@ -30,10 +40,27 @@ lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o # So let's apply the __init annotations at the section level, by prefixing # the section names directly. This will ensure that even all the inline string # literals are covered. +# The fact that the stub and the kernel proper are essentially the same binary +# also means that we need to be extra careful to make sure that the stub does +# not rely on any absolute symbol references, considering that the virtual +# kernel mapping that the linker uses is not active yet when the stub is +# executing. So build all C dependencies of the EFI stub into libstub, and do +# a verification pass to see if any absolute relocations exist in any of the +# object files. # -extra-$(CONFIG_ARM64) := $(lib-y) -lib-$(CONFIG_ARM64) := $(patsubst %.o,%.init.o,$(lib-y)) +extra-$(CONFIG_EFI_ARMSTUB) := $(lib-y) +lib-$(CONFIG_EFI_ARMSTUB) := $(patsubst %.o,%.stub.o,$(lib-y)) + +STUBCOPY_FLAGS-y := -R .debug* -R *ksymtab* +STUBCOPY_FLAGS-$(CONFIG_ARM64) += --prefix-alloc-sections=.init \ + --prefix-symbols=__efistub_ +STUBCOPY_RELOC-$(CONFIG_ARM64) := R_AARCH64_ABS + +$(obj)/%.stub.o: $(obj)/%.o FORCE + $(call if_changed,stubcopy) -OBJCOPYFLAGS := --prefix-alloc-sections=.init -$(obj)/%.init.o: $(obj)/%.o FORCE - $(call if_changed,objcopy) +quiet_cmd_stubcopy = STUBCPY $@ + cmd_stubcopy = if $(OBJCOPY) $(STUBCOPY_FLAGS-y) $< $@; then \ + $(OBJDUMP) -r $@ | grep $(STUBCOPY_RELOC-y) \ + && (echo >&2 "$@: absolute symbol references not allowed in the EFI stub"; \ + rm -f $@; /bin/false); else /bin/false; fi diff --git a/drivers/firmware/efi/libstub/string.c b/drivers/firmware/efi/libstub/string.c new file mode 100644 index 0000000..09d5a08 --- /dev/null +++ b/drivers/firmware/efi/libstub/string.c @@ -0,0 +1,57 @@ +/* + * Taken from: + * linux/lib/string.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include +#include + +#ifndef __HAVE_ARCH_STRSTR +/** + * strstr - Find the first substring in a %NUL terminated string + * @s1: The string to be searched + * @s2: The string to search for + */ +char *strstr(const char *s1, const char *s2) +{ + size_t l1, l2; + + l2 = strlen(s2); + if (!l2) + return (char *)s1; + l1 = strlen(s1); + while (l1 >= l2) { + l1--; + if (!memcmp(s1, s2, l2)) + return (char *)s1; + s1++; + } + return NULL; +} +#endif + +#ifndef __HAVE_ARCH_STRNCMP +/** + * strncmp - Compare two length-limited strings + * @cs: One string + * @ct: Another string + * @count: The maximum number of bytes to compare + */ +int strncmp(const char *cs, const char *ct, size_t count) +{ + unsigned char c1, c2; + + while (count) { + c1 = *cs++; + c2 = *ct++; + if (c1 != c2) + return c1 < c2 ? -1 : 1; + if (!c1) + break; + count--; + } + return 0; +} +#endif -- cgit v0.10.2 From 305d454aaa292be3a09a9d674e6c35f5b4249a13 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 8 Oct 2015 20:15:18 +0100 Subject: arm64: atomics: implement native {relaxed, acquire, release} atomics Commit 654672d4ba1a ("locking/atomics: Add _{acquire|release|relaxed}() variants of some atomic operation") introduced a relaxed atomic API to Linux that maps nicely onto the arm64 memory model, including the new ARMv8.1 atomic instructions. This patch hooks up the API to our relaxed atomic instructions, rather than have them all expand to the full-barrier variants as they do currently. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 35a6778..5e13ad7 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -55,13 +55,42 @@ #define atomic_read(v) READ_ONCE((v)->counter) #define atomic_set(v, i) (((v)->counter) = (i)) + +#define atomic_add_return_relaxed atomic_add_return_relaxed +#define atomic_add_return_acquire atomic_add_return_acquire +#define atomic_add_return_release atomic_add_return_release +#define atomic_add_return atomic_add_return + +#define atomic_inc_return_relaxed(v) atomic_add_return_relaxed(1, (v)) +#define atomic_inc_return_acquire(v) atomic_add_return_acquire(1, (v)) +#define atomic_inc_return_release(v) atomic_add_return_release(1, (v)) +#define atomic_inc_return(v) atomic_add_return(1, (v)) + +#define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_sub_return_acquire atomic_sub_return_acquire +#define atomic_sub_return_release atomic_sub_return_release +#define atomic_sub_return atomic_sub_return + +#define atomic_dec_return_relaxed(v) atomic_sub_return_relaxed(1, (v)) +#define atomic_dec_return_acquire(v) atomic_sub_return_acquire(1, (v)) +#define atomic_dec_return_release(v) atomic_sub_return_release(1, (v)) +#define atomic_dec_return(v) atomic_sub_return(1, (v)) + +#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) +#define atomic_xchg_acquire(v, new) xchg_acquire(&((v)->counter), (new)) +#define atomic_xchg_release(v, new) xchg_release(&((v)->counter), (new)) #define atomic_xchg(v, new) xchg(&((v)->counter), (new)) + +#define atomic_cmpxchg_relaxed(v, old, new) \ + cmpxchg_relaxed(&((v)->counter), (old), (new)) +#define atomic_cmpxchg_acquire(v, old, new) \ + cmpxchg_acquire(&((v)->counter), (old), (new)) +#define atomic_cmpxchg_release(v, old, new) \ + cmpxchg_release(&((v)->counter), (old), (new)) #define atomic_cmpxchg(v, old, new) cmpxchg(&((v)->counter), (old), (new)) #define atomic_inc(v) atomic_add(1, (v)) #define atomic_dec(v) atomic_sub(1, (v)) -#define atomic_inc_return(v) atomic_add_return(1, (v)) -#define atomic_dec_return(v) atomic_sub_return(1, (v)) #define atomic_inc_and_test(v) (atomic_inc_return(v) == 0) #define atomic_dec_and_test(v) (atomic_dec_return(v) == 0) #define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0) @@ -75,13 +104,39 @@ #define ATOMIC64_INIT ATOMIC_INIT #define atomic64_read atomic_read #define atomic64_set atomic_set + +#define atomic64_add_return_relaxed atomic64_add_return_relaxed +#define atomic64_add_return_acquire atomic64_add_return_acquire +#define atomic64_add_return_release atomic64_add_return_release +#define atomic64_add_return atomic64_add_return + +#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1, (v)) +#define atomic64_inc_return_acquire(v) atomic64_add_return_acquire(1, (v)) +#define atomic64_inc_return_release(v) atomic64_add_return_release(1, (v)) +#define atomic64_inc_return(v) atomic64_add_return(1, (v)) + +#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_sub_return_acquire atomic64_sub_return_acquire +#define atomic64_sub_return_release atomic64_sub_return_release +#define atomic64_sub_return atomic64_sub_return + +#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1, (v)) +#define atomic64_dec_return_acquire(v) atomic64_sub_return_acquire(1, (v)) +#define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v)) +#define atomic64_dec_return(v) atomic64_sub_return(1, (v)) + +#define atomic64_xchg_relaxed atomic_xchg_relaxed +#define atomic64_xchg_acquire atomic_xchg_acquire +#define atomic64_xchg_release atomic_xchg_release #define atomic64_xchg atomic_xchg + +#define atomic64_cmpxchg_relaxed atomic_cmpxchg_relaxed +#define atomic64_cmpxchg_acquire atomic_cmpxchg_acquire +#define atomic64_cmpxchg_release atomic_cmpxchg_release #define atomic64_cmpxchg atomic_cmpxchg #define atomic64_inc(v) atomic64_add(1, (v)) #define atomic64_dec(v) atomic64_sub(1, (v)) -#define atomic64_inc_return(v) atomic64_add_return(1, (v)) -#define atomic64_dec_return(v) atomic64_sub_return(1, (v)) #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) #define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0) #define atomic64_sub_and_test(i, v) (atomic64_sub_return((i), (v)) == 0) diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index b3b5c4a..74d0b8e 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -55,40 +55,47 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \ } \ __LL_SC_EXPORT(atomic_##op); -#define ATOMIC_OP_RETURN(op, asm_op) \ +#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \ __LL_SC_INLINE int \ -__LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v)) \ +__LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v)) \ { \ unsigned long tmp; \ int result; \ \ - asm volatile("// atomic_" #op "_return\n" \ + asm volatile("// atomic_" #op "_return" #name "\n" \ " prfm pstl1strm, %2\n" \ -"1: ldxr %w0, %2\n" \ +"1: ld" #acq "xr %w0, %2\n" \ " " #asm_op " %w0, %w0, %w3\n" \ -" stlxr %w1, %w0, %2\n" \ -" cbnz %w1, 1b" \ +" st" #rel "xr %w1, %w0, %2\n" \ +" cbnz %w1, 1b\n" \ +" " #mb \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : "Ir" (i) \ - : "memory"); \ + : cl); \ \ - smp_mb(); \ return result; \ } \ -__LL_SC_EXPORT(atomic_##op##_return); +__LL_SC_EXPORT(atomic_##op##_return##name); + +#define ATOMIC_OPS(...) \ + ATOMIC_OP(__VA_ARGS__) \ + ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__) -#define ATOMIC_OPS(op, asm_op) \ - ATOMIC_OP(op, asm_op) \ - ATOMIC_OP_RETURN(op, asm_op) +#define ATOMIC_OPS_RLX(...) \ + ATOMIC_OPS(__VA_ARGS__) \ + ATOMIC_OP_RETURN(_relaxed, , , , , __VA_ARGS__)\ + ATOMIC_OP_RETURN(_acquire, , a, , "memory", __VA_ARGS__)\ + ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__) -ATOMIC_OPS(add, add) -ATOMIC_OPS(sub, sub) +ATOMIC_OPS_RLX(add, add) +ATOMIC_OPS_RLX(sub, sub) ATOMIC_OP(and, and) ATOMIC_OP(andnot, bic) ATOMIC_OP(or, orr) ATOMIC_OP(xor, eor) +#undef ATOMIC_OPS_RLX #undef ATOMIC_OPS #undef ATOMIC_OP_RETURN #undef ATOMIC_OP @@ -111,40 +118,47 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \ } \ __LL_SC_EXPORT(atomic64_##op); -#define ATOMIC64_OP_RETURN(op, asm_op) \ +#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \ __LL_SC_INLINE long \ -__LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v)) \ +__LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v)) \ { \ long result; \ unsigned long tmp; \ \ - asm volatile("// atomic64_" #op "_return\n" \ + asm volatile("// atomic64_" #op "_return" #name "\n" \ " prfm pstl1strm, %2\n" \ -"1: ldxr %0, %2\n" \ +"1: ld" #acq "xr %0, %2\n" \ " " #asm_op " %0, %0, %3\n" \ -" stlxr %w1, %0, %2\n" \ -" cbnz %w1, 1b" \ +" st" #rel "xr %w1, %0, %2\n" \ +" cbnz %w1, 1b\n" \ +" " #mb \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : "Ir" (i) \ - : "memory"); \ + : cl); \ \ - smp_mb(); \ return result; \ } \ -__LL_SC_EXPORT(atomic64_##op##_return); +__LL_SC_EXPORT(atomic64_##op##_return##name); + +#define ATOMIC64_OPS(...) \ + ATOMIC64_OP(__VA_ARGS__) \ + ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__) -#define ATOMIC64_OPS(op, asm_op) \ - ATOMIC64_OP(op, asm_op) \ - ATOMIC64_OP_RETURN(op, asm_op) +#define ATOMIC64_OPS_RLX(...) \ + ATOMIC64_OPS(__VA_ARGS__) \ + ATOMIC64_OP_RETURN(_relaxed,, , , , __VA_ARGS__) \ + ATOMIC64_OP_RETURN(_acquire,, a, , "memory", __VA_ARGS__) \ + ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__) -ATOMIC64_OPS(add, add) -ATOMIC64_OPS(sub, sub) +ATOMIC64_OPS_RLX(add, add) +ATOMIC64_OPS_RLX(sub, sub) ATOMIC64_OP(and, and) ATOMIC64_OP(andnot, bic) ATOMIC64_OP(or, orr) ATOMIC64_OP(xor, eor) +#undef ATOMIC64_OPS_RLX #undef ATOMIC64_OPS #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP @@ -172,7 +186,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v)) } __LL_SC_EXPORT(atomic64_dec_if_positive); -#define __CMPXCHG_CASE(w, sz, name, mb, rel, cl) \ +#define __CMPXCHG_CASE(w, sz, name, mb, acq, rel, cl) \ __LL_SC_INLINE unsigned long \ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \ unsigned long old, \ @@ -182,7 +196,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \ \ asm volatile( \ " prfm pstl1strm, %[v]\n" \ - "1: ldxr" #sz "\t%" #w "[oldval], %[v]\n" \ + "1: ld" #acq "xr" #sz "\t%" #w "[oldval], %[v]\n" \ " eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \ " cbnz %" #w "[tmp], 2f\n" \ " st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n" \ @@ -199,14 +213,22 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \ } \ __LL_SC_EXPORT(__cmpxchg_case_##name); -__CMPXCHG_CASE(w, b, 1, , , ) -__CMPXCHG_CASE(w, h, 2, , , ) -__CMPXCHG_CASE(w, , 4, , , ) -__CMPXCHG_CASE( , , 8, , , ) -__CMPXCHG_CASE(w, b, mb_1, dmb ish, l, "memory") -__CMPXCHG_CASE(w, h, mb_2, dmb ish, l, "memory") -__CMPXCHG_CASE(w, , mb_4, dmb ish, l, "memory") -__CMPXCHG_CASE( , , mb_8, dmb ish, l, "memory") +__CMPXCHG_CASE(w, b, 1, , , , ) +__CMPXCHG_CASE(w, h, 2, , , , ) +__CMPXCHG_CASE(w, , 4, , , , ) +__CMPXCHG_CASE( , , 8, , , , ) +__CMPXCHG_CASE(w, b, acq_1, , a, , "memory") +__CMPXCHG_CASE(w, h, acq_2, , a, , "memory") +__CMPXCHG_CASE(w, , acq_4, , a, , "memory") +__CMPXCHG_CASE( , , acq_8, , a, , "memory") +__CMPXCHG_CASE(w, b, rel_1, , , l, "memory") +__CMPXCHG_CASE(w, h, rel_2, , , l, "memory") +__CMPXCHG_CASE(w, , rel_4, , , l, "memory") +__CMPXCHG_CASE( , , rel_8, , , l, "memory") +__CMPXCHG_CASE(w, b, mb_1, dmb ish, , l, "memory") +__CMPXCHG_CASE(w, h, mb_2, dmb ish, , l, "memory") +__CMPXCHG_CASE(w, , mb_4, dmb ish, , l, "memory") +__CMPXCHG_CASE( , , mb_8, dmb ish, , l, "memory") #undef __CMPXCHG_CASE diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 55d740e..1fce790 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -75,24 +75,32 @@ static inline void atomic_add(int i, atomic_t *v) : "x30"); } -static inline int atomic_add_return(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; +#define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \ +static inline int atomic_add_return##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC(add_return##name), \ + /* LSE atomics */ \ + " ldadd" #mb " %w[i], w30, %[v]\n" \ + " add %w[i], %w[i], w30") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : "x30" , ##cl); \ + \ + return w0; \ +} - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC(add_return), - /* LSE atomics */ - " ldaddal %w[i], w30, %[v]\n" - " add %w[i], %w[i], w30") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : "x30", "memory"); +ATOMIC_OP_ADD_RETURN(_relaxed, ) +ATOMIC_OP_ADD_RETURN(_acquire, a, "memory") +ATOMIC_OP_ADD_RETURN(_release, l, "memory") +ATOMIC_OP_ADD_RETURN( , al, "memory") - return w0; -} +#undef ATOMIC_OP_ADD_RETURN static inline void atomic_and(int i, atomic_t *v) { @@ -128,27 +136,34 @@ static inline void atomic_sub(int i, atomic_t *v) : "x30"); } -static inline int atomic_sub_return(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC(sub_return) - " nop", - /* LSE atomics */ - " neg %w[i], %w[i]\n" - " ldaddal %w[i], w30, %[v]\n" - " add %w[i], %w[i], w30") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : "x30", "memory"); - - return w0; +#define ATOMIC_OP_SUB_RETURN(name, mb, cl...) \ +static inline int atomic_sub_return##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC(sub_return##name) \ + " nop", \ + /* LSE atomics */ \ + " neg %w[i], %w[i]\n" \ + " ldadd" #mb " %w[i], w30, %[v]\n" \ + " add %w[i], %w[i], w30") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : "x30" , ##cl); \ + \ + return w0; \ } +ATOMIC_OP_SUB_RETURN(_relaxed, ) +ATOMIC_OP_SUB_RETURN(_acquire, a, "memory") +ATOMIC_OP_SUB_RETURN(_release, l, "memory") +ATOMIC_OP_SUB_RETURN( , al, "memory") + +#undef ATOMIC_OP_SUB_RETURN #undef __LL_SC_ATOMIC #define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op) @@ -201,24 +216,32 @@ static inline void atomic64_add(long i, atomic64_t *v) : "x30"); } -static inline long atomic64_add_return(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; +#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \ +static inline long atomic64_add_return##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC64(add_return##name), \ + /* LSE atomics */ \ + " ldadd" #mb " %[i], x30, %[v]\n" \ + " add %[i], %[i], x30") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : "x30" , ##cl); \ + \ + return x0; \ +} - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC64(add_return), - /* LSE atomics */ - " ldaddal %[i], x30, %[v]\n" - " add %[i], %[i], x30") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : "x30", "memory"); +ATOMIC64_OP_ADD_RETURN(_relaxed, ) +ATOMIC64_OP_ADD_RETURN(_acquire, a, "memory") +ATOMIC64_OP_ADD_RETURN(_release, l, "memory") +ATOMIC64_OP_ADD_RETURN( , al, "memory") - return x0; -} +#undef ATOMIC64_OP_ADD_RETURN static inline void atomic64_and(long i, atomic64_t *v) { @@ -254,26 +277,34 @@ static inline void atomic64_sub(long i, atomic64_t *v) : "x30"); } -static inline long atomic64_sub_return(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; +#define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \ +static inline long atomic64_sub_return##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC64(sub_return##name) \ + " nop", \ + /* LSE atomics */ \ + " neg %[i], %[i]\n" \ + " ldadd" #mb " %[i], x30, %[v]\n" \ + " add %[i], %[i], x30") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : "x30" , ##cl); \ + \ + return x0; \ +} - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC64(sub_return) - " nop", - /* LSE atomics */ - " neg %[i], %[i]\n" - " ldaddal %[i], x30, %[v]\n" - " add %[i], %[i], x30") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : "x30", "memory"); +ATOMIC64_OP_SUB_RETURN(_relaxed, ) +ATOMIC64_OP_SUB_RETURN(_acquire, a, "memory") +ATOMIC64_OP_SUB_RETURN(_release, l, "memory") +ATOMIC64_OP_SUB_RETURN( , al, "memory") - return x0; -} +#undef ATOMIC64_OP_SUB_RETURN static inline long atomic64_dec_if_positive(atomic64_t *v) { @@ -333,14 +364,22 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \ return x0; \ } -__CMPXCHG_CASE(w, b, 1, ) -__CMPXCHG_CASE(w, h, 2, ) -__CMPXCHG_CASE(w, , 4, ) -__CMPXCHG_CASE(x, , 8, ) -__CMPXCHG_CASE(w, b, mb_1, al, "memory") -__CMPXCHG_CASE(w, h, mb_2, al, "memory") -__CMPXCHG_CASE(w, , mb_4, al, "memory") -__CMPXCHG_CASE(x, , mb_8, al, "memory") +__CMPXCHG_CASE(w, b, 1, ) +__CMPXCHG_CASE(w, h, 2, ) +__CMPXCHG_CASE(w, , 4, ) +__CMPXCHG_CASE(x, , 8, ) +__CMPXCHG_CASE(w, b, acq_1, a, "memory") +__CMPXCHG_CASE(w, h, acq_2, a, "memory") +__CMPXCHG_CASE(w, , acq_4, a, "memory") +__CMPXCHG_CASE(x, , acq_8, a, "memory") +__CMPXCHG_CASE(w, b, rel_1, l, "memory") +__CMPXCHG_CASE(w, h, rel_2, l, "memory") +__CMPXCHG_CASE(w, , rel_4, l, "memory") +__CMPXCHG_CASE(x, , rel_8, l, "memory") +__CMPXCHG_CASE(w, b, mb_1, al, "memory") +__CMPXCHG_CASE(w, h, mb_2, al, "memory") +__CMPXCHG_CASE(w, , mb_4, al, "memory") +__CMPXCHG_CASE(x, , mb_8, al, "memory") #undef __LL_SC_CMPXCHG #undef __CMPXCHG_CASE diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 899e9f1..9ea611e 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -25,154 +25,151 @@ #include #include -static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size) -{ - unsigned long ret, tmp; - - switch (size) { - case 1: - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " prfm pstl1strm, %2\n" - "1: ldxrb %w0, %2\n" - " stlxrb %w1, %w3, %2\n" - " cbnz %w1, 1b\n" - " dmb ish", - /* LSE atomics */ - " nop\n" - " nop\n" - " swpalb %w3, %w0, %2\n" - " nop\n" - " nop") - : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) - : "r" (x) - : "memory"); - break; - case 2: - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " prfm pstl1strm, %2\n" - "1: ldxrh %w0, %2\n" - " stlxrh %w1, %w3, %2\n" - " cbnz %w1, 1b\n" - " dmb ish", - /* LSE atomics */ - " nop\n" - " nop\n" - " swpalh %w3, %w0, %2\n" - " nop\n" - " nop") - : "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr) - : "r" (x) - : "memory"); - break; - case 4: - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " prfm pstl1strm, %2\n" - "1: ldxr %w0, %2\n" - " stlxr %w1, %w3, %2\n" - " cbnz %w1, 1b\n" - " dmb ish", - /* LSE atomics */ - " nop\n" - " nop\n" - " swpal %w3, %w0, %2\n" - " nop\n" - " nop") - : "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr) - : "r" (x) - : "memory"); - break; - case 8: - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " prfm pstl1strm, %2\n" - "1: ldxr %0, %2\n" - " stlxr %w1, %3, %2\n" - " cbnz %w1, 1b\n" - " dmb ish", - /* LSE atomics */ - " nop\n" - " nop\n" - " swpal %3, %0, %2\n" - " nop\n" - " nop") - : "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr) - : "r" (x) - : "memory"); - break; - default: - BUILD_BUG(); - } - - return ret; +/* + * We need separate acquire parameters for ll/sc and lse, since the full + * barrier case is generated as release+dmb for the former and + * acquire+release for the latter. + */ +#define __XCHG_CASE(w, sz, name, mb, nop_lse, acq, acq_lse, rel, cl) \ +static inline unsigned long __xchg_case_##name(unsigned long x, \ + volatile void *ptr) \ +{ \ + unsigned long ret, tmp; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " prfm pstl1strm, %2\n" \ + "1: ld" #acq "xr" #sz "\t%" #w "0, %2\n" \ + " st" #rel "xr" #sz "\t%w1, %" #w "3, %2\n" \ + " cbnz %w1, 1b\n" \ + " " #mb, \ + /* LSE atomics */ \ + " nop\n" \ + " nop\n" \ + " swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n" \ + " nop\n" \ + " " #nop_lse) \ + : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) \ + : "r" (x) \ + : cl); \ + \ + return ret; \ } -#define xchg(ptr,x) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __ret = (__typeof__(*(ptr))) \ - __xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \ - __ret; \ +__XCHG_CASE(w, b, 1, , , , , , ) +__XCHG_CASE(w, h, 2, , , , , , ) +__XCHG_CASE(w, , 4, , , , , , ) +__XCHG_CASE( , , 8, , , , , , ) +__XCHG_CASE(w, b, acq_1, , , a, a, , "memory") +__XCHG_CASE(w, h, acq_2, , , a, a, , "memory") +__XCHG_CASE(w, , acq_4, , , a, a, , "memory") +__XCHG_CASE( , , acq_8, , , a, a, , "memory") +__XCHG_CASE(w, b, rel_1, , , , , l, "memory") +__XCHG_CASE(w, h, rel_2, , , , , l, "memory") +__XCHG_CASE(w, , rel_4, , , , , l, "memory") +__XCHG_CASE( , , rel_8, , , , , l, "memory") +__XCHG_CASE(w, b, mb_1, dmb ish, nop, , a, l, "memory") +__XCHG_CASE(w, h, mb_2, dmb ish, nop, , a, l, "memory") +__XCHG_CASE(w, , mb_4, dmb ish, nop, , a, l, "memory") +__XCHG_CASE( , , mb_8, dmb ish, nop, , a, l, "memory") + +#undef __XCHG_CASE + +#define __XCHG_GEN(sfx) \ +static inline unsigned long __xchg##sfx(unsigned long x, \ + volatile void *ptr, \ + int size) \ +{ \ + switch (size) { \ + case 1: \ + return __xchg_case##sfx##_1(x, ptr); \ + case 2: \ + return __xchg_case##sfx##_2(x, ptr); \ + case 4: \ + return __xchg_case##sfx##_4(x, ptr); \ + case 8: \ + return __xchg_case##sfx##_8(x, ptr); \ + default: \ + BUILD_BUG(); \ + } \ + \ + unreachable(); \ +} + +__XCHG_GEN() +__XCHG_GEN(_acq) +__XCHG_GEN(_rel) +__XCHG_GEN(_mb) + +#undef __XCHG_GEN + +#define __xchg_wrapper(sfx, ptr, x) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __xchg##sfx((unsigned long)(x), (ptr), sizeof(*(ptr))); \ + __ret; \ }) -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - switch (size) { - case 1: - return __cmpxchg_case_1(ptr, (u8)old, new); - case 2: - return __cmpxchg_case_2(ptr, (u16)old, new); - case 4: - return __cmpxchg_case_4(ptr, old, new); - case 8: - return __cmpxchg_case_8(ptr, old, new); - default: - BUILD_BUG(); - } - - unreachable(); +/* xchg */ +#define xchg_relaxed(...) __xchg_wrapper( , __VA_ARGS__) +#define xchg_acquire(...) __xchg_wrapper(_acq, __VA_ARGS__) +#define xchg_release(...) __xchg_wrapper(_rel, __VA_ARGS__) +#define xchg(...) __xchg_wrapper( _mb, __VA_ARGS__) + +#define __CMPXCHG_GEN(sfx) \ +static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ + unsigned long old, \ + unsigned long new, \ + int size) \ +{ \ + switch (size) { \ + case 1: \ + return __cmpxchg_case##sfx##_1(ptr, (u8)old, new); \ + case 2: \ + return __cmpxchg_case##sfx##_2(ptr, (u16)old, new); \ + case 4: \ + return __cmpxchg_case##sfx##_4(ptr, old, new); \ + case 8: \ + return __cmpxchg_case##sfx##_8(ptr, old, new); \ + default: \ + BUILD_BUG(); \ + } \ + \ + unreachable(); \ } -static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - switch (size) { - case 1: - return __cmpxchg_case_mb_1(ptr, (u8)old, new); - case 2: - return __cmpxchg_case_mb_2(ptr, (u16)old, new); - case 4: - return __cmpxchg_case_mb_4(ptr, old, new); - case 8: - return __cmpxchg_case_mb_8(ptr, old, new); - default: - BUILD_BUG(); - } - - unreachable(); -} +__CMPXCHG_GEN() +__CMPXCHG_GEN(_acq) +__CMPXCHG_GEN(_rel) +__CMPXCHG_GEN(_mb) -#define cmpxchg(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __ret = (__typeof__(*(ptr))) \ - __cmpxchg_mb((ptr), (unsigned long)(o), (unsigned long)(n), \ - sizeof(*(ptr))); \ - __ret; \ -}) +#undef __CMPXCHG_GEN -#define cmpxchg_local(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __ret = (__typeof__(*(ptr))) \ - __cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), sizeof(*(ptr))); \ - __ret; \ +#define __cmpxchg_wrapper(sfx, ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __cmpxchg##sfx((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr))); \ + __ret; \ }) +/* cmpxchg */ +#define cmpxchg_relaxed(...) __cmpxchg_wrapper( , __VA_ARGS__) +#define cmpxchg_acquire(...) __cmpxchg_wrapper(_acq, __VA_ARGS__) +#define cmpxchg_release(...) __cmpxchg_wrapper(_rel, __VA_ARGS__) +#define cmpxchg(...) __cmpxchg_wrapper( _mb, __VA_ARGS__) +#define cmpxchg_local cmpxchg_relaxed + +/* cmpxchg64 */ +#define cmpxchg64_relaxed cmpxchg_relaxed +#define cmpxchg64_acquire cmpxchg_acquire +#define cmpxchg64_release cmpxchg_release +#define cmpxchg64 cmpxchg +#define cmpxchg64_local cmpxchg_local + +/* cmpxchg_double */ #define system_has_cmpxchg_double() 1 #define __cmpxchg_double_check(ptr1, ptr2) \ @@ -202,6 +199,7 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, __ret; \ }) +/* this_cpu_cmpxchg */ #define _protect_cmpxchg_local(pcp, o, n) \ ({ \ typeof(*raw_cpu_ptr(&(pcp))) __ret; \ @@ -227,9 +225,4 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, __ret; \ }) -#define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n)) -#define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n)) - -#define cmpxchg64_relaxed(ptr,o,n) cmpxchg_local((ptr),(o),(n)) - #endif /* __ASM_CMPXCHG_H */ -- cgit v0.10.2 From fd2203dd3556f6553231fa026060793e67a25ce6 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 12 Oct 2015 18:52:57 +0300 Subject: arm64: move PGD_SIZE definition to pgalloc.h This will be used by KASAN latter. Signed-off-by: Andrey Ryabinin Acked-by: Catalin Marinas Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 7642056..c150539 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -27,6 +27,7 @@ #define check_pgt_cache() do { } while (0) #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) #if CONFIG_PGTABLE_LEVELS > 2 diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 71ca104..cb3ba1b 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -28,8 +28,6 @@ #include "mm.h" -#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) - static struct kmem_cache *pgd_cache; pgd_t *pgd_alloc(struct mm_struct *mm) -- cgit v0.10.2 From 39d114ddc68223022c12ae3a1573912bc4b585e5 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 12 Oct 2015 18:52:58 +0300 Subject: arm64: add KASAN support This patch adds arch specific code for kernel address sanitizer (see Documentation/kasan.txt). 1/8 of kernel addresses reserved for shadow memory. There was no big enough hole for this, so virtual addresses for shadow were stolen from vmalloc area. At early boot stage the whole shadow region populated with just one physical page (kasan_zero_page). Later, this page reused as readonly zero shadow for some memory that KASan currently don't track (vmalloc). After mapping the physical memory, pages for shadow memory are allocated and mapped. Functions like memset/memmove/memcpy do a lot of memory accesses. If bad pointer passed to one of these function it is important to catch this. Compiler's instrumentation cannot do this since these functions are written in assembly. KASan replaces memory functions with manually instrumented variants. Original functions declared as weak symbols so strong definitions in mm/kasan/kasan.c could replace them. Original functions have aliases with '__' prefix in name, so we could call non-instrumented variant if needed. Some files built without kasan instrumentation (e.g. mm/slub.c). Original mem* function replaced (via #define) with prefixed variants to disable memory access checks for such files. Signed-off-by: Andrey Ryabinin Tested-by: Linus Walleij Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1b35bdb..2782c11 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -48,6 +48,7 @@ config ARM64 select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_BITREVERSE select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP select HAVE_ARCH_KGDB select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index f9914d7..f41c676 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -55,6 +55,13 @@ else TEXT_OFFSET := 0x00080000 endif +# KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - 3)) - (1 << 61) +# in 32-bit arithmetic +KASAN_SHADOW_OFFSET := $(shell printf "0x%08x00000000\n" $$(( \ + (0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 32))) \ + + (1 << ($(CONFIG_ARM64_VA_BITS) - 32 - 3)) \ + - (1 << (64 - 32 - 3)) )) ) + export TEXT_OFFSET GZFLAGS core-y += arch/arm64/kernel/ arch/arm64/mm/ diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h new file mode 100644 index 0000000..71dfe14 --- /dev/null +++ b/arch/arm64/include/asm/kasan.h @@ -0,0 +1,36 @@ +#ifndef __ASM_KASAN_H +#define __ASM_KASAN_H + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_KASAN + +#include + +/* + * KASAN_SHADOW_START: beginning of the kernel virtual addresses. + * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses. + */ +#define KASAN_SHADOW_START (VA_START) +#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1UL << (VA_BITS - 3))) + +/* + * This value is used to map an address to the corresponding shadow + * address by the following formula: + * shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET; + * + * (1 << 61) shadow addresses - [KASAN_SHADOW_OFFSET,KASAN_SHADOW_END] + * cover all 64-bits of virtual addresses. So KASAN_SHADOW_OFFSET + * should satisfy the following equation: + * KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - (1ULL << 61) + */ +#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3))) + +void kasan_init(void); + +#else +static inline void kasan_init(void) { } +#endif + +#endif +#endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 3f481ef..e3b515f 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -41,7 +41,14 @@ * fixed mappings and modules */ #define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE) + +#ifndef CONFIG_KASAN #define VMALLOC_START (VA_START) +#else +#include +#define VMALLOC_START (KASAN_SHADOW_END + SZ_64K) +#endif + #define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K) #define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h index 64d2d48..2eb714c 100644 --- a/arch/arm64/include/asm/string.h +++ b/arch/arm64/include/asm/string.h @@ -36,17 +36,33 @@ extern __kernel_size_t strnlen(const char *, __kernel_size_t); #define __HAVE_ARCH_MEMCPY extern void *memcpy(void *, const void *, __kernel_size_t); +extern void *__memcpy(void *, const void *, __kernel_size_t); #define __HAVE_ARCH_MEMMOVE extern void *memmove(void *, const void *, __kernel_size_t); +extern void *__memmove(void *, const void *, __kernel_size_t); #define __HAVE_ARCH_MEMCHR extern void *memchr(const void *, int, __kernel_size_t); #define __HAVE_ARCH_MEMSET extern void *memset(void *, int, __kernel_size_t); +extern void *__memset(void *, int, __kernel_size_t); #define __HAVE_ARCH_MEMCMP extern int memcmp(const void *, const void *, size_t); + +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) + +/* + * For files that are not instrumented (e.g. mm/slub.c) we + * should use not instrumented version of mem* functions. + */ + +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memmove(dst, src, len) __memmove(dst, src, len) +#define memset(s, c, n) __memset(s, c, n) +#endif + #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 7b17f62..1b6bda2 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -7,6 +7,8 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) CFLAGS_armv8_deprecated.o := -I$(src) +KASAN_SANITIZE_efi-stub.o := n + CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_insn.o = -pg CFLAGS_REMOVE_return_address.o = -pg diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index a85843d..3b6d8cc 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -51,6 +51,9 @@ EXPORT_SYMBOL(strnlen); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(__memset); +EXPORT_SYMBOL(__memcpy); +EXPORT_SYMBOL(__memmove); EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(memcmp); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 28a81e9..2a8c1d5 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -444,6 +444,9 @@ __mmap_switched: str_l x21, __fdt_pointer, x5 // Save FDT pointer str_l x24, memstart_addr, x6 // Save PHYS_OFFSET mov x29, #0 +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif b start_kernel ENDPROC(__mmap_switched) diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index e083af0..6eb8fee 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -80,6 +80,12 @@ __efistub_strcmp = __pi_strcmp; __efistub_strncmp = __pi_strncmp; __efistub___flush_dcache_area = __pi___flush_dcache_area; +#ifdef CONFIG_KASAN +__efistub___memcpy = __pi_memcpy; +__efistub___memmove = __pi_memmove; +__efistub___memset = __pi_memset; +#endif + __efistub__text = _text; __efistub__end = _end; __efistub__edata = _edata; diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 876eb8d..f4bc779 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -34,9 +35,18 @@ void *module_alloc(unsigned long size) { - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, 0, - NUMA_NO_NODE, __builtin_return_address(0)); + void *p; + + p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END, + GFP_KERNEL, PAGE_KERNEL_EXEC, 0, + NUMA_NO_NODE, __builtin_return_address(0)); + + if (p && (kasan_module_alloc(p, size) < 0)) { + vfree(p); + return NULL; + } + + return p; } enum aarch64_reloc_op { diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 6bab21f..79df79a 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -434,6 +435,9 @@ void __init setup_arch(char **cmdline_p) paging_init(); relocate_initrd(); + + kasan_init(); + request_standard_resources(); early_ioremap_reset(); diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index 36a6a62..6761393 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -68,7 +68,10 @@ stp \ptr, \regB, [\regC], \val .endm + .weak memcpy +ENTRY(__memcpy) ENTRY(memcpy) #include "copy_template.S" ret ENDPIPROC(memcpy) +ENDPROC(__memcpy) diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S index 68e2f20..a5a4459 100644 --- a/arch/arm64/lib/memmove.S +++ b/arch/arm64/lib/memmove.S @@ -57,12 +57,14 @@ C_h .req x12 D_l .req x13 D_h .req x14 + .weak memmove +ENTRY(__memmove) ENTRY(memmove) cmp dstin, src - b.lo memcpy + b.lo __memcpy add tmp1, src, count cmp dstin, tmp1 - b.hs memcpy /* No overlap. */ + b.hs __memcpy /* No overlap. */ add dst, dstin, count add src, src, count @@ -195,3 +197,4 @@ ENTRY(memmove) b.ne .Ltail63 ret ENDPIPROC(memmove) +ENDPROC(__memmove) diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index 29f405f..f2670a9 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -54,6 +54,8 @@ dst .req x8 tmp3w .req w9 tmp3 .req x9 + .weak memset +ENTRY(__memset) ENTRY(memset) mov dst, dstin /* Preserve return value. */ and A_lw, val, #255 @@ -214,3 +216,4 @@ ENTRY(memset) b.ne .Ltail_maybe_long ret ENDPIPROC(memset) +ENDPROC(__memset) diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 773d37a..57f57fd 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -4,3 +4,6 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_ARM64_PTDUMP) += dump.o + +obj-$(CONFIG_KASAN) += kasan_init.o +KASAN_SANITIZE_kasan_init.o := n diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c new file mode 100644 index 0000000..b6a92f5 --- /dev/null +++ b/arch/arm64/mm/kasan_init.c @@ -0,0 +1,165 @@ +/* + * This file contains kasan initialization code for ARM64. + * + * Copyright (c) 2015 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#define pr_fmt(fmt) "kasan: " fmt +#include +#include +#include +#include + +#include +#include +#include +#include + +static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE); + +static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr, + unsigned long end) +{ + pte_t *pte; + unsigned long next; + + if (pmd_none(*pmd)) + pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); + + pte = pte_offset_kernel(pmd, addr); + do { + next = addr + PAGE_SIZE; + set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page), + PAGE_KERNEL)); + } while (pte++, addr = next, addr != end && pte_none(*pte)); +} + +static void __init kasan_early_pmd_populate(pud_t *pud, + unsigned long addr, + unsigned long end) +{ + pmd_t *pmd; + unsigned long next; + + if (pud_none(*pud)) + pud_populate(&init_mm, pud, kasan_zero_pmd); + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + kasan_early_pte_populate(pmd, addr, next); + } while (pmd++, addr = next, addr != end && pmd_none(*pmd)); +} + +static void __init kasan_early_pud_populate(pgd_t *pgd, + unsigned long addr, + unsigned long end) +{ + pud_t *pud; + unsigned long next; + + if (pgd_none(*pgd)) + pgd_populate(&init_mm, pgd, kasan_zero_pud); + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + kasan_early_pmd_populate(pud, addr, next); + } while (pud++, addr = next, addr != end && pud_none(*pud)); +} + +static void __init kasan_map_early_shadow(void) +{ + unsigned long addr = KASAN_SHADOW_START; + unsigned long end = KASAN_SHADOW_END; + unsigned long next; + pgd_t *pgd; + + pgd = pgd_offset_k(addr); + do { + next = pgd_addr_end(addr, end); + kasan_early_pud_populate(pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +void __init kasan_early_init(void) +{ + BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); + kasan_map_early_shadow(); +} + +static void __init clear_pgds(unsigned long start, + unsigned long end) +{ + /* + * Remove references to kasan page tables from + * swapper_pg_dir. pgd_clear() can't be used + * here because it's nop on 2,3-level pagetable setups + */ + for (; start < end; start += PGDIR_SIZE) + set_pgd(pgd_offset_k(start), __pgd(0)); +} + +static void __init cpu_set_ttbr1(unsigned long ttbr1) +{ + asm( + " msr ttbr1_el1, %0\n" + " isb" + : + : "r" (ttbr1)); +} + +void __init kasan_init(void) +{ + struct memblock_region *reg; + + /* + * We are going to perform proper setup of shadow memory. + * At first we should unmap early shadow (clear_pgds() call bellow). + * However, instrumented code couldn't execute without shadow memory. + * tmp_pg_dir used to keep early shadow mapped until full shadow + * setup will be finished. + */ + memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir)); + cpu_set_ttbr1(__pa(tmp_pg_dir)); + flush_tlb_all(); + + clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + + kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, + kasan_mem_to_shadow((void *)MODULES_VADDR)); + + for_each_memblock(memory, reg) { + void *start = (void *)__phys_to_virt(reg->base); + void *end = (void *)__phys_to_virt(reg->base + reg->size); + + if (start >= end) + break; + + /* + * end + 1 here is intentional. We check several shadow bytes in + * advance to slightly speed up fastpath. In some rare cases + * we could cross boundary of mapped shadow, so we just map + * some more here. + */ + vmemmap_populate((unsigned long)kasan_mem_to_shadow(start), + (unsigned long)kasan_mem_to_shadow(end) + 1, + pfn_to_nid(virt_to_pfn(start))); + } + + memset(kasan_zero_page, 0, PAGE_SIZE); + cpu_set_ttbr1(__pa(swapper_pg_dir)); + flush_tlb_all(); + + /* At this point kasan is fully initialized. Enable error messages */ + init_task.kasan_depth = 0; + pr_info("KernelAddressSanitizer initialized\n"); +} diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index 6fd3da9..413fcf2 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -1,6 +1,14 @@ # # Makefile for linux kernel # + +# +# ARM64 maps efi runtime services in userspace addresses +# which don't have KASAN shadow. So dereference of these addresses +# in efi_call_virt() will cause crash if this code instrumented. +# +KASAN_SANITIZE_runtime-wrappers.o := n + obj-$(CONFIG_EFI) += efi.o vars.o reboot.o obj-$(CONFIG_EFI_VARS) += efivars.o obj-$(CONFIG_EFI_ESRT) += esrt.o diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 3f874d2..37323b0 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -5,10 +5,12 @@ else call_threshold := 0 endif +KASAN_SHADOW_OFFSET ?= $(CONFIG_KASAN_SHADOW_OFFSET) + CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address CFLAGS_KASAN := $(call cc-option, -fsanitize=kernel-address \ - -fasan-shadow-offset=$(CONFIG_KASAN_SHADOW_OFFSET) \ + -fasan-shadow-offset=$(KASAN_SHADOW_OFFSET) \ --param asan-stack=1 --param asan-globals=1 \ --param asan-instrumentation-with-call-threshold=$(call_threshold)) -- cgit v0.10.2 From ee7f881b59de4e0e0be250fd0c5d4ade3e30ec34 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 12 Oct 2015 18:52:59 +0300 Subject: ARM64: kasan: print memory assignment This prints out the virtual memory assigned to KASan in the boot crawl along with other memory assignments, if and only if KASan is activated. Example dmesg from the Juno Development board: Memory: 1691156K/2080768K available (5465K kernel code, 444K rwdata, 2160K rodata, 340K init, 217K bss, 373228K reserved, 16384K cma-reserved) Virtual kernel memory layout: kasan : 0xffffff8000000000 - 0xffffff9000000000 ( 64 GB) vmalloc : 0xffffff9000000000 - 0xffffffbdbfff0000 ( 182 GB) vmemmap : 0xffffffbdc0000000 - 0xffffffbfc0000000 ( 8 GB maximum) 0xffffffbdc2000000 - 0xffffffbdc3fc0000 ( 31 MB actual) fixed : 0xffffffbffabfd000 - 0xffffffbffac00000 ( 12 KB) PCI I/O : 0xffffffbffae00000 - 0xffffffbffbe00000 ( 16 MB) modules : 0xffffffbffc000000 - 0xffffffc000000000 ( 64 MB) memory : 0xffffffc000000000 - 0xffffffc07f000000 ( 2032 MB) .init : 0xffffffc0007f5000 - 0xffffffc00084a000 ( 340 KB) .text : 0xffffffc000080000 - 0xffffffc0007f45b4 ( 7634 KB) .data : 0xffffffc000850000 - 0xffffffc0008bf200 ( 445 KB) Signed-off-by: Linus Walleij Signed-off-by: Andrey Ryabinin Acked-by: Catalin Marinas Signed-off-by: Catalin Marinas diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f5c0680..7a1f9a0 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -298,6 +298,9 @@ void __init mem_init(void) #define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K) pr_notice("Virtual kernel memory layout:\n" +#ifdef CONFIG_KASAN + " kasan : 0x%16lx - 0x%16lx (%6ld GB)\n" +#endif " vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n" #ifdef CONFIG_SPARSEMEM_VMEMMAP " vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n" @@ -310,6 +313,9 @@ void __init mem_init(void) " .init : 0x%p" " - 0x%p" " (%6ld KB)\n" " .text : 0x%p" " - 0x%p" " (%6ld KB)\n" " .data : 0x%p" " - 0x%p" " (%6ld KB)\n", +#ifdef CONFIG_KASAN + MLG(KASAN_SHADOW_START, KASAN_SHADOW_END), +#endif MLG(VMALLOC_START, VMALLOC_END), #ifdef CONFIG_SPARSEMEM_VMEMMAP MLG((unsigned long)vmemmap, -- cgit v0.10.2 From 4fc57692268bf841a3cc90b3b056640de2bb1605 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 12 Oct 2015 18:53:00 +0300 Subject: Documentation/features/KASAN: arm64 supports KASAN now Signed-off-by: Andrey Ryabinin Signed-off-by: Catalin Marinas diff --git a/Documentation/features/debug/KASAN/arch-support.txt b/Documentation/features/debug/KASAN/arch-support.txt index 14531da..703f578 100644 --- a/Documentation/features/debug/KASAN/arch-support.txt +++ b/Documentation/features/debug/KASAN/arch-support.txt @@ -9,7 +9,7 @@ | alpha: | TODO | | arc: | TODO | | arm: | TODO | - | arm64: | TODO | + | arm64: | ok | | avr32: | TODO | | blackfin: | TODO | | c6x: | TODO | -- cgit v0.10.2 From 83040123fde42ec532d3b632efb5f7f84024e61d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 13 Oct 2015 14:01:06 +0100 Subject: arm64: kasan: fix issues reported by sparse Sparse reports some new issues introduced by the kasan patches: arch/arm64/mm/kasan_init.c:91:13: warning: no previous prototype for 'kasan_early_init' [-Wmissing-prototypes] void __init kasan_early_init(void) ^ arch/arm64/mm/kasan_init.c:91:13: warning: symbol 'kasan_early_init' was not declared. Should it be static? [sparse] This patch resolves the problem by adding a prototype for kasan_early_init and marking the function as asmlinkage, since it's only called from head.S. Signed-off-by: Will Deacon Acked-by: Andrey Ryabinin Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h index 71dfe14..2774fa3 100644 --- a/arch/arm64/include/asm/kasan.h +++ b/arch/arm64/include/asm/kasan.h @@ -5,6 +5,7 @@ #ifdef CONFIG_KASAN +#include #include /* @@ -27,6 +28,7 @@ #define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3))) void kasan_init(void); +asmlinkage void kasan_early_init(void); #else static inline void kasan_init(void) { } diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index b6a92f5..cf038c7 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -88,7 +88,7 @@ static void __init kasan_map_early_shadow(void) } while (pgd++, addr = next, addr != end); } -void __init kasan_early_init(void) +asmlinkage void __init kasan_early_init(void) { BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61)); BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); -- cgit v0.10.2 From 1b6d7f8742d5d46c478f10c9e57da18d049b116d Mon Sep 17 00:00:00 2001 From: yalin wang Date: Mon, 12 Oct 2015 10:28:18 +0800 Subject: arm64: ioremap: add ioremap_cache macro Add ioremap_cache macro, because some code will test if this macro is defined or not, and will generate a generric version if not defined, for example, memremap.c do like this. Signed-off-by: yalin wang Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 44be1e0..d6b620c 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -172,6 +172,7 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); #define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC)) #define ioremap_wt(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define iounmap __iounmap +#define ioremap_cache ioremap_cache /* * io{read,write}{16,32}be() macros -- cgit v0.10.2 From 03875ad52fdde1f110f663470a45d3dcc34b8fef Mon Sep 17 00:00:00 2001 From: yalin wang Date: Mon, 12 Oct 2015 14:52:59 +0800 Subject: arm64: add kc_offset_to_vaddr and kc_vaddr_to_offset macro This patch add kc_offset_to_vaddr() and kc_vaddr_to_offset(), the default version doesn't work on arm64, because arm64 kernel address is below the PAGE_OFFSET, like module address and vmemmap address are all below PAGE_OFFSET address. Signed-off-by: yalin wang Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index e3b515f..83965dd 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -673,6 +673,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) +#define kc_vaddr_to_offset(v) ((v) & ~VA_START) +#define kc_offset_to_vaddr(o) ((o) | VA_START) #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ -- cgit v0.10.2 From 52fa1927e953d8b1fbdcf8a19dc75c1499cc3d5d Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 13 Oct 2015 16:18:17 +0100 Subject: Revert "arm64: ioremap: add ioremap_cache macro" This reverts commit 1b6d7f8742d5d46c478f10c9e57da18d049b116d. This patch would conflict with Dan Williams' "tree-wide convert to memremap()" series (ioremap_cache replaced by arch_memremap) Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index d6b620c..44be1e0 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -172,7 +172,6 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); #define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC)) #define ioremap_wt(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define iounmap __iounmap -#define ioremap_cache ioremap_cache /* * io{read,write}{16,32}be() macros -- cgit v0.10.2 From 7db743c6d8bb7a08396a0ea3084fb9f8ebccc577 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 16 Oct 2015 14:34:50 +0100 Subject: arm64: Minor coding style fixes for kc_offset_to_vaddr and kc_vaddr_to_offset These were introduced by commit 03875ad52fdd (arm64: add kc_offset_to_vaddr and kc_vaddr_to_offset macro). Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 83965dd..c3d22a5 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -673,8 +673,9 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) -#define kc_vaddr_to_offset(v) ((v) & ~VA_START) -#define kc_offset_to_vaddr(o) ((o) | VA_START) +#define kc_vaddr_to_offset(v) ((v) & ~VA_START) +#define kc_offset_to_vaddr(o) ((o) | VA_START) + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ -- cgit v0.10.2 From 77f3228f771506bac4b1308571a62c769552f8f8 Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Tue, 13 Oct 2015 14:30:51 -0700 Subject: arm64: AArch32 user space PC alignment exception ARMv7 does not have a PC alignment exception. ARMv8 AArch32 user space however can produce a PC alignment exception. Add handler so that we do not dump an unexpected stack trace in the logs. Signed-off-by: Mark Salyzyn Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 4306c93..7ed3d75 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -430,6 +430,8 @@ el0_sync_compat: b.eq el0_fpsimd_acc cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception b.eq el0_fpsimd_exc + cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception + b.eq el0_sp_pc cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap -- cgit v0.10.2 From fbc61a26e6b7a2ebc399559ea22df4a35ac05fcb Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Fri, 18 Sep 2015 14:09:00 -0700 Subject: arm64: debug: Fix typo in debug-monitors.c Fix handers to handlers. Signed-off-by: Yang Shi Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index cebf786..8eef30f 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -201,7 +201,7 @@ void unregister_step_hook(struct step_hook *hook) } /* - * Call registered single step handers + * Call registered single step handlers * There is no Syndrome info to check for determining the handler. * So we call all the registered handlers, until the right handler is * found which returns zero. -- cgit v0.10.2 From 87d1587bef394cd8a77dbca8cc92885fe7041b8f Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:19:27 +0100 Subject: arm64: Move swapper pagetable definitions Move the kernel pagetable (both swapper and idmap) definitions from the generic asm/page.h to a new file, asm/kernel-pgtable.h. This is mostly a cosmetic change, to clean up the asm/page.h to get rid of the arch specific details which are not needed by the generic code. Also renames the symbols to prevent conflicts. e.g, BLOCK_SHIFT => SWAPPER_BLOCK_SHIFT Cc: Will Deacon Signed-off-by: Suzuki K. Poulose Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Acked-by: Mark Rutland Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h new file mode 100644 index 0000000..3c92fa7 --- /dev/null +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -0,0 +1,65 @@ +/* + * Kernel page table mapping + * + * Copyright (C) 2015 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ASM_KERNEL_PGTABLE_H +#define __ASM_KERNEL_PGTABLE_H + +/* + * The idmap and swapper page tables need some space reserved in the kernel + * image. Both require pgd, pud (4 levels only) and pmd tables to (section) + * map the kernel. With the 64K page configuration, swapper and idmap need to + * map to pte level. The swapper also maps the FDT (see __create_page_tables + * for more information). Note that the number of ID map translation levels + * could be increased on the fly if system RAM is out of reach for the default + * VA range, so 3 pages are reserved in all cases. + */ +#ifdef CONFIG_ARM64_64K_PAGES +#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) +#else +#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) +#endif + +#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) +#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) + +/* Initial memory map size */ +#ifdef CONFIG_ARM64_64K_PAGES +#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT +#define SWAPPER_BLOCK_SIZE PAGE_SIZE +#define SWAPPER_TABLE_SHIFT PMD_SHIFT +#else +#define SWAPPER_BLOCK_SHIFT SECTION_SHIFT +#define SWAPPER_BLOCK_SIZE SECTION_SIZE +#define SWAPPER_TABLE_SHIFT PUD_SHIFT +#endif + + +/* + * Initial memory map attributes. + */ +#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) + +#ifdef CONFIG_ARM64_64K_PAGES +#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) +#else +#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) +#endif + + +#endif /* __ASM_KERNEL_PGTABLE_H */ diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 33892ef..da32354 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -34,24 +34,6 @@ #define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT)) #define CONT_MASK (~(CONT_SIZE-1)) -/* - * The idmap and swapper page tables need some space reserved in the kernel - * image. Both require pgd, pud (4 levels only) and pmd tables to (section) - * map the kernel. With the 64K page configuration, swapper and idmap need to - * map to pte level. The swapper also maps the FDT (see __create_page_tables - * for more information). Note that the number of ID map translation levels - * could be increased on the fly if system RAM is out of reach for the default - * VA range, so 3 pages are reserved in all cases. - */ -#ifdef CONFIG_ARM64_64K_PAGES -#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) -#else -#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) -#endif - -#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) -#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) - #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2a8c1d5..149fce3 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -46,32 +47,10 @@ #error TEXT_OFFSET must be less than 2MB #endif -#ifdef CONFIG_ARM64_64K_PAGES -#define BLOCK_SHIFT PAGE_SHIFT -#define BLOCK_SIZE PAGE_SIZE -#define TABLE_SHIFT PMD_SHIFT -#else -#define BLOCK_SHIFT SECTION_SHIFT -#define BLOCK_SIZE SECTION_SIZE -#define TABLE_SHIFT PUD_SHIFT -#endif - #define KERNEL_START _text #define KERNEL_END _end /* - * Initial memory map attributes. - */ -#define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF | PTE_SHARED -#define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S - -#ifdef CONFIG_ARM64_64K_PAGES -#define MM_MMUFLAGS PTE_ATTRINDX(MT_NORMAL) | PTE_FLAGS -#else -#define MM_MMUFLAGS PMD_ATTRINDX(MT_NORMAL) | PMD_FLAGS -#endif - -/* * Kernel startup entry point. * --------------------------- * @@ -293,7 +272,7 @@ ENDPROC(preserve_boot_args) .macro create_pgd_entry, tbl, virt, tmp1, tmp2 create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 #if SWAPPER_PGTABLE_LEVELS == 3 - create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 + create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 #endif .endm @@ -305,15 +284,15 @@ ENDPROC(preserve_boot_args) * Corrupts: phys, start, end, pstate */ .macro create_block_map, tbl, flags, phys, start, end - lsr \phys, \phys, #BLOCK_SHIFT - lsr \start, \start, #BLOCK_SHIFT + lsr \phys, \phys, #SWAPPER_BLOCK_SHIFT + lsr \start, \start, #SWAPPER_BLOCK_SHIFT and \start, \start, #PTRS_PER_PTE - 1 // table index - orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry - lsr \end, \end, #BLOCK_SHIFT + orr \phys, \flags, \phys, lsl #SWAPPER_BLOCK_SHIFT // table entry + lsr \end, \end, #SWAPPER_BLOCK_SHIFT and \end, \end, #PTRS_PER_PTE - 1 // table end index 9999: str \phys, [\tbl, \start, lsl #3] // store the entry add \start, \start, #1 // next entry - add \phys, \phys, #BLOCK_SIZE // next block + add \phys, \phys, #SWAPPER_BLOCK_SIZE // next block cmp \start, \end b.ls 9999b .endm @@ -350,7 +329,7 @@ __create_page_tables: cmp x0, x6 b.lo 1b - ldr x7, =MM_MMUFLAGS + ldr x7, =SWAPPER_MM_MMUFLAGS /* * Create the identity mapping. diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 9807333..8a5d97b 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -5,6 +5,7 @@ */ #include +#include #include #include #include -- cgit v0.10.2 From b433dce056d3814dc4b33e5a8a533d6401ffcfb0 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:19:28 +0100 Subject: arm64: Handle section maps for swapper/idmap We use section maps with 4K page size to create the swapper/idmaps. So far we have used !64K or 4K checks to handle the case where we use the section maps. This patch adds a new symbol, ARM64_SWAPPER_USES_SECTION_MAPS, to handle cases where we use section maps, instead of using the page size symbols. Cc: Will Deacon Signed-off-by: Suzuki K. Poulose Reviewed-by: Ard Biesheuvel Acked-by: Mark Rutland Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 3c92fa7..4e08faa 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -19,6 +19,19 @@ #ifndef __ASM_KERNEL_PGTABLE_H #define __ASM_KERNEL_PGTABLE_H + +/* + * The linear mapping and the start of memory are both 2M aligned (per + * the arm64 booting.txt requirements). Hence we can use section mapping + * with 4K (section size = 2M) but not with 16K (section size = 32M) or + * 64K (section size = 512M). + */ +#ifdef CONFIG_ARM64_4K_PAGES +#define ARM64_SWAPPER_USES_SECTION_MAPS 1 +#else +#define ARM64_SWAPPER_USES_SECTION_MAPS 0 +#endif + /* * The idmap and swapper page tables need some space reserved in the kernel * image. Both require pgd, pud (4 levels only) and pmd tables to (section) @@ -28,26 +41,28 @@ * could be increased on the fly if system RAM is out of reach for the default * VA range, so 3 pages are reserved in all cases. */ -#ifdef CONFIG_ARM64_64K_PAGES -#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) -#else +#if ARM64_SWAPPER_USES_SECTION_MAPS #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) +#else +#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) #endif #define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) #define IDMAP_DIR_SIZE (3 * PAGE_SIZE) /* Initial memory map size */ -#ifdef CONFIG_ARM64_64K_PAGES -#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT -#define SWAPPER_BLOCK_SIZE PAGE_SIZE -#define SWAPPER_TABLE_SHIFT PMD_SHIFT -#else +#if ARM64_SWAPPER_USES_SECTION_MAPS #define SWAPPER_BLOCK_SHIFT SECTION_SHIFT #define SWAPPER_BLOCK_SIZE SECTION_SIZE #define SWAPPER_TABLE_SHIFT PUD_SHIFT +#else +#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT +#define SWAPPER_BLOCK_SIZE PAGE_SIZE +#define SWAPPER_TABLE_SHIFT PMD_SHIFT #endif +/* The size of the initial kernel direct mapping */ +#define SWAPPER_INIT_MAP_SIZE (_AC(1, UL) << SWAPPER_TABLE_SHIFT) /* * Initial memory map attributes. @@ -55,10 +70,10 @@ #define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) -#ifdef CONFIG_ARM64_64K_PAGES -#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) -#else +#if ARM64_SWAPPER_USES_SECTION_MAPS #define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) +#else +#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) #endif diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index eed6d52..c2fa6b5 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -32,6 +32,7 @@ #include #include +#include #include #include #include @@ -406,14 +407,11 @@ static void __init map_mem(void) * memory addressable from the initial direct kernel mapping. * * The initial direct kernel mapping, located at swapper_pg_dir, gives - * us PUD_SIZE (4K pages) or PMD_SIZE (64K pages) memory starting from - * PHYS_OFFSET (which must be aligned to 2MB as per - * Documentation/arm64/booting.txt). + * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps, + * memory starting from PHYS_OFFSET (which must be aligned to 2MB as + * per Documentation/arm64/booting.txt). */ - if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) - limit = PHYS_OFFSET + PMD_SIZE; - else - limit = PHYS_OFFSET + PUD_SIZE; + limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE; memblock_set_current_limit(limit); /* map all the memory banks */ @@ -424,21 +422,24 @@ static void __init map_mem(void) if (start >= end) break; -#ifndef CONFIG_ARM64_64K_PAGES - /* - * For the first memory bank align the start address and - * current memblock limit to prevent create_mapping() from - * allocating pte page tables from unmapped memory. - * When 64K pages are enabled, the pte page table for the - * first PGDIR_SIZE is already present in swapper_pg_dir. - */ - if (start < limit) - start = ALIGN(start, PMD_SIZE); - if (end < limit) { - limit = end & PMD_MASK; - memblock_set_current_limit(limit); + if (ARM64_SWAPPER_USES_SECTION_MAPS) { + /* + * For the first memory bank align the start address and + * current memblock limit to prevent create_mapping() from + * allocating pte page tables from unmapped memory. With + * the section maps, if the first block doesn't end on section + * size boundary, create_mapping() will try to allocate a pte + * page, which may be returned from an unmapped area. + * When section maps are not used, the pte page table for the + * current limit is already present in swapper_pg_dir. + */ + if (start < limit) + start = ALIGN(start, SECTION_SIZE); + if (end < limit) { + limit = end & SECTION_MASK; + memblock_set_current_limit(limit); + } } -#endif __map_memblock(start, end); } @@ -551,12 +552,12 @@ int kern_addr_valid(unsigned long addr) return pfn_valid(pte_pfn(*pte)); } #ifdef CONFIG_SPARSEMEM_VMEMMAP -#ifdef CONFIG_ARM64_64K_PAGES +#if !ARM64_SWAPPER_USES_SECTION_MAPS int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) { return vmemmap_populate_basepages(start, end, node); } -#else /* !CONFIG_ARM64_64K_PAGES */ +#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) { unsigned long addr = start; @@ -691,7 +692,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) { const u64 dt_virt_base = __fix_to_virt(FIX_FDT); pgprot_t prot = PAGE_KERNEL | PTE_RDONLY; - int granularity, size, offset; + int size, offset; void *dt_virt; /* @@ -717,24 +718,15 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) */ BUILD_BUG_ON(dt_virt_base % SZ_2M); - if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) { - BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> PMD_SHIFT != - __fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT); - - granularity = PAGE_SIZE; - } else { - BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> PUD_SHIFT != - __fix_to_virt(FIX_BTMAP_BEGIN) >> PUD_SHIFT); - - granularity = PMD_SIZE; - } + BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT != + __fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT); - offset = dt_phys % granularity; + offset = dt_phys % SWAPPER_BLOCK_SIZE; dt_virt = (void *)dt_virt_base + offset; /* map the first chunk so we can read the size from the header */ - create_mapping(round_down(dt_phys, granularity), dt_virt_base, - granularity, prot); + create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, + SWAPPER_BLOCK_SIZE, prot); if (fdt_check_header(dt_virt) != 0) return NULL; @@ -743,9 +735,9 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) if (size > MAX_FDT_SIZE) return NULL; - if (offset + size > granularity) - create_mapping(round_down(dt_phys, granularity), dt_virt_base, - round_up(offset + size, granularity), prot); + if (offset + size > SWAPPER_BLOCK_SIZE) + create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, + round_up(offset + size, SWAPPER_BLOCK_SIZE), prot); memblock_reserve(dt_phys, size); -- cgit v0.10.2 From 686e783869d37935510b4d2c266948677be82665 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:19:29 +0100 Subject: arm64: Introduce helpers for page table levels Introduce helpers for finding the number of page table levels required for a given VA width, shift for a particular page table level. Convert the existing users to the new helpers. More users to follow. Cc: Ard Biesheuvel Cc: Will Deacon Cc: Marc Zyngier Signed-off-by: Suzuki K. Poulose Acked-by: Christoffer Dall Acked-by: Mark Rutland Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 95c1ec0..d6739e8 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -16,13 +16,46 @@ #ifndef __ASM_PGTABLE_HWDEF_H #define __ASM_PGTABLE_HWDEF_H +/* + * Number of page-table levels required to address 'va_bits' wide + * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT) + * bits with (PAGE_SHIFT - 3) bits at each page table level. Hence: + * + * levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), (PAGE_SHIFT - 3)) + * + * where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d)) + * + * We cannot include linux/kernel.h which defines DIV_ROUND_UP here + * due to build issues. So we open code DIV_ROUND_UP here: + * + * ((((va_bits) - PAGE_SHIFT) + (PAGE_SHIFT - 3) - 1) / (PAGE_SHIFT - 3)) + * + * which gets simplified as : + */ +#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3)) + +/* + * Size mapped by an entry at level n ( 0 <= n <= 3) + * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits + * in the final page. The maximum number of translation levels supported by + * the architecture is 4. Hence, starting at at level n, we have further + * ((4 - n) - 1) levels of translation excluding the offset within the page. + * So, the total number of bits mapped by an entry at level n is : + * + * ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT + * + * Rearranging it a bit we get : + * (4 - n) * (PAGE_SHIFT - 3) + 3 + */ +#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3) + #define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3)) /* * PMD_SHIFT determines the size a level 2 page table entry can map. */ #if CONFIG_PGTABLE_LEVELS > 2 -#define PMD_SHIFT ((PAGE_SHIFT - 3) * 2 + 3) +#define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) #define PTRS_PER_PMD PTRS_PER_PTE @@ -32,7 +65,7 @@ * PUD_SHIFT determines the size a level 1 page table entry can map. */ #if CONFIG_PGTABLE_LEVELS > 3 -#define PUD_SHIFT ((PAGE_SHIFT - 3) * 3 + 3) +#define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) #define PTRS_PER_PUD PTRS_PER_PTE @@ -42,7 +75,7 @@ * PGDIR_SHIFT determines the size a top-level page table entry can map * (depending on the configuration, this level can be 0, 1 or 2). */ -#define PGDIR_SHIFT ((PAGE_SHIFT - 3) * CONFIG_PGTABLE_LEVELS + 3) +#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS) #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) #define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) -- cgit v0.10.2 From c265af51c5f17ec5c0cf0d960133bd6fe5f39eb9 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:19:30 +0100 Subject: arm64: Calculate size for idmap_pg_dir at compile time Now that we can calculate the number of levels required for mapping a va width, reserve exact number of pages that would be required to cover the idmap. The idmap should be able to handle the maximum physical address size supported. Cc: Will Deacon Signed-off-by: Suzuki K. Poulose Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Acked-by: Mark Rutland Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 4e08faa..a459714 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -39,16 +39,19 @@ * map to pte level. The swapper also maps the FDT (see __create_page_tables * for more information). Note that the number of ID map translation levels * could be increased on the fly if system RAM is out of reach for the default - * VA range, so 3 pages are reserved in all cases. + * VA range, so pages required to map highest possible PA are reserved in all + * cases. */ #if ARM64_SWAPPER_USES_SECTION_MAPS #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) +#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1) #else #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) +#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT)) #endif #define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) -#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) +#define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) /* Initial memory map size */ #if ARM64_SWAPPER_USES_SECTION_MAPS -- cgit v0.10.2 From 6a3fd4026c0c0ac279265c2a5d228233b5bbd28f Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:19:31 +0100 Subject: arm64: Handle 4 level page table for swapper At the moment, we only support maximum of 3-level page table for swapper. With 48bit VA, 64K has only 3 levels and 4K uses section mapping. Add support for 4-level page table for swapper, needed by 16K pages. Cc: Will Deacon Signed-off-by: Suzuki K. Poulose Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Acked-by: Mark Rutland Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 149fce3..7ace955 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -271,7 +271,10 @@ ENDPROC(preserve_boot_args) */ .macro create_pgd_entry, tbl, virt, tmp1, tmp2 create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 -#if SWAPPER_PGTABLE_LEVELS == 3 +#if SWAPPER_PGTABLE_LEVELS > 3 + create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2 +#endif +#if SWAPPER_PGTABLE_LEVELS > 2 create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 #endif .endm -- cgit v0.10.2 From 755e70b7e3f189aa2503c510fb98208e477a5030 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:19:32 +0100 Subject: arm64: Clean config usages for page size We use !CONFIG_ARM64_64K_PAGES for CONFIG_ARM64_4K_PAGES (and vice versa) in code. It all worked well, so far since we only had two options. Now, with the introduction of 16K, these cases will break. This patch cleans up the code to use the required CONFIG symbol expression without the assumption that !64K => 4K (and vice versa) Cc: Will Deacon Acked-by: Mark Rutland Signed-off-by: Suzuki K. Poulose Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Signed-off-by: Catalin Marinas diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2782c11..4654c27 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -466,7 +466,7 @@ config ARCH_WANT_GENERAL_HUGETLB def_bool y config ARCH_WANT_HUGE_PMD_SHARE - def_bool y if !ARM64_64K_PAGES + def_bool y if ARM64_4K_PAGES config HAVE_ARCH_TRANSPARENT_HUGEPAGE def_bool y @@ -678,7 +678,7 @@ source "fs/Kconfig.binfmt" config COMPAT bool "Kernel support for 32-bit EL0" - depends on !ARM64_64K_PAGES || EXPERT + depends on ARM64_4K_PAGES || EXPERT select COMPAT_BINFMT_ELF select HAVE_UID16 select OLD_SIGSUSPEND3 diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index d6285ef..c24d6ad 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -77,7 +77,7 @@ config DEBUG_RODATA If in doubt, say Y config DEBUG_ALIGN_RODATA - depends on DEBUG_RODATA && !ARM64_64K_PAGES + depends on DEBUG_RODATA && ARM64_4K_PAGES bool "Align linker sections up to SECTION_SIZE" help If this option is enabled, sections that may potentially be marked as diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 555c6de..5eac6a2 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -23,7 +23,7 @@ #include -#ifndef CONFIG_ARM64_64K_PAGES +#ifdef CONFIG_ARM64_4K_PAGES #define THREAD_SIZE_ORDER 2 #endif -- cgit v0.10.2 From e25781e3dea914cc4c34c946bae5fa3d4516516f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 19 Oct 2015 14:19:33 +0100 Subject: arm64: Simplify NR_FIX_BTMAPS calculation We choose NR_FIX_BTMAPS such that each slot (NR_FIX_BTMAPS * PAGE_SIZE) can address 256K. Use division to derive NR_FIX_BTMAPS rather than defining it for each page size. Signed-off-by: Mark Rutland Signed-off-by: Suzuki K. Poulose Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 8b9884c..3097045 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -17,6 +17,7 @@ #ifndef __ASSEMBLY__ #include +#include #include #include @@ -55,11 +56,7 @@ enum fixed_addresses { * Temporary boot-time mappings, used by early_ioremap(), * before ioremap() is functional. */ -#ifdef CONFIG_ARM64_64K_PAGES -#define NR_FIX_BTMAPS 4 -#else -#define NR_FIX_BTMAPS 64 -#endif +#define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE) #define FIX_BTMAPS_SLOTS 7 #define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) -- cgit v0.10.2 From db488be354bc85724d7b9523e94435fdaa761a35 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:19:34 +0100 Subject: arm64: Kconfig: Fix help text about AArch32 support with 64K pages Update the help text for ARM64_64K_PAGES to reflect the reality about AArch32 support. Cc: Will Deacon Signed-off-by: Suzuki K. Poulose Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Acked-by: Mark Rutland Signed-off-by: Catalin Marinas diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4654c27..13b51a0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -368,8 +368,8 @@ config ARM64_64K_PAGES help This feature enables 64KB pages support (4KB by default) allowing only two levels of page tables and faster TLB - look-up. AArch32 emulation is not available when this feature - is enabled. + look-up. AArch32 emulation requires applications compiled + with 64K aligned segments. endchoice -- cgit v0.10.2 From 4bf8b96ed3f7e11422d8b4f58cf43896ed02d1f6 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:19:35 +0100 Subject: arm64: Check for selected granule support Ensure that the selected page size is supported by the CPU(s). If it doesn't park it. Cc: Will Deacon Signed-off-by: Suzuki K. Poulose Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Acked-by: Mark Rutland Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index a7f3d4b..d59cb23 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -44,6 +44,26 @@ #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\ (!!x)<<8 | 0x1f) + +#define ID_AA64MMFR0_TGRAN4_SHIFT 28 +#define ID_AA64MMFR0_TGRAN64_SHIFT 24 +#define ID_AA64MMFR0_TGRAN16_SHIFT 20 + +#define ID_AA64MMFR0_TGRAN4_NI 0xf +#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN64_NI 0xf +#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN16_NI 0x0 +#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1 + +#if defined(CONFIG_ARM64_4K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN4_SUPPORTED +#elif defined(CONFIG_ARM64_64K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED +#endif + #ifdef __ASSEMBLY__ .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 7ace955..514c1cc 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -31,10 +31,11 @@ #include #include #include -#include #include #include #include +#include +#include #include #define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET) @@ -613,10 +614,17 @@ ENDPROC(__secondary_switched) * x0 = SCTLR_EL1 value for turning on the MMU. * x27 = *virtual* address to jump to upon completion * - * other registers depend on the function called upon completion + * Other registers depend on the function called upon completion. + * + * Checks if the selected granule size is supported by the CPU. + * If it isn't, park the CPU */ .section ".idmap.text", "ax" __enable_mmu: + mrs x1, ID_AA64MMFR0_EL1 + ubfx x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4 + cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED + b.ne __no_granule_support ldr x5, =vectors msr vbar_el1, x5 msr ttbr0_el1, x25 // load TTBR0 @@ -634,3 +642,8 @@ __enable_mmu: isb br x27 ENDPROC(__enable_mmu) + +__no_granule_support: + wfe + b __no_granule_support +ENDPROC(__no_granule_support) -- cgit v0.10.2 From 9d372c9fab34cd8803141871195141995f85c7f7 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 19 Oct 2015 14:19:36 +0100 Subject: arm64: Add page size to the kernel image header This patch adds the page size to the arm64 kernel image header so that one can infer the PAGESIZE used by the kernel. This will be helpful to diagnose failures to boot the kernel with page size not supported by the CPU. Signed-off-by: Ard Biesheuvel Signed-off-by: Suzuki K. Poulose Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Acked-by: Mark Rutland Signed-off-by: Catalin Marinas diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt index 7d9d3c2..aaf6d77 100644 --- a/Documentation/arm64/booting.txt +++ b/Documentation/arm64/booting.txt @@ -104,7 +104,12 @@ Header notes: - The flags field (introduced in v3.17) is a little-endian 64-bit field composed as follows: Bit 0: Kernel endianness. 1 if BE, 0 if LE. - Bits 1-63: Reserved. + Bit 1-2: Kernel Page size. + 0 - Unspecified. + 1 - 4K + 2 - 16K + 3 - 64K + Bits 3-63: Reserved. - When image_size is zero, a bootloader should attempt to keep as much memory as possible free for use by the kernel immediately after the diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index 6eb8fee..bc2abb8 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -47,7 +47,10 @@ #define __HEAD_FLAG_BE 0 #endif -#define __HEAD_FLAGS (__HEAD_FLAG_BE << 0) +#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2) + +#define __HEAD_FLAGS ((__HEAD_FLAG_BE << 0) | \ + (__HEAD_FLAG_PAGE_SIZE << 1)) /* * These will output as part of the Image header, which should be little-endian -- cgit v0.10.2 From 44eaacf1b8999b15cec89bd9d9cd989da4798d53 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:19:37 +0100 Subject: arm64: Add 16K page size support This patch turns on the 16K page support in the kernel. We support 48bit VA (4 level page tables) and 47bit VA (3 level page tables). With 16K we can map 128 entries using contiguous bit hint at level 3 to map 2M using single TLB entry. TODO: 16K supports 32 contiguous entries at level 2 to get us 1G(which is not yet supported by the infrastructure). That should be a separate patch altogether. Cc: Will Deacon Cc: Jeremy Linton Cc: Marc Zyngier Cc: Christoffer Dall Cc: Steve Capper Signed-off-by: Suzuki K. Poulose Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Acked-by: Mark Rutland Signed-off-by: Catalin Marinas diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 13b51a0..8541664 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -173,7 +173,8 @@ config PGTABLE_LEVELS default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48 default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 - default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48 + default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47 + default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48 source "init/Kconfig" @@ -363,6 +364,13 @@ config ARM64_4K_PAGES help This feature enables 4KB pages support. +config ARM64_16K_PAGES + bool "16KB" + help + The system will use 16KB pages support. AArch32 emulation + requires applications compiled with 16K (or a multiple of 16K) + aligned segments. + config ARM64_64K_PAGES bool "64KB" help @@ -376,6 +384,7 @@ endchoice choice prompt "Virtual address space size" default ARM64_VA_BITS_39 if ARM64_4K_PAGES + default ARM64_VA_BITS_47 if ARM64_16K_PAGES default ARM64_VA_BITS_42 if ARM64_64K_PAGES help Allows choosing one of multiple possible virtual address @@ -390,6 +399,10 @@ config ARM64_VA_BITS_42 bool "42-bit" depends on ARM64_64K_PAGES +config ARM64_VA_BITS_47 + bool "47-bit" + depends on ARM64_16K_PAGES + config ARM64_VA_BITS_48 bool "48-bit" @@ -399,6 +412,7 @@ config ARM64_VA_BITS int default 39 if ARM64_VA_BITS_39 default 42 if ARM64_VA_BITS_42 + default 47 if ARM64_VA_BITS_47 default 48 if ARM64_VA_BITS_48 config CPU_BIG_ENDIAN @@ -466,7 +480,7 @@ config ARCH_WANT_GENERAL_HUGETLB def_bool y config ARCH_WANT_HUGE_PMD_SHARE - def_bool y if ARM64_4K_PAGES + def_bool y if ARM64_4K_PAGES || ARM64_16K_PAGES config HAVE_ARCH_TRANSPARENT_HUGEPAGE def_bool y @@ -503,7 +517,25 @@ config XEN config FORCE_MAX_ZONEORDER int default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) + default "12" if (ARM64_16K_PAGES && TRANSPARENT_HUGEPAGE) default "11" + help + The kernel memory allocator divides physically contiguous memory + blocks into "zones", where each zone is a power of two number of + pages. This option selects the largest power of two that the kernel + keeps in the memory allocator. If you need to allocate very large + blocks of physically contiguous memory, then you may need to + increase this value. + + This config option is actually maximum order plus one. For example, + a value of 11 means that the largest free memory block is 2^10 pages. + + We make sure that we can allocate upto a HugePage size for each configuration. + Hence we have : + MAX_ORDER = (PMD_SHIFT - PAGE_SHIFT) + 1 => PAGE_SHIFT - 2 + + However for 4K, we choose a higher default value, 11 as opposed to 10, giving us + 4M allocations matching the default size used by generic code. menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" @@ -689,9 +721,9 @@ config COMPAT the user helper functions, VFP support and the ptrace interface are handled appropriately by the kernel. - If you also enabled CONFIG_ARM64_64K_PAGES, please be aware that you - will only be able to execute AArch32 binaries that were compiled with - 64k aligned segments. + If you use a page size other than 4KB (i.e, 16KB or 64KB), please be aware + that you will only be able to execute AArch32 binaries that were compiled + with page size aligned segments. If you want to execute 32-bit userspace applications, say Y. diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index da32354..9b2f5a9 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -24,6 +24,9 @@ #ifdef CONFIG_ARM64_64K_PAGES #define PAGE_SHIFT 16 #define CONT_SHIFT 5 +#elif defined(CONFIG_ARM64_16K_PAGES) +#define PAGE_SHIFT 14 +#define CONT_SHIFT 7 #else #define PAGE_SHIFT 12 #define CONT_SHIFT 4 diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d59cb23..4b57e10 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -59,6 +59,9 @@ #if defined(CONFIG_ARM64_4K_PAGES) #define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT #define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN4_SUPPORTED +#elif defined(CONFIG_ARM64_16K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN16_SUPPORTED #elif defined(CONFIG_ARM64_64K_PAGES) #define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT #define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 5eac6a2..90c7ff2 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -25,6 +25,8 @@ #ifdef CONFIG_ARM64_4K_PAGES #define THREAD_SIZE_ORDER 2 +#elif defined(CONFIG_ARM64_16K_PAGES) +#define THREAD_SIZE_ORDER 0 #endif #define THREAD_SIZE 16384 diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 5c7e920e..6a7d5cd 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -19,6 +19,7 @@ if VIRTUALIZATION config KVM bool "Kernel-based Virtual Machine (KVM) support" depends on OF + depends on !ARM64_16K_PAGES select MMU_NOTIFIER select PREEMPT_NOTIFIERS select ANON_INODES @@ -33,6 +34,8 @@ config KVM select HAVE_KVM_IRQFD ---help--- Support hosting virtualized guest machines. + We don't support KVM with 16K page tables yet, due to the multiple + levels of fake page tables. If unsure, say N. diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 91cb2ea..3a4b8b1 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -30,7 +30,9 @@ #ifdef CONFIG_ARM64_64K_PAGES #define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K -#else +#elif defined(CONFIG_ARM64_16K_PAGES) +#define TCR_TG_FLAGS TCR_TG0_16K | TCR_TG1_16K +#else /* CONFIG_ARM64_4K_PAGES */ #define TCR_TG_FLAGS TCR_TG0_4K | TCR_TG1_4K #endif -- cgit v0.10.2 From 215399392fe40f33880ea9de49a1ed8ee26edd10 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:19:38 +0100 Subject: arm64: 36 bit VA 36bit VA lets us use 2 level page tables while limiting the available address space to 64GB. Cc: Will Deacon Cc: Steve Capper Signed-off-by: Suzuki K. Poulose Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Acked-by: Mark Rutland Signed-off-by: Catalin Marinas diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8541664..a346671 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -170,6 +170,7 @@ config FIX_EARLYCON_MEM config PGTABLE_LEVELS int + default 2 if ARM64_16K_PAGES && ARM64_VA_BITS_36 default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48 default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 @@ -391,6 +392,10 @@ choice space sizes. The level of translation table is determined by a combination of page size and virtual address space size. +config ARM64_VA_BITS_36 + bool "36-bit" + depends on ARM64_16K_PAGES + config ARM64_VA_BITS_39 bool "39-bit" depends on ARM64_4K_PAGES @@ -410,6 +415,7 @@ endchoice config ARM64_VA_BITS int + default 36 if ARM64_VA_BITS_36 default 39 if ARM64_VA_BITS_39 default 42 if ARM64_VA_BITS_42 default 47 if ARM64_VA_BITS_47 @@ -480,7 +486,7 @@ config ARCH_WANT_GENERAL_HUGETLB def_bool y config ARCH_WANT_HUGE_PMD_SHARE - def_bool y if ARM64_4K_PAGES || ARM64_16K_PAGES + def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) config HAVE_ARCH_TRANSPARENT_HUGEPAGE def_bool y -- cgit v0.10.2 From 096b3224d5e7239ec3e5033bbc7612ac2d5dec3a Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 16 Sep 2015 22:23:21 +0800 Subject: arm64: add cpu_idle tracepoints to arch_cpu_idle Currently, if cpuidle is disabled or not supported, powertop reports zero wakeups and zero events. This is due to the cpu_idle tracepoints are missing. This patch is to make cpu_idle tracepoints always available even if cpuidle is disabled or not supported. Signed-off-by: Jisheng Zhang Acked-by: Lorenzo Pieralisi Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 223b093..f75b540 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -75,8 +76,10 @@ void arch_cpu_idle(void) * This should do all the clock switching and wait for interrupt * tricks */ + trace_cpu_idle_rcuidle(1, smp_processor_id()); cpu_do_idle(); local_irq_enable(); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } #ifdef CONFIG_HOTPLUG_CPU -- cgit v0.10.2 From 9f93f3e9461a30f425fdba15784db67ce878ce00 Mon Sep 17 00:00:00 2001 From: Jungseok Lee Date: Sat, 17 Oct 2015 14:28:11 +0000 Subject: arm64: Synchronise dump_backtrace() with perf callchain Unlike perf callchain relying on walk_stackframe(), dump_backtrace() has its own backtrace logic. A major difference between them is the moment a symbol is recorded. Perf writes down a symbol *before* calling unwind_frame(), but dump_backtrace() prints it out *after* unwind_frame(). As a result, the last valid symbol cannot be hooked in case of dump_backtrace(). This patch addresses the issue as synchronising dump_backtrace() with perf callchain. A simple test and its results are as follows: - crash trigger $ sudo echo c > /proc/sysrq-trigger - current status Call trace: [] sysrq_handle_crash+0x24/0x30 [] __handle_sysrq+0x128/0x19c [] write_sysrq_trigger+0x60/0x74 [] proc_reg_write+0x84/0xc0 [] __vfs_write+0x44/0x104 [] vfs_write+0x98/0x1a8 [] SyS_write+0x50/0xb0 - with this change Call trace: [] sysrq_handle_crash+0x24/0x30 [] __handle_sysrq+0x128/0x19c [] write_sysrq_trigger+0x60/0x74 [] proc_reg_write+0x84/0xc0 [] __vfs_write+0x44/0x104 [] vfs_write+0x98/0x1a8 [] SyS_write+0x50/0xb0 [] el0_svc_naked+0x20/0x28 Note that this patch does not cover a case where MMU is disabled. The last stack frame of swapper, for example, has PC in a form of physical address. Unfortunately, a simple conversion using phys_to_virt() cannot cover all scenarios since PC is retrieved from LR - 4, not LR. It is a big tradeoff to change both head.S and unwind_frame() for only a few of symbols in *.S. Thus, this hunk does not take care of the case. Cc: AKASHI Takahiro Cc: James Morse Cc: Mark Rutland Signed-off-by: Jungseok Lee Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index f93aae5..e9b9b53 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -103,12 +103,12 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, set_fs(fs); } -static void dump_backtrace_entry(unsigned long where, unsigned long stack) +static void dump_backtrace_entry(unsigned long where) { + /* + * Note that 'where' can have a physical address, but it's not handled. + */ print_ip_sym(where); - if (in_exception_text(where)) - dump_mem("", "Exception stack", stack, - stack + sizeof(struct pt_regs), false); } static void dump_instr(const char *lvl, struct pt_regs *regs) @@ -172,12 +172,17 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) pr_emerg("Call trace:\n"); while (1) { unsigned long where = frame.pc; + unsigned long stack; int ret; + dump_backtrace_entry(where); ret = unwind_frame(&frame); if (ret < 0) break; - dump_backtrace_entry(where, frame.sp); + stack = frame.sp; + if (in_exception_text(where)) + dump_mem("", "Exception stack", stack, + stack + sizeof(struct pt_regs), false); } } -- cgit v0.10.2 From 56a3f30e029396948989b96716f27b87e3510e0f Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 20 Oct 2015 14:59:20 +0100 Subject: arm64: Make 36-bit VA depend on EXPERT Commit 215399392fe4 (arm64: 36 bit VA) introduced 36-bit VA support for the arm64 kernel when the 16KB page configuration is enabled. While this is a valid hardware configuration, it's not something we want to encourage since it reduces the memory (and I/O) range that the kernel can access. Make this depend on EXPERT to avoid complaints of Linux not mapping the whole RAM, especially on platforms following the ARM recommended memory map. Signed-off-by: Catalin Marinas diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a346671..c62fb03 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -393,7 +393,7 @@ choice a combination of page size and virtual address space size. config ARM64_VA_BITS_36 - bool "36-bit" + bool "36-bit" if EXPERT depends on ARM64_16K_PAGES config ARM64_VA_BITS_39 -- cgit v0.10.2 From 64f17818977d0989f7d05347670777611b295799 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:24:38 +0100 Subject: arm64: Make the CPU information more clear At early boot, we print the CPU version/revision. On a heterogeneous system, we could have different types of CPUs. Print the CPU info for all active cpus. Also, the secondary CPUs prints the message only when they turn online. Also, remove the redundant 'revision' information which doesn't make any sense without the 'variant' field. Signed-off-by: Suzuki K. Poulose Tested-by: Dave Martin Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 79df79a..1d503e2 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -203,8 +203,7 @@ static void __init setup_processor(void) u32 cwg; int cls; - printk("CPU: AArch64 Processor [%08x] revision %d\n", - read_cpuid_id(), read_cpuid_id() & 15); + pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id()); sprintf(init_utsname()->machine, ELF_PLATFORM); elf_hwcap = 0; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index b7a973d..c1d044b 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -144,7 +144,6 @@ asmlinkage void secondary_start_kernel(void) current->active_mm = mm; set_my_cpu_offset(per_cpu_offset(smp_processor_id())); - printk("CPU%u: Booted secondary processor\n", cpu); /* * TTBR0 is only used for the identity mapping at this stage. Make it @@ -177,6 +176,8 @@ asmlinkage void secondary_start_kernel(void) * the CPU migration code to notice that the CPU is online * before we continue. */ + pr_info("CPU%u: Booted secondary processor [%08x]\n", + cpu, read_cpuid_id()); set_cpu_online(cpu, true); complete(&cpu_running); -- cgit v0.10.2 From 3a75578efae64b94d76eacbf8adf2a3ab13c6aa1 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:24:39 +0100 Subject: arm64: Delay ELF HWCAP initialisation until all CPUs are up Delay the ELF HWCAP initialisation until all the (enabled) CPUs are up, i.e, smp_cpus_done(). This is in preparation for detecting the common features across the CPUS and creating a consistent ELF HWCAP for the system. Signed-off-by: Suzuki K. Poulose Tested-by: Dave Martin Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 1715707..b7769f6 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -81,6 +81,7 @@ static inline int __attribute_const__ cpuid_feature_extract_field(u64 features, return (s64)(features << (64 - 4 - field)) >> (64 - 4); } +void __init setup_cpu_features(void); void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 1d503e2..4f0408e 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -196,20 +196,13 @@ static void __init smp_build_mpidr_hash(void) __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); } -static void __init setup_processor(void) +void __init setup_cpu_features(void) { u64 features; s64 block; u32 cwg; int cls; - pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id()); - - sprintf(init_utsname()->machine, ELF_PLATFORM); - elf_hwcap = 0; - - cpuinfo_store_boot_cpu(); - /* * Check for sane CTR_EL0.CWG value. */ @@ -293,6 +286,13 @@ static void __init setup_processor(void) #endif } +static void __init setup_processor(void) +{ + pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id()); + sprintf(init_utsname()->machine, ELF_PLATFORM); + cpuinfo_store_boot_cpu(); +} + static void __init setup_machine_fdt(phys_addr_t dt_phys) { void *dt_virt = fixmap_remap_fdt(dt_phys); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index c1d044b..77763d9 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -320,6 +320,7 @@ static void __init hyp_mode_check(void) void __init smp_cpus_done(unsigned int max_cpus) { pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); + setup_cpu_features(); hyp_mode_check(); apply_alternatives_all(); } -- cgit v0.10.2 From 4b998ff1885eecd3dc330bf057e24667c1db84a4 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:24:40 +0100 Subject: arm64: Delay cpuinfo_store_boot_cpu At the moment the boot CPU stores the cpuinfo long before the PERCPU areas are initialised by the kernel. This could be problematic as the non-boot CPU data structures might get copied with the data from the boot CPU, giving us no chance to detect if a particular CPU updated its cpuinfo. This patch delays the boot cpu store to smp_prepare_boot_cpu(). Also kills the setup_processor() which no longer does meaningful work. Signed-off-by: Suzuki K. Poulose Tested-by: Dave Martin Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 4f0408e..47e0051 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -286,13 +286,6 @@ void __init setup_cpu_features(void) #endif } -static void __init setup_processor(void) -{ - pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id()); - sprintf(init_utsname()->machine, ELF_PLATFORM); - cpuinfo_store_boot_cpu(); -} - static void __init setup_machine_fdt(phys_addr_t dt_phys) { void *dt_virt = fixmap_remap_fdt(dt_phys); @@ -404,8 +397,9 @@ u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; void __init setup_arch(char **cmdline_p) { - setup_processor(); + pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id()); + sprintf(init_utsname()->machine, ELF_PLATFORM); init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 77763d9..d1d0049 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -327,6 +327,7 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { + cpuinfo_store_boot_cpu(); set_my_cpu_offset(per_cpu_offset(smp_processor_id())); } -- cgit v0.10.2 From 9cdf8ec4a86b9310111f741bbaf11df9120e0482 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:24:41 +0100 Subject: arm64: Move cpu feature detection code This patch moves the CPU feature detection code from arch/arm64/kernel/{setup.c to cpufeature.c} The plan is to consolidate all the CPU feature handling in cpufeature.c. Apart from changing pr_fmt from "alternatives" to "cpu features", there are no functional changes. Signed-off-by: Suzuki K. Poulose Tested-by: Dave Martin Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 3c9aed3..e49be15 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -16,13 +16,31 @@ * along with this program. If not, see . */ -#define pr_fmt(fmt) "alternatives: " fmt +#define pr_fmt(fmt) "CPU features: " fmt #include #include #include #include +unsigned long elf_hwcap __read_mostly; +EXPORT_SYMBOL_GPL(elf_hwcap); + +#ifdef CONFIG_COMPAT +#define COMPAT_ELF_HWCAP_DEFAULT \ + (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ + COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ + COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ + COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ + COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\ + COMPAT_HWCAP_LPAE) +unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; +unsigned int compat_elf_hwcap2 __read_mostly; +#endif + +DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); + + static bool feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) { @@ -100,3 +118,93 @@ void check_local_cpu_features(void) { check_cpu_capabilities(arm64_features, "detected feature:"); } + +void __init setup_cpu_features(void) +{ + u64 features; + s64 block; + u32 cwg; + int cls; + + /* + * Check for sane CTR_EL0.CWG value. + */ + cwg = cache_type_cwg(); + cls = cache_line_size(); + if (!cwg) + pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n", + cls); + if (L1_CACHE_BYTES < cls) + pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", + L1_CACHE_BYTES, cls); + + /* + * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks. + * The blocks we test below represent incremental functionality + * for non-negative values. Negative values are reserved. + */ + features = read_cpuid(ID_AA64ISAR0_EL1); + block = cpuid_feature_extract_field(features, 4); + if (block > 0) { + switch (block) { + default: + case 2: + elf_hwcap |= HWCAP_PMULL; + case 1: + elf_hwcap |= HWCAP_AES; + case 0: + break; + } + } + + if (cpuid_feature_extract_field(features, 8) > 0) + elf_hwcap |= HWCAP_SHA1; + + if (cpuid_feature_extract_field(features, 12) > 0) + elf_hwcap |= HWCAP_SHA2; + + if (cpuid_feature_extract_field(features, 16) > 0) + elf_hwcap |= HWCAP_CRC32; + + block = cpuid_feature_extract_field(features, 20); + if (block > 0) { + switch (block) { + default: + case 2: + elf_hwcap |= HWCAP_ATOMICS; + case 1: + /* RESERVED */ + case 0: + break; + } + } + +#ifdef CONFIG_COMPAT + /* + * ID_ISAR5_EL1 carries similar information as above, but pertaining to + * the AArch32 32-bit execution state. + */ + features = read_cpuid(ID_ISAR5_EL1); + block = cpuid_feature_extract_field(features, 4); + if (block > 0) { + switch (block) { + default: + case 2: + compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL; + case 1: + compat_elf_hwcap2 |= COMPAT_HWCAP2_AES; + case 0: + break; + } + } + + if (cpuid_feature_extract_field(features, 8) > 0) + compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1; + + if (cpuid_feature_extract_field(features, 12) > 0) + compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2; + + if (cpuid_feature_extract_field(features, 16) > 0) + compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32; +#endif +} diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 47e0051..baf0da8 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -65,23 +65,6 @@ #include #include -unsigned long elf_hwcap __read_mostly; -EXPORT_SYMBOL_GPL(elf_hwcap); - -#ifdef CONFIG_COMPAT -#define COMPAT_ELF_HWCAP_DEFAULT \ - (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ - COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ - COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ - COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ - COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\ - COMPAT_HWCAP_LPAE) -unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; -unsigned int compat_elf_hwcap2 __read_mostly; -#endif - -DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); - phys_addr_t __fdt_pointer __initdata; /* @@ -196,96 +179,6 @@ static void __init smp_build_mpidr_hash(void) __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); } -void __init setup_cpu_features(void) -{ - u64 features; - s64 block; - u32 cwg; - int cls; - - /* - * Check for sane CTR_EL0.CWG value. - */ - cwg = cache_type_cwg(); - cls = cache_line_size(); - if (!cwg) - pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n", - cls); - if (L1_CACHE_BYTES < cls) - pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", - L1_CACHE_BYTES, cls); - - /* - * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks. - * The blocks we test below represent incremental functionality - * for non-negative values. Negative values are reserved. - */ - features = read_cpuid(ID_AA64ISAR0_EL1); - block = cpuid_feature_extract_field(features, 4); - if (block > 0) { - switch (block) { - default: - case 2: - elf_hwcap |= HWCAP_PMULL; - case 1: - elf_hwcap |= HWCAP_AES; - case 0: - break; - } - } - - if (cpuid_feature_extract_field(features, 8) > 0) - elf_hwcap |= HWCAP_SHA1; - - if (cpuid_feature_extract_field(features, 12) > 0) - elf_hwcap |= HWCAP_SHA2; - - if (cpuid_feature_extract_field(features, 16) > 0) - elf_hwcap |= HWCAP_CRC32; - - block = cpuid_feature_extract_field(features, 20); - if (block > 0) { - switch (block) { - default: - case 2: - elf_hwcap |= HWCAP_ATOMICS; - case 1: - /* RESERVED */ - case 0: - break; - } - } - -#ifdef CONFIG_COMPAT - /* - * ID_ISAR5_EL1 carries similar information as above, but pertaining to - * the AArch32 32-bit execution state. - */ - features = read_cpuid(ID_ISAR5_EL1); - block = cpuid_feature_extract_field(features, 4); - if (block > 0) { - switch (block) { - default: - case 2: - compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL; - case 1: - compat_elf_hwcap2 |= COMPAT_HWCAP2_AES; - case 0: - break; - } - } - - if (cpuid_feature_extract_field(features, 8) > 0) - compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1; - - if (cpuid_feature_extract_field(features, 12) > 0) - compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2; - - if (cpuid_feature_extract_field(features, 16) > 0) - compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32; -#endif -} - static void __init setup_machine_fdt(phys_addr_t dt_phys) { void *dt_virt = fixmap_remap_fdt(dt_phys); -- cgit v0.10.2 From cdcf817b7e4b62b935d8797f7d07ea0b97760884 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:24:42 +0100 Subject: arm64: Move mixed endian support detection Move the mixed endian support detection code to cpufeature.c from cpuinfo.c. This also moves the update_cpu_features() used by mixed endian detection code, which will get more functionality. Also moves the ID register field shifts to asm/sysreg.h, where all the useful definitions will end up in later patches. Signed-off-by: Suzuki K. Poulose Tested-by: Dave Martin Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index 8e797b2..30db691 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -63,4 +63,6 @@ DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data); void cpuinfo_store_cpu(void); void __init cpuinfo_store_boot_cpu(void); +void update_cpu_features(struct cpuinfo_arm64 *info); + #endif /* __ASM_CPU_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index b7769f6..b5f313d 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -10,6 +10,7 @@ #define __ASM_CPUFEATURE_H #include +#include /* * In the arm64 world (as in the ARM world), elf_hwcap is used both internally @@ -81,6 +82,12 @@ static inline int __attribute_const__ cpuid_feature_extract_field(u64 features, return (s64)(features << (64 - 4 - field)) >> (64 - 4); } +static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) +{ + return cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 || + cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1; +} + void __init setup_cpu_features(void); void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps, diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index ee6403d..31678b2 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -72,15 +72,6 @@ #define APM_CPU_PART_POTENZA 0x000 -#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16 -#define ID_AA64MMFR0_BIGENDEL0_MASK (0xf << ID_AA64MMFR0_BIGENDEL0_SHIFT) -#define ID_AA64MMFR0_BIGENDEL0(mmfr0) \ - (((mmfr0) & ID_AA64MMFR0_BIGENDEL0_MASK) >> ID_AA64MMFR0_BIGENDEL0_SHIFT) -#define ID_AA64MMFR0_BIGEND_SHIFT 8 -#define ID_AA64MMFR0_BIGEND_MASK (0xf << ID_AA64MMFR0_BIGEND_SHIFT) -#define ID_AA64MMFR0_BIGEND(mmfr0) \ - (((mmfr0) & ID_AA64MMFR0_BIGEND_MASK) >> ID_AA64MMFR0_BIGEND_SHIFT) - #ifndef __ASSEMBLY__ /* @@ -112,12 +103,6 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void) { return read_cpuid(CTR_EL0); } - -static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) -{ - return (ID_AA64MMFR0_BIGEND(mmfr0) == 0x1) || - (ID_AA64MMFR0_BIGENDEL0(mmfr0) == 0x1); -} #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 4b57e10..061a279 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -44,6 +44,8 @@ #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\ (!!x)<<8 | 0x1f) +#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16 +#define ID_AA64MMFR0_BIGENDEL_SHIFT 8 #define ID_AA64MMFR0_TGRAN4_SHIFT 28 #define ID_AA64MMFR0_TGRAN64_SHIFT 24 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e49be15..1ae8b24 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -22,7 +22,9 @@ #include #include #include +#include +static bool mixed_endian_el0 = true; unsigned long elf_hwcap __read_mostly; EXPORT_SYMBOL_GPL(elf_hwcap); @@ -41,6 +43,26 @@ unsigned int compat_elf_hwcap2 __read_mostly; DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); +bool cpu_supports_mixed_endian_el0(void) +{ + return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); +} + +bool system_supports_mixed_endian_el0(void) +{ + return mixed_endian_el0; +} + +static void update_mixed_endian_el0_support(struct cpuinfo_arm64 *info) +{ + mixed_endian_el0 &= id_aa64mmfr0_mixed_endian_el0(info->reg_id_aa64mmfr0); +} + +void update_cpu_features(struct cpuinfo_arm64 *info) +{ + update_mixed_endian_el0_support(info); +} + static bool feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) { diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 75d5a86..8307b33 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -35,7 +35,6 @@ */ DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); static struct cpuinfo_arm64 boot_cpu_data; -static bool mixed_endian_el0 = true; static char *icache_policy_str[] = { [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN", @@ -69,26 +68,6 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); } -bool cpu_supports_mixed_endian_el0(void) -{ - return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); -} - -bool system_supports_mixed_endian_el0(void) -{ - return mixed_endian_el0; -} - -static void update_mixed_endian_el0_support(struct cpuinfo_arm64 *info) -{ - mixed_endian_el0 &= id_aa64mmfr0_mixed_endian_el0(info->reg_id_aa64mmfr0); -} - -static void update_cpu_features(struct cpuinfo_arm64 *info) -{ - update_mixed_endian_el0_support(info); -} - static int check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu) { if ((boot & mask) == (cur & mask)) -- cgit v0.10.2 From 12d11817eaafa414eeb47af684093eb2165ebe37 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:24:43 +0100 Subject: arm64: Move /proc/cpuinfo handling code This patch moves the /proc/cpuinfo handling code: arch/arm64/kernel/{setup.c to cpuinfo.c} No functional changes Signed-off-by: Suzuki K. Poulose Tested-by: Dave Martin Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 8307b33..0dadb69 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -24,8 +24,11 @@ #include #include #include +#include #include #include +#include +#include #include /* @@ -45,6 +48,127 @@ static char *icache_policy_str[] = { unsigned long __icache_flags; +static const char *hwcap_str[] = { + "fp", + "asimd", + "evtstrm", + "aes", + "pmull", + "sha1", + "sha2", + "crc32", + "atomics", + NULL +}; + +#ifdef CONFIG_COMPAT +static const char *compat_hwcap_str[] = { + "swp", + "half", + "thumb", + "26bit", + "fastmult", + "fpa", + "vfp", + "edsp", + "java", + "iwmmxt", + "crunch", + "thumbee", + "neon", + "vfpv3", + "vfpv3d16", + "tls", + "vfpv4", + "idiva", + "idivt", + "vfpd32", + "lpae", + "evtstrm" +}; + +static const char *compat_hwcap2_str[] = { + "aes", + "pmull", + "sha1", + "sha2", + "crc32", + NULL +}; +#endif /* CONFIG_COMPAT */ + +static int c_show(struct seq_file *m, void *v) +{ + int i, j; + + for_each_online_cpu(i) { + struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i); + u32 midr = cpuinfo->reg_midr; + + /* + * glibc reads /proc/cpuinfo to determine the number of + * online processors, looking for lines beginning with + * "processor". Give glibc what it expects. + */ + seq_printf(m, "processor\t: %d\n", i); + + /* + * Dump out the common processor features in a single line. + * Userspace should read the hwcaps with getauxval(AT_HWCAP) + * rather than attempting to parse this, but there's a body of + * software which does already (at least for 32-bit). + */ + seq_puts(m, "Features\t:"); + if (personality(current->personality) == PER_LINUX32) { +#ifdef CONFIG_COMPAT + for (j = 0; compat_hwcap_str[j]; j++) + if (compat_elf_hwcap & (1 << j)) + seq_printf(m, " %s", compat_hwcap_str[j]); + + for (j = 0; compat_hwcap2_str[j]; j++) + if (compat_elf_hwcap2 & (1 << j)) + seq_printf(m, " %s", compat_hwcap2_str[j]); +#endif /* CONFIG_COMPAT */ + } else { + for (j = 0; hwcap_str[j]; j++) + if (elf_hwcap & (1 << j)) + seq_printf(m, " %s", hwcap_str[j]); + } + seq_puts(m, "\n"); + + seq_printf(m, "CPU implementer\t: 0x%02x\n", + MIDR_IMPLEMENTOR(midr)); + seq_printf(m, "CPU architecture: 8\n"); + seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr)); + seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr)); + seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr)); + } + + return 0; +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + return *pos < 1 ? (void *)1 : NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return NULL; +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = c_show +}; + static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) { unsigned int cpu = smp_processor_id(); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index baf0da8..556301f 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -44,7 +43,6 @@ #include #include #include -#include #include #include @@ -381,124 +379,3 @@ static int __init topology_init(void) return 0; } subsys_initcall(topology_init); - -static const char *hwcap_str[] = { - "fp", - "asimd", - "evtstrm", - "aes", - "pmull", - "sha1", - "sha2", - "crc32", - "atomics", - NULL -}; - -#ifdef CONFIG_COMPAT -static const char *compat_hwcap_str[] = { - "swp", - "half", - "thumb", - "26bit", - "fastmult", - "fpa", - "vfp", - "edsp", - "java", - "iwmmxt", - "crunch", - "thumbee", - "neon", - "vfpv3", - "vfpv3d16", - "tls", - "vfpv4", - "idiva", - "idivt", - "vfpd32", - "lpae", - "evtstrm" -}; - -static const char *compat_hwcap2_str[] = { - "aes", - "pmull", - "sha1", - "sha2", - "crc32", - NULL -}; -#endif /* CONFIG_COMPAT */ - -static int c_show(struct seq_file *m, void *v) -{ - int i, j; - - for_each_online_cpu(i) { - struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i); - u32 midr = cpuinfo->reg_midr; - - /* - * glibc reads /proc/cpuinfo to determine the number of - * online processors, looking for lines beginning with - * "processor". Give glibc what it expects. - */ - seq_printf(m, "processor\t: %d\n", i); - - /* - * Dump out the common processor features in a single line. - * Userspace should read the hwcaps with getauxval(AT_HWCAP) - * rather than attempting to parse this, but there's a body of - * software which does already (at least for 32-bit). - */ - seq_puts(m, "Features\t:"); - if (personality(current->personality) == PER_LINUX32) { -#ifdef CONFIG_COMPAT - for (j = 0; compat_hwcap_str[j]; j++) - if (compat_elf_hwcap & (1 << j)) - seq_printf(m, " %s", compat_hwcap_str[j]); - - for (j = 0; compat_hwcap2_str[j]; j++) - if (compat_elf_hwcap2 & (1 << j)) - seq_printf(m, " %s", compat_hwcap2_str[j]); -#endif /* CONFIG_COMPAT */ - } else { - for (j = 0; hwcap_str[j]; j++) - if (elf_hwcap & (1 << j)) - seq_printf(m, " %s", hwcap_str[j]); - } - seq_puts(m, "\n"); - - seq_printf(m, "CPU implementer\t: 0x%02x\n", - MIDR_IMPLEMENTOR(midr)); - seq_printf(m, "CPU architecture: 8\n"); - seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr)); - seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr)); - seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr)); - } - - return 0; -} - -static void *c_start(struct seq_file *m, loff_t *pos) -{ - return *pos < 1 ? (void *)1 : NULL; -} - -static void *c_next(struct seq_file *m, void *v, loff_t *pos) -{ - ++*pos; - return NULL; -} - -static void c_stop(struct seq_file *m, void *v) -{ -} - -const struct seq_operations cpuinfo_op = { - .start = c_start, - .next = c_next, - .stop = c_stop, - .show = c_show -}; -- cgit v0.10.2 From ce98a677d897dbaac86905652292fab1eeeb2b93 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:24:44 +0100 Subject: arm64: Handle width of a cpuid feature Introduce a helper to extract cpuid feature for any given width. Signed-off-by: Suzuki K. Poulose Tested-by: Dave Martin Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index b5f313d..85507fe 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -76,10 +76,16 @@ static inline void cpus_set_cap(unsigned int num) __set_bit(num, cpu_hwcaps); } -static inline int __attribute_const__ cpuid_feature_extract_field(u64 features, - int field) +static inline int __attribute_const__ +cpuid_feature_extract_field_width(u64 features, int field, int width) { - return (s64)(features << (64 - 4 - field)) >> (64 - 4); + return (s64)(features << (64 - width - field)) >> (64 - width); +} + +static inline int __attribute_const__ +cpuid_feature_extract_field(u64 features, int field) +{ + return cpuid_feature_extract_field_width(features, field, 4); } static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) -- cgit v0.10.2 From 3c739b57108436211c7f798ba3de0bb0cd8ef469 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:24:45 +0100 Subject: arm64: Keep track of CPU feature registers This patch adds an infrastructure to keep track of the CPU feature registers on the system. For each register, the infrastructure keeps track of the system wide safe value of the feature bits. Also, tracks the which fields of a register should be matched strictly across all the CPUs on the system for the SANITY check infrastructure. The feature bits are classified into following 3 types depending on the implication of the possible values. This information is used to decide the safe value for a feature. LOWER_SAFE - The smaller value is safer HIGHER_SAFE - The bigger value is safer EXACT - We can't decide between the two, so a predefined safe_value is used. This infrastructure will be later used to make better decisions for: - Kernel features (e.g, KVM, Debug) - SANITY Check - CPU capability - ELF HWCAP - Exposing CPU Feature register to userspace. Signed-off-by: Suzuki K. Poulose Tested-by: Dave Martin [catalin.marinas@arm.com: whitespace fix] Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index 30db691..704c17b 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -63,6 +63,7 @@ DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data); void cpuinfo_store_cpu(void); void __init cpuinfo_store_boot_cpu(void); +void __init init_cpu_features(struct cpuinfo_arm64 *info); void update_cpu_features(struct cpuinfo_arm64 *info); #endif /* __ASM_CPU_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 85507fe..01bb5cf 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -35,6 +35,37 @@ #include +/* CPU feature register tracking */ +enum ftr_type { + FTR_EXACT, /* Use a predefined safe value */ + FTR_LOWER_SAFE, /* Smaller value is safe */ + FTR_HIGHER_SAFE,/* Bigger value is safe */ +}; + +#define FTR_STRICT true /* SANITY check strict matching required */ +#define FTR_NONSTRICT false /* SANITY check ignored */ + +struct arm64_ftr_bits { + bool strict; /* CPU Sanity check: strict matching required ? */ + enum ftr_type type; + u8 shift; + u8 width; + s64 safe_val; /* safe value for discrete features */ +}; + +/* + * @arm64_ftr_reg - Feature register + * @strict_mask Bits which should match across all CPUs for sanity. + * @sys_val Safe value across the CPUs (system view) + */ +struct arm64_ftr_reg { + u32 sys_id; + const char *name; + u64 strict_mask; + u64 sys_val; + struct arm64_ftr_bits *ftr_bits; +}; + struct arm64_cpu_capabilities { const char *desc; u16 capability; @@ -88,6 +119,16 @@ cpuid_feature_extract_field(u64 features, int field) return cpuid_feature_extract_field_width(features, field, 4); } +static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp) +{ + return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift); +} + +static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val) +{ + return cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width); +} + static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) { return cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 || diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 061a279..d48ab5b 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -22,9 +22,6 @@ #include -#define SCTLR_EL1_CP15BEN (0x1 << 5) -#define SCTLR_EL1_SED (0x1 << 8) - /* * ARMv8 ARM reserves the following encoding for system registers: * (Ref: ARMv8 ARM, Section: "System instruction class encoding overview", @@ -38,14 +35,139 @@ #define sys_reg(op0, op1, crn, crm, op2) \ ((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5)) -#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) -#define SCTLR_EL1_SPAN (1 << 23) +#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) +#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) +#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) + +#define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0) +#define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1) +#define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) +#define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4) +#define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5) +#define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6) +#define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7) + +#define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0) +#define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1) +#define SYS_ID_ISAR2_EL1 sys_reg(3, 0, 0, 2, 2) +#define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3) +#define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4) +#define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5) +#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) + +#define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0) +#define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1) +#define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2) + +#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0) +#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1) + +#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0) +#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1) + +#define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0) +#define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1) + +#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0) +#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) + +#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) +#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) +#define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7) + +#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\ (!!x)<<8 | 0x1f) +/* SCTLR_EL1 */ +#define SCTLR_EL1_CP15BEN (0x1 << 5) +#define SCTLR_EL1_SED (0x1 << 8) +#define SCTLR_EL1_SPAN (0x1 << 23) + + +/* id_aa64isar0 */ +#define ID_AA64ISAR0_RDM_SHIFT 28 +#define ID_AA64ISAR0_ATOMICS_SHIFT 20 +#define ID_AA64ISAR0_CRC32_SHIFT 16 +#define ID_AA64ISAR0_SHA2_SHIFT 12 +#define ID_AA64ISAR0_SHA1_SHIFT 8 +#define ID_AA64ISAR0_AES_SHIFT 4 + +/* id_aa64pfr0 */ +#define ID_AA64PFR0_GIC_SHIFT 24 +#define ID_AA64PFR0_ASIMD_SHIFT 20 +#define ID_AA64PFR0_FP_SHIFT 16 +#define ID_AA64PFR0_EL3_SHIFT 12 +#define ID_AA64PFR0_EL2_SHIFT 8 +#define ID_AA64PFR0_EL1_SHIFT 4 +#define ID_AA64PFR0_EL0_SHIFT 0 + +#define ID_AA64PFR0_FP_NI 0xf +#define ID_AA64PFR0_FP_SUPPORTED 0x0 +#define ID_AA64PFR0_ASIMD_NI 0xf +#define ID_AA64PFR0_ASIMD_SUPPORTED 0x0 +#define ID_AA64PFR0_EL1_64BIT_ONLY 0x1 +#define ID_AA64PFR0_EL0_64BIT_ONLY 0x1 + +/* id_aa64mmfr0 */ +#define ID_AA64MMFR0_TGRAN4_SHIFT 28 +#define ID_AA64MMFR0_TGRAN64_SHIFT 24 +#define ID_AA64MMFR0_TGRAN16_SHIFT 20 #define ID_AA64MMFR0_BIGENDEL0_SHIFT 16 +#define ID_AA64MMFR0_SNSMEM_SHIFT 12 #define ID_AA64MMFR0_BIGENDEL_SHIFT 8 +#define ID_AA64MMFR0_ASID_SHIFT 4 +#define ID_AA64MMFR0_PARANGE_SHIFT 0 + +#define ID_AA64MMFR0_TGRAN4_NI 0xf +#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN64_NI 0xf +#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN16_NI 0x0 +#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1 + +/* id_aa64mmfr1 */ +#define ID_AA64MMFR1_PAN_SHIFT 20 +#define ID_AA64MMFR1_LOR_SHIFT 16 +#define ID_AA64MMFR1_HPD_SHIFT 12 +#define ID_AA64MMFR1_VHE_SHIFT 8 +#define ID_AA64MMFR1_VMIDBITS_SHIFT 4 +#define ID_AA64MMFR1_HADBS_SHIFT 0 + +/* id_aa64dfr0 */ +#define ID_AA64DFR0_CTX_CMPS_SHIFT 28 +#define ID_AA64DFR0_WRPS_SHIFT 20 +#define ID_AA64DFR0_BRPS_SHIFT 12 +#define ID_AA64DFR0_PMUVER_SHIFT 8 +#define ID_AA64DFR0_TRACEVER_SHIFT 4 +#define ID_AA64DFR0_DEBUGVER_SHIFT 0 + +#define ID_ISAR5_RDM_SHIFT 24 +#define ID_ISAR5_CRC32_SHIFT 16 +#define ID_ISAR5_SHA2_SHIFT 12 +#define ID_ISAR5_SHA1_SHIFT 8 +#define ID_ISAR5_AES_SHIFT 4 +#define ID_ISAR5_SEVL_SHIFT 0 + +#define MVFR0_FPROUND_SHIFT 28 +#define MVFR0_FPSHVEC_SHIFT 24 +#define MVFR0_FPSQRT_SHIFT 20 +#define MVFR0_FPDIVIDE_SHIFT 16 +#define MVFR0_FPTRAP_SHIFT 12 +#define MVFR0_FPDP_SHIFT 8 +#define MVFR0_FPSP_SHIFT 4 +#define MVFR0_SIMD_SHIFT 0 + +#define MVFR1_SIMDFMAC_SHIFT 28 +#define MVFR1_FPHP_SHIFT 24 +#define MVFR1_SIMDHP_SHIFT 20 +#define MVFR1_SIMDSP_SHIFT 16 +#define MVFR1_SIMDINT_SHIFT 12 +#define MVFR1_SIMDLS_SHIFT 8 +#define MVFR1_FPDNAN_SHIFT 4 +#define MVFR1_FPFTZ_SHIFT 0 + #define ID_AA64MMFR0_TGRAN4_SHIFT 28 #define ID_AA64MMFR0_TGRAN64_SHIFT 24 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 1ae8b24..8d7abaa 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -18,6 +18,8 @@ #define pr_fmt(fmt) "CPU features: " fmt +#include +#include #include #include #include @@ -58,8 +60,435 @@ static void update_mixed_endian_el0_support(struct cpuinfo_arm64 *info) mixed_endian_el0 &= id_aa64mmfr0_mixed_endian_el0(info->reg_id_aa64mmfr0); } +#define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ + { \ + .strict = STRICT, \ + .type = TYPE, \ + .shift = SHIFT, \ + .width = WIDTH, \ + .safe_val = SAFE_VAL, \ + } + +#define ARM64_FTR_END \ + { \ + .width = 0, \ + } + +static struct arm64_ftr_bits ftr_id_aa64isar0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* RAZ */ + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_aa64pfr0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), + /* Linux doesn't care about the EL3 */ + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0), + /* Linux shouldn't care about secure memory */ + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_ASID_SHIFT, 4, 0), + /* + * Differing PARange is fine as long as all peripherals and memory are mapped + * within the minimum PARange of all CPUs + */ + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VHE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HADBS_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_ctr[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */ + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ + /* + * Linux can handle differing I-cache policies. Userspace JITs will + * make use of *minLine + */ + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0), /* L1Ip */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0), /* RAZ */ + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_mmfr0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), /* InnerShr */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */ + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 4, 0), /* TCM */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* ShareLvl */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* OuterShr */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* PMSA */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* VMSA */ + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_aa64dfr0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_mvfr2[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* FPMisc */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* SIMDMisc */ + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_dczid[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 5, 27, 0), /* RAZ */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */ + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */ + ARM64_FTR_END, +}; + + +static struct arm64_ftr_bits ftr_id_isar5[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 20, 4, 0), /* RAZ */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_AES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SEVL_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_mmfr4[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* ac2 */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* RAZ */ + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_id_pfr0[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 16, 0), /* RAZ */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* State3 */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* State2 */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* State1 */ + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* State0 */ + ARM64_FTR_END, +}; + +/* + * Common ftr bits for a 32bit register with all hidden, strict + * attributes, with 4bit feature fields and a default safe value of + * 0. Covers the following 32bit registers: + * id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1] + */ +static struct arm64_ftr_bits ftr_generic_32bits[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_generic[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_generic32[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 32, 0), + ARM64_FTR_END, +}; + +static struct arm64_ftr_bits ftr_aa64raz[] = { + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0), + ARM64_FTR_END, +}; + +#define ARM64_FTR_REG(id, table) \ + { \ + .sys_id = id, \ + .name = #id, \ + .ftr_bits = &((table)[0]), \ + } + +static struct arm64_ftr_reg arm64_ftr_regs[] = { + + /* Op1 = 0, CRn = 0, CRm = 1 */ + ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0), + ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0), + ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_MMFR2_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_MMFR3_EL1, ftr_generic_32bits), + + /* Op1 = 0, CRn = 0, CRm = 2 */ + ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR2_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR3_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR5_EL1, ftr_id_isar5), + ARM64_FTR_REG(SYS_ID_MMFR4_EL1, ftr_id_mmfr4), + + /* Op1 = 0, CRn = 0, CRm = 3 */ + ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_MVFR2_EL1, ftr_mvfr2), + + /* Op1 = 0, CRn = 0, CRm = 4 */ + ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), + ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_aa64raz), + + /* Op1 = 0, CRn = 0, CRm = 5 */ + ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0), + ARM64_FTR_REG(SYS_ID_AA64DFR1_EL1, ftr_generic), + + /* Op1 = 0, CRn = 0, CRm = 6 */ + ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), + ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_aa64raz), + + /* Op1 = 0, CRn = 0, CRm = 7 */ + ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), + ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1), + + /* Op1 = 3, CRn = 0, CRm = 0 */ + ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr), + ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid), + + /* Op1 = 3, CRn = 14, CRm = 0 */ + ARM64_FTR_REG(SYS_CNTFRQ_EL0, ftr_generic32), +}; + +static int search_cmp_ftr_reg(const void *id, const void *regp) +{ + return (int)(unsigned long)id - (int)((const struct arm64_ftr_reg *)regp)->sys_id; +} + +/* + * get_arm64_ftr_reg - Lookup a feature register entry using its + * sys_reg() encoding. With the array arm64_ftr_regs sorted in the + * ascending order of sys_id , we use binary search to find a matching + * entry. + * + * returns - Upon success, matching ftr_reg entry for id. + * - NULL on failure. It is upto the caller to decide + * the impact of a failure. + */ +static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id) +{ + return bsearch((const void *)(unsigned long)sys_id, + arm64_ftr_regs, + ARRAY_SIZE(arm64_ftr_regs), + sizeof(arm64_ftr_regs[0]), + search_cmp_ftr_reg); +} + +static u64 arm64_ftr_set_value(struct arm64_ftr_bits *ftrp, s64 reg, s64 ftr_val) +{ + u64 mask = arm64_ftr_mask(ftrp); + + reg &= ~mask; + reg |= (ftr_val << ftrp->shift) & mask; + return reg; +} + +static s64 arm64_ftr_safe_value(struct arm64_ftr_bits *ftrp, s64 new, s64 cur) +{ + s64 ret = 0; + + switch (ftrp->type) { + case FTR_EXACT: + ret = ftrp->safe_val; + break; + case FTR_LOWER_SAFE: + ret = new < cur ? new : cur; + break; + case FTR_HIGHER_SAFE: + ret = new > cur ? new : cur; + break; + default: + BUG(); + } + + return ret; +} + +static int __init sort_cmp_ftr_regs(const void *a, const void *b) +{ + return ((const struct arm64_ftr_reg *)a)->sys_id - + ((const struct arm64_ftr_reg *)b)->sys_id; +} + +static void __init swap_ftr_regs(void *a, void *b, int size) +{ + struct arm64_ftr_reg tmp = *(struct arm64_ftr_reg *)a; + *(struct arm64_ftr_reg *)a = *(struct arm64_ftr_reg *)b; + *(struct arm64_ftr_reg *)b = tmp; +} + +static void __init sort_ftr_regs(void) +{ + /* Keep the array sorted so that we can do the binary search */ + sort(arm64_ftr_regs, + ARRAY_SIZE(arm64_ftr_regs), + sizeof(arm64_ftr_regs[0]), + sort_cmp_ftr_regs, + swap_ftr_regs); +} + +/* + * Initialise the CPU feature register from Boot CPU values. + * Also initiliases the strict_mask for the register. + */ +static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) +{ + u64 val = 0; + u64 strict_mask = ~0x0ULL; + struct arm64_ftr_bits *ftrp; + struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg); + + BUG_ON(!reg); + + for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { + s64 ftr_new = arm64_ftr_value(ftrp, new); + + val = arm64_ftr_set_value(ftrp, val, ftr_new); + if (!ftrp->strict) + strict_mask &= ~arm64_ftr_mask(ftrp); + } + reg->sys_val = val; + reg->strict_mask = strict_mask; +} + +void __init init_cpu_features(struct cpuinfo_arm64 *info) +{ + /* Before we start using the tables, make sure it is sorted */ + sort_ftr_regs(); + + init_cpu_ftr_reg(SYS_CTR_EL0, info->reg_ctr); + init_cpu_ftr_reg(SYS_DCZID_EL0, info->reg_dczid); + init_cpu_ftr_reg(SYS_CNTFRQ_EL0, info->reg_cntfrq); + init_cpu_ftr_reg(SYS_ID_AA64DFR0_EL1, info->reg_id_aa64dfr0); + init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1); + init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0); + init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1); + init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0); + init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1); + init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); + init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); + init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); + init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0); + init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1); + init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2); + init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3); + init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4); + init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5); + init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0); + init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); + init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); + init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); + init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); + init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); + init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0); + init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1); + init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); + + /* This will be removed later, once we start using the infrastructure */ + update_mixed_endian_el0_support(info); +} + +static void update_cpu_ftr_reg(u32 sys_reg, u64 new) +{ + struct arm64_ftr_bits *ftrp; + struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg); + + BUG_ON(!reg); + + for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { + s64 ftr_cur = arm64_ftr_value(ftrp, reg->sys_val); + s64 ftr_new = arm64_ftr_value(ftrp, new); + + if (ftr_cur == ftr_new) + continue; + /* Find a safe value */ + ftr_new = arm64_ftr_safe_value(ftrp, ftr_new, ftr_cur); + reg->sys_val = arm64_ftr_set_value(ftrp, reg->sys_val, ftr_new); + } + +} + +/* Update CPU feature register from non-boot CPU */ void update_cpu_features(struct cpuinfo_arm64 *info) { + update_cpu_ftr_reg(SYS_CTR_EL0, info->reg_ctr); + update_cpu_ftr_reg(SYS_DCZID_EL0, info->reg_dczid); + update_cpu_ftr_reg(SYS_CNTFRQ_EL0, info->reg_cntfrq); + update_cpu_ftr_reg(SYS_ID_AA64DFR0_EL1, info->reg_id_aa64dfr0); + update_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1); + update_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0); + update_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1); + update_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0); + update_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1); + update_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); + update_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); + update_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); + update_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0); + update_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1); + update_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2); + update_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3); + update_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4); + update_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5); + update_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0); + update_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); + update_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); + update_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); + update_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); + update_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); + update_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0); + update_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1); + update_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); + update_mixed_endian_el0_support(info); } diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 0dadb69..857aaf0 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -340,7 +340,6 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) check_local_cpu_errata(); check_local_cpu_features(); - update_cpu_features(info); } void cpuinfo_store_cpu(void) @@ -348,6 +347,7 @@ void cpuinfo_store_cpu(void) struct cpuinfo_arm64 *info = this_cpu_ptr(&cpu_data); __cpuinfo_store_cpu(info); cpuinfo_sanity_check(info); + update_cpu_features(info); } void __init cpuinfo_store_boot_cpu(void) @@ -356,4 +356,5 @@ void __init cpuinfo_store_boot_cpu(void) __cpuinfo_store_cpu(info); boot_cpu_data = *info; + init_cpu_features(&boot_cpu_data); } -- cgit v0.10.2 From 3086d391f992984def0aa6aa4a36f54853c58536 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:24:46 +0100 Subject: arm64: Consolidate CPU Sanity check to CPU Feature infrastructure This patch consolidates the CPU Sanity check to the new infrastructure. Cc: Mark Rutland Signed-off-by: Suzuki K. Poulose Tested-by: Dave Martin Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index 704c17b..b5e9cee 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -64,6 +64,7 @@ void cpuinfo_store_cpu(void); void __init cpuinfo_store_boot_cpu(void); void __init init_cpu_features(struct cpuinfo_arm64 *info); -void update_cpu_features(struct cpuinfo_arm64 *info); +void update_cpu_features(int cpu, struct cpuinfo_arm64 *info, + struct cpuinfo_arm64 *boot); #endif /* __ASM_CPU_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 8d7abaa..aae1817 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -438,12 +438,9 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) update_mixed_endian_el0_support(info); } -static void update_cpu_ftr_reg(u32 sys_reg, u64 new) +static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) { struct arm64_ftr_bits *ftrp; - struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg); - - BUG_ON(!reg); for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { s64 ftr_cur = arm64_ftr_value(ftrp, reg->sys_val); @@ -458,36 +455,137 @@ static void update_cpu_ftr_reg(u32 sys_reg, u64 new) } -/* Update CPU feature register from non-boot CPU */ -void update_cpu_features(struct cpuinfo_arm64 *info) +static int check_update_ftr_reg(u32 sys_id, int cpu, u64 val, u64 boot) { - update_cpu_ftr_reg(SYS_CTR_EL0, info->reg_ctr); - update_cpu_ftr_reg(SYS_DCZID_EL0, info->reg_dczid); - update_cpu_ftr_reg(SYS_CNTFRQ_EL0, info->reg_cntfrq); - update_cpu_ftr_reg(SYS_ID_AA64DFR0_EL1, info->reg_id_aa64dfr0); - update_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1); - update_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0); - update_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1); - update_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0); - update_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1); - update_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); - update_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); - update_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); - update_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0); - update_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1); - update_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2); - update_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3); - update_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4); - update_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5); - update_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0); - update_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); - update_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); - update_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); - update_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); - update_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); - update_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0); - update_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1); - update_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); + struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id); + + BUG_ON(!regp); + update_cpu_ftr_reg(regp, val); + if ((boot & regp->strict_mask) == (val & regp->strict_mask)) + return 0; + pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016llx, CPU%d: %#016llx\n", + regp->name, boot, cpu, val); + return 1; +} + +/* + * Update system wide CPU feature registers with the values from a + * non-boot CPU. Also performs SANITY checks to make sure that there + * aren't any insane variations from that of the boot CPU. + */ +void update_cpu_features(int cpu, + struct cpuinfo_arm64 *info, + struct cpuinfo_arm64 *boot) +{ + int taint = 0; + + /* + * The kernel can handle differing I-cache policies, but otherwise + * caches should look identical. Userspace JITs will make use of + * *minLine. + */ + taint |= check_update_ftr_reg(SYS_CTR_EL0, cpu, + info->reg_ctr, boot->reg_ctr); + + /* + * Userspace may perform DC ZVA instructions. Mismatched block sizes + * could result in too much or too little memory being zeroed if a + * process is preempted and migrated between CPUs. + */ + taint |= check_update_ftr_reg(SYS_DCZID_EL0, cpu, + info->reg_dczid, boot->reg_dczid); + + /* If different, timekeeping will be broken (especially with KVM) */ + taint |= check_update_ftr_reg(SYS_CNTFRQ_EL0, cpu, + info->reg_cntfrq, boot->reg_cntfrq); + + /* + * The kernel uses self-hosted debug features and expects CPUs to + * support identical debug features. We presently need CTX_CMPs, WRPs, + * and BRPs to be identical. + * ID_AA64DFR1 is currently RES0. + */ + taint |= check_update_ftr_reg(SYS_ID_AA64DFR0_EL1, cpu, + info->reg_id_aa64dfr0, boot->reg_id_aa64dfr0); + taint |= check_update_ftr_reg(SYS_ID_AA64DFR1_EL1, cpu, + info->reg_id_aa64dfr1, boot->reg_id_aa64dfr1); + /* + * Even in big.LITTLE, processors should be identical instruction-set + * wise. + */ + taint |= check_update_ftr_reg(SYS_ID_AA64ISAR0_EL1, cpu, + info->reg_id_aa64isar0, boot->reg_id_aa64isar0); + taint |= check_update_ftr_reg(SYS_ID_AA64ISAR1_EL1, cpu, + info->reg_id_aa64isar1, boot->reg_id_aa64isar1); + + /* + * Differing PARange support is fine as long as all peripherals and + * memory are mapped within the minimum PARange of all CPUs. + * Linux should not care about secure memory. + */ + taint |= check_update_ftr_reg(SYS_ID_AA64MMFR0_EL1, cpu, + info->reg_id_aa64mmfr0, boot->reg_id_aa64mmfr0); + taint |= check_update_ftr_reg(SYS_ID_AA64MMFR1_EL1, cpu, + info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1); + + /* + * EL3 is not our concern. + * ID_AA64PFR1 is currently RES0. + */ + taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, + info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); + taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu, + info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1); + + /* + * If we have AArch32, we care about 32-bit features for compat. These + * registers should be RES0 otherwise. + */ + taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, + info->reg_id_dfr0, boot->reg_id_dfr0); + taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu, + info->reg_id_isar0, boot->reg_id_isar0); + taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu, + info->reg_id_isar1, boot->reg_id_isar1); + taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu, + info->reg_id_isar2, boot->reg_id_isar2); + taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu, + info->reg_id_isar3, boot->reg_id_isar3); + taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu, + info->reg_id_isar4, boot->reg_id_isar4); + taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu, + info->reg_id_isar5, boot->reg_id_isar5); + + /* + * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and + * ACTLR formats could differ across CPUs and therefore would have to + * be trapped for virtualization anyway. + */ + taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu, + info->reg_id_mmfr0, boot->reg_id_mmfr0); + taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu, + info->reg_id_mmfr1, boot->reg_id_mmfr1); + taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu, + info->reg_id_mmfr2, boot->reg_id_mmfr2); + taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu, + info->reg_id_mmfr3, boot->reg_id_mmfr3); + taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu, + info->reg_id_pfr0, boot->reg_id_pfr0); + taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu, + info->reg_id_pfr1, boot->reg_id_pfr1); + taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu, + info->reg_mvfr0, boot->reg_mvfr0); + taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu, + info->reg_mvfr1, boot->reg_mvfr1); + taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu, + info->reg_mvfr2, boot->reg_mvfr2); + + /* + * Mismatched CPU features are a recipe for disaster. Don't even + * pretend to support them. + */ + WARN_TAINT_ONCE(taint, TAINT_CPU_OUT_OF_SPEC, + "Unsupported CPU feature variation.\n"); update_mixed_endian_el0_support(info); } diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 857aaf0..f25869e 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -192,116 +192,6 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); } -static int check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu) -{ - if ((boot & mask) == (cur & mask)) - return 0; - - pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016lx, CPU%d: %#016lx\n", - name, (unsigned long)boot, cpu, (unsigned long)cur); - - return 1; -} - -#define CHECK_MASK(field, mask, boot, cur, cpu) \ - check_reg_mask(#field, mask, (boot)->reg_ ## field, (cur)->reg_ ## field, cpu) - -#define CHECK(field, boot, cur, cpu) \ - CHECK_MASK(field, ~0ULL, boot, cur, cpu) - -/* - * Verify that CPUs don't have unexpected differences that will cause problems. - */ -static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur) -{ - unsigned int cpu = smp_processor_id(); - struct cpuinfo_arm64 *boot = &boot_cpu_data; - unsigned int diff = 0; - - /* - * The kernel can handle differing I-cache policies, but otherwise - * caches should look identical. Userspace JITs will make use of - * *minLine. - */ - diff |= CHECK_MASK(ctr, 0xffff3fff, boot, cur, cpu); - - /* - * Userspace may perform DC ZVA instructions. Mismatched block sizes - * could result in too much or too little memory being zeroed if a - * process is preempted and migrated between CPUs. - */ - diff |= CHECK(dczid, boot, cur, cpu); - - /* If different, timekeeping will be broken (especially with KVM) */ - diff |= CHECK(cntfrq, boot, cur, cpu); - - /* - * The kernel uses self-hosted debug features and expects CPUs to - * support identical debug features. We presently need CTX_CMPs, WRPs, - * and BRPs to be identical. - * ID_AA64DFR1 is currently RES0. - */ - diff |= CHECK(id_aa64dfr0, boot, cur, cpu); - diff |= CHECK(id_aa64dfr1, boot, cur, cpu); - - /* - * Even in big.LITTLE, processors should be identical instruction-set - * wise. - */ - diff |= CHECK(id_aa64isar0, boot, cur, cpu); - diff |= CHECK(id_aa64isar1, boot, cur, cpu); - - /* - * Differing PARange support is fine as long as all peripherals and - * memory are mapped within the minimum PARange of all CPUs. - * Linux should not care about secure memory. - * ID_AA64MMFR1 is currently RES0. - */ - diff |= CHECK_MASK(id_aa64mmfr0, 0xffffffffffff0ff0, boot, cur, cpu); - diff |= CHECK(id_aa64mmfr1, boot, cur, cpu); - - /* - * EL3 is not our concern. - * ID_AA64PFR1 is currently RES0. - */ - diff |= CHECK_MASK(id_aa64pfr0, 0xffffffffffff0fff, boot, cur, cpu); - diff |= CHECK(id_aa64pfr1, boot, cur, cpu); - - /* - * If we have AArch32, we care about 32-bit features for compat. These - * registers should be RES0 otherwise. - */ - diff |= CHECK(id_dfr0, boot, cur, cpu); - diff |= CHECK(id_isar0, boot, cur, cpu); - diff |= CHECK(id_isar1, boot, cur, cpu); - diff |= CHECK(id_isar2, boot, cur, cpu); - diff |= CHECK(id_isar3, boot, cur, cpu); - diff |= CHECK(id_isar4, boot, cur, cpu); - diff |= CHECK(id_isar5, boot, cur, cpu); - /* - * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and - * ACTLR formats could differ across CPUs and therefore would have to - * be trapped for virtualization anyway. - */ - diff |= CHECK_MASK(id_mmfr0, 0xff0fffff, boot, cur, cpu); - diff |= CHECK(id_mmfr1, boot, cur, cpu); - diff |= CHECK(id_mmfr2, boot, cur, cpu); - diff |= CHECK(id_mmfr3, boot, cur, cpu); - diff |= CHECK(id_pfr0, boot, cur, cpu); - diff |= CHECK(id_pfr1, boot, cur, cpu); - - diff |= CHECK(mvfr0, boot, cur, cpu); - diff |= CHECK(mvfr1, boot, cur, cpu); - diff |= CHECK(mvfr2, boot, cur, cpu); - - /* - * Mismatched CPU features are a recipe for disaster. Don't even - * pretend to support them. - */ - WARN_TAINT_ONCE(diff, TAINT_CPU_OUT_OF_SPEC, - "Unsupported CPU feature variation.\n"); -} - static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) { info->reg_cntfrq = arch_timer_get_cntfrq(); @@ -346,8 +236,7 @@ void cpuinfo_store_cpu(void) { struct cpuinfo_arm64 *info = this_cpu_ptr(&cpu_data); __cpuinfo_store_cpu(info); - cpuinfo_sanity_check(info); - update_cpu_features(info); + update_cpu_features(smp_processor_id(), info, &boot_cpu_data); } void __init cpuinfo_store_boot_cpu(void) -- cgit v0.10.2 From b3f1537893b54d0f42f52e0f4cde5e17e21f564c Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:24:47 +0100 Subject: arm64: Read system wide CPUID value Add an API for reading the safe CPUID value across the system from the new infrastructure. Signed-off-by: Suzuki K. Poulose Tested-by: Dave Martin Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 01bb5cf..8121722 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -144,6 +144,8 @@ void check_local_cpu_features(void); bool cpu_supports_mixed_endian_el0(void); bool system_supports_mixed_endian_el0(void); +u64 read_system_reg(u32 id); + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index aae1817..b4656ca 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -590,6 +590,15 @@ void update_cpu_features(int cpu, update_mixed_endian_el0_support(info); } +u64 read_system_reg(u32 id) +{ + struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id); + + /* We shouldn't get a request for an unsupported register */ + BUG_ON(!regp); + return regp->sys_val; +} + static bool feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) { -- cgit v0.10.2 From c1e8656cbae139c8aaf34d7b802edecbc8a1cf58 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:24:48 +0100 Subject: arm64: Cleanup mixed endian support detection Make use of the system wide safe register to decide the support for mixed endian. Signed-off-by: Suzuki K. Poulose Tested-by: Dave Martin Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 8121722..1e281b2 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -141,11 +141,19 @@ void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info); void check_local_cpu_errata(void); void check_local_cpu_features(void); -bool cpu_supports_mixed_endian_el0(void); -bool system_supports_mixed_endian_el0(void); u64 read_system_reg(u32 id); +static inline bool cpu_supports_mixed_endian_el0(void) +{ + return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); +} + +static inline bool system_supports_mixed_endian_el0(void) +{ + return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1)); +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index b4656ca..a90a64d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -26,7 +26,6 @@ #include #include -static bool mixed_endian_el0 = true; unsigned long elf_hwcap __read_mostly; EXPORT_SYMBOL_GPL(elf_hwcap); @@ -44,22 +43,6 @@ unsigned int compat_elf_hwcap2 __read_mostly; DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); - -bool cpu_supports_mixed_endian_el0(void) -{ - return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); -} - -bool system_supports_mixed_endian_el0(void) -{ - return mixed_endian_el0; -} - -static void update_mixed_endian_el0_support(struct cpuinfo_arm64 *info) -{ - mixed_endian_el0 &= id_aa64mmfr0_mixed_endian_el0(info->reg_id_aa64mmfr0); -} - #define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ { \ .strict = STRICT, \ @@ -433,9 +416,6 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0); init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1); init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); - - /* This will be removed later, once we start using the infrastructure */ - update_mixed_endian_el0_support(info); } static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) @@ -586,8 +566,6 @@ void update_cpu_features(int cpu, */ WARN_TAINT_ONCE(taint, TAINT_CPU_OUT_OF_SPEC, "Unsupported CPU feature variation.\n"); - - update_mixed_endian_el0_support(info); } u64 read_system_reg(u32 id) -- cgit v0.10.2 From ce8b602c694c9482e0ffb7432cd59fa2276673fe Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:24:49 +0100 Subject: arm64: Refactor check_cpu_capabilities check_cpu_capabilities runs through a given list of caps and checks if the system has the cap, updates the system capability bitmap and also runs any enable() methods associated with them. All of this is not quite obvious from the name 'check'. This patch splits the check_cpu_capabilities into two parts : 1) update_cpu_capabilities => Runs through the given list and updates the system wide capability map. 2) enable_cpu_capabilities => Runs through the given list and invokes enable() (if any) for the caps enabled on the system. Cc: Andre Przywara Cc: Will Deacon Suggested-by: Catalin Marinas Signed-off-by: Suzuki K. Poulose Tested-by: Dave Martin Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 1e281b2..12d883a 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -137,7 +137,7 @@ static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) void __init setup_cpu_features(void); -void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps, +void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info); void check_local_cpu_errata(void); void check_local_cpu_features(void); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 6ffd914..09eab32 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -88,5 +88,5 @@ const struct arm64_cpu_capabilities arm64_errata[] = { void check_local_cpu_errata(void) { - check_cpu_capabilities(arm64_errata, "enabling workaround for"); + update_cpu_capabilities(arm64_errata, "enabling workaround for"); } diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a90a64d..78c4cb7a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -629,7 +629,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { {}, }; -void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps, +void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info) { int i; @@ -642,8 +642,15 @@ void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps, pr_info("%s %s\n", info, caps[i].desc); cpus_set_cap(caps[i].capability); } +} + +/* + * Run through the enabled capabilities and enable() it on the CPUs + */ +void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) +{ + int i; - /* second pass allows enable() to consider interacting capabilities */ for (i = 0; caps[i].desc; i++) { if (cpus_have_cap(caps[i].capability) && caps[i].enable) caps[i].enable(); @@ -652,7 +659,8 @@ void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps, void check_local_cpu_features(void) { - check_cpu_capabilities(arm64_features, "detected feature:"); + update_cpu_capabilities(arm64_features, "detected feature:"); + enable_cpu_capabilities(arm64_features); } void __init setup_cpu_features(void) -- cgit v0.10.2 From dbb4e152b8da1f977d9d8cd7e494ab4ee3622f72 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:24:50 +0100 Subject: arm64: Delay cpu feature capability checks At the moment we run through the arm64_features capability list for each CPU and set the capability if one of the CPU supports it. This could be problematic in a heterogeneous system with differing capabilities. Delay the CPU feature checks until all the enabled CPUs are up(i.e, smp_cpus_done(), so that we can make better decisions based on the overall system capability. Once we decide and advertise the capabilities the alternatives can be applied. From this state, we cannot roll back a feature to disabled based on the values from a new hotplugged CPU, due to the runtime patching and other reasons. So, for all new CPUs, we need to make sure that they have the established system capabilities. Failing which, we bring the CPU down, preventing it from turning online. Once the capabilities are decided, any new CPU booting up goes through verification to ensure that it has all the enabled capabilities and also invokes the respective enable() method on the CPU. The CPU errata checks are not delayed and is still executed per-CPU to detect the respective capabilities. If we ever come across a non-errata capability that needs to be checked on each-CPU, we could introduce them via a new capability table(or introduce a flag), which can be processed per CPU. The next patch will make the feature checks use the system wide safe value of a feature register. NOTE: The enable() methods associated with the capability is scheduled on all the CPUs (which is the only use case at the moment). If we need a different type of 'enable()' which only needs to be run once on any CPU, we should be able to handle that when needed. Signed-off-by: Suzuki K. Poulose Tested-by: Dave Martin [catalin.marinas@arm.com: static variable and coding style fixes] Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 12d883a..1603ae8 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -70,7 +70,7 @@ struct arm64_cpu_capabilities { const char *desc; u16 capability; bool (*matches)(const struct arm64_cpu_capabilities *); - void (*enable)(void); + void (*enable)(void *); /* Called on all active CPUs */ union { struct { /* To be used for erratum handling only */ u32 midr_model; @@ -140,7 +140,14 @@ void __init setup_cpu_features(void); void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info); void check_local_cpu_errata(void); -void check_local_cpu_features(void); + +#ifdef CONFIG_HOTPLUG_CPU +void verify_local_cpu_capabilities(void); +#else +static inline void verify_local_cpu_capabilities(void) +{ +} +#endif u64 read_system_reg(u32 id); diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 98f3235..4acb7ca 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -186,6 +186,6 @@ static inline void spin_lock_prefetch(const void *x) #endif -void cpu_enable_pan(void); +void cpu_enable_pan(void *__unused); #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 78c4cb7a..e002cad 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -645,19 +646,101 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, } /* - * Run through the enabled capabilities and enable() it on the CPUs + * Run through the enabled capabilities and enable() it on all active + * CPUs */ void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { int i; + for (i = 0; caps[i].desc; i++) + if (caps[i].enable && cpus_have_cap(caps[i].capability)) + on_each_cpu(caps[i].enable, NULL, true); +} + +#ifdef CONFIG_HOTPLUG_CPU + +/* + * Flag to indicate if we have computed the system wide + * capabilities based on the boot time active CPUs. This + * will be used to determine if a new booting CPU should + * go through the verification process to make sure that it + * supports the system capabilities, without using a hotplug + * notifier. + */ +static bool sys_caps_initialised; + +static inline void set_sys_caps_initialised(void) +{ + sys_caps_initialised = true; +} + +/* + * Park the CPU which doesn't have the capability as advertised + * by the system. + */ +static void fail_incapable_cpu(char *cap_type, + const struct arm64_cpu_capabilities *cap) +{ + int cpu = smp_processor_id(); + + pr_crit("CPU%d: missing %s : %s\n", cpu, cap_type, cap->desc); + /* Mark this CPU absent */ + set_cpu_present(cpu, 0); + + /* Check if we can park ourselves */ + if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die) + cpu_ops[cpu]->cpu_die(cpu); + asm( + "1: wfe\n" + " wfi\n" + " b 1b"); +} + +/* + * Run through the enabled system capabilities and enable() it on this CPU. + * The capabilities were decided based on the available CPUs at the boot time. + * Any new CPU should match the system wide status of the capability. If the + * new CPU doesn't have a capability which the system now has enabled, we + * cannot do anything to fix it up and could cause unexpected failures. So + * we park the CPU. + */ +void verify_local_cpu_capabilities(void) +{ + int i; + const struct arm64_cpu_capabilities *caps; + + /* + * If we haven't computed the system capabilities, there is nothing + * to verify. + */ + if (!sys_caps_initialised) + return; + + caps = arm64_features; for (i = 0; caps[i].desc; i++) { - if (cpus_have_cap(caps[i].capability) && caps[i].enable) - caps[i].enable(); + if (!cpus_have_cap(caps[i].capability)) + continue; + /* + * If the new CPU misses an advertised feature, we cannot proceed + * further, park the cpu. + */ + if (!caps[i].matches(&caps[i])) + fail_incapable_cpu("arm64_features", &caps[i]); + if (caps[i].enable) + caps[i].enable(NULL); } } -void check_local_cpu_features(void) +#else /* !CONFIG_HOTPLUG_CPU */ + +static inline void set_sys_caps_initialised(void) +{ +} + +#endif /* CONFIG_HOTPLUG_CPU */ + +static void setup_feature_capabilities(void) { update_cpu_capabilities(arm64_features, "detected feature:"); enable_cpu_capabilities(arm64_features); @@ -670,6 +753,12 @@ void __init setup_cpu_features(void) u32 cwg; int cls; + /* Set the CPU feature capabilies */ + setup_feature_capabilities(); + + /* Advertise that we have computed the system capabilities */ + set_sys_caps_initialised(); + /* * Check for sane CTR_EL0.CWG value. */ diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index f25869e..789fbea 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -229,7 +229,6 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) cpuinfo_detect_icache_policy(info); check_local_cpu_errata(); - check_local_cpu_features(); } void cpuinfo_store_cpu(void) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index d1d0049..2bbdc0e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -156,6 +156,13 @@ asmlinkage void secondary_start_kernel(void) preempt_disable(); trace_hardirqs_off(); + /* + * If the system has established the capabilities, make sure + * this CPU ticks all of those. If it doesn't, the CPU will + * fail to come online. + */ + verify_local_cpu_capabilities(); + if (cpu_ops[cpu]->cpu_postboot) cpu_ops[cpu]->cpu_postboot(); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index aba9ead..066d5ae 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -555,7 +555,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, } #ifdef CONFIG_ARM64_PAN -void cpu_enable_pan(void) +void cpu_enable_pan(void *__unused) { config_sctlr_el1(SCTLR_EL1_SPAN, 0); } -- cgit v0.10.2 From da8d02d19ffdd201af632c755a473b6df4b3e4cc Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:24:51 +0100 Subject: arm64/capabilities: Make use of system wide safe value Now that we can reliably read the system wide safe value for a feature register, use that to compute the system capability. This patch also replaces the 'feature-register-specific' methods with a generic routine to check the capability. Signed-off-by: Suzuki K. Poulose Tested-by: Dave Martin Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 1603ae8..eec584e 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -78,6 +78,7 @@ struct arm64_cpu_capabilities { }; struct { /* Feature register checking */ + u32 sys_reg; int field_pos; int min_field_value; }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e002cad..8aff8fc 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -586,34 +586,31 @@ feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) return val >= entry->min_field_value; } -#define __ID_FEAT_CHK(reg) \ -static bool __maybe_unused \ -has_##reg##_feature(const struct arm64_cpu_capabilities *entry) \ -{ \ - u64 val; \ - \ - val = read_cpuid(reg##_el1); \ - return feature_matches(val, entry); \ -} +static bool +has_cpuid_feature(const struct arm64_cpu_capabilities *entry) +{ + u64 val; -__ID_FEAT_CHK(id_aa64pfr0); -__ID_FEAT_CHK(id_aa64mmfr1); -__ID_FEAT_CHK(id_aa64isar0); + val = read_system_reg(entry->sys_reg); + return feature_matches(val, entry); +} static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", .capability = ARM64_HAS_SYSREG_GIC_CPUIF, - .matches = has_id_aa64pfr0_feature, - .field_pos = 24, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR0_EL1, + .field_pos = ID_AA64PFR0_GIC_SHIFT, .min_field_value = 1, }, #ifdef CONFIG_ARM64_PAN { .desc = "Privileged Access Never", .capability = ARM64_HAS_PAN, - .matches = has_id_aa64mmfr1_feature, - .field_pos = 20, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR1_EL1, + .field_pos = ID_AA64MMFR1_PAN_SHIFT, .min_field_value = 1, .enable = cpu_enable_pan, }, @@ -622,8 +619,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "LSE atomic instructions", .capability = ARM64_HAS_LSE_ATOMICS, - .matches = has_id_aa64isar0_feature, - .field_pos = 20, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64ISAR0_EL1, + .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT, .min_field_value = 2, }, #endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ @@ -676,6 +674,47 @@ static inline void set_sys_caps_initialised(void) } /* + * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated. + */ +static u64 __raw_read_system_reg(u32 sys_id) +{ + switch (sys_id) { + case SYS_ID_PFR0_EL1: return (u64)read_cpuid(ID_PFR0_EL1); + case SYS_ID_PFR1_EL1: return (u64)read_cpuid(ID_PFR1_EL1); + case SYS_ID_DFR0_EL1: return (u64)read_cpuid(ID_DFR0_EL1); + case SYS_ID_MMFR0_EL1: return (u64)read_cpuid(ID_MMFR0_EL1); + case SYS_ID_MMFR1_EL1: return (u64)read_cpuid(ID_MMFR1_EL1); + case SYS_ID_MMFR2_EL1: return (u64)read_cpuid(ID_MMFR2_EL1); + case SYS_ID_MMFR3_EL1: return (u64)read_cpuid(ID_MMFR3_EL1); + case SYS_ID_ISAR0_EL1: return (u64)read_cpuid(ID_ISAR0_EL1); + case SYS_ID_ISAR1_EL1: return (u64)read_cpuid(ID_ISAR1_EL1); + case SYS_ID_ISAR2_EL1: return (u64)read_cpuid(ID_ISAR2_EL1); + case SYS_ID_ISAR3_EL1: return (u64)read_cpuid(ID_ISAR3_EL1); + case SYS_ID_ISAR4_EL1: return (u64)read_cpuid(ID_ISAR4_EL1); + case SYS_ID_ISAR5_EL1: return (u64)read_cpuid(ID_ISAR4_EL1); + case SYS_MVFR0_EL1: return (u64)read_cpuid(MVFR0_EL1); + case SYS_MVFR1_EL1: return (u64)read_cpuid(MVFR1_EL1); + case SYS_MVFR2_EL1: return (u64)read_cpuid(MVFR2_EL1); + + case SYS_ID_AA64PFR0_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1); + case SYS_ID_AA64PFR1_EL1: return (u64)read_cpuid(ID_AA64PFR0_EL1); + case SYS_ID_AA64DFR0_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1); + case SYS_ID_AA64DFR1_EL1: return (u64)read_cpuid(ID_AA64DFR0_EL1); + case SYS_ID_AA64MMFR0_EL1: return (u64)read_cpuid(ID_AA64MMFR0_EL1); + case SYS_ID_AA64MMFR1_EL1: return (u64)read_cpuid(ID_AA64MMFR1_EL1); + case SYS_ID_AA64ISAR0_EL1: return (u64)read_cpuid(ID_AA64ISAR0_EL1); + case SYS_ID_AA64ISAR1_EL1: return (u64)read_cpuid(ID_AA64ISAR1_EL1); + + case SYS_CNTFRQ_EL0: return (u64)read_cpuid(CNTFRQ_EL0); + case SYS_CTR_EL0: return (u64)read_cpuid(CTR_EL0); + case SYS_DCZID_EL0: return (u64)read_cpuid(DCZID_EL0); + default: + BUG(); + return 0; + } +} + +/* * Park the CPU which doesn't have the capability as advertised * by the system. */ @@ -719,13 +758,13 @@ void verify_local_cpu_capabilities(void) caps = arm64_features; for (i = 0; caps[i].desc; i++) { - if (!cpus_have_cap(caps[i].capability)) + if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg) continue; /* * If the new CPU misses an advertised feature, we cannot proceed * further, park the cpu. */ - if (!caps[i].matches(&caps[i])) + if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) fail_incapable_cpu("arm64_features", &caps[i]); if (caps[i].enable) caps[i].enable(NULL); -- cgit v0.10.2 From 37b01d53ceefa390d6eee7a82f3c156b64951bf3 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:24:52 +0100 Subject: arm64/HWCAP: Use system wide safe values Extend struct arm64_cpu_capabilities to handle the HWCAP detection and make use of the system wide value of the feature registers for a reliable set of HWCAPs. Signed-off-by: Suzuki K. Poulose Tested-by: Dave Martin Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index eec584e..ca46210 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -81,6 +81,8 @@ struct arm64_cpu_capabilities { u32 sys_reg; int field_pos; int min_field_value; + int hwcap_type; + unsigned long hwcap; }; }; }; diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 0ad7351..400b80b 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -52,6 +52,14 @@ extern unsigned int compat_elf_hwcap, compat_elf_hwcap2; #endif +enum { + CAP_HWCAP = 1, +#ifdef CONFIG_COMPAT + CAP_COMPAT_HWCAP, + CAP_COMPAT_HWCAP2, +#endif +}; + extern unsigned long elf_hwcap; #endif #endif diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 8aff8fc..dd6997e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -628,6 +628,89 @@ static const struct arm64_cpu_capabilities arm64_features[] = { {}, }; +#define HWCAP_CAP(reg, field, min_value, type, cap) \ + { \ + .desc = #cap, \ + .matches = has_cpuid_feature, \ + .sys_reg = reg, \ + .field_pos = field, \ + .min_field_value = min_value, \ + .hwcap_type = type, \ + .hwcap = cap, \ + } + +static const struct arm64_cpu_capabilities arm64_hwcaps[] = { + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 2, CAP_HWCAP, HWCAP_PMULL), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 1, CAP_HWCAP, HWCAP_AES), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 1, CAP_HWCAP, HWCAP_SHA1), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 1, CAP_HWCAP, HWCAP_SHA2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 1, CAP_HWCAP, HWCAP_CRC32), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 2, CAP_HWCAP, HWCAP_ATOMICS), +#ifdef CONFIG_COMPAT + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32), +#endif + {}, +}; + +static void cap_set_hwcap(const struct arm64_cpu_capabilities *cap) +{ + switch (cap->hwcap_type) { + case CAP_HWCAP: + elf_hwcap |= cap->hwcap; + break; +#ifdef CONFIG_COMPAT + case CAP_COMPAT_HWCAP: + compat_elf_hwcap |= (u32)cap->hwcap; + break; + case CAP_COMPAT_HWCAP2: + compat_elf_hwcap2 |= (u32)cap->hwcap; + break; +#endif + default: + WARN_ON(1); + break; + } +} + +/* Check if we have a particular HWCAP enabled */ +static bool cpus_have_hwcap(const struct arm64_cpu_capabilities *cap) +{ + bool rc; + + switch (cap->hwcap_type) { + case CAP_HWCAP: + rc = (elf_hwcap & cap->hwcap) != 0; + break; +#ifdef CONFIG_COMPAT + case CAP_COMPAT_HWCAP: + rc = (compat_elf_hwcap & (u32)cap->hwcap) != 0; + break; + case CAP_COMPAT_HWCAP2: + rc = (compat_elf_hwcap2 & (u32)cap->hwcap) != 0; + break; +#endif + default: + WARN_ON(1); + rc = false; + } + + return rc; +} + +static void setup_cpu_hwcaps(void) +{ + int i; + const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps; + + for (i = 0; hwcaps[i].desc; i++) + if (hwcaps[i].matches(&hwcaps[i])) + cap_set_hwcap(&hwcaps[i]); +} + void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info) { @@ -769,6 +852,13 @@ void verify_local_cpu_capabilities(void) if (caps[i].enable) caps[i].enable(NULL); } + + for (i = 0, caps = arm64_hwcaps; caps[i].desc; i++) { + if (!cpus_have_hwcap(&caps[i])) + continue; + if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i])) + fail_incapable_cpu("arm64_hwcaps", &caps[i]); + } } #else /* !CONFIG_HOTPLUG_CPU */ @@ -787,13 +877,12 @@ static void setup_feature_capabilities(void) void __init setup_cpu_features(void) { - u64 features; - s64 block; u32 cwg; int cls; /* Set the CPU feature capabilies */ setup_feature_capabilities(); + setup_cpu_hwcaps(); /* Advertise that we have computed the system capabilities */ set_sys_caps_initialised(); @@ -809,74 +898,4 @@ void __init setup_cpu_features(void) if (L1_CACHE_BYTES < cls) pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", L1_CACHE_BYTES, cls); - - /* - * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks. - * The blocks we test below represent incremental functionality - * for non-negative values. Negative values are reserved. - */ - features = read_cpuid(ID_AA64ISAR0_EL1); - block = cpuid_feature_extract_field(features, 4); - if (block > 0) { - switch (block) { - default: - case 2: - elf_hwcap |= HWCAP_PMULL; - case 1: - elf_hwcap |= HWCAP_AES; - case 0: - break; - } - } - - if (cpuid_feature_extract_field(features, 8) > 0) - elf_hwcap |= HWCAP_SHA1; - - if (cpuid_feature_extract_field(features, 12) > 0) - elf_hwcap |= HWCAP_SHA2; - - if (cpuid_feature_extract_field(features, 16) > 0) - elf_hwcap |= HWCAP_CRC32; - - block = cpuid_feature_extract_field(features, 20); - if (block > 0) { - switch (block) { - default: - case 2: - elf_hwcap |= HWCAP_ATOMICS; - case 1: - /* RESERVED */ - case 0: - break; - } - } - -#ifdef CONFIG_COMPAT - /* - * ID_ISAR5_EL1 carries similar information as above, but pertaining to - * the AArch32 32-bit execution state. - */ - features = read_cpuid(ID_ISAR5_EL1); - block = cpuid_feature_extract_field(features, 4); - if (block > 0) { - switch (block) { - default: - case 2: - compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL; - case 1: - compat_elf_hwcap2 |= COMPAT_HWCAP2_AES; - case 0: - break; - } - } - - if (cpuid_feature_extract_field(features, 8) > 0) - compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1; - - if (cpuid_feature_extract_field(features, 12) > 0) - compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2; - - if (cpuid_feature_extract_field(features, 16) > 0) - compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32; -#endif } -- cgit v0.10.2 From fe80f9f2da1006a4308c2bc018ee1d67f10dd8d0 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:24:53 +0100 Subject: arm64: Move FP/ASIMD hwcap handling to common code The FP/ASIMD is detected in fpsimd_init(), which is built-in unconditionally. Lets move the hwcap handling to the central place. Signed-off-by: Suzuki K. Poulose Tested-by: Dave Martin Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index dd6997e..d0d6074 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -646,6 +646,8 @@ static const struct arm64_cpu_capabilities arm64_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 1, CAP_HWCAP, HWCAP_SHA2), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 1, CAP_HWCAP, HWCAP_CRC32), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 2, CAP_HWCAP, HWCAP_ATOMICS), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 0, CAP_HWCAP, HWCAP_FP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 0, CAP_HWCAP, HWCAP_ASIMD), #ifdef CONFIG_COMPAT HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index c56956a..4c46c54 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -332,21 +332,15 @@ static inline void fpsimd_hotplug_init(void) { } */ static int __init fpsimd_init(void) { - u64 pfr = read_cpuid(ID_AA64PFR0_EL1); - - if (pfr & (0xf << 16)) { + if (elf_hwcap & HWCAP_FP) { + fpsimd_pm_init(); + fpsimd_hotplug_init(); + } else { pr_notice("Floating-point is not implemented\n"); - return 0; } - elf_hwcap |= HWCAP_FP; - if (pfr & (0xf << 20)) + if (!(elf_hwcap & HWCAP_ASIMD)) pr_notice("Advanced SIMD is not implemented\n"); - else - elf_hwcap |= HWCAP_ASIMD; - - fpsimd_pm_init(); - fpsimd_hotplug_init(); return 0; } -- cgit v0.10.2 From 3085bb01b40676d946a13064483ab2819ae3b010 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:24:54 +0100 Subject: arm64/debug: Make use of the system wide safe value Use the system wide value of ID_AA64DFR0 to make safer decisions Signed-off-by: Suzuki K. Poulose Tested-by: Dave Martin Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index 4c47cb2..e54415e 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -17,6 +17,7 @@ #define __ASM_HW_BREAKPOINT_H #include +#include #ifdef __KERNEL__ @@ -137,13 +138,17 @@ extern struct pmu perf_ops_bp; /* Determine number of BRP registers available. */ static inline int get_num_brps(void) { - return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1; + return 1 + + cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1), + ID_AA64DFR0_BRPS_SHIFT); } /* Determine number of WRP registers available. */ static inline int get_num_wrps(void) { - return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1; + return 1 + + cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1), + ID_AA64DFR0_WRPS_SHIFT); } #endif /* __KERNEL__ */ diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 8eef30f..1429044 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -26,14 +26,16 @@ #include #include -#include +#include #include +#include #include /* Determine debug architecture. */ u8 debug_monitors_arch(void) { - return read_cpuid(ID_AA64DFR0_EL1) & 0xf; + return cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1), + ID_AA64DFR0_DEBUGVER_SHIFT); } /* -- cgit v0.10.2 From 4db8e5ea6b07ae83e4361aee3f967f7126e01fa4 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Mon, 19 Oct 2015 14:24:55 +0100 Subject: arm64/kvm: Make use of the system wide safe values Use the system wide safe value from the new API for safer decisions Cc: Marc Zyngier Cc: Christoffer Dall Cc: kvmarm@lists.cs.columbia.edu Signed-off-by: Suzuki K. Poulose Acked-by: Christoffer Dall Tested-by: Dave Martin Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 91cf535..f34745c 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -53,7 +53,7 @@ static bool cpu_has_32bit_el1(void) { u64 pfr0; - pfr0 = read_cpuid(ID_AA64PFR0_EL1); + pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1); return !!(pfr0 & 0x20); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d03d3af..87a64e8 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -693,13 +693,13 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu, if (p->is_write) { return ignore_write(vcpu, p); } else { - u64 dfr = read_cpuid(ID_AA64DFR0_EL1); - u64 pfr = read_cpuid(ID_AA64PFR0_EL1); - u32 el3 = !!((pfr >> 12) & 0xf); + u64 dfr = read_system_reg(SYS_ID_AA64DFR0_EL1); + u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1); + u32 el3 = !!cpuid_feature_extract_field(pfr, ID_AA64PFR0_EL3_SHIFT); - *vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) | - (((dfr >> 12) & 0xf) << 24) | - (((dfr >> 28) & 0xf) << 20) | + *vcpu_reg(vcpu, p->Rt) = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) | + (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) | + (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20) | (6 << 16) | (el3 << 14) | (el3 << 12)); return true; } -- cgit v0.10.2 From 9299b24712400d1cfe3dcf3e5dcb6cac2940df56 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 30 Jul 2015 16:36:25 +0100 Subject: arm64: Constify hwcap name string arrays The hwcap string arrays used for generating the contents of /proc/cpuinfo are currently arrays of non-const pointers. There's no need for these pointers to be mutable, so this patch makes them const so that they can be moved to .rodata. Signed-off-by: Dave Martin Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 789fbea..706679d 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -48,7 +48,7 @@ static char *icache_policy_str[] = { unsigned long __icache_flags; -static const char *hwcap_str[] = { +static const char *const hwcap_str[] = { "fp", "asimd", "evtstrm", @@ -62,7 +62,7 @@ static const char *hwcap_str[] = { }; #ifdef CONFIG_COMPAT -static const char *compat_hwcap_str[] = { +static const char *const compat_hwcap_str[] = { "swp", "half", "thumb", @@ -87,7 +87,7 @@ static const char *compat_hwcap_str[] = { "evtstrm" }; -static const char *compat_hwcap2_str[] = { +static const char *const compat_hwcap2_str[] = { "aes", "pmull", "sha1", -- cgit v0.10.2 From 58c9c87ca9dca99f269267f5aadcd051fef1637b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 22 Oct 2015 14:34:57 +0200 Subject: genirq: Make the cpuhotplug migration code less noisy The original arm code has a pr_debug() statement for the case where the irq chip has no set_affinity() callback. That's sufficient for debugging and we really don't want to spam dmesg with useless warnings for the normal case. Fixes: f1e0bb0ad473: "genirq: Introduce generic irq migration for cpu hotunplug" Reported-by: Geert Uytterhoeven Requested-by: Russell King Signed-off-by: Thomas Gleixner Cc: Yang Yingliang Cc: Mark Rutland Cc: Marc Zyngier Cc: Will Deacon Cc: Hanjun Guo Cc: Jiang Liu diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index 80f4f4e..011f8c4 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -36,7 +36,7 @@ static bool migrate_one_irq(struct irq_desc *desc) c = irq_data_get_irq_chip(d); if (!c->irq_set_affinity) { - pr_warn_ratelimited("IRQ%u: unable to set affinity\n", d->irq); + pr_debug("IRQ%u: unable to set affinity\n", d->irq); } else { int r = irq_do_set_affinity(d, affinity, false); if (r) -- cgit v0.10.2 From fde4a59fc1c55709b96d0f07110895f20015b6cc Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 27 Oct 2015 12:05:54 +0000 Subject: arm64: cpufeature: declare enable_cpu_capabilities as static enable_cpu_capabilities is only called from within cpufeature.c, so it can be declared static. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d0d6074..504526f 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -732,7 +732,7 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, * Run through the enabled capabilities and enable() it on all active * CPUs */ -void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) +static void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { int i; -- cgit v0.10.2 From 59f2413573e4ee1e76062beef4a359156500db94 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 27 Oct 2015 12:05:55 +0000 Subject: arm64: cachetype: fix definitions of ICACHEF_* flags test_bit and set_bit take the bit number to operate on, rather than a mask. This patch fixes the ICACHEF_* definitions so that they represent the bit index in __icache_flags as opposed to the mask returned by the BIT macro. Signed-off-by: Will Deacon Reviewed-by: Mark Rutland Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h index da2fc9e..f558869 100644 --- a/arch/arm64/include/asm/cachetype.h +++ b/arch/arm64/include/asm/cachetype.h @@ -34,8 +34,8 @@ #define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) -#define ICACHEF_ALIASING BIT(0) -#define ICACHEF_AIVIVT BIT(1) +#define ICACHEF_ALIASING 0 +#define ICACHEF_AIVIVT 1 extern unsigned long __icache_flags; -- cgit v0.10.2 From f23bef34d34b6325916daddc4cfdeee53d5139e6 Mon Sep 17 00:00:00 2001 From: Alexander Kuleshov Date: Mon, 26 Oct 2015 17:26:57 +0600 Subject: arm64/mm: use PAGE_ALIGNED instead of IS_ALIGNED The already provides the PAGE_ALIGNED macro. Let's use this macro instead of IS_ALIGNED and passing PAGE_SIZE directly. Signed-off-by: Alexander Kuleshov Acked-by: Laura Abbott Signed-off-by: Catalin Marinas diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index e47ed1c..3571c73 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -45,7 +45,7 @@ static int change_memory_common(unsigned long addr, int numpages, int ret; struct page_change_data data; - if (!IS_ALIGNED(addr, PAGE_SIZE)) { + if (!PAGE_ALIGNED(addr)) { start &= PAGE_MASK; end = start + size; WARN_ON_ONCE(1); -- cgit v0.10.2 From f90df5e27d978c492c4d911476622a7413621213 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 26 Oct 2015 11:48:16 +0800 Subject: arm64: make Timer Interrupt Frequency selectable It allows a selectable timer interrupt frequency of 100, 250, 300 and 1000 HZ. We will get better performance when choose a suitable frequency in some scene. Cc: Will Deacon Signed-off-by: Kefeng Wang Signed-off-by: Catalin Marinas diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c62fb03..4d8a5b2 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -454,10 +454,7 @@ config HOTPLUG_CPU can be controlled through /sys/devices/system/cpu. source kernel/Kconfig.preempt - -config HZ - int - default 100 +source kernel/Kconfig.hz config ARCH_HAS_HOLES_MEMORYMODEL def_bool y if SPARSEMEM -- cgit v0.10.2 From 6e4a0f2b5c56af6be43b546df16b1ece7df537d8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 26 Oct 2015 12:53:17 +0900 Subject: arm64: remove bogus TASK_SIZE_64 check The comparison between TASK_SIZE_64 and MODULES_VADDR does not make any sense on arm64, it is simply something that has been carried over from the ARM port which arm64 is based on. So drop it. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 11ccf6c..37f3538a 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -70,10 +70,6 @@ #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4)) -#if TASK_SIZE_64 > MODULES_VADDR -#error Top of 64-bit user space clashes with start of module space -#endif - /* * Physical vs virtual RAM address space conversion. These are * private definitions which should NOT be used outside memory.h -- cgit v0.10.2 From 97303480753e48fb313dc0e15daaf11b0451cdb8 Mon Sep 17 00:00:00 2001 From: Tirumalesh Chalamarla Date: Tue, 22 Sep 2015 19:59:48 +0200 Subject: arm64: Increase the max granular size Increase the standard cacheline size to avoid having locks in the same cacheline. Cavium's ThunderX core implements cache lines of 128 byte size. With current granulare size of 64 bytes (L1_CACHE_SHIFT=6) two locks could share the same cache line leading a performance degradation. Increasing the size fixes that. Increasing the size has no negative impact to cache invalidation on systems with a smaller cache line. There is an impact on memory usage, but that's not too important for arm64 use cases. Signed-off-by: Tirumalesh Chalamarla Signed-off-by: Robert Richter Acked-by: Timur Tabi Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index bde4499..5082b30 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -18,7 +18,7 @@ #include -#define L1_CACHE_SHIFT 6 +#define L1_CACHE_SHIFT 7 #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) /* -- cgit v0.10.2 From 5accd17d0eb523350c9ef754d655e379c9bb93b3 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 22 Oct 2015 15:41:52 +0100 Subject: arm64: Fix compat register mappings For reasons not entirely apparent, but now enshrined in history, the architectural mapping of AArch32 banked registers to AArch64 registers actually orders SP_ and LR_ backwards compared to the intuitive r13/r14 order, for all modes except FIQ. Fix the compat__ macros accordingly, in the hope of avoiding subtle bugs with KVM and AArch32 guests. Signed-off-by: Robin Murphy Acked-by: Will Deacon Cc: Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 536274e..e9e5467 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -83,14 +83,14 @@ #define compat_sp regs[13] #define compat_lr regs[14] #define compat_sp_hyp regs[15] -#define compat_sp_irq regs[16] -#define compat_lr_irq regs[17] -#define compat_sp_svc regs[18] -#define compat_lr_svc regs[19] -#define compat_sp_abt regs[20] -#define compat_lr_abt regs[21] -#define compat_sp_und regs[22] -#define compat_lr_und regs[23] +#define compat_lr_irq regs[16] +#define compat_sp_irq regs[17] +#define compat_lr_svc regs[18] +#define compat_sp_svc regs[19] +#define compat_lr_abt regs[20] +#define compat_sp_abt regs[21] +#define compat_lr_und regs[22] +#define compat_sp_und regs[23] #define compat_r8_fiq regs[24] #define compat_r9_fiq regs[25] #define compat_r10_fiq regs[26] -- cgit v0.10.2 From 86a5906e4d1df1ec160fa9e18b6f2277a5216c60 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 27 Oct 2015 17:40:26 +0000 Subject: arm64: Fix build with CONFIG_ZONE_DMA=n Trying to build with CONFIG_ZONE_DMA=n leaves visible references to the now-undefined ZONE_DMA, resulting in a syntax error. Hide the references behind an #ifdef instead of using IS_ENABLED. Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 7a1f9a0..17bf39a 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -86,10 +86,10 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) memset(zone_size, 0, sizeof(zone_size)); /* 4GB maximum for 32-bit only capable devices */ - if (IS_ENABLED(CONFIG_ZONE_DMA)) { - max_dma = PFN_DOWN(arm64_dma_phys_limit); - zone_size[ZONE_DMA] = max_dma - min; - } +#ifdef CONFIG_ZONE_DMA + max_dma = PFN_DOWN(arm64_dma_phys_limit); + zone_size[ZONE_DMA] = max_dma - min; +#endif zone_size[ZONE_NORMAL] = max - max_dma; memcpy(zhole_size, zone_size, sizeof(zhole_size)); @@ -101,11 +101,12 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) if (start >= max) continue; - if (IS_ENABLED(CONFIG_ZONE_DMA) && start < max_dma) { +#ifdef CONFIG_ZONE_DMA + if (start < max_dma) { unsigned long dma_end = min(end, max_dma); zhole_size[ZONE_DMA] -= dma_end - start; } - +#endif if (end > max_dma) { unsigned long normal_end = min(end, max); unsigned long normal_start = max(start, max_dma); -- cgit v0.10.2 From cb083816ab5ac3d10a9417527f07fc5962cc3808 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 26 Oct 2015 21:42:33 +0000 Subject: arm64: page-align sections for DEBUG_RODATA A kernel built with DEBUG_RO_DATA && !CONFIG_DEBUG_ALIGN_RODATA doesn't have .text aligned to a page boundary, though fixup_executable works at page-granularity thanks to its use of create_mapping. If .text is not page-aligned, the first page it exists in may be marked non-executable, leading to failures when an attempt is made to execute code in said page. This patch upgrades ALIGN_DEBUG_RO and ALIGN_DEBUG_RO_MIN to force page alignment for DEBUG_RO_DATA && !CONFIG_DEBUG_ALIGN_RODATA kernels, ensuring that all sections with specific RWX permission requirements are mapped with the correct permissions. Signed-off-by: Mark Rutland Reported-by: Jeremy Linton Reviewed-by: Laura Abbott Acked-by: Ard Biesheuvel Cc: Suzuki Poulose Cc: Will Deacon Fixes: da141706aea52c1a ("arm64: add better page protections to arm64") Cc: Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 8a5d97b..1ee2c39 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -61,9 +61,12 @@ PECOFF_FILE_ALIGNMENT = 0x200; #define PECOFF_EDATA_PADDING #endif -#ifdef CONFIG_DEBUG_ALIGN_RODATA +#if defined(CONFIG_DEBUG_ALIGN_RODATA) #define ALIGN_DEBUG_RO . = ALIGN(1< Date: Fri, 23 Oct 2015 16:48:14 +0200 Subject: arm64/efi: move arm64 specific stub C code to libstub Now that we added special handling to the C files in libstub, move the one remaining arm64 specific EFI stub C file to libstub as well, so that it gets the same treatment. This should prevent future changes from resulting in binaries that may execute incorrectly in UEFI context. With efi-entry.S the only remaining EFI stub source file under arch/arm64, we can also simplify the Makefile logic somewhat. Signed-off-by: Ard Biesheuvel Reviewed-by: Matt Fleming Tested-by: Jeremy Linton Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 1b6bda2..474691f 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -4,11 +4,8 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) -CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) CFLAGS_armv8_deprecated.o := -I$(src) -KASAN_SANITIZE_efi-stub.o := n - CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_insn.o = -pg CFLAGS_REMOVE_return_address.o = -pg @@ -22,9 +19,7 @@ arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o -stub-obj := efi-stub.o efi-entry.o -extra-y := $(stub-obj) -stub-obj := $(patsubst %.o,%.stub.o,$(stub-obj)) +extra-$(CONFIG_EFI) := efi-entry.o OBJCOPYFLAGS := --prefix-symbols=__efistub_ $(obj)/%.stub.o: $(obj)/%.o FORCE @@ -42,7 +37,7 @@ arm64-obj-$(CONFIG_CPU_PM) += sleep.o suspend.o arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o arm64-obj-$(CONFIG_KGDB) += kgdb.o -arm64-obj-$(CONFIG_EFI) += efi.o $(stub-obj) +arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c deleted file mode 100644 index 816120e..0000000 --- a/arch/arm64/kernel/efi-stub.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (C) 2013, 2014 Linaro Ltd; - * - * This file implements the EFI boot stub for the arm64 kernel. - * Adapted from ARM version by Mark Salter - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ -#include -#include -#include - -efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg, - unsigned long *image_addr, - unsigned long *image_size, - unsigned long *reserve_addr, - unsigned long *reserve_size, - unsigned long dram_base, - efi_loaded_image_t *image) -{ - efi_status_t status; - unsigned long kernel_size, kernel_memsize = 0; - unsigned long nr_pages; - void *old_image_addr = (void *)*image_addr; - - /* Relocate the image, if required. */ - kernel_size = _edata - _text; - if (*image_addr != (dram_base + TEXT_OFFSET)) { - kernel_memsize = kernel_size + (_end - _edata); - - /* - * First, try a straight allocation at the preferred offset. - * This will work around the issue where, if dram_base == 0x0, - * efi_low_alloc() refuses to allocate at 0x0 (to prevent the - * address of the allocation to be mistaken for a FAIL return - * value or a NULL pointer). It will also ensure that, on - * platforms where the [dram_base, dram_base + TEXT_OFFSET) - * interval is partially occupied by the firmware (like on APM - * Mustang), we can still place the kernel at the address - * 'dram_base + TEXT_OFFSET'. - */ - *image_addr = *reserve_addr = dram_base + TEXT_OFFSET; - nr_pages = round_up(kernel_memsize, EFI_ALLOC_ALIGN) / - EFI_PAGE_SIZE; - status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS, - EFI_LOADER_DATA, nr_pages, - (efi_physical_addr_t *)reserve_addr); - if (status != EFI_SUCCESS) { - kernel_memsize += TEXT_OFFSET; - status = efi_low_alloc(sys_table_arg, kernel_memsize, - SZ_2M, reserve_addr); - - if (status != EFI_SUCCESS) { - pr_efi_err(sys_table_arg, "Failed to relocate kernel\n"); - return status; - } - *image_addr = *reserve_addr + TEXT_OFFSET; - } - memcpy((void *)*image_addr, old_image_addr, kernel_size); - *reserve_size = kernel_memsize; - } - - - return EFI_SUCCESS; -} diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index bca9a76..92ae557 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -34,6 +34,9 @@ $(obj)/lib-%.o: $(srctree)/lib/%.c FORCE lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o string.o \ $(patsubst %.c,lib-%.o,$(arm-deps)) +lib-$(CONFIG_ARM64) += arm64-stub.o +CFLAGS_arm64-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) + # # arm64 puts the stub in the kernel proper, which will unnecessarily retain all # code indefinitely unless it is annotated as __init/__initdata/__initconst etc. diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c new file mode 100644 index 0000000..816120e --- /dev/null +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2013, 2014 Linaro Ltd; + * + * This file implements the EFI boot stub for the arm64 kernel. + * Adapted from ARM version by Mark Salter + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include +#include +#include + +efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg, + unsigned long *image_addr, + unsigned long *image_size, + unsigned long *reserve_addr, + unsigned long *reserve_size, + unsigned long dram_base, + efi_loaded_image_t *image) +{ + efi_status_t status; + unsigned long kernel_size, kernel_memsize = 0; + unsigned long nr_pages; + void *old_image_addr = (void *)*image_addr; + + /* Relocate the image, if required. */ + kernel_size = _edata - _text; + if (*image_addr != (dram_base + TEXT_OFFSET)) { + kernel_memsize = kernel_size + (_end - _edata); + + /* + * First, try a straight allocation at the preferred offset. + * This will work around the issue where, if dram_base == 0x0, + * efi_low_alloc() refuses to allocate at 0x0 (to prevent the + * address of the allocation to be mistaken for a FAIL return + * value or a NULL pointer). It will also ensure that, on + * platforms where the [dram_base, dram_base + TEXT_OFFSET) + * interval is partially occupied by the firmware (like on APM + * Mustang), we can still place the kernel at the address + * 'dram_base + TEXT_OFFSET'. + */ + *image_addr = *reserve_addr = dram_base + TEXT_OFFSET; + nr_pages = round_up(kernel_memsize, EFI_ALLOC_ALIGN) / + EFI_PAGE_SIZE; + status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS, + EFI_LOADER_DATA, nr_pages, + (efi_physical_addr_t *)reserve_addr); + if (status != EFI_SUCCESS) { + kernel_memsize += TEXT_OFFSET; + status = efi_low_alloc(sys_table_arg, kernel_memsize, + SZ_2M, reserve_addr); + + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table_arg, "Failed to relocate kernel\n"); + return status; + } + *image_addr = *reserve_addr + TEXT_OFFSET; + } + memcpy((void *)*image_addr, old_image_addr, kernel_size); + *reserve_size = kernel_memsize; + } + + + return EFI_SUCCESS; +} -- cgit v0.10.2 From aa644fa64c25ab2231a0fa9464892f5579d4e161 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Mon, 19 Oct 2015 17:55:49 +0100 Subject: ARM64: Enable multi-core scheduler support by default Make sure that the task scheduler domain hierarchy is set-up correctly on systems with single or multi-cluster topology. Signed-off-by: Dietmar Eggemann Acked-by: Punit Agrawal Signed-off-by: Catalin Marinas diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index c8ccda1..5f76034 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -51,6 +51,7 @@ CONFIG_PCI=y CONFIG_PCI_MSI=y CONFIG_PCI_XGENE=y CONFIG_SMP=y +CONFIG_SCHED_MC=y CONFIG_PREEMPT=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y -- cgit v0.10.2 From f8f8bdc48851da979c6e0e4808b6031122e4af47 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 27 Oct 2015 11:12:51 +0900 Subject: arm64/efi: fix libstub build under CONFIG_MODVERSIONS Now that we strictly forbid absolute relocations in libstub code, make sure that we don't emit any when CONFIG_MODVERSIONS is enabled, by stripping the kcrctab sections from the object file. This fixes a build problem under CONFIG_MODVERSIONS=y. Signed-off-by: Ard Biesheuvel Reviewed-by: Matt Fleming Signed-off-by: Catalin Marinas diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 92ae557..3c0467d 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -54,7 +54,7 @@ CFLAGS_arm64-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) extra-$(CONFIG_EFI_ARMSTUB) := $(lib-y) lib-$(CONFIG_EFI_ARMSTUB) := $(patsubst %.o,%.stub.o,$(lib-y)) -STUBCOPY_FLAGS-y := -R .debug* -R *ksymtab* +STUBCOPY_FLAGS-y := -R .debug* -R *ksymtab* -R *kcrctab* STUBCOPY_FLAGS-$(CONFIG_ARM64) += --prefix-alloc-sections=.init \ --prefix-symbols=__efistub_ STUBCOPY_RELOC-$(CONFIG_ARM64) := R_AARCH64_ABS -- cgit v0.10.2