summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig7
-rw-r--r--arch/x86/Kconfig.debug10
-rw-r--r--arch/x86/boot/compressed/Makefile6
-rw-r--r--arch/x86/boot/compressed/misc.c4
-rw-r--r--arch/x86/crypto/Makefile8
-rw-r--r--arch/x86/crypto/blowfish-avx2-asm_64.S449
-rw-r--r--arch/x86/crypto/blowfish_avx2_glue.c585
-rw-r--r--arch/x86/crypto/blowfish_glue.c32
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S160
-rw-r--r--arch/x86/crypto/crct10dif-pcl-asm_64.S643
-rw-r--r--arch/x86/crypto/crct10dif-pclmul_glue.c151
-rw-r--r--arch/x86/crypto/sha256_ssse3_glue.c57
-rw-r--r--arch/x86/crypto/sha512_ssse3_glue.c58
-rw-r--r--arch/x86/crypto/twofish-avx2-asm_64.S600
-rw-r--r--arch/x86/crypto/twofish_avx2_glue.c584
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c14
-rw-r--r--arch/x86/ia32/ia32_aout.c2
-rw-r--r--arch/x86/include/asm/acpi.h2
-rw-r--r--arch/x86/include/asm/crypto/blowfish.h43
-rw-r--r--arch/x86/include/asm/crypto/twofish.h18
-rw-r--r--arch/x86/include/asm/emergency-restart.h12
-rw-r--r--arch/x86/include/asm/io.h7
-rw-r--r--arch/x86/include/asm/mc146818rtc.h4
-rw-r--r--arch/x86/include/asm/mrst-vrtc.h4
-rw-r--r--arch/x86/include/asm/mtrr.h10
-rw-r--r--arch/x86/include/asm/pgtable.h24
-rw-r--r--arch/x86/include/asm/pgtable_types.h12
-rw-r--r--arch/x86/include/asm/thread_info.h4
-rw-r--r--arch/x86/include/asm/x86_init.h6
-rw-r--r--arch/x86/kernel/acpi/boot.c7
-rw-r--r--arch/x86/kernel/acpi/sleep.c4
-rw-r--r--arch/x86/kernel/acpi/sleep.h2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c71
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_iommu.c4
-rw-r--r--arch/x86/kernel/cpu/powerflags.c8
-rw-r--r--arch/x86/kernel/devicetree.c4
-rw-r--r--arch/x86/kernel/hw_breakpoint.c3
-rw-r--r--arch/x86/kernel/kvmclock.c9
-rw-r--r--arch/x86/kernel/ptrace.c204
-rw-r--r--arch/x86/kernel/reboot.c117
-rw-r--r--arch/x86/kernel/rtc.c17
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/smp.c29
-rw-r--r--arch/x86/lguest/boot.c4
-rw-r--r--arch/x86/mm/highmem_32.c6
-rw-r--r--arch/x86/mm/init.c14
-rw-r--r--arch/x86/mm/init_32.c32
-rw-r--r--arch/x86/mm/init_64.c45
-rw-r--r--arch/x86/mm/mmap.c2
-rw-r--r--arch/x86/mm/numa_32.c2
-rw-r--r--arch/x86/mm/pgtable.c4
-rw-r--r--arch/x86/net/bpf_jit_comp.c61
-rw-r--r--arch/x86/pci/acpi.c7
-rw-r--r--arch/x86/platform/efi/efi.c10
-rw-r--r--arch/x86/platform/mrst/vrtc.c11
-rw-r--r--arch/x86/xen/time.c58
58 files changed, 1425 insertions, 2832 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b094816..b32ebf9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -65,6 +65,7 @@ config X86
select HAVE_KERNEL_LZMA
select HAVE_KERNEL_XZ
select HAVE_KERNEL_LZO
+ select HAVE_KERNEL_LZ4
select HAVE_HW_BREAKPOINT
select HAVE_MIXED_BREAKPOINTS_REGS
select PERF_EVENTS
@@ -102,6 +103,7 @@ config X86
select HAVE_ARCH_SECCOMP_FILTER
select BUILDTIME_EXTABLE_SORT
select GENERIC_CMOS_UPDATE
+ select HAVE_ARCH_SOFT_DIRTY
select CLOCKSOURCE_WATCHDOG
select GENERIC_CLOCKEVENTS
select ARCH_CLOCKSOURCE_DATA if X86_64
@@ -121,6 +123,7 @@ config X86
select OLD_SIGACTION if X86_32
select COMPAT_OLD_SIGACTION if IA32_EMULATION
select RTC_LIB
+ select HAVE_DEBUG_STACKOVERFLOW
config INSTRUCTION_DECODER
def_bool y
@@ -2258,11 +2261,11 @@ source "drivers/pcmcia/Kconfig"
source "drivers/pci/hotplug/Kconfig"
config RAPIDIO
- bool "RapidIO support"
+ tristate "RapidIO support"
depends on PCI
default n
help
- If you say Y here, the kernel will include drivers and
+ If enabled this option will include drivers and the core
infrastructure code to support RapidIO interconnect devices.
source "drivers/rapidio/Kconfig"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index c963881..78d91af 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -59,16 +59,6 @@ config EARLY_PRINTK_DBGP
with klogd/syslogd or the X server. You should normally N here,
unless you want to debug such a crash. You need usb debug device.
-config DEBUG_STACKOVERFLOW
- bool "Check for stack overflows"
- depends on DEBUG_KERNEL
- ---help---
- Say Y here if you want to check the overflows of kernel, IRQ
- and exception stacks. This option will cause messages of the
- stacks in detail when free stack space drops below a certain
- limit.
- If in doubt, say "N".
-
config X86_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 5ef205c..dcd90df 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -4,7 +4,8 @@
# create a compressed vmlinux image from the original vmlinux
#
-targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo
+targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
+ vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
@@ -63,12 +64,15 @@ $(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
$(call if_changed,xzkern)
$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
$(call if_changed,lzo)
+$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,lz4)
suffix-$(CONFIG_KERNEL_GZIP) := gz
suffix-$(CONFIG_KERNEL_BZIP2) := bz2
suffix-$(CONFIG_KERNEL_LZMA) := lzma
suffix-$(CONFIG_KERNEL_XZ) := xz
suffix-$(CONFIG_KERNEL_LZO) := lzo
+suffix-$(CONFIG_KERNEL_LZ4) := lz4
quiet_cmd_mkpiggy = MKPIGGY $@
cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false )
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 7cb56c6..0319c88 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -145,6 +145,10 @@ static int lines, cols;
#include "../../../../lib/decompress_unlzo.c"
#endif
+#ifdef CONFIG_KERNEL_LZ4
+#include "../../../../lib/decompress_unlz4.c"
+#endif
+
static void scroll(void)
{
int i;
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index a3a0ed8..7d6ba9d 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -3,8 +3,6 @@
#
avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
-avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
- $(comma)4)$(comma)%ymm2,yes,no)
obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o
obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
@@ -29,6 +27,7 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
+obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
# These modules require assembler to support AVX.
ifeq ($(avx_supported),yes)
@@ -42,10 +41,8 @@ endif
# These modules require assembler to support AVX2.
ifeq ($(avx2_supported),yes)
- obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
- obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o
endif
aes-i586-y := aes-i586-asm_32.o aes_glue.o
@@ -73,10 +70,8 @@ ifeq ($(avx_supported),yes)
endif
ifeq ($(avx2_supported),yes)
- blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o
camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
- twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o
endif
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
@@ -87,3 +82,4 @@ crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
+crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o
diff --git a/arch/x86/crypto/blowfish-avx2-asm_64.S b/arch/x86/crypto/blowfish-avx2-asm_64.S
deleted file mode 100644
index 784452e..0000000
--- a/arch/x86/crypto/blowfish-avx2-asm_64.S
+++ /dev/null
@@ -1,449 +0,0 @@
-/*
- * x86_64/AVX2 assembler optimized version of Blowfish
- *
- * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-
-#include <linux/linkage.h>
-
-.file "blowfish-avx2-asm_64.S"
-
-.data
-.align 32
-
-.Lprefetch_mask:
-.long 0*64
-.long 1*64
-.long 2*64
-.long 3*64
-.long 4*64
-.long 5*64
-.long 6*64
-.long 7*64
-
-.Lbswap32_mask:
-.long 0x00010203
-.long 0x04050607
-.long 0x08090a0b
-.long 0x0c0d0e0f
-
-.Lbswap128_mask:
- .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-.Lbswap_iv_mask:
- .byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0
-
-.text
-/* structure of crypto context */
-#define p 0
-#define s0 ((16 + 2) * 4)
-#define s1 ((16 + 2 + (1 * 256)) * 4)
-#define s2 ((16 + 2 + (2 * 256)) * 4)
-#define s3 ((16 + 2 + (3 * 256)) * 4)
-
-/* register macros */
-#define CTX %rdi
-#define RIO %rdx
-
-#define RS0 %rax
-#define RS1 %r8
-#define RS2 %r9
-#define RS3 %r10
-
-#define RLOOP %r11
-#define RLOOPd %r11d
-
-#define RXr0 %ymm8
-#define RXr1 %ymm9
-#define RXr2 %ymm10
-#define RXr3 %ymm11
-#define RXl0 %ymm12
-#define RXl1 %ymm13
-#define RXl2 %ymm14
-#define RXl3 %ymm15
-
-/* temp regs */
-#define RT0 %ymm0
-#define RT0x %xmm0
-#define RT1 %ymm1
-#define RT1x %xmm1
-#define RIDX0 %ymm2
-#define RIDX1 %ymm3
-#define RIDX1x %xmm3
-#define RIDX2 %ymm4
-#define RIDX3 %ymm5
-
-/* vpgatherdd mask and '-1' */
-#define RNOT %ymm6
-
-/* byte mask, (-1 >> 24) */
-#define RBYTE %ymm7
-
-/***********************************************************************
- * 32-way AVX2 blowfish
- ***********************************************************************/
-#define F(xl, xr) \
- vpsrld $24, xl, RIDX0; \
- vpsrld $16, xl, RIDX1; \
- vpsrld $8, xl, RIDX2; \
- vpand RBYTE, RIDX1, RIDX1; \
- vpand RBYTE, RIDX2, RIDX2; \
- vpand RBYTE, xl, RIDX3; \
- \
- vpgatherdd RNOT, (RS0, RIDX0, 4), RT0; \
- vpcmpeqd RNOT, RNOT, RNOT; \
- vpcmpeqd RIDX0, RIDX0, RIDX0; \
- \
- vpgatherdd RNOT, (RS1, RIDX1, 4), RT1; \
- vpcmpeqd RIDX1, RIDX1, RIDX1; \
- vpaddd RT0, RT1, RT0; \
- \
- vpgatherdd RIDX0, (RS2, RIDX2, 4), RT1; \
- vpxor RT0, RT1, RT0; \
- \
- vpgatherdd RIDX1, (RS3, RIDX3, 4), RT1; \
- vpcmpeqd RNOT, RNOT, RNOT; \
- vpaddd RT0, RT1, RT0; \
- \
- vpxor RT0, xr, xr;
-
-#define add_roundkey(xl, nmem) \
- vpbroadcastd nmem, RT0; \
- vpxor RT0, xl ## 0, xl ## 0; \
- vpxor RT0, xl ## 1, xl ## 1; \
- vpxor RT0, xl ## 2, xl ## 2; \
- vpxor RT0, xl ## 3, xl ## 3;
-
-#define round_enc() \
- add_roundkey(RXr, p(CTX,RLOOP,4)); \
- F(RXl0, RXr0); \
- F(RXl1, RXr1); \
- F(RXl2, RXr2); \
- F(RXl3, RXr3); \
- \
- add_roundkey(RXl, p+4(CTX,RLOOP,4)); \
- F(RXr0, RXl0); \
- F(RXr1, RXl1); \
- F(RXr2, RXl2); \
- F(RXr3, RXl3);
-
-#define round_dec() \
- add_roundkey(RXr, p+4*2(CTX,RLOOP,4)); \
- F(RXl0, RXr0); \
- F(RXl1, RXr1); \
- F(RXl2, RXr2); \
- F(RXl3, RXr3); \
- \
- add_roundkey(RXl, p+4(CTX,RLOOP,4)); \
- F(RXr0, RXl0); \
- F(RXr1, RXl1); \
- F(RXr2, RXl2); \
- F(RXr3, RXl3);
-
-#define init_round_constants() \
- vpcmpeqd RNOT, RNOT, RNOT; \
- leaq s0(CTX), RS0; \
- leaq s1(CTX), RS1; \
- leaq s2(CTX), RS2; \
- leaq s3(CTX), RS3; \
- vpsrld $24, RNOT, RBYTE;
-
-#define transpose_2x2(x0, x1, t0) \
- vpunpckldq x0, x1, t0; \
- vpunpckhdq x0, x1, x1; \
- \
- vpunpcklqdq t0, x1, x0; \
- vpunpckhqdq t0, x1, x1;
-
-#define read_block(xl, xr) \
- vbroadcasti128 .Lbswap32_mask, RT1; \
- \
- vpshufb RT1, xl ## 0, xl ## 0; \
- vpshufb RT1, xr ## 0, xr ## 0; \
- vpshufb RT1, xl ## 1, xl ## 1; \
- vpshufb RT1, xr ## 1, xr ## 1; \
- vpshufb RT1, xl ## 2, xl ## 2; \
- vpshufb RT1, xr ## 2, xr ## 2; \
- vpshufb RT1, xl ## 3, xl ## 3; \
- vpshufb RT1, xr ## 3, xr ## 3; \
- \
- transpose_2x2(xl ## 0, xr ## 0, RT0); \
- transpose_2x2(xl ## 1, xr ## 1, RT0); \
- transpose_2x2(xl ## 2, xr ## 2, RT0); \
- transpose_2x2(xl ## 3, xr ## 3, RT0);
-
-#define write_block(xl, xr) \
- vbroadcasti128 .Lbswap32_mask, RT1; \
- \
- transpose_2x2(xl ## 0, xr ## 0, RT0); \
- transpose_2x2(xl ## 1, xr ## 1, RT0); \
- transpose_2x2(xl ## 2, xr ## 2, RT0); \
- transpose_2x2(xl ## 3, xr ## 3, RT0); \
- \
- vpshufb RT1, xl ## 0, xl ## 0; \
- vpshufb RT1, xr ## 0, xr ## 0; \
- vpshufb RT1, xl ## 1, xl ## 1; \
- vpshufb RT1, xr ## 1, xr ## 1; \
- vpshufb RT1, xl ## 2, xl ## 2; \
- vpshufb RT1, xr ## 2, xr ## 2; \
- vpshufb RT1, xl ## 3, xl ## 3; \
- vpshufb RT1, xr ## 3, xr ## 3;
-
-.align 8
-__blowfish_enc_blk32:
- /* input:
- * %rdi: ctx, CTX
- * RXl0..4, RXr0..4: plaintext
- * output:
- * RXl0..4, RXr0..4: ciphertext (RXl <=> RXr swapped)
- */
- init_round_constants();
-
- read_block(RXl, RXr);
-
- movl $1, RLOOPd;
- add_roundkey(RXl, p+4*(0)(CTX));
-
-.align 4
-.L__enc_loop:
- round_enc();
-
- leal 2(RLOOPd), RLOOPd;
- cmpl $17, RLOOPd;
- jne .L__enc_loop;
-
- add_roundkey(RXr, p+4*(17)(CTX));
-
- write_block(RXl, RXr);
-
- ret;
-ENDPROC(__blowfish_enc_blk32)
-
-.align 8
-__blowfish_dec_blk32:
- /* input:
- * %rdi: ctx, CTX
- * RXl0..4, RXr0..4: ciphertext
- * output:
- * RXl0..4, RXr0..4: plaintext (RXl <=> RXr swapped)
- */
- init_round_constants();
-
- read_block(RXl, RXr);
-
- movl $14, RLOOPd;
- add_roundkey(RXl, p+4*(17)(CTX));
-
-.align 4
-.L__dec_loop:
- round_dec();
-
- addl $-2, RLOOPd;
- jns .L__dec_loop;
-
- add_roundkey(RXr, p+4*(0)(CTX));
-
- write_block(RXl, RXr);
-
- ret;
-ENDPROC(__blowfish_dec_blk32)
-
-ENTRY(blowfish_ecb_enc_32way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- */
-
- vzeroupper;
-
- vmovdqu 0*32(%rdx), RXl0;
- vmovdqu 1*32(%rdx), RXr0;
- vmovdqu 2*32(%rdx), RXl1;
- vmovdqu 3*32(%rdx), RXr1;
- vmovdqu 4*32(%rdx), RXl2;
- vmovdqu 5*32(%rdx), RXr2;
- vmovdqu 6*32(%rdx), RXl3;
- vmovdqu 7*32(%rdx), RXr3;
-
- call __blowfish_enc_blk32;
-
- vmovdqu RXr0, 0*32(%rsi);
- vmovdqu RXl0, 1*32(%rsi);
- vmovdqu RXr1, 2*32(%rsi);
- vmovdqu RXl1, 3*32(%rsi);
- vmovdqu RXr2, 4*32(%rsi);
- vmovdqu RXl2, 5*32(%rsi);
- vmovdqu RXr3, 6*32(%rsi);
- vmovdqu RXl3, 7*32(%rsi);
-
- vzeroupper;
-
- ret;
-ENDPROC(blowfish_ecb_enc_32way)
-
-ENTRY(blowfish_ecb_dec_32way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- */
-
- vzeroupper;
-
- vmovdqu 0*32(%rdx), RXl0;
- vmovdqu 1*32(%rdx), RXr0;
- vmovdqu 2*32(%rdx), RXl1;
- vmovdqu 3*32(%rdx), RXr1;
- vmovdqu 4*32(%rdx), RXl2;
- vmovdqu 5*32(%rdx), RXr2;
- vmovdqu 6*32(%rdx), RXl3;
- vmovdqu 7*32(%rdx), RXr3;
-
- call __blowfish_dec_blk32;
-
- vmovdqu RXr0, 0*32(%rsi);
- vmovdqu RXl0, 1*32(%rsi);
- vmovdqu RXr1, 2*32(%rsi);
- vmovdqu RXl1, 3*32(%rsi);
- vmovdqu RXr2, 4*32(%rsi);
- vmovdqu RXl2, 5*32(%rsi);
- vmovdqu RXr3, 6*32(%rsi);
- vmovdqu RXl3, 7*32(%rsi);
-
- vzeroupper;
-
- ret;
-ENDPROC(blowfish_ecb_dec_32way)
-
-ENTRY(blowfish_cbc_dec_32way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- */
-
- vzeroupper;
-
- vmovdqu 0*32(%rdx), RXl0;
- vmovdqu 1*32(%rdx), RXr0;
- vmovdqu 2*32(%rdx), RXl1;
- vmovdqu 3*32(%rdx), RXr1;
- vmovdqu 4*32(%rdx), RXl2;
- vmovdqu 5*32(%rdx), RXr2;
- vmovdqu 6*32(%rdx), RXl3;
- vmovdqu 7*32(%rdx), RXr3;
-
- call __blowfish_dec_blk32;
-
- /* xor with src */
- vmovq (%rdx), RT0x;
- vpshufd $0x4f, RT0x, RT0x;
- vinserti128 $1, 8(%rdx), RT0, RT0;
- vpxor RT0, RXr0, RXr0;
- vpxor 0*32+24(%rdx), RXl0, RXl0;
- vpxor 1*32+24(%rdx), RXr1, RXr1;
- vpxor 2*32+24(%rdx), RXl1, RXl1;
- vpxor 3*32+24(%rdx), RXr2, RXr2;
- vpxor 4*32+24(%rdx), RXl2, RXl2;
- vpxor 5*32+24(%rdx), RXr3, RXr3;
- vpxor 6*32+24(%rdx), RXl3, RXl3;
-
- vmovdqu RXr0, (0*32)(%rsi);
- vmovdqu RXl0, (1*32)(%rsi);
- vmovdqu RXr1, (2*32)(%rsi);
- vmovdqu RXl1, (3*32)(%rsi);
- vmovdqu RXr2, (4*32)(%rsi);
- vmovdqu RXl2, (5*32)(%rsi);
- vmovdqu RXr3, (6*32)(%rsi);
- vmovdqu RXl3, (7*32)(%rsi);
-
- vzeroupper;
-
- ret;
-ENDPROC(blowfish_cbc_dec_32way)
-
-ENTRY(blowfish_ctr_32way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- * %rcx: iv (big endian, 64bit)
- */
-
- vzeroupper;
-
- vpcmpeqd RT0, RT0, RT0;
- vpsrldq $8, RT0, RT0; /* a: -1, b: 0, c: -1, d: 0 */
-
- vpcmpeqd RT1x, RT1x, RT1x;
- vpaddq RT1x, RT1x, RT1x; /* a: -2, b: -2 */
- vpxor RIDX0, RIDX0, RIDX0;
- vinserti128 $1, RT1x, RIDX0, RIDX0; /* a: 0, b: 0, c: -2, d: -2 */
-
- vpaddq RIDX0, RT0, RT0; /* a: -1, b: 0, c: -3, d: -2 */
-
- vpcmpeqd RT1, RT1, RT1;
- vpaddq RT1, RT1, RT1; /* a: -2, b: -2, c: -2, d: -2 */
- vpaddq RT1, RT1, RIDX2; /* a: -4, b: -4, c: -4, d: -4 */
-
- vbroadcasti128 .Lbswap_iv_mask, RIDX0;
- vbroadcasti128 .Lbswap128_mask, RIDX1;
-
- /* load IV and byteswap */
- vmovq (%rcx), RT1x;
- vinserti128 $1, RT1x, RT1, RT1; /* a: BE, b: 0, c: BE, d: 0 */
- vpshufb RIDX0, RT1, RT1; /* a: LE, b: LE, c: LE, d: LE */
-
- /* construct IVs */
- vpsubq RT0, RT1, RT1; /* a: le1, b: le0, c: le3, d: le2 */
- vpshufb RIDX1, RT1, RXl0; /* a: be0, b: be1, c: be2, d: be3 */
- vpsubq RIDX2, RT1, RT1; /* le5, le4, le7, le6 */
- vpshufb RIDX1, RT1, RXr0; /* be4, be5, be6, be7 */
- vpsubq RIDX2, RT1, RT1;
- vpshufb RIDX1, RT1, RXl1;
- vpsubq RIDX2, RT1, RT1;
- vpshufb RIDX1, RT1, RXr1;
- vpsubq RIDX2, RT1, RT1;
- vpshufb RIDX1, RT1, RXl2;
- vpsubq RIDX2, RT1, RT1;
- vpshufb RIDX1, RT1, RXr2;
- vpsubq RIDX2, RT1, RT1;
- vpshufb RIDX1, RT1, RXl3;
- vpsubq RIDX2, RT1, RT1;
- vpshufb RIDX1, RT1, RXr3;
-
- /* store last IV */
- vpsubq RIDX2, RT1, RT1; /* a: le33, b: le32, ... */
- vpshufb RIDX1x, RT1x, RT1x; /* a: be32, ... */
- vmovq RT1x, (%rcx);
-
- call __blowfish_enc_blk32;
-
- /* dst = src ^ iv */
- vpxor 0*32(%rdx), RXr0, RXr0;
- vpxor 1*32(%rdx), RXl0, RXl0;
- vpxor 2*32(%rdx), RXr1, RXr1;
- vpxor 3*32(%rdx), RXl1, RXl1;
- vpxor 4*32(%rdx), RXr2, RXr2;
- vpxor 5*32(%rdx), RXl2, RXl2;
- vpxor 6*32(%rdx), RXr3, RXr3;
- vpxor 7*32(%rdx), RXl3, RXl3;
- vmovdqu RXr0, (0*32)(%rsi);
- vmovdqu RXl0, (1*32)(%rsi);
- vmovdqu RXr1, (2*32)(%rsi);
- vmovdqu RXl1, (3*32)(%rsi);
- vmovdqu RXr2, (4*32)(%rsi);
- vmovdqu RXl2, (5*32)(%rsi);
- vmovdqu RXr3, (6*32)(%rsi);
- vmovdqu RXl3, (7*32)(%rsi);
-
- vzeroupper;
-
- ret;
-ENDPROC(blowfish_ctr_32way)
diff --git a/arch/x86/crypto/blowfish_avx2_glue.c b/arch/x86/crypto/blowfish_avx2_glue.c
deleted file mode 100644
index 4417e9a..0000000
--- a/arch/x86/crypto/blowfish_avx2_glue.c
+++ /dev/null
@@ -1,585 +0,0 @@
-/*
- * Glue Code for x86_64/AVX2 assembler optimized version of Blowfish
- *
- * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
- * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
- * CTR part based on code (crypto/ctr.c) by:
- * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/err.h>
-#include <crypto/algapi.h>
-#include <crypto/blowfish.h>
-#include <crypto/cryptd.h>
-#include <crypto/ctr.h>
-#include <asm/i387.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
-#include <asm/crypto/blowfish.h>
-#include <asm/crypto/ablk_helper.h>
-#include <crypto/scatterwalk.h>
-
-#define BF_AVX2_PARALLEL_BLOCKS 32
-
-/* 32-way AVX2 parallel cipher functions */
-asmlinkage void blowfish_ecb_enc_32way(struct bf_ctx *ctx, u8 *dst,
- const u8 *src);
-asmlinkage void blowfish_ecb_dec_32way(struct bf_ctx *ctx, u8 *dst,
- const u8 *src);
-asmlinkage void blowfish_cbc_dec_32way(struct bf_ctx *ctx, u8 *dst,
- const u8 *src);
-asmlinkage void blowfish_ctr_32way(struct bf_ctx *ctx, u8 *dst, const u8 *src,
- __be64 *iv);
-
-static inline bool bf_fpu_begin(bool fpu_enabled, unsigned int nbytes)
-{
- if (fpu_enabled)
- return true;
-
- /* FPU is only used when chunk to be processed is large enough, so
- * do not enable FPU until it is necessary.
- */
- if (nbytes < BF_BLOCK_SIZE * BF_AVX2_PARALLEL_BLOCKS)
- return false;
-
- kernel_fpu_begin();
- return true;
-}
-
-static inline void bf_fpu_end(bool fpu_enabled)
-{
- if (fpu_enabled)
- kernel_fpu_end();
-}
-
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
- bool enc)
-{
- bool fpu_enabled = false;
- struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- const unsigned int bsize = BF_BLOCK_SIZE;
- unsigned int nbytes;
- int err;
-
- err = blkcipher_walk_virt(desc, walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
- while ((nbytes = walk->nbytes)) {
- u8 *wsrc = walk->src.virt.addr;
- u8 *wdst = walk->dst.virt.addr;
-
- fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
-
- /* Process multi-block AVX2 batch */
- if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
- do {
- if (enc)
- blowfish_ecb_enc_32way(ctx, wdst, wsrc);
- else
- blowfish_ecb_dec_32way(ctx, wdst, wsrc);
-
- wsrc += bsize * BF_AVX2_PARALLEL_BLOCKS;
- wdst += bsize * BF_AVX2_PARALLEL_BLOCKS;
- nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS;
- } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
-
- if (nbytes < bsize)
- goto done;
- }
-
- /* Process multi-block batch */
- if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
- do {
- if (enc)
- blowfish_enc_blk_4way(ctx, wdst, wsrc);
- else
- blowfish_dec_blk_4way(ctx, wdst, wsrc);
-
- wsrc += bsize * BF_PARALLEL_BLOCKS;
- wdst += bsize * BF_PARALLEL_BLOCKS;
- nbytes -= bsize * BF_PARALLEL_BLOCKS;
- } while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
-
- if (nbytes < bsize)
- goto done;
- }
-
- /* Handle leftovers */
- do {
- if (enc)
- blowfish_enc_blk(ctx, wdst, wsrc);
- else
- blowfish_dec_blk(ctx, wdst, wsrc);
-
- wsrc += bsize;
- wdst += bsize;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
-done:
- err = blkcipher_walk_done(desc, walk, nbytes);
- }
-
- bf_fpu_end(fpu_enabled);
- return err;
-}
-
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_crypt(desc, &walk, true);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_crypt(desc, &walk, false);
-}
-
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
-{
- struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- unsigned int bsize = BF_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u64 *src = (u64 *)walk->src.virt.addr;
- u64 *dst = (u64 *)walk->dst.virt.addr;
- u64 *iv = (u64 *)walk->iv;
-
- do {
- *dst = *src ^ *iv;
- blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
- iv = dst;
-
- src += 1;
- dst += 1;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
- *(u64 *)walk->iv = *iv;
- return nbytes;
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct blkcipher_walk walk;
- int err;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
-
- while ((nbytes = walk.nbytes)) {
- nbytes = __cbc_encrypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
- }
-
- return err;
-}
-
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
-{
- struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- const unsigned int bsize = BF_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u64 *src = (u64 *)walk->src.virt.addr;
- u64 *dst = (u64 *)walk->dst.virt.addr;
- u64 last_iv;
- int i;
-
- /* Start of the last block. */
- src += nbytes / bsize - 1;
- dst += nbytes / bsize - 1;
-
- last_iv = *src;
-
- /* Process multi-block AVX2 batch */
- if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
- do {
- nbytes -= bsize * (BF_AVX2_PARALLEL_BLOCKS - 1);
- src -= BF_AVX2_PARALLEL_BLOCKS - 1;
- dst -= BF_AVX2_PARALLEL_BLOCKS - 1;
-
- blowfish_cbc_dec_32way(ctx, (u8 *)dst, (u8 *)src);
-
- nbytes -= bsize;
- if (nbytes < bsize)
- goto done;
-
- *dst ^= *(src - 1);
- src -= 1;
- dst -= 1;
- } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
-
- if (nbytes < bsize)
- goto done;
- }
-
- /* Process multi-block batch */
- if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
- u64 ivs[BF_PARALLEL_BLOCKS - 1];
-
- do {
- nbytes -= bsize * (BF_PARALLEL_BLOCKS - 1);
- src -= BF_PARALLEL_BLOCKS - 1;
- dst -= BF_PARALLEL_BLOCKS - 1;
-
- for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++)
- ivs[i] = src[i];
-
- blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
-
- for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++)
- dst[i + 1] ^= ivs[i];
-
- nbytes -= bsize;
- if (nbytes < bsize)
- goto done;
-
- *dst ^= *(src - 1);
- src -= 1;
- dst -= 1;
- } while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
-
- if (nbytes < bsize)
- goto done;
- }
-
- /* Handle leftovers */
- for (;;) {
- blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
-
- nbytes -= bsize;
- if (nbytes < bsize)
- break;
-
- *dst ^= *(src - 1);
- src -= 1;
- dst -= 1;
- }
-
-done:
- *dst ^= *(u64 *)walk->iv;
- *(u64 *)walk->iv = last_iv;
-
- return nbytes;
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- bool fpu_enabled = false;
- struct blkcipher_walk walk;
- int err;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
- while ((nbytes = walk.nbytes)) {
- fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
- nbytes = __cbc_decrypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
- }
-
- bf_fpu_end(fpu_enabled);
- return err;
-}
-
-static void ctr_crypt_final(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
-{
- struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- u8 *ctrblk = walk->iv;
- u8 keystream[BF_BLOCK_SIZE];
- u8 *src = walk->src.virt.addr;
- u8 *dst = walk->dst.virt.addr;
- unsigned int nbytes = walk->nbytes;
-
- blowfish_enc_blk(ctx, keystream, ctrblk);
- crypto_xor(keystream, src, nbytes);
- memcpy(dst, keystream, nbytes);
-
- crypto_inc(ctrblk, BF_BLOCK_SIZE);
-}
-
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
-{
- struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- unsigned int bsize = BF_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u64 *src = (u64 *)walk->src.virt.addr;
- u64 *dst = (u64 *)walk->dst.virt.addr;
- int i;
-
- /* Process multi-block AVX2 batch */
- if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
- do {
- blowfish_ctr_32way(ctx, (u8 *)dst, (u8 *)src,
- (__be64 *)walk->iv);
-
- src += BF_AVX2_PARALLEL_BLOCKS;
- dst += BF_AVX2_PARALLEL_BLOCKS;
- nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS;
- } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
-
- if (nbytes < bsize)
- goto done;
- }
-
- /* Process four block batch */
- if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
- __be64 ctrblocks[BF_PARALLEL_BLOCKS];
- u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
-
- do {
- /* create ctrblks for parallel encrypt */
- for (i = 0; i < BF_PARALLEL_BLOCKS; i++) {
- if (dst != src)
- dst[i] = src[i];
-
- ctrblocks[i] = cpu_to_be64(ctrblk++);
- }
-
- blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
- (u8 *)ctrblocks);
-
- src += BF_PARALLEL_BLOCKS;
- dst += BF_PARALLEL_BLOCKS;
- nbytes -= bsize * BF_PARALLEL_BLOCKS;
- } while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
-
- *(__be64 *)walk->iv = cpu_to_be64(ctrblk);
-
- if (nbytes < bsize)
- goto done;
- }
-
- /* Handle leftovers */
- do {
- u64 ctrblk;
-
- if (dst != src)
- *dst = *src;
-
- ctrblk = *(u64 *)walk->iv;
- be64_add_cpu((__be64 *)walk->iv, 1);
-
- blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
-
- src += 1;
- dst += 1;
- } while ((nbytes -= bsize) >= bsize);
-
-done:
- return nbytes;
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- bool fpu_enabled = false;
- struct blkcipher_walk walk;
- int err;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
- while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
- fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
- nbytes = __ctr_crypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
- }
-
- bf_fpu_end(fpu_enabled);
-
- if (walk.nbytes) {
- ctr_crypt_final(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, 0);
- }
-
- return err;
-}
-
-static struct crypto_alg bf_algs[6] = { {
- .cra_name = "__ecb-blowfish-avx2",
- .cra_driver_name = "__driver-ecb-blowfish-avx2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = BF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct bf_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = BF_MIN_KEY_SIZE,
- .max_keysize = BF_MAX_KEY_SIZE,
- .setkey = blowfish_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "__cbc-blowfish-avx2",
- .cra_driver_name = "__driver-cbc-blowfish-avx2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = BF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct bf_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = BF_MIN_KEY_SIZE,
- .max_keysize = BF_MAX_KEY_SIZE,
- .setkey = blowfish_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}, {
- .cra_name = "__ctr-blowfish-avx2",
- .cra_driver_name = "__driver-ctr-blowfish-avx2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct bf_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = BF_MIN_KEY_SIZE,
- .max_keysize = BF_MAX_KEY_SIZE,
- .ivsize = BF_BLOCK_SIZE,
- .setkey = blowfish_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-}, {
- .cra_name = "ecb(blowfish)",
- .cra_driver_name = "ecb-blowfish-avx2",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = BF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = BF_MIN_KEY_SIZE,
- .max_keysize = BF_MAX_KEY_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(blowfish)",
- .cra_driver_name = "cbc-blowfish-avx2",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = BF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = BF_MIN_KEY_SIZE,
- .max_keysize = BF_MAX_KEY_SIZE,
- .ivsize = BF_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = __ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "ctr(blowfish)",
- .cra_driver_name = "ctr-blowfish-avx2",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = BF_MIN_KEY_SIZE,
- .max_keysize = BF_MAX_KEY_SIZE,
- .ivsize = BF_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_encrypt,
- .geniv = "chainiv",
- },
- },
-} };
-
-
-static int __init init(void)
-{
- u64 xcr0;
-
- if (!cpu_has_avx2 || !cpu_has_osxsave) {
- pr_info("AVX2 instructions are not detected.\n");
- return -ENODEV;
- }
-
- xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
- if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
- pr_info("AVX detected but unusable.\n");
- return -ENODEV;
- }
-
- return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
-}
-
-static void __exit fini(void)
-{
- crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Blowfish Cipher Algorithm, AVX2 optimized");
-MODULE_ALIAS("blowfish");
-MODULE_ALIAS("blowfish-asm");
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 3548d76..50ec333 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -1,7 +1,7 @@
/*
* Glue Code for assembler optimized version of Blowfish
*
- * Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
@@ -32,24 +32,40 @@
#include <linux/module.h>
#include <linux/types.h>
#include <crypto/algapi.h>
-#include <asm/crypto/blowfish.h>
/* regular block cipher functions */
asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
bool xor);
-EXPORT_SYMBOL_GPL(__blowfish_enc_blk);
-
asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
-EXPORT_SYMBOL_GPL(blowfish_dec_blk);
/* 4-way parallel cipher functions */
asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
-EXPORT_SYMBOL_GPL(__blowfish_enc_blk_4way);
-
asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src);
-EXPORT_SYMBOL_GPL(blowfish_dec_blk_4way);
+
+static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
+{
+ __blowfish_enc_blk(ctx, dst, src, false);
+}
+
+static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __blowfish_enc_blk(ctx, dst, src, true);
+}
+
+static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __blowfish_enc_blk_4way(ctx, dst, src, false);
+}
+
+static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __blowfish_enc_blk_4way(ctx, dst, src, true);
+}
static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index 91a1878..0e0b886 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -51,16 +51,6 @@
#define ymm14_x xmm14
#define ymm15_x xmm15
-/*
- * AES-NI instructions do not support ymmX registers, so we need splitting and
- * merging.
- */
-#define vaesenclast256(zero, yreg, tmp) \
- vextracti128 $1, yreg, tmp##_x; \
- vaesenclast zero##_x, yreg##_x, yreg##_x; \
- vaesenclast zero##_x, tmp##_x, tmp##_x; \
- vinserti128 $1, tmp##_x, yreg, yreg;
-
/**********************************************************************
32-way camellia
**********************************************************************/
@@ -79,46 +69,70 @@
* S-function with AES subbytes \
*/ \
vbroadcasti128 .Linv_shift_row, t4; \
- vpbroadcastb .L0f0f0f0f, t7; \
- vbroadcasti128 .Lpre_tf_lo_s1, t0; \
- vbroadcasti128 .Lpre_tf_hi_s1, t1; \
+ vpbroadcastd .L0f0f0f0f, t7; \
+ vbroadcasti128 .Lpre_tf_lo_s1, t5; \
+ vbroadcasti128 .Lpre_tf_hi_s1, t6; \
+ vbroadcasti128 .Lpre_tf_lo_s4, t2; \
+ vbroadcasti128 .Lpre_tf_hi_s4, t3; \
\
/* AES inverse shift rows */ \
vpshufb t4, x0, x0; \
vpshufb t4, x7, x7; \
- vpshufb t4, x1, x1; \
- vpshufb t4, x4, x4; \
- vpshufb t4, x2, x2; \
- vpshufb t4, x5, x5; \
vpshufb t4, x3, x3; \
vpshufb t4, x6, x6; \
+ vpshufb t4, x2, x2; \
+ vpshufb t4, x5, x5; \
+ vpshufb t4, x1, x1; \
+ vpshufb t4, x4, x4; \
\
/* prefilter sboxes 1, 2 and 3 */ \
- vbroadcasti128 .Lpre_tf_lo_s4, t2; \
- vbroadcasti128 .Lpre_tf_hi_s4, t3; \
- filter_8bit(x0, t0, t1, t7, t6); \
- filter_8bit(x7, t0, t1, t7, t6); \
- filter_8bit(x1, t0, t1, t7, t6); \
- filter_8bit(x4, t0, t1, t7, t6); \
- filter_8bit(x2, t0, t1, t7, t6); \
- filter_8bit(x5, t0, t1, t7, t6); \
- \
/* prefilter sbox 4 */ \
+ filter_8bit(x0, t5, t6, t7, t4); \
+ filter_8bit(x7, t5, t6, t7, t4); \
+ vextracti128 $1, x0, t0##_x; \
+ vextracti128 $1, x7, t1##_x; \
+ filter_8bit(x3, t2, t3, t7, t4); \
+ filter_8bit(x6, t2, t3, t7, t4); \
+ vextracti128 $1, x3, t3##_x; \
+ vextracti128 $1, x6, t2##_x; \
+ filter_8bit(x2, t5, t6, t7, t4); \
+ filter_8bit(x5, t5, t6, t7, t4); \
+ filter_8bit(x1, t5, t6, t7, t4); \
+ filter_8bit(x4, t5, t6, t7, t4); \
+ \
vpxor t4##_x, t4##_x, t4##_x; \
- filter_8bit(x3, t2, t3, t7, t6); \
- filter_8bit(x6, t2, t3, t7, t6); \
\
/* AES subbytes + AES shift rows */ \
+ vextracti128 $1, x2, t6##_x; \
+ vextracti128 $1, x5, t5##_x; \
+ vaesenclast t4##_x, x0##_x, x0##_x; \
+ vaesenclast t4##_x, t0##_x, t0##_x; \
+ vinserti128 $1, t0##_x, x0, x0; \
+ vaesenclast t4##_x, x7##_x, x7##_x; \
+ vaesenclast t4##_x, t1##_x, t1##_x; \
+ vinserti128 $1, t1##_x, x7, x7; \
+ vaesenclast t4##_x, x3##_x, x3##_x; \
+ vaesenclast t4##_x, t3##_x, t3##_x; \
+ vinserti128 $1, t3##_x, x3, x3; \
+ vaesenclast t4##_x, x6##_x, x6##_x; \
+ vaesenclast t4##_x, t2##_x, t2##_x; \
+ vinserti128 $1, t2##_x, x6, x6; \
+ vextracti128 $1, x1, t3##_x; \
+ vextracti128 $1, x4, t2##_x; \
vbroadcasti128 .Lpost_tf_lo_s1, t0; \
vbroadcasti128 .Lpost_tf_hi_s1, t1; \
- vaesenclast256(t4, x0, t5); \
- vaesenclast256(t4, x7, t5); \
- vaesenclast256(t4, x1, t5); \
- vaesenclast256(t4, x4, t5); \
- vaesenclast256(t4, x2, t5); \
- vaesenclast256(t4, x5, t5); \
- vaesenclast256(t4, x3, t5); \
- vaesenclast256(t4, x6, t5); \
+ vaesenclast t4##_x, x2##_x, x2##_x; \
+ vaesenclast t4##_x, t6##_x, t6##_x; \
+ vinserti128 $1, t6##_x, x2, x2; \
+ vaesenclast t4##_x, x5##_x, x5##_x; \
+ vaesenclast t4##_x, t5##_x, t5##_x; \
+ vinserti128 $1, t5##_x, x5, x5; \
+ vaesenclast t4##_x, x1##_x, x1##_x; \
+ vaesenclast t4##_x, t3##_x, t3##_x; \
+ vinserti128 $1, t3##_x, x1, x1; \
+ vaesenclast t4##_x, x4##_x, x4##_x; \
+ vaesenclast t4##_x, t2##_x, t2##_x; \
+ vinserti128 $1, t2##_x, x4, x4; \
\
/* postfilter sboxes 1 and 4 */ \
vbroadcasti128 .Lpost_tf_lo_s3, t2; \
@@ -139,22 +153,12 @@
/* postfilter sbox 2 */ \
filter_8bit(x1, t4, t5, t7, t2); \
filter_8bit(x4, t4, t5, t7, t2); \
+ vpxor t7, t7, t7; \
\
vpsrldq $1, t0, t1; \
vpsrldq $2, t0, t2; \
+ vpshufb t7, t1, t1; \
vpsrldq $3, t0, t3; \
- vpsrldq $4, t0, t4; \
- vpsrldq $5, t0, t5; \
- vpsrldq $6, t0, t6; \
- vpsrldq $7, t0, t7; \
- vpbroadcastb t0##_x, t0; \
- vpbroadcastb t1##_x, t1; \
- vpbroadcastb t2##_x, t2; \
- vpbroadcastb t3##_x, t3; \
- vpbroadcastb t4##_x, t4; \
- vpbroadcastb t6##_x, t6; \
- vpbroadcastb t5##_x, t5; \
- vpbroadcastb t7##_x, t7; \
\
/* P-function */ \
vpxor x5, x0, x0; \
@@ -162,11 +166,21 @@
vpxor x7, x2, x2; \
vpxor x4, x3, x3; \
\
+ vpshufb t7, t2, t2; \
+ vpsrldq $4, t0, t4; \
+ vpshufb t7, t3, t3; \
+ vpsrldq $5, t0, t5; \
+ vpshufb t7, t4, t4; \
+ \
vpxor x2, x4, x4; \
vpxor x3, x5, x5; \
vpxor x0, x6, x6; \
vpxor x1, x7, x7; \
\
+ vpsrldq $6, t0, t6; \
+ vpshufb t7, t5, t5; \
+ vpshufb t7, t6, t6; \
+ \
vpxor x7, x0, x0; \
vpxor x4, x1, x1; \
vpxor x5, x2, x2; \
@@ -179,12 +193,16 @@
\
/* Add key material and result to CD (x becomes new CD) */ \
\
- vpxor t7, x0, x0; \
- vpxor 4 * 32(mem_cd), x0, x0; \
- \
vpxor t6, x1, x1; \
vpxor 5 * 32(mem_cd), x1, x1; \
\
+ vpsrldq $7, t0, t6; \
+ vpshufb t7, t0, t0; \
+ vpshufb t7, t6, t7; \
+ \
+ vpxor t7, x0, x0; \
+ vpxor 4 * 32(mem_cd), x0, x0; \
+ \
vpxor t5, x2, x2; \
vpxor 6 * 32(mem_cd), x2, x2; \
\
@@ -204,7 +222,7 @@
vpxor 3 * 32(mem_cd), x7, x7;
/*
- * Size optimization... with inlined roundsm16 binary would be over 5 times
+ * Size optimization... with inlined roundsm32 binary would be over 5 times
* larger and would only marginally faster.
*/
.align 8
@@ -324,13 +342,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
*/ \
vpbroadcastd kll, t0; /* only lowest 32-bit used */ \
vpxor tt0, tt0, tt0; \
- vpbroadcastb t0##_x, t3; \
+ vpshufb tt0, t0, t3; \
vpsrldq $1, t0, t0; \
- vpbroadcastb t0##_x, t2; \
+ vpshufb tt0, t0, t2; \
vpsrldq $1, t0, t0; \
- vpbroadcastb t0##_x, t1; \
+ vpshufb tt0, t0, t1; \
vpsrldq $1, t0, t0; \
- vpbroadcastb t0##_x, t0; \
+ vpshufb tt0, t0, t0; \
\
vpand l0, t0, t0; \
vpand l1, t1, t1; \
@@ -340,6 +358,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
\
vpxor l4, t0, l4; \
+ vpbroadcastd krr, t0; /* only lowest 32-bit used */ \
vmovdqu l4, 4 * 32(l); \
vpxor l5, t1, l5; \
vmovdqu l5, 5 * 32(l); \
@@ -354,14 +373,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
* rl ^= t2; \
*/ \
\
- vpbroadcastd krr, t0; /* only lowest 32-bit used */ \
- vpbroadcastb t0##_x, t3; \
+ vpshufb tt0, t0, t3; \
vpsrldq $1, t0, t0; \
- vpbroadcastb t0##_x, t2; \
+ vpshufb tt0, t0, t2; \
vpsrldq $1, t0, t0; \
- vpbroadcastb t0##_x, t1; \
+ vpshufb tt0, t0, t1; \
vpsrldq $1, t0, t0; \
- vpbroadcastb t0##_x, t0; \
+ vpshufb tt0, t0, t0; \
\
vpor 4 * 32(r), t0, t0; \
vpor 5 * 32(r), t1, t1; \
@@ -373,6 +391,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
vpxor 2 * 32(r), t2, t2; \
vpxor 3 * 32(r), t3, t3; \
vmovdqu t0, 0 * 32(r); \
+ vpbroadcastd krl, t0; /* only lowest 32-bit used */ \
vmovdqu t1, 1 * 32(r); \
vmovdqu t2, 2 * 32(r); \
vmovdqu t3, 3 * 32(r); \
@@ -382,14 +401,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
* t2 &= rl; \
* rr ^= rol32(t2, 1); \
*/ \
- vpbroadcastd krl, t0; /* only lowest 32-bit used */ \
- vpbroadcastb t0##_x, t3; \
+ vpshufb tt0, t0, t3; \
vpsrldq $1, t0, t0; \
- vpbroadcastb t0##_x, t2; \
+ vpshufb tt0, t0, t2; \
vpsrldq $1, t0, t0; \
- vpbroadcastb t0##_x, t1; \
+ vpshufb tt0, t0, t1; \
vpsrldq $1, t0, t0; \
- vpbroadcastb t0##_x, t0; \
+ vpshufb tt0, t0, t0; \
\
vpand 0 * 32(r), t0, t0; \
vpand 1 * 32(r), t1, t1; \
@@ -403,6 +421,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
vpxor 6 * 32(r), t2, t2; \
vpxor 7 * 32(r), t3, t3; \
vmovdqu t0, 4 * 32(r); \
+ vpbroadcastd klr, t0; /* only lowest 32-bit used */ \
vmovdqu t1, 5 * 32(r); \
vmovdqu t2, 6 * 32(r); \
vmovdqu t3, 7 * 32(r); \
@@ -413,14 +432,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
* ll ^= t0; \
*/ \
\
- vpbroadcastd klr, t0; /* only lowest 32-bit used */ \
- vpbroadcastb t0##_x, t3; \
+ vpshufb tt0, t0, t3; \
vpsrldq $1, t0, t0; \
- vpbroadcastb t0##_x, t2; \
+ vpshufb tt0, t0, t2; \
vpsrldq $1, t0, t0; \
- vpbroadcastb t0##_x, t1; \
+ vpshufb tt0, t0, t1; \
vpsrldq $1, t0, t0; \
- vpbroadcastb t0##_x, t0; \
+ vpshufb tt0, t0, t0; \
\
vpor l4, t0, t0; \
vpor l5, t1, t1; \
diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S
new file mode 100644
index 0000000..35e9756
--- /dev/null
+++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S
@@ -0,0 +1,643 @@
+########################################################################
+# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
+#
+# Copyright (c) 2013, Intel Corporation
+#
+# Authors:
+# Erdinc Ozturk <erdinc.ozturk@intel.com>
+# Vinodh Gopal <vinodh.gopal@intel.com>
+# James Guilford <james.guilford@intel.com>
+# Tim Chen <tim.c.chen@linux.intel.com>
+#
+# This software is available to you under a choice of one of two
+# licenses. You may choose to be licensed under the terms of the GNU
+# General Public License (GPL) Version 2, available from the file
+# COPYING in the main directory of this source tree, or the
+# OpenIB.org BSD license below:
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the
+# distribution.
+#
+# * Neither the name of the Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+#
+# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+########################################################################
+# Function API:
+# UINT16 crc_t10dif_pcl(
+# UINT16 init_crc, //initial CRC value, 16 bits
+# const unsigned char *buf, //buffer pointer to calculate CRC on
+# UINT64 len //buffer length in bytes (64-bit data)
+# );
+#
+# Reference paper titled "Fast CRC Computation for Generic
+# Polynomials Using PCLMULQDQ Instruction"
+# URL: http://www.intel.com/content/dam/www/public/us/en/documents
+# /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
+#
+#
+
+#include <linux/linkage.h>
+
+.text
+
+#define arg1 %rdi
+#define arg2 %rsi
+#define arg3 %rdx
+
+#define arg1_low32 %edi
+
+ENTRY(crc_t10dif_pcl)
+.align 16
+
+ # adjust the 16-bit initial_crc value, scale it to 32 bits
+ shl $16, arg1_low32
+
+ # Allocate Stack Space
+ mov %rsp, %rcx
+ sub $16*2, %rsp
+ # align stack to 16 byte boundary
+ and $~(0x10 - 1), %rsp
+
+ # check if smaller than 256
+ cmp $256, arg3
+
+ # for sizes less than 128, we can't fold 64B at a time...
+ jl _less_than_128
+
+
+ # load the initial crc value
+ movd arg1_low32, %xmm10 # initial crc
+
+ # crc value does not need to be byte-reflected, but it needs
+ # to be moved to the high part of the register.
+ # because data will be byte-reflected and will align with
+ # initial crc at correct place.
+ pslldq $12, %xmm10
+
+ movdqa SHUF_MASK(%rip), %xmm11
+ # receive the initial 64B data, xor the initial crc value
+ movdqu 16*0(arg2), %xmm0
+ movdqu 16*1(arg2), %xmm1
+ movdqu 16*2(arg2), %xmm2
+ movdqu 16*3(arg2), %xmm3
+ movdqu 16*4(arg2), %xmm4
+ movdqu 16*5(arg2), %xmm5
+ movdqu 16*6(arg2), %xmm6
+ movdqu 16*7(arg2), %xmm7
+
+ pshufb %xmm11, %xmm0
+ # XOR the initial_crc value
+ pxor %xmm10, %xmm0
+ pshufb %xmm11, %xmm1
+ pshufb %xmm11, %xmm2
+ pshufb %xmm11, %xmm3
+ pshufb %xmm11, %xmm4
+ pshufb %xmm11, %xmm5
+ pshufb %xmm11, %xmm6
+ pshufb %xmm11, %xmm7
+
+ movdqa rk3(%rip), %xmm10 #xmm10 has rk3 and rk4
+ #imm value of pclmulqdq instruction
+ #will determine which constant to use
+
+ #################################################################
+ # we subtract 256 instead of 128 to save one instruction from the loop
+ sub $256, arg3
+
+ # at this section of the code, there is 64*x+y (0<=y<64) bytes of
+ # buffer. The _fold_64_B_loop will fold 64B at a time
+ # until we have 64+y Bytes of buffer
+
+
+ # fold 64B at a time. This section of the code folds 4 xmm
+ # registers in parallel
+_fold_64_B_loop:
+
+ # update the buffer pointer
+ add $128, arg2 # buf += 64#
+
+ movdqu 16*0(arg2), %xmm9
+ movdqu 16*1(arg2), %xmm12
+ pshufb %xmm11, %xmm9
+ pshufb %xmm11, %xmm12
+ movdqa %xmm0, %xmm8
+ movdqa %xmm1, %xmm13
+ pclmulqdq $0x0 , %xmm10, %xmm0
+ pclmulqdq $0x11, %xmm10, %xmm8
+ pclmulqdq $0x0 , %xmm10, %xmm1
+ pclmulqdq $0x11, %xmm10, %xmm13
+ pxor %xmm9 , %xmm0
+ xorps %xmm8 , %xmm0
+ pxor %xmm12, %xmm1
+ xorps %xmm13, %xmm1
+
+ movdqu 16*2(arg2), %xmm9
+ movdqu 16*3(arg2), %xmm12
+ pshufb %xmm11, %xmm9
+ pshufb %xmm11, %xmm12
+ movdqa %xmm2, %xmm8
+ movdqa %xmm3, %xmm13
+ pclmulqdq $0x0, %xmm10, %xmm2
+ pclmulqdq $0x11, %xmm10, %xmm8
+ pclmulqdq $0x0, %xmm10, %xmm3
+ pclmulqdq $0x11, %xmm10, %xmm13
+ pxor %xmm9 , %xmm2
+ xorps %xmm8 , %xmm2
+ pxor %xmm12, %xmm3
+ xorps %xmm13, %xmm3
+
+ movdqu 16*4(arg2), %xmm9
+ movdqu 16*5(arg2), %xmm12
+ pshufb %xmm11, %xmm9
+ pshufb %xmm11, %xmm12
+ movdqa %xmm4, %xmm8
+ movdqa %xmm5, %xmm13
+ pclmulqdq $0x0, %xmm10, %xmm4
+ pclmulqdq $0x11, %xmm10, %xmm8
+ pclmulqdq $0x0, %xmm10, %xmm5
+ pclmulqdq $0x11, %xmm10, %xmm13
+ pxor %xmm9 , %xmm4
+ xorps %xmm8 , %xmm4
+ pxor %xmm12, %xmm5
+ xorps %xmm13, %xmm5
+
+ movdqu 16*6(arg2), %xmm9
+ movdqu 16*7(arg2), %xmm12
+ pshufb %xmm11, %xmm9
+ pshufb %xmm11, %xmm12
+ movdqa %xmm6 , %xmm8
+ movdqa %xmm7 , %xmm13
+ pclmulqdq $0x0 , %xmm10, %xmm6
+ pclmulqdq $0x11, %xmm10, %xmm8
+ pclmulqdq $0x0 , %xmm10, %xmm7
+ pclmulqdq $0x11, %xmm10, %xmm13
+ pxor %xmm9 , %xmm6
+ xorps %xmm8 , %xmm6
+ pxor %xmm12, %xmm7
+ xorps %xmm13, %xmm7
+
+ sub $128, arg3
+
+ # check if there is another 64B in the buffer to be able to fold
+ jge _fold_64_B_loop
+ ##################################################################
+
+
+ add $128, arg2
+ # at this point, the buffer pointer is pointing at the last y Bytes
+ # of the buffer the 64B of folded data is in 4 of the xmm
+ # registers: xmm0, xmm1, xmm2, xmm3
+
+
+ # fold the 8 xmm registers to 1 xmm register with different constants
+
+ movdqa rk9(%rip), %xmm10
+ movdqa %xmm0, %xmm8
+ pclmulqdq $0x11, %xmm10, %xmm0
+ pclmulqdq $0x0 , %xmm10, %xmm8
+ pxor %xmm8, %xmm7
+ xorps %xmm0, %xmm7
+
+ movdqa rk11(%rip), %xmm10
+ movdqa %xmm1, %xmm8
+ pclmulqdq $0x11, %xmm10, %xmm1
+ pclmulqdq $0x0 , %xmm10, %xmm8
+ pxor %xmm8, %xmm7
+ xorps %xmm1, %xmm7
+
+ movdqa rk13(%rip), %xmm10
+ movdqa %xmm2, %xmm8
+ pclmulqdq $0x11, %xmm10, %xmm2
+ pclmulqdq $0x0 , %xmm10, %xmm8
+ pxor %xmm8, %xmm7
+ pxor %xmm2, %xmm7
+
+ movdqa rk15(%rip), %xmm10
+ movdqa %xmm3, %xmm8
+ pclmulqdq $0x11, %xmm10, %xmm3
+ pclmulqdq $0x0 , %xmm10, %xmm8
+ pxor %xmm8, %xmm7
+ xorps %xmm3, %xmm7
+
+ movdqa rk17(%rip), %xmm10
+ movdqa %xmm4, %xmm8
+ pclmulqdq $0x11, %xmm10, %xmm4
+ pclmulqdq $0x0 , %xmm10, %xmm8
+ pxor %xmm8, %xmm7
+ pxor %xmm4, %xmm7
+
+ movdqa rk19(%rip), %xmm10
+ movdqa %xmm5, %xmm8
+ pclmulqdq $0x11, %xmm10, %xmm5
+ pclmulqdq $0x0 , %xmm10, %xmm8
+ pxor %xmm8, %xmm7
+ xorps %xmm5, %xmm7
+
+ movdqa rk1(%rip), %xmm10 #xmm10 has rk1 and rk2
+ #imm value of pclmulqdq instruction
+ #will determine which constant to use
+ movdqa %xmm6, %xmm8
+ pclmulqdq $0x11, %xmm10, %xmm6
+ pclmulqdq $0x0 , %xmm10, %xmm8
+ pxor %xmm8, %xmm7
+ pxor %xmm6, %xmm7
+
+
+ # instead of 64, we add 48 to the loop counter to save 1 instruction
+ # from the loop instead of a cmp instruction, we use the negative
+ # flag with the jl instruction
+ add $128-16, arg3
+ jl _final_reduction_for_128
+
+ # now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7
+ # and the rest is in memory. We can fold 16 bytes at a time if y>=16
+ # continue folding 16B at a time
+
+_16B_reduction_loop:
+ movdqa %xmm7, %xmm8
+ pclmulqdq $0x11, %xmm10, %xmm7
+ pclmulqdq $0x0 , %xmm10, %xmm8
+ pxor %xmm8, %xmm7
+ movdqu (arg2), %xmm0
+ pshufb %xmm11, %xmm0
+ pxor %xmm0 , %xmm7
+ add $16, arg2
+ sub $16, arg3
+ # instead of a cmp instruction, we utilize the flags with the
+ # jge instruction equivalent of: cmp arg3, 16-16
+ # check if there is any more 16B in the buffer to be able to fold
+ jge _16B_reduction_loop
+
+ #now we have 16+z bytes left to reduce, where 0<= z < 16.
+ #first, we reduce the data in the xmm7 register
+
+
+_final_reduction_for_128:
+ # check if any more data to fold. If not, compute the CRC of
+ # the final 128 bits
+ add $16, arg3
+ je _128_done
+
+ # here we are getting data that is less than 16 bytes.
+ # since we know that there was data before the pointer, we can
+ # offset the input pointer before the actual point, to receive
+ # exactly 16 bytes. after that the registers need to be adjusted.
+_get_last_two_xmms:
+ movdqa %xmm7, %xmm2
+
+ movdqu -16(arg2, arg3), %xmm1
+ pshufb %xmm11, %xmm1
+
+ # get rid of the extra data that was loaded before
+ # load the shift constant
+ lea pshufb_shf_table+16(%rip), %rax
+ sub arg3, %rax
+ movdqu (%rax), %xmm0
+
+ # shift xmm2 to the left by arg3 bytes
+ pshufb %xmm0, %xmm2
+
+ # shift xmm7 to the right by 16-arg3 bytes
+ pxor mask1(%rip), %xmm0
+ pshufb %xmm0, %xmm7
+ pblendvb %xmm2, %xmm1 #xmm0 is implicit
+
+ # fold 16 Bytes
+ movdqa %xmm1, %xmm2
+ movdqa %xmm7, %xmm8
+ pclmulqdq $0x11, %xmm10, %xmm7
+ pclmulqdq $0x0 , %xmm10, %xmm8
+ pxor %xmm8, %xmm7
+ pxor %xmm2, %xmm7
+
+_128_done:
+ # compute crc of a 128-bit value
+ movdqa rk5(%rip), %xmm10 # rk5 and rk6 in xmm10
+ movdqa %xmm7, %xmm0
+
+ #64b fold
+ pclmulqdq $0x1, %xmm10, %xmm7
+ pslldq $8 , %xmm0
+ pxor %xmm0, %xmm7
+
+ #32b fold
+ movdqa %xmm7, %xmm0
+
+ pand mask2(%rip), %xmm0
+
+ psrldq $12, %xmm7
+ pclmulqdq $0x10, %xmm10, %xmm7
+ pxor %xmm0, %xmm7
+
+ #barrett reduction
+_barrett:
+ movdqa rk7(%rip), %xmm10 # rk7 and rk8 in xmm10
+ movdqa %xmm7, %xmm0
+ pclmulqdq $0x01, %xmm10, %xmm7
+ pslldq $4, %xmm7
+ pclmulqdq $0x11, %xmm10, %xmm7
+
+ pslldq $4, %xmm7
+ pxor %xmm0, %xmm7
+ pextrd $1, %xmm7, %eax
+
+_cleanup:
+ # scale the result back to 16 bits
+ shr $16, %eax
+ mov %rcx, %rsp
+ ret
+
+########################################################################
+
+.align 16
+_less_than_128:
+
+ # check if there is enough buffer to be able to fold 16B at a time
+ cmp $32, arg3
+ jl _less_than_32
+ movdqa SHUF_MASK(%rip), %xmm11
+
+ # now if there is, load the constants
+ movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10
+
+ movd arg1_low32, %xmm0 # get the initial crc value
+ pslldq $12, %xmm0 # align it to its correct place
+ movdqu (arg2), %xmm7 # load the plaintext
+ pshufb %xmm11, %xmm7 # byte-reflect the plaintext
+ pxor %xmm0, %xmm7
+
+
+ # update the buffer pointer
+ add $16, arg2
+
+ # update the counter. subtract 32 instead of 16 to save one
+ # instruction from the loop
+ sub $32, arg3
+
+ jmp _16B_reduction_loop
+
+
+.align 16
+_less_than_32:
+ # mov initial crc to the return value. this is necessary for
+ # zero-length buffers.
+ mov arg1_low32, %eax
+ test arg3, arg3
+ je _cleanup
+
+ movdqa SHUF_MASK(%rip), %xmm11
+
+ movd arg1_low32, %xmm0 # get the initial crc value
+ pslldq $12, %xmm0 # align it to its correct place
+
+ cmp $16, arg3
+ je _exact_16_left
+ jl _less_than_16_left
+
+ movdqu (arg2), %xmm7 # load the plaintext
+ pshufb %xmm11, %xmm7 # byte-reflect the plaintext
+ pxor %xmm0 , %xmm7 # xor the initial crc value
+ add $16, arg2
+ sub $16, arg3
+ movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10
+ jmp _get_last_two_xmms
+
+
+.align 16
+_less_than_16_left:
+ # use stack space to load data less than 16 bytes, zero-out
+ # the 16B in memory first.
+
+ pxor %xmm1, %xmm1
+ mov %rsp, %r11
+ movdqa %xmm1, (%r11)
+
+ cmp $4, arg3
+ jl _only_less_than_4
+
+ # backup the counter value
+ mov arg3, %r9
+ cmp $8, arg3
+ jl _less_than_8_left
+
+ # load 8 Bytes
+ mov (arg2), %rax
+ mov %rax, (%r11)
+ add $8, %r11
+ sub $8, arg3
+ add $8, arg2
+_less_than_8_left:
+
+ cmp $4, arg3
+ jl _less_than_4_left
+
+ # load 4 Bytes
+ mov (arg2), %eax
+ mov %eax, (%r11)
+ add $4, %r11
+ sub $4, arg3
+ add $4, arg2
+_less_than_4_left:
+
+ cmp $2, arg3
+ jl _less_than_2_left
+
+ # load 2 Bytes
+ mov (arg2), %ax
+ mov %ax, (%r11)
+ add $2, %r11
+ sub $2, arg3
+ add $2, arg2
+_less_than_2_left:
+ cmp $1, arg3
+ jl _zero_left
+
+ # load 1 Byte
+ mov (arg2), %al
+ mov %al, (%r11)
+_zero_left:
+ movdqa (%rsp), %xmm7
+ pshufb %xmm11, %xmm7
+ pxor %xmm0 , %xmm7 # xor the initial crc value
+
+ # shl r9, 4
+ lea pshufb_shf_table+16(%rip), %rax
+ sub %r9, %rax
+ movdqu (%rax), %xmm0
+ pxor mask1(%rip), %xmm0
+
+ pshufb %xmm0, %xmm7
+ jmp _128_done
+
+.align 16
+_exact_16_left:
+ movdqu (arg2), %xmm7
+ pshufb %xmm11, %xmm7
+ pxor %xmm0 , %xmm7 # xor the initial crc value
+
+ jmp _128_done
+
+_only_less_than_4:
+ cmp $3, arg3
+ jl _only_less_than_3
+
+ # load 3 Bytes
+ mov (arg2), %al
+ mov %al, (%r11)
+
+ mov 1(arg2), %al
+ mov %al, 1(%r11)
+
+ mov 2(arg2), %al
+ mov %al, 2(%r11)
+
+ movdqa (%rsp), %xmm7
+ pshufb %xmm11, %xmm7
+ pxor %xmm0 , %xmm7 # xor the initial crc value
+
+ psrldq $5, %xmm7
+
+ jmp _barrett
+_only_less_than_3:
+ cmp $2, arg3
+ jl _only_less_than_2
+
+ # load 2 Bytes
+ mov (arg2), %al
+ mov %al, (%r11)
+
+ mov 1(arg2), %al
+ mov %al, 1(%r11)
+
+ movdqa (%rsp), %xmm7
+ pshufb %xmm11, %xmm7
+ pxor %xmm0 , %xmm7 # xor the initial crc value
+
+ psrldq $6, %xmm7
+
+ jmp _barrett
+_only_less_than_2:
+
+ # load 1 Byte
+ mov (arg2), %al
+ mov %al, (%r11)
+
+ movdqa (%rsp), %xmm7
+ pshufb %xmm11, %xmm7
+ pxor %xmm0 , %xmm7 # xor the initial crc value
+
+ psrldq $7, %xmm7
+
+ jmp _barrett
+
+ENDPROC(crc_t10dif_pcl)
+
+.data
+
+# precomputed constants
+# these constants are precomputed from the poly:
+# 0x8bb70000 (0x8bb7 scaled to 32 bits)
+.align 16
+# Q = 0x18BB70000
+# rk1 = 2^(32*3) mod Q << 32
+# rk2 = 2^(32*5) mod Q << 32
+# rk3 = 2^(32*15) mod Q << 32
+# rk4 = 2^(32*17) mod Q << 32
+# rk5 = 2^(32*3) mod Q << 32
+# rk6 = 2^(32*2) mod Q << 32
+# rk7 = floor(2^64/Q)
+# rk8 = Q
+rk1:
+.quad 0x2d56000000000000
+rk2:
+.quad 0x06df000000000000
+rk3:
+.quad 0x9d9d000000000000
+rk4:
+.quad 0x7cf5000000000000
+rk5:
+.quad 0x2d56000000000000
+rk6:
+.quad 0x1368000000000000
+rk7:
+.quad 0x00000001f65a57f8
+rk8:
+.quad 0x000000018bb70000
+
+rk9:
+.quad 0xceae000000000000
+rk10:
+.quad 0xbfd6000000000000
+rk11:
+.quad 0x1e16000000000000
+rk12:
+.quad 0x713c000000000000
+rk13:
+.quad 0xf7f9000000000000
+rk14:
+.quad 0x80a6000000000000
+rk15:
+.quad 0x044c000000000000
+rk16:
+.quad 0xe658000000000000
+rk17:
+.quad 0xad18000000000000
+rk18:
+.quad 0xa497000000000000
+rk19:
+.quad 0x6ee3000000000000
+rk20:
+.quad 0xe7b5000000000000
+
+
+
+mask1:
+.octa 0x80808080808080808080808080808080
+mask2:
+.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
+
+SHUF_MASK:
+.octa 0x000102030405060708090A0B0C0D0E0F
+
+pshufb_shf_table:
+# use these values for shift constants for the pshufb instruction
+# different alignments result in values as shown:
+# DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
+# DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
+# DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
+# DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
+# DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
+# DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
+# DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7
+# DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8
+# DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9
+# DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10
+# DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11
+# DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12
+# DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13
+# DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14
+# DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15
+.octa 0x8f8e8d8c8b8a89888786858483828100
+.octa 0x000e0d0c0b0a09080706050403020100
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
new file mode 100644
index 0000000..7845d7f
--- /dev/null
+++ b/arch/x86/crypto/crct10dif-pclmul_glue.c
@@ -0,0 +1,151 @@
+/*
+ * Cryptographic API.
+ *
+ * T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions
+ *
+ * Copyright (C) 2013 Intel Corporation
+ * Author: Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/crc-t10dif.h>
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <asm/i387.h>
+#include <asm/cpufeature.h>
+#include <asm/cpu_device_id.h>
+
+asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
+ size_t len);
+
+struct chksum_desc_ctx {
+ __u16 crc;
+};
+
+/*
+ * Steps through buffer one byte at at time, calculates reflected
+ * crc using table.
+ */
+
+static int chksum_init(struct shash_desc *desc)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ ctx->crc = 0;
+
+ return 0;
+}
+
+static int chksum_update(struct shash_desc *desc, const u8 *data,
+ unsigned int length)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ if (irq_fpu_usable()) {
+ kernel_fpu_begin();
+ ctx->crc = crc_t10dif_pcl(ctx->crc, data, length);
+ kernel_fpu_end();
+ } else
+ ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
+ return 0;
+}
+
+static int chksum_final(struct shash_desc *desc, u8 *out)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ *(__u16 *)out = ctx->crc;
+ return 0;
+}
+
+static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
+ u8 *out)
+{
+ if (irq_fpu_usable()) {
+ kernel_fpu_begin();
+ *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len);
+ kernel_fpu_end();
+ } else
+ *(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
+ return 0;
+}
+
+static int chksum_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ return __chksum_finup(&ctx->crc, data, len, out);
+}
+
+static int chksum_digest(struct shash_desc *desc, const u8 *data,
+ unsigned int length, u8 *out)
+{
+ struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ return __chksum_finup(&ctx->crc, data, length, out);
+}
+
+static struct shash_alg alg = {
+ .digestsize = CRC_T10DIF_DIGEST_SIZE,
+ .init = chksum_init,
+ .update = chksum_update,
+ .final = chksum_final,
+ .finup = chksum_finup,
+ .digest = chksum_digest,
+ .descsize = sizeof(struct chksum_desc_ctx),
+ .base = {
+ .cra_name = "crct10dif",
+ .cra_driver_name = "crct10dif-pclmul",
+ .cra_priority = 200,
+ .cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static const struct x86_cpu_id crct10dif_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id);
+
+static int __init crct10dif_intel_mod_init(void)
+{
+ if (!x86_match_cpu(crct10dif_cpu_id))
+ return -ENODEV;
+
+ return crypto_register_shash(&alg);
+}
+
+static void __exit crct10dif_intel_mod_fini(void)
+{
+ crypto_unregister_shash(&alg);
+}
+
+module_init(crct10dif_intel_mod_init);
+module_exit(crct10dif_intel_mod_fini);
+
+MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
+MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ.");
+MODULE_LICENSE("GPL");
+
+MODULE_ALIAS("crct10dif");
+MODULE_ALIAS("crct10dif-pclmul");
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 597d4da..50226c4 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -187,7 +187,36 @@ static int sha256_ssse3_import(struct shash_desc *desc, const void *in)
return 0;
}
-static struct shash_alg alg = {
+static int sha224_ssse3_init(struct shash_desc *desc)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA224_H0;
+ sctx->state[1] = SHA224_H1;
+ sctx->state[2] = SHA224_H2;
+ sctx->state[3] = SHA224_H3;
+ sctx->state[4] = SHA224_H4;
+ sctx->state[5] = SHA224_H5;
+ sctx->state[6] = SHA224_H6;
+ sctx->state[7] = SHA224_H7;
+ sctx->count = 0;
+
+ return 0;
+}
+
+static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash)
+{
+ u8 D[SHA256_DIGEST_SIZE];
+
+ sha256_ssse3_final(desc, D);
+
+ memcpy(hash, D, SHA224_DIGEST_SIZE);
+ memset(D, 0, SHA256_DIGEST_SIZE);
+
+ return 0;
+}
+
+static struct shash_alg algs[] = { {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_ssse3_init,
.update = sha256_ssse3_update,
@@ -204,7 +233,24 @@ static struct shash_alg alg = {
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
-};
+}, {
+ .digestsize = SHA224_DIGEST_SIZE,
+ .init = sha224_ssse3_init,
+ .update = sha256_ssse3_update,
+ .final = sha224_ssse3_final,
+ .export = sha256_ssse3_export,
+ .import = sha256_ssse3_import,
+ .descsize = sizeof(struct sha256_state),
+ .statesize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha224",
+ .cra_driver_name = "sha224-ssse3",
+ .cra_priority = 150,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA224_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+} };
#ifdef CONFIG_AS_AVX
static bool __init avx_usable(void)
@@ -227,7 +273,7 @@ static bool __init avx_usable(void)
static int __init sha256_ssse3_mod_init(void)
{
- /* test for SSE3 first */
+ /* test for SSSE3 first */
if (cpu_has_ssse3)
sha256_transform_asm = sha256_transform_ssse3;
@@ -254,7 +300,7 @@ static int __init sha256_ssse3_mod_init(void)
else
#endif
pr_info("Using SSSE3 optimized SHA-256 implementation\n");
- return crypto_register_shash(&alg);
+ return crypto_register_shashes(algs, ARRAY_SIZE(algs));
}
pr_info("Neither AVX nor SSSE3 is available/usable.\n");
@@ -263,7 +309,7 @@ static int __init sha256_ssse3_mod_init(void)
static void __exit sha256_ssse3_mod_fini(void)
{
- crypto_unregister_shash(&alg);
+ crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
module_init(sha256_ssse3_mod_init);
@@ -273,3 +319,4 @@ MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated");
MODULE_ALIAS("sha256");
+MODULE_ALIAS("sha384");
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 6cbd8df..f30cd10 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -194,7 +194,37 @@ static int sha512_ssse3_import(struct shash_desc *desc, const void *in)
return 0;
}
-static struct shash_alg alg = {
+static int sha384_ssse3_init(struct shash_desc *desc)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA384_H0;
+ sctx->state[1] = SHA384_H1;
+ sctx->state[2] = SHA384_H2;
+ sctx->state[3] = SHA384_H3;
+ sctx->state[4] = SHA384_H4;
+ sctx->state[5] = SHA384_H5;
+ sctx->state[6] = SHA384_H6;
+ sctx->state[7] = SHA384_H7;
+
+ sctx->count[0] = sctx->count[1] = 0;
+
+ return 0;
+}
+
+static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash)
+{
+ u8 D[SHA512_DIGEST_SIZE];
+
+ sha512_ssse3_final(desc, D);
+
+ memcpy(hash, D, SHA384_DIGEST_SIZE);
+ memset(D, 0, SHA512_DIGEST_SIZE);
+
+ return 0;
+}
+
+static struct shash_alg algs[] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_ssse3_init,
.update = sha512_ssse3_update,
@@ -211,7 +241,24 @@ static struct shash_alg alg = {
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
-};
+}, {
+ .digestsize = SHA384_DIGEST_SIZE,
+ .init = sha384_ssse3_init,
+ .update = sha512_ssse3_update,
+ .final = sha384_ssse3_final,
+ .export = sha512_ssse3_export,
+ .import = sha512_ssse3_import,
+ .descsize = sizeof(struct sha512_state),
+ .statesize = sizeof(struct sha512_state),
+ .base = {
+ .cra_name = "sha384",
+ .cra_driver_name = "sha384-ssse3",
+ .cra_priority = 150,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA384_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+} };
#ifdef CONFIG_AS_AVX
static bool __init avx_usable(void)
@@ -234,7 +281,7 @@ static bool __init avx_usable(void)
static int __init sha512_ssse3_mod_init(void)
{
- /* test for SSE3 first */
+ /* test for SSSE3 first */
if (cpu_has_ssse3)
sha512_transform_asm = sha512_transform_ssse3;
@@ -261,7 +308,7 @@ static int __init sha512_ssse3_mod_init(void)
else
#endif
pr_info("Using SSSE3 optimized SHA-512 implementation\n");
- return crypto_register_shash(&alg);
+ return crypto_register_shashes(algs, ARRAY_SIZE(algs));
}
pr_info("Neither AVX nor SSSE3 is available/usable.\n");
@@ -270,7 +317,7 @@ static int __init sha512_ssse3_mod_init(void)
static void __exit sha512_ssse3_mod_fini(void)
{
- crypto_unregister_shash(&alg);
+ crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
module_init(sha512_ssse3_mod_init);
@@ -280,3 +327,4 @@ MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated");
MODULE_ALIAS("sha512");
+MODULE_ALIAS("sha384");
diff --git a/arch/x86/crypto/twofish-avx2-asm_64.S b/arch/x86/crypto/twofish-avx2-asm_64.S
deleted file mode 100644
index e1a83b9..0000000
--- a/arch/x86/crypto/twofish-avx2-asm_64.S
+++ /dev/null
@@ -1,600 +0,0 @@
-/*
- * x86_64/AVX2 assembler optimized version of Twofish
- *
- * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-
-#include <linux/linkage.h>
-#include "glue_helper-asm-avx2.S"
-
-.file "twofish-avx2-asm_64.S"
-
-.data
-.align 16
-
-.Lvpshufb_mask0:
-.long 0x80808000
-.long 0x80808004
-.long 0x80808008
-.long 0x8080800c
-
-.Lbswap128_mask:
- .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-.Lxts_gf128mul_and_shl1_mask_0:
- .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
-.Lxts_gf128mul_and_shl1_mask_1:
- .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
-
-.text
-
-/* structure of crypto context */
-#define s0 0
-#define s1 1024
-#define s2 2048
-#define s3 3072
-#define w 4096
-#define k 4128
-
-/* register macros */
-#define CTX %rdi
-
-#define RS0 CTX
-#define RS1 %r8
-#define RS2 %r9
-#define RS3 %r10
-#define RK %r11
-#define RW %rax
-#define RROUND %r12
-#define RROUNDd %r12d
-
-#define RA0 %ymm8
-#define RB0 %ymm9
-#define RC0 %ymm10
-#define RD0 %ymm11
-#define RA1 %ymm12
-#define RB1 %ymm13
-#define RC1 %ymm14
-#define RD1 %ymm15
-
-/* temp regs */
-#define RX0 %ymm0
-#define RY0 %ymm1
-#define RX1 %ymm2
-#define RY1 %ymm3
-#define RT0 %ymm4
-#define RIDX %ymm5
-
-#define RX0x %xmm0
-#define RY0x %xmm1
-#define RX1x %xmm2
-#define RY1x %xmm3
-#define RT0x %xmm4
-
-/* vpgatherdd mask and '-1' */
-#define RNOT %ymm6
-
-/* byte mask, (-1 >> 24) */
-#define RBYTE %ymm7
-
-/**********************************************************************
- 16-way AVX2 twofish
- **********************************************************************/
-#define init_round_constants() \
- vpcmpeqd RNOT, RNOT, RNOT; \
- vpsrld $24, RNOT, RBYTE; \
- leaq k(CTX), RK; \
- leaq w(CTX), RW; \
- leaq s1(CTX), RS1; \
- leaq s2(CTX), RS2; \
- leaq s3(CTX), RS3; \
-
-#define g16(ab, rs0, rs1, rs2, rs3, xy) \
- vpand RBYTE, ab ## 0, RIDX; \
- vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 0; \
- vpcmpeqd RNOT, RNOT, RNOT; \
- \
- vpand RBYTE, ab ## 1, RIDX; \
- vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 1; \
- vpcmpeqd RNOT, RNOT, RNOT; \
- \
- vpsrld $8, ab ## 0, RIDX; \
- vpand RBYTE, RIDX, RIDX; \
- vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \
- vpcmpeqd RNOT, RNOT, RNOT; \
- vpxor RT0, xy ## 0, xy ## 0; \
- \
- vpsrld $8, ab ## 1, RIDX; \
- vpand RBYTE, RIDX, RIDX; \
- vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \
- vpcmpeqd RNOT, RNOT, RNOT; \
- vpxor RT0, xy ## 1, xy ## 1; \
- \
- vpsrld $16, ab ## 0, RIDX; \
- vpand RBYTE, RIDX, RIDX; \
- vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \
- vpcmpeqd RNOT, RNOT, RNOT; \
- vpxor RT0, xy ## 0, xy ## 0; \
- \
- vpsrld $16, ab ## 1, RIDX; \
- vpand RBYTE, RIDX, RIDX; \
- vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \
- vpcmpeqd RNOT, RNOT, RNOT; \
- vpxor RT0, xy ## 1, xy ## 1; \
- \
- vpsrld $24, ab ## 0, RIDX; \
- vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \
- vpcmpeqd RNOT, RNOT, RNOT; \
- vpxor RT0, xy ## 0, xy ## 0; \
- \
- vpsrld $24, ab ## 1, RIDX; \
- vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \
- vpcmpeqd RNOT, RNOT, RNOT; \
- vpxor RT0, xy ## 1, xy ## 1;
-
-#define g1_16(a, x) \
- g16(a, RS0, RS1, RS2, RS3, x);
-
-#define g2_16(b, y) \
- g16(b, RS1, RS2, RS3, RS0, y);
-
-#define encrypt_round_end16(a, b, c, d, nk) \
- vpaddd RY0, RX0, RX0; \
- vpaddd RX0, RY0, RY0; \
- vpbroadcastd nk(RK,RROUND,8), RT0; \
- vpaddd RT0, RX0, RX0; \
- vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
- vpaddd RT0, RY0, RY0; \
- \
- vpxor RY0, d ## 0, d ## 0; \
- \
- vpxor RX0, c ## 0, c ## 0; \
- vpsrld $1, c ## 0, RT0; \
- vpslld $31, c ## 0, c ## 0; \
- vpor RT0, c ## 0, c ## 0; \
- \
- vpaddd RY1, RX1, RX1; \
- vpaddd RX1, RY1, RY1; \
- vpbroadcastd nk(RK,RROUND,8), RT0; \
- vpaddd RT0, RX1, RX1; \
- vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
- vpaddd RT0, RY1, RY1; \
- \
- vpxor RY1, d ## 1, d ## 1; \
- \
- vpxor RX1, c ## 1, c ## 1; \
- vpsrld $1, c ## 1, RT0; \
- vpslld $31, c ## 1, c ## 1; \
- vpor RT0, c ## 1, c ## 1; \
-
-#define encrypt_round16(a, b, c, d, nk) \
- g2_16(b, RY); \
- \
- vpslld $1, b ## 0, RT0; \
- vpsrld $31, b ## 0, b ## 0; \
- vpor RT0, b ## 0, b ## 0; \
- \
- vpslld $1, b ## 1, RT0; \
- vpsrld $31, b ## 1, b ## 1; \
- vpor RT0, b ## 1, b ## 1; \
- \
- g1_16(a, RX); \
- \
- encrypt_round_end16(a, b, c, d, nk);
-
-#define encrypt_round_first16(a, b, c, d, nk) \
- vpslld $1, d ## 0, RT0; \
- vpsrld $31, d ## 0, d ## 0; \
- vpor RT0, d ## 0, d ## 0; \
- \
- vpslld $1, d ## 1, RT0; \
- vpsrld $31, d ## 1, d ## 1; \
- vpor RT0, d ## 1, d ## 1; \
- \
- encrypt_round16(a, b, c, d, nk);
-
-#define encrypt_round_last16(a, b, c, d, nk) \
- g2_16(b, RY); \
- \
- g1_16(a, RX); \
- \
- encrypt_round_end16(a, b, c, d, nk);
-
-#define decrypt_round_end16(a, b, c, d, nk) \
- vpaddd RY0, RX0, RX0; \
- vpaddd RX0, RY0, RY0; \
- vpbroadcastd nk(RK,RROUND,8), RT0; \
- vpaddd RT0, RX0, RX0; \
- vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
- vpaddd RT0, RY0, RY0; \
- \
- vpxor RX0, c ## 0, c ## 0; \
- \
- vpxor RY0, d ## 0, d ## 0; \
- vpsrld $1, d ## 0, RT0; \
- vpslld $31, d ## 0, d ## 0; \
- vpor RT0, d ## 0, d ## 0; \
- \
- vpaddd RY1, RX1, RX1; \
- vpaddd RX1, RY1, RY1; \
- vpbroadcastd nk(RK,RROUND,8), RT0; \
- vpaddd RT0, RX1, RX1; \
- vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
- vpaddd RT0, RY1, RY1; \
- \
- vpxor RX1, c ## 1, c ## 1; \
- \
- vpxor RY1, d ## 1, d ## 1; \
- vpsrld $1, d ## 1, RT0; \
- vpslld $31, d ## 1, d ## 1; \
- vpor RT0, d ## 1, d ## 1;
-
-#define decrypt_round16(a, b, c, d, nk) \
- g1_16(a, RX); \
- \
- vpslld $1, a ## 0, RT0; \
- vpsrld $31, a ## 0, a ## 0; \
- vpor RT0, a ## 0, a ## 0; \
- \
- vpslld $1, a ## 1, RT0; \
- vpsrld $31, a ## 1, a ## 1; \
- vpor RT0, a ## 1, a ## 1; \
- \
- g2_16(b, RY); \
- \
- decrypt_round_end16(a, b, c, d, nk);
-
-#define decrypt_round_first16(a, b, c, d, nk) \
- vpslld $1, c ## 0, RT0; \
- vpsrld $31, c ## 0, c ## 0; \
- vpor RT0, c ## 0, c ## 0; \
- \
- vpslld $1, c ## 1, RT0; \
- vpsrld $31, c ## 1, c ## 1; \
- vpor RT0, c ## 1, c ## 1; \
- \
- decrypt_round16(a, b, c, d, nk)
-
-#define decrypt_round_last16(a, b, c, d, nk) \
- g1_16(a, RX); \
- \
- g2_16(b, RY); \
- \
- decrypt_round_end16(a, b, c, d, nk);
-
-#define encrypt_cycle16() \
- encrypt_round16(RA, RB, RC, RD, 0); \
- encrypt_round16(RC, RD, RA, RB, 8);
-
-#define encrypt_cycle_first16() \
- encrypt_round_first16(RA, RB, RC, RD, 0); \
- encrypt_round16(RC, RD, RA, RB, 8);
-
-#define encrypt_cycle_last16() \
- encrypt_round16(RA, RB, RC, RD, 0); \
- encrypt_round_last16(RC, RD, RA, RB, 8);
-
-#define decrypt_cycle16(n) \
- decrypt_round16(RC, RD, RA, RB, 8); \
- decrypt_round16(RA, RB, RC, RD, 0);
-
-#define decrypt_cycle_first16(n) \
- decrypt_round_first16(RC, RD, RA, RB, 8); \
- decrypt_round16(RA, RB, RC, RD, 0);
-
-#define decrypt_cycle_last16(n) \
- decrypt_round16(RC, RD, RA, RB, 8); \
- decrypt_round_last16(RA, RB, RC, RD, 0);
-
-#define transpose_4x4(x0,x1,x2,x3,t1,t2) \
- vpunpckhdq x1, x0, t2; \
- vpunpckldq x1, x0, x0; \
- \
- vpunpckldq x3, x2, t1; \
- vpunpckhdq x3, x2, x2; \
- \
- vpunpckhqdq t1, x0, x1; \
- vpunpcklqdq t1, x0, x0; \
- \
- vpunpckhqdq x2, t2, x3; \
- vpunpcklqdq x2, t2, x2;
-
-#define read_blocks8(offs,a,b,c,d) \
- transpose_4x4(a, b, c, d, RX0, RY0);
-
-#define write_blocks8(offs,a,b,c,d) \
- transpose_4x4(a, b, c, d, RX0, RY0);
-
-#define inpack_enc8(a,b,c,d) \
- vpbroadcastd 4*0(RW), RT0; \
- vpxor RT0, a, a; \
- \
- vpbroadcastd 4*1(RW), RT0; \
- vpxor RT0, b, b; \
- \
- vpbroadcastd 4*2(RW), RT0; \
- vpxor RT0, c, c; \
- \
- vpbroadcastd 4*3(RW), RT0; \
- vpxor RT0, d, d;
-
-#define outunpack_enc8(a,b,c,d) \
- vpbroadcastd 4*4(RW), RX0; \
- vpbroadcastd 4*5(RW), RY0; \
- vpxor RX0, c, RX0; \
- vpxor RY0, d, RY0; \
- \
- vpbroadcastd 4*6(RW), RT0; \
- vpxor RT0, a, c; \
- vpbroadcastd 4*7(RW), RT0; \
- vpxor RT0, b, d; \
- \
- vmovdqa RX0, a; \
- vmovdqa RY0, b;
-
-#define inpack_dec8(a,b,c,d) \
- vpbroadcastd 4*4(RW), RX0; \
- vpbroadcastd 4*5(RW), RY0; \
- vpxor RX0, a, RX0; \
- vpxor RY0, b, RY0; \
- \
- vpbroadcastd 4*6(RW), RT0; \
- vpxor RT0, c, a; \
- vpbroadcastd 4*7(RW), RT0; \
- vpxor RT0, d, b; \
- \
- vmovdqa RX0, c; \
- vmovdqa RY0, d;
-
-#define outunpack_dec8(a,b,c,d) \
- vpbroadcastd 4*0(RW), RT0; \
- vpxor RT0, a, a; \
- \
- vpbroadcastd 4*1(RW), RT0; \
- vpxor RT0, b, b; \
- \
- vpbroadcastd 4*2(RW), RT0; \
- vpxor RT0, c, c; \
- \
- vpbroadcastd 4*3(RW), RT0; \
- vpxor RT0, d, d;
-
-#define read_blocks16(a,b,c,d) \
- read_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \
- read_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1);
-
-#define write_blocks16(a,b,c,d) \
- write_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \
- write_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1);
-
-#define xor_blocks16(a,b,c,d) \
- xor_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \
- xor_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1);
-
-#define inpack_enc16(a,b,c,d) \
- inpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \
- inpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1);
-
-#define outunpack_enc16(a,b,c,d) \
- outunpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \
- outunpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1);
-
-#define inpack_dec16(a,b,c,d) \
- inpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \
- inpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1);
-
-#define outunpack_dec16(a,b,c,d) \
- outunpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \
- outunpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1);
-
-.align 8
-__twofish_enc_blk16:
- /* input:
- * %rdi: ctx, CTX
- * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext
- * output:
- * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext
- */
- init_round_constants();
-
- read_blocks16(RA, RB, RC, RD);
- inpack_enc16(RA, RB, RC, RD);
-
- xorl RROUNDd, RROUNDd;
- encrypt_cycle_first16();
- movl $2, RROUNDd;
-
-.align 4
-.L__enc_loop:
- encrypt_cycle16();
-
- addl $2, RROUNDd;
- cmpl $14, RROUNDd;
- jne .L__enc_loop;
-
- encrypt_cycle_last16();
-
- outunpack_enc16(RA, RB, RC, RD);
- write_blocks16(RA, RB, RC, RD);
-
- ret;
-ENDPROC(__twofish_enc_blk16)
-
-.align 8
-__twofish_dec_blk16:
- /* input:
- * %rdi: ctx, CTX
- * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext
- * output:
- * RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext
- */
- init_round_constants();
-
- read_blocks16(RA, RB, RC, RD);
- inpack_dec16(RA, RB, RC, RD);
-
- movl $14, RROUNDd;
- decrypt_cycle_first16();
- movl $12, RROUNDd;
-
-.align 4
-.L__dec_loop:
- decrypt_cycle16();
-
- addl $-2, RROUNDd;
- jnz .L__dec_loop;
-
- decrypt_cycle_last16();
-
- outunpack_dec16(RA, RB, RC, RD);
- write_blocks16(RA, RB, RC, RD);
-
- ret;
-ENDPROC(__twofish_dec_blk16)
-
-ENTRY(twofish_ecb_enc_16way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- */
-
- vzeroupper;
- pushq %r12;
-
- load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
-
- call __twofish_enc_blk16;
-
- store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
-
- popq %r12;
- vzeroupper;
-
- ret;
-ENDPROC(twofish_ecb_enc_16way)
-
-ENTRY(twofish_ecb_dec_16way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- */
-
- vzeroupper;
- pushq %r12;
-
- load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
-
- call __twofish_dec_blk16;
-
- store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
-
- popq %r12;
- vzeroupper;
-
- ret;
-ENDPROC(twofish_ecb_dec_16way)
-
-ENTRY(twofish_cbc_dec_16way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- */
-
- vzeroupper;
- pushq %r12;
-
- load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
-
- call __twofish_dec_blk16;
-
- store_cbc_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1,
- RX0);
-
- popq %r12;
- vzeroupper;
-
- ret;
-ENDPROC(twofish_cbc_dec_16way)
-
-ENTRY(twofish_ctr_16way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (16 blocks)
- * %rdx: src (16 blocks)
- * %rcx: iv (little endian, 128bit)
- */
-
- vzeroupper;
- pushq %r12;
-
- load_ctr_16way(%rcx, .Lbswap128_mask, RA0, RB0, RC0, RD0, RA1, RB1, RC1,
- RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT,
- RBYTE);
-
- call __twofish_enc_blk16;
-
- store_ctr_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
-
- popq %r12;
- vzeroupper;
-
- ret;
-ENDPROC(twofish_ctr_16way)
-
-.align 8
-twofish_xts_crypt_16way:
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (16 blocks)
- * %rdx: src (16 blocks)
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- * %r8: pointer to __twofish_enc_blk16 or __twofish_dec_blk16
- */
-
- vzeroupper;
- pushq %r12;
-
- load_xts_16way(%rcx, %rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1,
- RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT,
- .Lxts_gf128mul_and_shl1_mask_0,
- .Lxts_gf128mul_and_shl1_mask_1);
-
- call *%r8;
-
- store_xts_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
-
- popq %r12;
- vzeroupper;
-
- ret;
-ENDPROC(twofish_xts_crypt_16way)
-
-ENTRY(twofish_xts_enc_16way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (16 blocks)
- * %rdx: src (16 blocks)
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- */
- leaq __twofish_enc_blk16, %r8;
- jmp twofish_xts_crypt_16way;
-ENDPROC(twofish_xts_enc_16way)
-
-ENTRY(twofish_xts_dec_16way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (16 blocks)
- * %rdx: src (16 blocks)
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- */
- leaq __twofish_dec_blk16, %r8;
- jmp twofish_xts_crypt_16way;
-ENDPROC(twofish_xts_dec_16way)
diff --git a/arch/x86/crypto/twofish_avx2_glue.c b/arch/x86/crypto/twofish_avx2_glue.c
deleted file mode 100644
index ce33b5b..0000000
--- a/arch/x86/crypto/twofish_avx2_glue.c
+++ /dev/null
@@ -1,584 +0,0 @@
-/*
- * Glue Code for x86_64/AVX2 assembler optimized version of Twofish
- *
- * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/err.h>
-#include <crypto/algapi.h>
-#include <crypto/ctr.h>
-#include <crypto/twofish.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
-#include <asm/xcr.h>
-#include <asm/xsave.h>
-#include <asm/crypto/twofish.h>
-#include <asm/crypto/ablk_helper.h>
-#include <asm/crypto/glue_helper.h>
-#include <crypto/scatterwalk.h>
-
-#define TF_AVX2_PARALLEL_BLOCKS 16
-
-/* 16-way AVX2 parallel cipher functions */
-asmlinkage void twofish_ecb_enc_16way(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src);
-asmlinkage void twofish_ecb_dec_16way(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src);
-asmlinkage void twofish_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src);
-
-asmlinkage void twofish_ctr_16way(void *ctx, u128 *dst, const u128 *src,
- le128 *iv);
-
-asmlinkage void twofish_xts_enc_16way(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src, le128 *iv);
-asmlinkage void twofish_xts_dec_16way(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src, le128 *iv);
-
-static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __twofish_enc_blk_3way(ctx, dst, src, false);
-}
-
-static const struct common_glue_ctx twofish_enc = {
- .num_funcs = 4,
- .fpu_blocks_limit = 8,
-
- .funcs = { {
- .num_blocks = 16,
- .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_16way) }
- }, {
- .num_blocks = 8,
- .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) }
- }, {
- .num_blocks = 3,
- .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
- } }
-};
-
-static const struct common_glue_ctx twofish_ctr = {
- .num_funcs = 4,
- .fpu_blocks_limit = 8,
-
- .funcs = { {
- .num_blocks = 16,
- .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_16way) }
- }, {
- .num_blocks = 8,
- .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) }
- }, {
- .num_blocks = 3,
- .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) }
- }, {
- .num_blocks = 1,
- .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) }
- } }
-};
-
-static const struct common_glue_ctx twofish_enc_xts = {
- .num_funcs = 3,
- .fpu_blocks_limit = 8,
-
- .funcs = { {
- .num_blocks = 16,
- .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_16way) }
- }, {
- .num_blocks = 8,
- .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) }
- }, {
- .num_blocks = 1,
- .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) }
- } }
-};
-
-static const struct common_glue_ctx twofish_dec = {
- .num_funcs = 4,
- .fpu_blocks_limit = 8,
-
- .funcs = { {
- .num_blocks = 16,
- .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_16way) }
- }, {
- .num_blocks = 8,
- .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) }
- }, {
- .num_blocks = 3,
- .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
- } }
-};
-
-static const struct common_glue_ctx twofish_dec_cbc = {
- .num_funcs = 4,
- .fpu_blocks_limit = 8,
-
- .funcs = { {
- .num_blocks = 16,
- .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_16way) }
- }, {
- .num_blocks = 8,
- .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) }
- }, {
- .num_blocks = 3,
- .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
- }, {
- .num_blocks = 1,
- .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
- } }
-};
-
-static const struct common_glue_ctx twofish_dec_xts = {
- .num_funcs = 3,
- .fpu_blocks_limit = 8,
-
- .funcs = { {
- .num_blocks = 16,
- .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_16way) }
- }, {
- .num_blocks = 8,
- .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) }
- }, {
- .num_blocks = 1,
- .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) }
- } }
-};
-
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
- dst, src, nbytes);
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
- nbytes);
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
-}
-
-static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes)
-{
- /* since reusing AVX functions, starts using FPU at 8 parallel blocks */
- return glue_fpu_begin(TF_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes);
-}
-
-static inline void twofish_fpu_end(bool fpu_enabled)
-{
- glue_fpu_end(fpu_enabled);
-}
-
-struct crypt_priv {
- struct twofish_ctx *ctx;
- bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
- const unsigned int bsize = TF_BLOCK_SIZE;
- struct crypt_priv *ctx = priv;
- int i;
-
- ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
-
- while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) {
- twofish_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS;
- nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS;
- }
-
- while (nbytes >= 8 * bsize) {
- twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * 8;
- nbytes -= bsize * 8;
- }
-
- while (nbytes >= 3 * bsize) {
- twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * 3;
- nbytes -= bsize * 3;
- }
-
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- twofish_enc_blk(ctx->ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
- const unsigned int bsize = TF_BLOCK_SIZE;
- struct crypt_priv *ctx = priv;
- int i;
-
- ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
-
- while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) {
- twofish_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS;
- nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS;
- }
-
- while (nbytes >= 8 * bsize) {
- twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * 8;
- nbytes -= bsize * 8;
- }
-
- while (nbytes >= 3 * bsize) {
- twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * 3;
- nbytes -= bsize * 3;
- }
-
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- twofish_dec_blk(ctx->ctx, srcdst, srcdst);
-}
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[TF_AVX2_PARALLEL_BLOCKS];
- struct crypt_priv crypt_ctx = {
- .ctx = &ctx->twofish_ctx,
- .fpu_enabled = false,
- };
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &crypt_ctx,
- .crypt_fn = encrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- ret = lrw_crypt(desc, dst, src, nbytes, &req);
- twofish_fpu_end(crypt_ctx.fpu_enabled);
-
- return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[TF_AVX2_PARALLEL_BLOCKS];
- struct crypt_priv crypt_ctx = {
- .ctx = &ctx->twofish_ctx,
- .fpu_enabled = false,
- };
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &crypt_ctx,
- .crypt_fn = decrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- ret = lrw_crypt(desc, dst, src, nbytes, &req);
- twofish_fpu_end(crypt_ctx.fpu_enabled);
-
- return ret;
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
- return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes,
- XTS_TWEAK_CAST(twofish_enc_blk),
- &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
- return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes,
- XTS_TWEAK_CAST(twofish_enc_blk),
- &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static struct crypto_alg tf_algs[10] = { {
- .cra_name = "__ecb-twofish-avx2",
- .cra_driver_name = "__driver-ecb-twofish-avx2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .setkey = twofish_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "__cbc-twofish-avx2",
- .cra_driver_name = "__driver-cbc-twofish-avx2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .setkey = twofish_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}, {
- .cra_name = "__ctr-twofish-avx2",
- .cra_driver_name = "__driver-ctr-twofish-avx2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct twofish_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = twofish_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-}, {
- .cra_name = "__lrw-twofish-avx2",
- .cra_driver_name = "__driver-lrw-twofish-avx2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_exit = lrw_twofish_exit_tfm,
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE +
- TF_BLOCK_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE +
- TF_BLOCK_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = lrw_twofish_setkey,
- .encrypt = lrw_encrypt,
- .decrypt = lrw_decrypt,
- },
- },
-}, {
- .cra_name = "__xts-twofish-avx2",
- .cra_driver_name = "__driver-xts-twofish-avx2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_xts_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE * 2,
- .max_keysize = TF_MAX_KEY_SIZE * 2,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = xts_twofish_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
- },
- },
-}, {
- .cra_name = "ecb(twofish)",
- .cra_driver_name = "ecb-twofish-avx2",
- .cra_priority = 500,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(twofish)",
- .cra_driver_name = "cbc-twofish-avx2",
- .cra_priority = 500,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = __ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "ctr(twofish)",
- .cra_driver_name = "ctr-twofish-avx2",
- .cra_priority = 500,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_encrypt,
- .geniv = "chainiv",
- },
- },
-}, {
- .cra_name = "lrw(twofish)",
- .cra_driver_name = "lrw-twofish-avx2",
- .cra_priority = 500,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE +
- TF_BLOCK_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE +
- TF_BLOCK_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "xts(twofish)",
- .cra_driver_name = "xts-twofish-avx2",
- .cra_priority = 500,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE * 2,
- .max_keysize = TF_MAX_KEY_SIZE * 2,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-} };
-
-static int __init init(void)
-{
- u64 xcr0;
-
- if (!cpu_has_avx2 || !cpu_has_osxsave) {
- pr_info("AVX2 instructions are not detected.\n");
- return -ENODEV;
- }
-
- xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
- if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
- pr_info("AVX2 detected but unusable.\n");
- return -ENODEV;
- }
-
- return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs));
-}
-
-static void __exit fini(void)
-{
- crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs));
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX2 optimized");
-MODULE_ALIAS("twofish");
-MODULE_ALIAS("twofish-asm");
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index 2047a56..a62ba54 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -50,26 +50,18 @@
/* 8-way parallel cipher functions */
asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
-EXPORT_SYMBOL_GPL(twofish_ecb_enc_8way);
-
asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
-EXPORT_SYMBOL_GPL(twofish_ecb_dec_8way);
asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
-EXPORT_SYMBOL_GPL(twofish_cbc_dec_8way);
-
asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
-EXPORT_SYMBOL_GPL(twofish_ctr_8way);
asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
-EXPORT_SYMBOL_GPL(twofish_xts_enc_8way);
asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
-EXPORT_SYMBOL_GPL(twofish_xts_dec_8way);
static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
@@ -77,19 +69,17 @@ static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
__twofish_enc_blk_3way(ctx, dst, src, false);
}
-void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(twofish_enc_blk));
}
-EXPORT_SYMBOL_GPL(twofish_xts_enc);
-void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(twofish_dec_blk));
}
-EXPORT_SYMBOL_GPL(twofish_xts_dec);
static const struct common_glue_ctx twofish_enc = {
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 52ff81c..bae3aba 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -308,8 +308,6 @@ static int load_aout_binary(struct linux_binprm *bprm)
(current->mm->start_data = N_DATADDR(ex));
current->mm->brk = ex.a_bss +
(current->mm->start_brk = N_BSSADDR(ex));
- current->mm->free_area_cache = TASK_UNMAPPED_BASE;
- current->mm->cached_hole_size = 0;
retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
if (retval < 0) {
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index b31bf97..2dfac58 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -111,7 +111,7 @@ static inline void acpi_disable_pci(void)
}
/* Low-level suspend routine. */
-extern int acpi_suspend_lowlevel(void);
+extern int (*acpi_suspend_lowlevel)(void);
/* Physical address to resume after wakeup */
#define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start))
diff --git a/arch/x86/include/asm/crypto/blowfish.h b/arch/x86/include/asm/crypto/blowfish.h
deleted file mode 100644
index f097b2f..0000000
--- a/arch/x86/include/asm/crypto/blowfish.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef ASM_X86_BLOWFISH_H
-#define ASM_X86_BLOWFISH_H
-
-#include <linux/crypto.h>
-#include <crypto/blowfish.h>
-
-#define BF_PARALLEL_BLOCKS 4
-
-/* regular block cipher functions */
-asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
- bool xor);
-asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
-
-/* 4-way parallel cipher functions */
-asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
- const u8 *src, bool xor);
-asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
- const u8 *src);
-
-static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
-{
- __blowfish_enc_blk(ctx, dst, src, false);
-}
-
-static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __blowfish_enc_blk(ctx, dst, src, true);
-}
-
-static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __blowfish_enc_blk_4way(ctx, dst, src, false);
-}
-
-static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __blowfish_enc_blk_4way(ctx, dst, src, true);
-}
-
-#endif
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h
index e655c60..878c51c 100644
--- a/arch/x86/include/asm/crypto/twofish.h
+++ b/arch/x86/include/asm/crypto/twofish.h
@@ -28,20 +28,6 @@ asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
-/* 8-way parallel cipher functions */
-asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src);
-asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src);
-asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src);
-asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src, le128 *iv);
-asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src, le128 *iv);
-asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src, le128 *iv);
-
/* helpers from twofish_x86_64-3way module */
extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
@@ -57,8 +43,4 @@ extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm);
extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen);
-/* helpers from twofish-avx module */
-extern void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
-extern void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
-
#endif /* ASM_X86_TWOFISH_H */
diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h
index 75ce3f4..77a99ac 100644
--- a/arch/x86/include/asm/emergency-restart.h
+++ b/arch/x86/include/asm/emergency-restart.h
@@ -1,18 +1,6 @@
#ifndef _ASM_X86_EMERGENCY_RESTART_H
#define _ASM_X86_EMERGENCY_RESTART_H
-enum reboot_type {
- BOOT_TRIPLE = 't',
- BOOT_KBD = 'k',
- BOOT_BIOS = 'b',
- BOOT_ACPI = 'a',
- BOOT_EFI = 'e',
- BOOT_CF9 = 'p',
- BOOT_CF9_COND = 'q',
-};
-
-extern enum reboot_type reboot_type;
-
extern void machine_emergency_restart(void);
#endif /* _ASM_X86_EMERGENCY_RESTART_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index d8e8eef..34f69cb 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -345,4 +345,11 @@ extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
#define IO_SPACE_LIMIT 0xffff
+#ifdef CONFIG_MTRR
+extern int __must_check arch_phys_wc_add(unsigned long base,
+ unsigned long size);
+extern void arch_phys_wc_del(int handle);
+#define arch_phys_wc_add arch_phys_wc_add
+#endif
+
#endif /* _ASM_X86_IO_H */
diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h
index d354fb7..a55c7ef 100644
--- a/arch/x86/include/asm/mc146818rtc.h
+++ b/arch/x86/include/asm/mc146818rtc.h
@@ -95,8 +95,8 @@ static inline unsigned char current_lock_cmos_reg(void)
unsigned char rtc_cmos_read(unsigned char addr);
void rtc_cmos_write(unsigned char val, unsigned char addr);
-extern int mach_set_rtc_mmss(unsigned long nowtime);
-extern unsigned long mach_get_cmos_time(void);
+extern int mach_set_rtc_mmss(const struct timespec *now);
+extern void mach_get_cmos_time(struct timespec *now);
#define RTC_IRQ 8
diff --git a/arch/x86/include/asm/mrst-vrtc.h b/arch/x86/include/asm/mrst-vrtc.h
index 73668ab..1e69a75 100644
--- a/arch/x86/include/asm/mrst-vrtc.h
+++ b/arch/x86/include/asm/mrst-vrtc.h
@@ -3,7 +3,7 @@
extern unsigned char vrtc_cmos_read(unsigned char reg);
extern void vrtc_cmos_write(unsigned char val, unsigned char reg);
-extern unsigned long vrtc_get_time(void);
-extern int vrtc_set_mmss(unsigned long nowtime);
+extern void vrtc_get_time(struct timespec *now);
+extern int vrtc_set_mmss(const struct timespec *now);
#endif
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index e235582..f768f62 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -26,7 +26,10 @@
#include <uapi/asm/mtrr.h>
-/* The following functions are for use by other drivers */
+/*
+ * The following functions are for use by other drivers that cannot use
+ * arch_phys_wc_add and arch_phys_wc_del.
+ */
# ifdef CONFIG_MTRR
extern u8 mtrr_type_lookup(u64 addr, u64 end);
extern void mtrr_save_fixed_ranges(void *);
@@ -45,6 +48,7 @@ extern void mtrr_aps_init(void);
extern void mtrr_bp_restore(void);
extern int mtrr_trim_uncached_memory(unsigned long end_pfn);
extern int amd_special_default_mtrr(void);
+extern int phys_wc_to_mtrr_index(int handle);
# else
static inline u8 mtrr_type_lookup(u64 addr, u64 end)
{
@@ -80,6 +84,10 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
{
}
+static inline int phys_wc_to_mtrr_index(int handle)
+{
+ return -1;
+}
#define mtrr_ap_init() do {} while (0)
#define mtrr_bp_init() do {} while (0)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5b0818b..7dc305a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -207,7 +207,7 @@ static inline pte_t pte_mkexec(pte_t pte)
static inline pte_t pte_mkdirty(pte_t pte)
{
- return pte_set_flags(pte, _PAGE_DIRTY);
+ return pte_set_flags(pte, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
}
static inline pte_t pte_mkyoung(pte_t pte)
@@ -271,7 +271,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd)
static inline pmd_t pmd_mkdirty(pmd_t pmd)
{
- return pmd_set_flags(pmd, _PAGE_DIRTY);
+ return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
}
static inline pmd_t pmd_mkhuge(pmd_t pmd)
@@ -294,6 +294,26 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd)
return pmd_clear_flags(pmd, _PAGE_PRESENT);
}
+static inline int pte_soft_dirty(pte_t pte)
+{
+ return pte_flags(pte) & _PAGE_SOFT_DIRTY;
+}
+
+static inline int pmd_soft_dirty(pmd_t pmd)
+{
+ return pmd_flags(pmd) & _PAGE_SOFT_DIRTY;
+}
+
+static inline pte_t pte_mksoft_dirty(pte_t pte)
+{
+ return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
+}
+
+static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
+{
+ return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
+}
+
/*
* Mask out unsupported bits in a present pgprot. Non-present pgprots
* can use those bits for other purposes, so leave them be.
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index e642300..c98ac63 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -55,6 +55,18 @@
#define _PAGE_HIDDEN (_AT(pteval_t, 0))
#endif
+/*
+ * The same hidden bit is used by kmemcheck, but since kmemcheck
+ * works on kernel pages while soft-dirty engine on user space,
+ * they do not conflict with each other.
+ */
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN)
+#else
+#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 0))
+#endif
+
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
#else
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index a1df6e8..2781119 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -89,7 +89,6 @@ struct thread_info {
#define TIF_FORK 18 /* ret_from_fork */
#define TIF_NOHZ 19 /* in adaptive nohz mode */
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
-#define TIF_DEBUG 21 /* uses debug registers */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
@@ -113,7 +112,6 @@ struct thread_info {
#define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_FORK (1 << TIF_FORK)
#define _TIF_NOHZ (1 << TIF_NOHZ)
-#define _TIF_DEBUG (1 << TIF_DEBUG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
@@ -154,7 +152,7 @@ struct thread_info {
(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
-#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
+#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
#define PREEMPT_ACTIVE 0x10000000
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index d8d9922..828a156 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -142,6 +142,8 @@ struct x86_cpuinit_ops {
void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
};
+struct timespec;
+
/**
* struct x86_platform_ops - platform specific runtime functions
* @calibrate_tsc: calibrate TSC
@@ -156,8 +158,8 @@ struct x86_cpuinit_ops {
*/
struct x86_platform_ops {
unsigned long (*calibrate_tsc)(void);
- unsigned long (*get_wallclock)(void);
- int (*set_wallclock)(unsigned long nowtime);
+ void (*get_wallclock)(struct timespec *ts);
+ int (*set_wallclock)(const struct timespec *ts);
void (*iommu_shutdown)(void);
bool (*is_untracked_pat_range)(u64 start, u64 end);
void (*nmi_init)(void);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 230c8ea..d81a972 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -44,6 +44,7 @@
#include <asm/mpspec.h>
#include <asm/smp.h>
+#include "sleep.h" /* To include x86_acpi_suspend_lowlevel */
static int __initdata acpi_force = 0;
u32 acpi_rsdt_forced;
int acpi_disabled;
@@ -559,6 +560,12 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
int trigger, int polarity) = acpi_register_gsi_pic;
+#ifdef CONFIG_ACPI_SLEEP
+int (*acpi_suspend_lowlevel)(void) = x86_acpi_suspend_lowlevel;
+#else
+int (*acpi_suspend_lowlevel)(void);
+#endif
+
/*
* success: return IRQ number (>=0)
* failure: return < 0
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index b44577b..2a34aaf 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -26,12 +26,12 @@ static char temp_stack[4096];
#endif
/**
- * acpi_suspend_lowlevel - save kernel state
+ * x86_acpi_suspend_lowlevel - save kernel state
*
* Create an identity mapped page table and copy the wakeup routine to
* low memory.
*/
-int acpi_suspend_lowlevel(void)
+int x86_acpi_suspend_lowlevel(void)
{
struct wakeup_header *header =
(struct wakeup_header *) __va(real_mode_header->wakeup_header);
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index 67f59f8..c9c2c98 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -15,3 +15,5 @@ extern unsigned long acpi_copy_wakeup_routine(unsigned long);
extern void wakeup_long64(void);
extern void do_suspend_lowlevel(void);
+
+extern int x86_acpi_suspend_lowlevel(void);
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 39cc7f7..63092af 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -25,6 +25,7 @@
#include <linux/kdebug.h>
#include <linux/delay.h>
#include <linux/crash_dump.h>
+#include <linux/reboot.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
@@ -36,7 +37,6 @@
#include <asm/ipi.h>
#include <asm/smp.h>
#include <asm/x86_init.h>
-#include <asm/emergency-restart.h>
#include <asm/nmi.h>
/* BMC sets a bit this MMR non-zero before sending an NMI */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 5013a48..c587a87 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -90,7 +90,7 @@ static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c)
static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
{
u32 l, h;
- int mbytes = num_physpages >> (20-PAGE_SHIFT);
+ int mbytes = get_num_physpages() >> (20-PAGE_SHIFT);
if (c->x86_model < 6) {
/* Based on AMD doc 20734R - June 2000 */
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index ca22b73..f961de9 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -51,9 +51,13 @@
#include <asm/e820.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
+#include <asm/pat.h>
#include "mtrr.h"
+/* arch_phys_wc_add returns an MTRR register index plus this offset. */
+#define MTRR_TO_PHYS_WC_OFFSET 1000
+
u32 num_var_ranges;
unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
@@ -525,6 +529,73 @@ int mtrr_del(int reg, unsigned long base, unsigned long size)
}
EXPORT_SYMBOL(mtrr_del);
+/**
+ * arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable
+ * @base: Physical base address
+ * @size: Size of region
+ *
+ * If PAT is available, this does nothing. If PAT is unavailable, it
+ * attempts to add a WC MTRR covering size bytes starting at base and
+ * logs an error if this fails.
+ *
+ * Drivers must store the return value to pass to mtrr_del_wc_if_needed,
+ * but drivers should not try to interpret that return value.
+ */
+int arch_phys_wc_add(unsigned long base, unsigned long size)
+{
+ int ret;
+
+ if (pat_enabled)
+ return 0; /* Success! (We don't need to do anything.) */
+
+ ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true);
+ if (ret < 0) {
+ pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.",
+ (void *)base, (void *)(base + size - 1));
+ return ret;
+ }
+ return ret + MTRR_TO_PHYS_WC_OFFSET;
+}
+EXPORT_SYMBOL(arch_phys_wc_add);
+
+/*
+ * arch_phys_wc_del - undoes arch_phys_wc_add
+ * @handle: Return value from arch_phys_wc_add
+ *
+ * This cleans up after mtrr_add_wc_if_needed.
+ *
+ * The API guarantees that mtrr_del_wc_if_needed(error code) and
+ * mtrr_del_wc_if_needed(0) do nothing.
+ */
+void arch_phys_wc_del(int handle)
+{
+ if (handle >= 1) {
+ WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET);
+ mtrr_del(handle - MTRR_TO_PHYS_WC_OFFSET, 0, 0);
+ }
+}
+EXPORT_SYMBOL(arch_phys_wc_del);
+
+/*
+ * phys_wc_to_mtrr_index - translates arch_phys_wc_add's return value
+ * @handle: Return value from arch_phys_wc_add
+ *
+ * This will turn the return value from arch_phys_wc_add into an mtrr
+ * index suitable for debugging.
+ *
+ * Note: There is no legitimate use for this function, except possibly
+ * in printk line. Alas there is an illegitimate use in some ancient
+ * drm ioctls.
+ */
+int phys_wc_to_mtrr_index(int handle)
+{
+ if (handle < MTRR_TO_PHYS_WC_OFFSET)
+ return -1;
+ else
+ return handle - MTRR_TO_PHYS_WC_OFFSET;
+}
+EXPORT_SYMBOL_GPL(phys_wc_to_mtrr_index);
+
/*
* HACK ALERT!
* These should be called implicitly, but we can't yet until all the initcall
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
index 0db655e..639d128 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_iommu.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
@@ -491,10 +491,8 @@ static struct perf_amd_iommu __perf_iommu = {
static __init int amd_iommu_pc_init(void)
{
/* Make sure the IOMMU PC resource is available */
- if (!amd_iommu_pc_supported()) {
- pr_err("perf: amd_iommu PMU not installed. No support!\n");
+ if (!amd_iommu_pc_supported())
return -ENODEV;
- }
_init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c
index 7b3fe56..31f0f33 100644
--- a/arch/x86/kernel/cpu/powerflags.c
+++ b/arch/x86/kernel/cpu/powerflags.c
@@ -11,10 +11,10 @@ const char *const x86_power_flags[32] = {
"fid", /* frequency id control */
"vid", /* voltage id control */
"ttp", /* thermal trip */
- "tm",
- "stc",
- "100mhzsteps",
- "hwpstate",
+ "tm", /* hardware thermal control */
+ "stc", /* software thermal control */
+ "100mhzsteps", /* 100 MHz multiplier control */
+ "hwpstate", /* hardware P-state control */
"", /* tsc invariant mapped to constant_tsc */
"cpb", /* core performance boost */
"eff_freq_ro", /* Readonly aperf/mperf */
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index b158152..4934890 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -364,9 +364,7 @@ static void dt_add_ioapic_domain(unsigned int ioapic_num,
* and assigned so we can keep the 1:1 mapping which the ioapic
* is having.
*/
- ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY);
- if (ret)
- pr_err("Error mapping legacy IRQs: %d\n", ret);
+ irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY);
if (num > NR_IRQS_LEGACY) {
ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY,
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 02f0763..f66ff16 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -393,6 +393,9 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
unregister_hw_breakpoint(t->ptrace_bps[i]);
t->ptrace_bps[i] = NULL;
}
+
+ t->debugreg6 = 0;
+ t->ptrace_dr7 = 0;
}
void hw_breakpoint_restore(void)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 3dd37eb..1f354f4 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -48,10 +48,9 @@ static struct pvclock_wall_clock wall_clock;
* have elapsed since the hypervisor wrote the data. So we try to account for
* that with system time
*/
-static unsigned long kvm_get_wallclock(void)
+static void kvm_get_wallclock(struct timespec *now)
{
struct pvclock_vcpu_time_info *vcpu_time;
- struct timespec ts;
int low, high;
int cpu;
@@ -64,14 +63,12 @@ static unsigned long kvm_get_wallclock(void)
cpu = smp_processor_id();
vcpu_time = &hv_clock[cpu].pvti;
- pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
+ pvclock_read_wallclock(&wall_clock, vcpu_time, now);
preempt_enable();
-
- return ts.tv_sec;
}
-static int kvm_set_wallclock(unsigned long now)
+static int kvm_set_wallclock(const struct timespec *now)
{
return -1;
}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 29a8120..7461f50 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -601,30 +601,48 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[])
return dr7;
}
-static int
-ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
- struct task_struct *tsk, int disabled)
+static int ptrace_fill_bp_fields(struct perf_event_attr *attr,
+ int len, int type, bool disabled)
+{
+ int err, bp_len, bp_type;
+
+ err = arch_bp_generic_fields(len, type, &bp_len, &bp_type);
+ if (!err) {
+ attr->bp_len = bp_len;
+ attr->bp_type = bp_type;
+ attr->disabled = disabled;
+ }
+
+ return err;
+}
+
+static struct perf_event *
+ptrace_register_breakpoint(struct task_struct *tsk, int len, int type,
+ unsigned long addr, bool disabled)
{
- int err;
- int gen_len, gen_type;
struct perf_event_attr attr;
+ int err;
- /*
- * We should have at least an inactive breakpoint at this
- * slot. It means the user is writing dr7 without having
- * written the address register first
- */
- if (!bp)
- return -EINVAL;
+ ptrace_breakpoint_init(&attr);
+ attr.bp_addr = addr;
- err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
+ err = ptrace_fill_bp_fields(&attr, len, type, disabled);
if (err)
- return err;
+ return ERR_PTR(err);
+
+ return register_user_hw_breakpoint(&attr, ptrace_triggered,
+ NULL, tsk);
+}
- attr = bp->attr;
- attr.bp_len = gen_len;
- attr.bp_type = gen_type;
- attr.disabled = disabled;
+static int ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
+ int disabled)
+{
+ struct perf_event_attr attr = bp->attr;
+ int err;
+
+ err = ptrace_fill_bp_fields(&attr, len, type, disabled);
+ if (err)
+ return err;
return modify_user_hw_breakpoint(bp, &attr);
}
@@ -634,67 +652,50 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
*/
static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
{
- struct thread_struct *thread = &(tsk->thread);
+ struct thread_struct *thread = &tsk->thread;
unsigned long old_dr7;
- int i, orig_ret = 0, rc = 0;
- int enabled, second_pass = 0;
- unsigned len, type;
- struct perf_event *bp;
-
- if (ptrace_get_breakpoints(tsk) < 0)
- return -ESRCH;
+ bool second_pass = false;
+ int i, rc, ret = 0;
data &= ~DR_CONTROL_RESERVED;
old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
+
restore:
- /*
- * Loop through all the hardware breakpoints, making the
- * appropriate changes to each.
- */
+ rc = 0;
for (i = 0; i < HBP_NUM; i++) {
- enabled = decode_dr7(data, i, &len, &type);
- bp = thread->ptrace_bps[i];
-
- if (!enabled) {
- if (bp) {
- /*
- * Don't unregister the breakpoints right-away,
- * unless all register_user_hw_breakpoint()
- * requests have succeeded. This prevents
- * any window of opportunity for debug
- * register grabbing by other users.
- */
- if (!second_pass)
- continue;
-
- rc = ptrace_modify_breakpoint(bp, len, type,
- tsk, 1);
- if (rc)
- break;
+ unsigned len, type;
+ bool disabled = !decode_dr7(data, i, &len, &type);
+ struct perf_event *bp = thread->ptrace_bps[i];
+
+ if (!bp) {
+ if (disabled)
+ continue;
+
+ bp = ptrace_register_breakpoint(tsk,
+ len, type, 0, disabled);
+ if (IS_ERR(bp)) {
+ rc = PTR_ERR(bp);
+ break;
}
+
+ thread->ptrace_bps[i] = bp;
continue;
}
- rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
+ rc = ptrace_modify_breakpoint(bp, len, type, disabled);
if (rc)
break;
}
- /*
- * Make a second pass to free the remaining unused breakpoints
- * or to restore the original breakpoints if an error occurred.
- */
- if (!second_pass) {
- second_pass = 1;
- if (rc < 0) {
- orig_ret = rc;
- data = old_dr7;
- }
+
+ /* Restore if the first pass failed, second_pass shouldn't fail. */
+ if (rc && !WARN_ON(second_pass)) {
+ ret = rc;
+ data = old_dr7;
+ second_pass = true;
goto restore;
}
- ptrace_put_breakpoints(tsk);
-
- return ((orig_ret < 0) ? orig_ret : rc);
+ return ret;
}
/*
@@ -702,25 +703,17 @@ restore:
*/
static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
{
- struct thread_struct *thread = &(tsk->thread);
+ struct thread_struct *thread = &tsk->thread;
unsigned long val = 0;
if (n < HBP_NUM) {
- struct perf_event *bp;
+ struct perf_event *bp = thread->ptrace_bps[n];
- if (ptrace_get_breakpoints(tsk) < 0)
- return -ESRCH;
-
- bp = thread->ptrace_bps[n];
- if (!bp)
- val = 0;
- else
+ if (bp)
val = bp->hw.info.address;
-
- ptrace_put_breakpoints(tsk);
} else if (n == 6) {
val = thread->debugreg6;
- } else if (n == 7) {
+ } else if (n == 7) {
val = thread->ptrace_dr7;
}
return val;
@@ -729,29 +722,14 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
unsigned long addr)
{
- struct perf_event *bp;
struct thread_struct *t = &tsk->thread;
- struct perf_event_attr attr;
+ struct perf_event *bp = t->ptrace_bps[nr];
int err = 0;
- if (ptrace_get_breakpoints(tsk) < 0)
- return -ESRCH;
-
- if (!t->ptrace_bps[nr]) {
- ptrace_breakpoint_init(&attr);
- /*
- * Put stub len and type to register (reserve) an inactive but
- * correct bp
- */
- attr.bp_addr = addr;
- attr.bp_len = HW_BREAKPOINT_LEN_1;
- attr.bp_type = HW_BREAKPOINT_W;
- attr.disabled = 1;
-
- bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
- NULL, tsk);
-
+ if (!bp) {
/*
+ * Put stub len and type to create an inactive but correct bp.
+ *
* CHECKME: the previous code returned -EIO if the addr wasn't
* a valid task virtual addr. The new one will return -EINVAL in
* this case.
@@ -760,22 +738,20 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
* writing for the user. And anyway this is the previous
* behaviour.
*/
- if (IS_ERR(bp)) {
+ bp = ptrace_register_breakpoint(tsk,
+ X86_BREAKPOINT_LEN_1, X86_BREAKPOINT_WRITE,
+ addr, true);
+ if (IS_ERR(bp))
err = PTR_ERR(bp);
- goto put;
- }
-
- t->ptrace_bps[nr] = bp;
+ else
+ t->ptrace_bps[nr] = bp;
} else {
- bp = t->ptrace_bps[nr];
+ struct perf_event_attr attr = bp->attr;
- attr = bp->attr;
attr.bp_addr = addr;
err = modify_user_hw_breakpoint(bp, &attr);
}
-put:
- ptrace_put_breakpoints(tsk);
return err;
}
@@ -785,30 +761,20 @@ put:
static int ptrace_set_debugreg(struct task_struct *tsk, int n,
unsigned long val)
{
- struct thread_struct *thread = &(tsk->thread);
- int rc = 0;
-
+ struct thread_struct *thread = &tsk->thread;
/* There are no DR4 or DR5 registers */
- if (n == 4 || n == 5)
- return -EIO;
+ int rc = -EIO;
- if (n == 6) {
- thread->debugreg6 = val;
- goto ret_path;
- }
if (n < HBP_NUM) {
rc = ptrace_set_breakpoint_addr(tsk, n, val);
- if (rc)
- return rc;
- }
- /* All that's left is DR7 */
- if (n == 7) {
+ } else if (n == 6) {
+ thread->debugreg6 = val;
+ rc = 0;
+ } else if (n == 7) {
rc = ptrace_write_dr7(tsk, val);
if (!rc)
thread->ptrace_dr7 = val;
}
-
-ret_path:
return rc;
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 76fa1e9..563ed91 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -36,22 +36,6 @@ void (*pm_power_off)(void);
EXPORT_SYMBOL(pm_power_off);
static const struct desc_ptr no_idt = {};
-static int reboot_mode;
-enum reboot_type reboot_type = BOOT_ACPI;
-int reboot_force;
-
-/*
- * This variable is used privately to keep track of whether or not
- * reboot_type is still set to its default value (i.e., reboot= hasn't
- * been set on the command line). This is needed so that we can
- * suppress DMI scanning for reboot quirks. Without it, it's
- * impossible to override a faulty reboot quirk without recompiling.
- */
-static int reboot_default = 1;
-
-#ifdef CONFIG_SMP
-static int reboot_cpu = -1;
-#endif
/*
* This is set if we need to go through the 'emergency' path.
@@ -64,79 +48,6 @@ static int reboot_emergency;
bool port_cf9_safe = false;
/*
- * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]
- * warm Don't set the cold reboot flag
- * cold Set the cold reboot flag
- * bios Reboot by jumping through the BIOS
- * smp Reboot by executing reset on BSP or other CPU
- * triple Force a triple fault (init)
- * kbd Use the keyboard controller. cold reset (default)
- * acpi Use the RESET_REG in the FADT
- * efi Use efi reset_system runtime service
- * pci Use the so-called "PCI reset register", CF9
- * force Avoid anything that could hang.
- */
-static int __init reboot_setup(char *str)
-{
- for (;;) {
- /*
- * Having anything passed on the command line via
- * reboot= will cause us to disable DMI checking
- * below.
- */
- reboot_default = 0;
-
- switch (*str) {
- case 'w':
- reboot_mode = 0x1234;
- break;
-
- case 'c':
- reboot_mode = 0;
- break;
-
-#ifdef CONFIG_SMP
- case 's':
- if (isdigit(*(str+1))) {
- reboot_cpu = (int) (*(str+1) - '0');
- if (isdigit(*(str+2)))
- reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0');
- }
- /*
- * We will leave sorting out the final value
- * when we are ready to reboot, since we might not
- * have detected BSP APIC ID or smp_num_cpu
- */
- break;
-#endif /* CONFIG_SMP */
-
- case 'b':
- case 'a':
- case 'k':
- case 't':
- case 'e':
- case 'p':
- reboot_type = *str;
- break;
-
- case 'f':
- reboot_force = 1;
- break;
- }
-
- str = strchr(str, ',');
- if (str)
- str++;
- else
- break;
- }
- return 1;
-}
-
-__setup("reboot=", reboot_setup);
-
-
-/*
* Reboot options and system auto-detection code provided by
* Dell Inc. so their systems "just work". :-)
*/
@@ -536,6 +447,7 @@ static void native_machine_emergency_restart(void)
int i;
int attempt = 0;
int orig_reboot_type = reboot_type;
+ unsigned short mode;
if (reboot_emergency)
emergency_vmx_disable_all();
@@ -543,7 +455,8 @@ static void native_machine_emergency_restart(void)
tboot_shutdown(TB_SHUTDOWN_REBOOT);
/* Tell the BIOS if we want cold or warm reboot */
- *((unsigned short *)__va(0x472)) = reboot_mode;
+ mode = reboot_mode == REBOOT_WARM ? 0x1234 : 0;
+ *((unsigned short *)__va(0x472)) = mode;
for (;;) {
/* Could also try the reset bit in the Hammer NB */
@@ -585,7 +498,7 @@ static void native_machine_emergency_restart(void)
case BOOT_EFI:
if (efi_enabled(EFI_RUNTIME_SERVICES))
- efi.reset_system(reboot_mode ?
+ efi.reset_system(reboot_mode == REBOOT_WARM ?
EFI_RESET_WARM :
EFI_RESET_COLD,
EFI_SUCCESS, 0, NULL);
@@ -614,26 +527,10 @@ void native_machine_shutdown(void)
{
/* Stop the cpus and apics */
#ifdef CONFIG_SMP
-
- /* The boot cpu is always logical cpu 0 */
- int reboot_cpu_id = 0;
-
- /* See if there has been given a command line override */
- if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) &&
- cpu_online(reboot_cpu))
- reboot_cpu_id = reboot_cpu;
-
- /* Make certain the cpu I'm about to reboot on is online */
- if (!cpu_online(reboot_cpu_id))
- reboot_cpu_id = smp_processor_id();
-
- /* Make certain I only run on the appropriate processor */
- set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id));
-
/*
- * O.K Now that I'm on the appropriate processor, stop all of the
- * others. Also disable the local irq to not receive the per-cpu
- * timer interrupt which may trigger scheduler's load balance.
+ * Stop all of the others. Also disable the local irq to
+ * not receive the per-cpu timer interrupt which may trigger
+ * scheduler's load balance.
*/
local_irq_disable();
stop_other_cpus();
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 198eb20..0aa2939 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -38,8 +38,9 @@ EXPORT_SYMBOL(rtc_lock);
* jump to the next second precisely 500 ms later. Check the Motorola
* MC146818A or Dallas DS12887 data sheet for details.
*/
-int mach_set_rtc_mmss(unsigned long nowtime)
+int mach_set_rtc_mmss(const struct timespec *now)
{
+ unsigned long nowtime = now->tv_sec;
struct rtc_time tm;
int retval = 0;
@@ -58,7 +59,7 @@ int mach_set_rtc_mmss(unsigned long nowtime)
return retval;
}
-unsigned long mach_get_cmos_time(void)
+void mach_get_cmos_time(struct timespec *now)
{
unsigned int status, year, mon, day, hour, min, sec, century = 0;
unsigned long flags;
@@ -107,7 +108,8 @@ unsigned long mach_get_cmos_time(void)
} else
year += CMOS_YEARS_OFFS;
- return mktime(year, mon, day, hour, min, sec);
+ now->tv_sec = mktime(year, mon, day, hour, min, sec);
+ now->tv_nsec = 0;
}
/* Routines for accessing the CMOS RAM/RTC. */
@@ -135,18 +137,13 @@ EXPORT_SYMBOL(rtc_cmos_write);
int update_persistent_clock(struct timespec now)
{
- return x86_platform.set_wallclock(now.tv_sec);
+ return x86_platform.set_wallclock(&now);
}
/* not static: needed by APM */
void read_persistent_clock(struct timespec *ts)
{
- unsigned long retval;
-
- retval = x86_platform.get_wallclock();
-
- ts->tv_sec = retval;
- ts->tv_nsec = 0;
+ x86_platform.get_wallclock(ts);
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 56f7fcf..e68709d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1040,8 +1040,6 @@ void __init setup_arch(char **cmdline_p)
/* max_low_pfn get updated here */
find_low_pfn_range();
#else
- num_physpages = max_pfn;
-
check_x2apic();
/* How many end-of-memory variables you have, grandma! */
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index f4fe0b88..cdaa347 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -265,23 +265,30 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
*/
}
-void smp_trace_reschedule_interrupt(struct pt_regs *regs)
+static inline void smp_entering_irq(void)
{
ack_APIC_irq();
+ irq_enter();
+}
+
+void smp_trace_reschedule_interrupt(struct pt_regs *regs)
+{
+ /*
+ * Need to call irq_enter() before calling the trace point.
+ * __smp_reschedule_interrupt() calls irq_enter/exit() too (in
+ * scheduler_ipi(). This is OK, since those functions are allowed
+ * to nest.
+ */
+ smp_entering_irq();
trace_reschedule_entry(RESCHEDULE_VECTOR);
__smp_reschedule_interrupt();
trace_reschedule_exit(RESCHEDULE_VECTOR);
+ exiting_irq();
/*
* KVM uses this interrupt to force a cpu out of guest mode
*/
}
-static inline void call_function_entering_irq(void)
-{
- ack_APIC_irq();
- irq_enter();
-}
-
static inline void __smp_call_function_interrupt(void)
{
generic_smp_call_function_interrupt();
@@ -290,14 +297,14 @@ static inline void __smp_call_function_interrupt(void)
void smp_call_function_interrupt(struct pt_regs *regs)
{
- call_function_entering_irq();
+ smp_entering_irq();
__smp_call_function_interrupt();
exiting_irq();
}
void smp_trace_call_function_interrupt(struct pt_regs *regs)
{
- call_function_entering_irq();
+ smp_entering_irq();
trace_call_function_entry(CALL_FUNCTION_VECTOR);
__smp_call_function_interrupt();
trace_call_function_exit(CALL_FUNCTION_VECTOR);
@@ -312,14 +319,14 @@ static inline void __smp_call_function_single_interrupt(void)
void smp_call_function_single_interrupt(struct pt_regs *regs)
{
- call_function_entering_irq();
+ smp_entering_irq();
__smp_call_function_single_interrupt();
exiting_irq();
}
void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
{
- call_function_entering_irq();
+ smp_entering_irq();
trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
__smp_call_function_single_interrupt();
trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index d482bca..6a22c19 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -882,9 +882,9 @@ int lguest_setup_irq(unsigned int irq)
* It would be far better for everyone if the Guest had its own clock, but
* until then the Host gives us the time on every interrupt.
*/
-static unsigned long lguest_get_wallclock(void)
+static void lguest_get_wallclock(struct timespec *now)
{
- return lguest_data.time.tv_sec;
+ *now = lguest_data.time;
}
/*
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 252b8f5..4500142 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -1,6 +1,7 @@
#include <linux/highmem.h>
#include <linux/module.h>
#include <linux/swap.h> /* for totalram_pages */
+#include <linux/bootmem.h>
void *kmap(struct page *page)
{
@@ -121,6 +122,11 @@ void __init set_highmem_pages_init(void)
struct zone *zone;
int nid;
+ /*
+ * Explicitly reset zone->managed_pages because set_highmem_pages_init()
+ * is invoked before free_all_bootmem()
+ */
+ reset_all_zones_managed_pages();
for_each_zone(zone) {
unsigned long zone_start_pfn, zone_end_pfn;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 1f34e92..2ec29ac 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -494,7 +494,6 @@ int devmem_is_allowed(unsigned long pagenr)
void free_init_pages(char *what, unsigned long begin, unsigned long end)
{
- unsigned long addr;
unsigned long begin_aligned, end_aligned;
/* Make sure boundaries are page aligned */
@@ -509,8 +508,6 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
if (begin >= end)
return;
- addr = begin;
-
/*
* If debugging page accesses then do not free this memory but
* mark them not present - any buggy init-section access will
@@ -529,18 +526,13 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
set_memory_nx(begin, (end - begin) >> PAGE_SHIFT);
set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
- printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
-
- for (; addr < end; addr += PAGE_SIZE) {
- memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
- free_reserved_page(virt_to_page(addr));
- }
+ free_reserved_area((void *)begin, (void *)end, POISON_FREE_INITMEM, what);
#endif
}
void free_initmem(void)
{
- free_init_pages("unused kernel memory",
+ free_init_pages("unused kernel",
(unsigned long)(&__init_begin),
(unsigned long)(&__init_end));
}
@@ -566,7 +558,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
* - relocate_initrd()
* So here We can do PAGE_ALIGN() safely to get partial page to be freed
*/
- free_init_pages("initrd memory", start, PAGE_ALIGN(end));
+ free_init_pages("initrd", start, PAGE_ALIGN(end));
}
#endif
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 3ac7e31..4287f1f 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -660,10 +660,8 @@ void __init initmem_init(void)
highstart_pfn = max_low_pfn;
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
pages_to_mb(highend_pfn - highstart_pfn));
- num_physpages = highend_pfn;
high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
#else
- num_physpages = max_low_pfn;
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
#endif
@@ -671,7 +669,7 @@ void __init initmem_init(void)
sparse_memory_present_with_active_regions(0);
#ifdef CONFIG_FLATMEM
- max_mapnr = num_physpages;
+ max_mapnr = IS_ENABLED(CONFIG_HIGHMEM) ? highend_pfn : max_low_pfn;
#endif
__vmalloc_start_set = true;
@@ -739,9 +737,6 @@ static void __init test_wp_bit(void)
void __init mem_init(void)
{
- int codesize, reservedpages, datasize, initsize;
- int tmp;
-
pci_iommu_alloc();
#ifdef CONFIG_FLATMEM
@@ -759,32 +754,11 @@ void __init mem_init(void)
set_highmem_pages_init();
/* this will put all low memory onto the freelists */
- totalram_pages += free_all_bootmem();
-
- reservedpages = 0;
- for (tmp = 0; tmp < max_low_pfn; tmp++)
- /*
- * Only count reserved RAM pages:
- */
- if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
- reservedpages++;
+ free_all_bootmem();
after_bootmem = 1;
- codesize = (unsigned long) &_etext - (unsigned long) &_text;
- datasize = (unsigned long) &_edata - (unsigned long) &_etext;
- initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
-
- printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
- "%dk reserved, %dk data, %dk init, %ldk highmem)\n",
- nr_free_pages() << (PAGE_SHIFT-10),
- num_physpages << (PAGE_SHIFT-10),
- codesize >> 10,
- reservedpages << (PAGE_SHIFT-10),
- datasize >> 10,
- initsize >> 10,
- totalhigh_pages << (PAGE_SHIFT-10));
-
+ mem_init_print_info(NULL);
printk(KERN_INFO "virtual kernel memory layout:\n"
" fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
#ifdef CONFIG_HIGHMEM
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index b3940b6..104d56a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -712,36 +712,22 @@ EXPORT_SYMBOL_GPL(arch_add_memory);
static void __meminit free_pagetable(struct page *page, int order)
{
- struct zone *zone;
- bool bootmem = false;
unsigned long magic;
unsigned int nr_pages = 1 << order;
/* bootmem page has reserved flag */
if (PageReserved(page)) {
__ClearPageReserved(page);
- bootmem = true;
magic = (unsigned long)page->lru.next;
if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
while (nr_pages--)
put_page_bootmem(page++);
} else
- __free_pages_bootmem(page, order);
+ while (nr_pages--)
+ free_reserved_page(page++);
} else
free_pages((unsigned long)page_address(page), order);
-
- /*
- * SECTION_INFO pages and MIX_SECTION_INFO pages
- * are all allocated by bootmem.
- */
- if (bootmem) {
- zone = page_zone(page);
- zone_span_writelock(zone);
- zone->present_pages += nr_pages;
- zone_span_writeunlock(zone);
- totalram_pages += nr_pages;
- }
}
static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
@@ -1058,9 +1044,6 @@ static void __init register_page_bootmem_info(void)
void __init mem_init(void)
{
- long codesize, reservedpages, datasize, initsize;
- unsigned long absent_pages;
-
pci_iommu_alloc();
/* clear_bss() already clear the empty_zero_page */
@@ -1068,29 +1051,14 @@ void __init mem_init(void)
register_page_bootmem_info();
/* this will put all memory onto the freelists */
- totalram_pages = free_all_bootmem();
-
- absent_pages = absent_pages_in_range(0, max_pfn);
- reservedpages = max_pfn - totalram_pages - absent_pages;
+ free_all_bootmem();
after_bootmem = 1;
- codesize = (unsigned long) &_etext - (unsigned long) &_text;
- datasize = (unsigned long) &_edata - (unsigned long) &_etext;
- initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
-
/* Register memory areas for /proc/kcore */
kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
VSYSCALL_END - VSYSCALL_START, KCORE_OTHER);
- printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
- "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
- nr_free_pages() << (PAGE_SHIFT-10),
- max_pfn << (PAGE_SHIFT-10),
- codesize >> 10,
- absent_pages << (PAGE_SHIFT-10),
- reservedpages << (PAGE_SHIFT-10),
- datasize >> 10,
- initsize >> 10);
+ mem_init_print_info(NULL);
}
#ifdef CONFIG_DEBUG_RODATA
@@ -1166,11 +1134,10 @@ void mark_rodata_ro(void)
set_memory_ro(start, (end-start) >> PAGE_SHIFT);
#endif
- free_init_pages("unused kernel memory",
+ free_init_pages("unused kernel",
(unsigned long) __va(__pa_symbol(text_end)),
(unsigned long) __va(__pa_symbol(rodata_start)));
-
- free_init_pages("unused kernel memory",
+ free_init_pages("unused kernel",
(unsigned long) __va(__pa_symbol(rodata_end)),
(unsigned long) __va(__pa_symbol(_sdata)));
}
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 845df68..62c29a5 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -115,10 +115,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (mmap_is_legacy()) {
mm->mmap_base = mmap_legacy_base();
mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
}
}
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 73a6d73..0342d27 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -83,10 +83,8 @@ void __init initmem_init(void)
highstart_pfn = max_low_pfn;
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
pages_to_mb(highend_pfn - highstart_pfn));
- num_physpages = highend_pfn;
high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
#else
- num_physpages = max_low_pfn;
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
#endif
printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 17fda6a..dfa537a 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -240,7 +240,6 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
{
pud_t *pud;
- unsigned long addr;
int i;
if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
@@ -248,8 +247,7 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
pud = pud_offset(pgd, 0);
- for (addr = i = 0; i < PREALLOCATED_PMDS;
- i++, pud++, addr += PUD_SIZE) {
+ for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) {
pmd_t *pmd = pmds[i];
if (i >= KERNEL_PGD_BOUNDARY)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index f66b540..79c216a 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -12,6 +12,7 @@
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/if_vlan.h>
+#include <linux/random.h>
/*
* Conventions :
@@ -144,6 +145,39 @@ static int pkt_type_offset(void)
return -1;
}
+struct bpf_binary_header {
+ unsigned int pages;
+ /* Note : for security reasons, bpf code will follow a randomly
+ * sized amount of int3 instructions
+ */
+ u8 image[];
+};
+
+static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen,
+ u8 **image_ptr)
+{
+ unsigned int sz, hole;
+ struct bpf_binary_header *header;
+
+ /* Most of BPF filters are really small,
+ * but if some of them fill a page, allow at least
+ * 128 extra bytes to insert a random section of int3
+ */
+ sz = round_up(proglen + sizeof(*header) + 128, PAGE_SIZE);
+ header = module_alloc(sz);
+ if (!header)
+ return NULL;
+
+ memset(header, 0xcc, sz); /* fill whole space with int3 instructions */
+
+ header->pages = sz / PAGE_SIZE;
+ hole = sz - (proglen + sizeof(*header));
+
+ /* insert a random number of int3 instructions before BPF code */
+ *image_ptr = &header->image[prandom_u32() % hole];
+ return header;
+}
+
void bpf_jit_compile(struct sk_filter *fp)
{
u8 temp[64];
@@ -153,6 +187,7 @@ void bpf_jit_compile(struct sk_filter *fp)
int t_offset, f_offset;
u8 t_op, f_op, seen = 0, pass;
u8 *image = NULL;
+ struct bpf_binary_header *header = NULL;
u8 *func;
int pc_ret0 = -1; /* bpf index of first RET #0 instruction (if any) */
unsigned int cleanup_addr; /* epilogue code offset */
@@ -693,7 +728,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
if (unlikely(proglen + ilen > oldproglen)) {
pr_err("bpb_jit_compile fatal error\n");
kfree(addrs);
- module_free(NULL, image);
+ module_free(NULL, header);
return;
}
memcpy(image + proglen, temp, ilen);
@@ -717,10 +752,8 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
break;
}
if (proglen == oldproglen) {
- image = module_alloc(max_t(unsigned int,
- proglen,
- sizeof(struct work_struct)));
- if (!image)
+ header = bpf_alloc_binary(proglen, &image);
+ if (!header)
goto out;
}
oldproglen = proglen;
@@ -730,7 +763,8 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i];
bpf_jit_dump(flen, proglen, pass, image);
if (image) {
- bpf_flush_icache(image, image + proglen);
+ bpf_flush_icache(header, image + proglen);
+ set_memory_ro((unsigned long)header, header->pages);
fp->bpf_func = (void *)image;
}
out:
@@ -738,20 +772,13 @@ out:
return;
}
-static void jit_free_defer(struct work_struct *arg)
-{
- module_free(NULL, arg);
-}
-
-/* run from softirq, we must use a work_struct to call
- * module_free() from process context
- */
void bpf_jit_free(struct sk_filter *fp)
{
if (fp->bpf_func != sk_run_filter) {
- struct work_struct *work = (struct work_struct *)fp->bpf_func;
+ unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
+ struct bpf_binary_header *header = (void *)addr;
- INIT_WORK(work, jit_free_defer);
- schedule_work(work);
+ set_memory_rw(addr, header->pages);
+ module_free(NULL, header);
}
}
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 3e72425..d641897 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -324,14 +324,11 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
res->start = start;
res->end = end;
info->res_offset[info->res_num] = addr.translation_offset;
+ info->res_num++;
- if (!pci_use_crs) {
+ if (!pci_use_crs)
dev_printk(KERN_DEBUG, &info->bridge->dev,
"host bridge window %pR (ignored)\n", res);
- return AE_OK;
- }
-
- info->res_num++;
return AE_OK;
}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index b410b71..c8d5577 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -274,8 +274,9 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
return status;
}
-int efi_set_rtc_mmss(unsigned long nowtime)
+int efi_set_rtc_mmss(const struct timespec *now)
{
+ unsigned long nowtime = now->tv_sec;
efi_status_t status;
efi_time_t eft;
efi_time_cap_t cap;
@@ -310,7 +311,7 @@ int efi_set_rtc_mmss(unsigned long nowtime)
return 0;
}
-unsigned long efi_get_time(void)
+void efi_get_time(struct timespec *now)
{
efi_status_t status;
efi_time_t eft;
@@ -320,8 +321,9 @@ unsigned long efi_get_time(void)
if (status != EFI_SUCCESS)
pr_err("Oops: efitime: can't read time!\n");
- return mktime(eft.year, eft.month, eft.day, eft.hour,
- eft.minute, eft.second);
+ now->tv_sec = mktime(eft.year, eft.month, eft.day, eft.hour,
+ eft.minute, eft.second);
+ now->tv_nsec = 0;
}
/*
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c
index d62b0a3..5e355b1 100644
--- a/arch/x86/platform/mrst/vrtc.c
+++ b/arch/x86/platform/mrst/vrtc.c
@@ -56,7 +56,7 @@ void vrtc_cmos_write(unsigned char val, unsigned char reg)
}
EXPORT_SYMBOL_GPL(vrtc_cmos_write);
-unsigned long vrtc_get_time(void)
+void vrtc_get_time(struct timespec *now)
{
u8 sec, min, hour, mday, mon;
unsigned long flags;
@@ -82,17 +82,18 @@ unsigned long vrtc_get_time(void)
printk(KERN_INFO "vRTC: sec: %d min: %d hour: %d day: %d "
"mon: %d year: %d\n", sec, min, hour, mday, mon, year);
- return mktime(year, mon, mday, hour, min, sec);
+ now->tv_sec = mktime(year, mon, mday, hour, min, sec);
+ now->tv_nsec = 0;
}
-int vrtc_set_mmss(unsigned long nowtime)
+int vrtc_set_mmss(const struct timespec *now)
{
unsigned long flags;
struct rtc_time tm;
int year;
int retval = 0;
- rtc_time_to_tm(nowtime, &tm);
+ rtc_time_to_tm(now->tv_sec, &tm);
if (!rtc_valid_tm(&tm) && tm.tm_year >= 72) {
/*
* tm.year is the number of years since 1900, and the
@@ -110,7 +111,7 @@ int vrtc_set_mmss(unsigned long nowtime)
} else {
printk(KERN_ERR
"%s: Invalid vRTC value: write of %lx to vRTC failed\n",
- __FUNCTION__, nowtime);
+ __FUNCTION__, now->tv_sec);
retval = -EINVAL;
}
return retval;
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index a690868..ee36589 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -15,6 +15,7 @@
#include <linux/math64.h>
#include <linux/gfp.h>
#include <linux/slab.h>
+#include <linux/pvclock_gtod.h>
#include <asm/pvclock.h>
#include <asm/xen/hypervisor.h>
@@ -179,34 +180,56 @@ static void xen_read_wallclock(struct timespec *ts)
put_cpu_var(xen_vcpu);
}
-static unsigned long xen_get_wallclock(void)
+static void xen_get_wallclock(struct timespec *now)
{
- struct timespec ts;
+ xen_read_wallclock(now);
+}
- xen_read_wallclock(&ts);
- return ts.tv_sec;
+static int xen_set_wallclock(const struct timespec *now)
+{
+ return -1;
}
-static int xen_set_wallclock(unsigned long now)
+static int xen_pvclock_gtod_notify(struct notifier_block *nb,
+ unsigned long was_set, void *priv)
{
+ /* Protected by the calling core code serialization */
+ static struct timespec next_sync;
+
struct xen_platform_op op;
- int rc;
+ struct timespec now;
- /* do nothing for domU */
- if (!xen_initial_domain())
- return -1;
+ now = __current_kernel_time();
+
+ /*
+ * We only take the expensive HV call when the clock was set
+ * or when the 11 minutes RTC synchronization time elapsed.
+ */
+ if (!was_set && timespec_compare(&now, &next_sync) < 0)
+ return NOTIFY_OK;
op.cmd = XENPF_settime;
- op.u.settime.secs = now;
- op.u.settime.nsecs = 0;
+ op.u.settime.secs = now.tv_sec;
+ op.u.settime.nsecs = now.tv_nsec;
op.u.settime.system_time = xen_clocksource_read();
- rc = HYPERVISOR_dom0_op(&op);
- WARN(rc != 0, "XENPF_settime failed: now=%ld\n", now);
+ (void)HYPERVISOR_dom0_op(&op);
- return rc;
+ /*
+ * Move the next drift compensation time 11 minutes
+ * ahead. That's emulating the sync_cmos_clock() update for
+ * the hardware RTC.
+ */
+ next_sync = now;
+ next_sync.tv_sec += 11 * 60;
+
+ return NOTIFY_OK;
}
+static struct notifier_block xen_pvclock_gtod_notifier = {
+ .notifier_call = xen_pvclock_gtod_notify,
+};
+
static struct clocksource xen_clocksource __read_mostly = {
.name = "xen",
.rating = 400,
@@ -482,6 +505,9 @@ static void __init xen_time_init(void)
xen_setup_runstate_info(cpu);
xen_setup_timer(cpu);
xen_setup_cpu_clockevents();
+
+ if (xen_initial_domain())
+ pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
}
void __init xen_init_time_ops(void)
@@ -494,7 +520,9 @@ void __init xen_init_time_ops(void)
x86_platform.calibrate_tsc = xen_tsc_khz;
x86_platform.get_wallclock = xen_get_wallclock;
- x86_platform.set_wallclock = xen_set_wallclock;
+ /* Dom0 uses the native method to set the hardware RTC. */
+ if (!xen_initial_domain())
+ x86_platform.set_wallclock = xen_set_wallclock;
}
#ifdef CONFIG_XEN_PVHVM