From b7d5006de1afab266175288f41e5dc70e69cce33 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 27 Aug 2014 07:35:05 +0200 Subject: s390: remove unused MACHINE_FLAG_RRBM Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 089a498..dbde7c2 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -55,7 +55,6 @@ extern void detect_memory_memblock(void); #define MACHINE_FLAG_LPP (1UL << 13) #define MACHINE_FLAG_TOPOLOGY (1UL << 14) #define MACHINE_FLAG_TE (1UL << 15) -#define MACHINE_FLAG_RRBM (1UL << 16) #define MACHINE_FLAG_TLB_LC (1UL << 17) #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) @@ -78,7 +77,6 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_LPP (0) #define MACHINE_HAS_TOPOLOGY (0) #define MACHINE_HAS_TE (0) -#define MACHINE_HAS_RRBM (0) #define MACHINE_HAS_TLB_LC (0) #else /* CONFIG_64BIT */ #define MACHINE_HAS_IEEE (1) @@ -91,7 +89,6 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_LPP (S390_lowcore.machine_flags & MACHINE_FLAG_LPP) #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) -#define MACHINE_HAS_RRBM (S390_lowcore.machine_flags & MACHINE_FLAG_RRBM) #define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) #endif /* CONFIG_64BIT */ diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 0dff972..f6c66b5 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -390,8 +390,6 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_LPP; if (test_facility(50) && test_facility(73)) S390_lowcore.machine_flags |= MACHINE_FLAG_TE; - if (test_facility(66)) - S390_lowcore.machine_flags |= MACHINE_FLAG_RRBM; if (test_facility(51)) S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC; #endif -- cgit v0.10.2 From 369e8c355383143964290b6a706eb6639ae3588c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 27 Aug 2014 07:51:05 +0200 Subject: Documentation/kprobes: add s390 to list of supported architectures s390 supports kprobes since 2006 but is missing in the list of architectures that support kprobes. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 4bbeca8..4227ec2 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt @@ -300,6 +300,7 @@ architectures: - arm - ppc - mips +- s390 3. Configuring Kprobes -- cgit v0.10.2 From 070b7be633dc33c0899e8c934b4d5fad046b06e8 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 29 Aug 2014 12:44:40 +0200 Subject: s390/vdso: replace stck with stcke If gettimeofday / clock_gettime are called multiple times in a row the STCK instruction will stall until a difference in the result is visible. This unnecessarily slows down the vdso calls, use stcke instead of stck to get rid of the stall. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S index 7cf18f8..4e20a93 100644 --- a/arch/s390/kernel/vdso32/clock_gettime.S +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -30,8 +30,8 @@ __kernel_clock_gettime: 1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ tml %r4,0x0001 /* pending update ? loop */ jnz 1b - stck 24(%r15) /* Store TOD clock */ - lm %r0,%r1,24(%r15) + stcke 24(%r15) /* Store TOD clock */ + lm %r0,%r1,25(%r15) s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ sl %r1,__VDSO_XTIME_STAMP+4(%r5) brc 3,2f @@ -72,8 +72,8 @@ __kernel_clock_gettime: 11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ tml %r4,0x0001 /* pending update ? loop */ jnz 11b - stck 24(%r15) /* Store TOD clock */ - lm %r0,%r1,24(%r15) + stcke 24(%r15) /* Store TOD clock */ + lm %r0,%r1,25(%r15) s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ sl %r1,__VDSO_XTIME_STAMP+4(%r5) brc 3,12f diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S index fd621a9..60def5f 100644 --- a/arch/s390/kernel/vdso32/gettimeofday.S +++ b/arch/s390/kernel/vdso32/gettimeofday.S @@ -29,8 +29,8 @@ __kernel_gettimeofday: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ tml %r4,0x0001 /* pending update ? loop */ jnz 1b - stck 24(%r15) /* Store TOD clock */ - lm %r0,%r1,24(%r15) + stcke 24(%r15) /* Store TOD clock */ + lm %r0,%r1,25(%r15) s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ sl %r1,__VDSO_XTIME_STAMP+4(%r5) brc 3,3f diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index 3f34e09..4add40b 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -33,10 +33,10 @@ __kernel_clock_gettime: 0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ tmll %r4,0x0001 /* pending update ? loop */ jnz 0b - stck 48(%r15) /* Store TOD clock */ + stcke 48(%r15) /* Store TOD clock */ lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ lg %r0,__VDSO_WTOM_SEC(%r5) - lg %r1,48(%r15) + lg %r1,49(%r15) sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ alg %r1,__VDSO_WTOM_NSEC(%r5) @@ -58,9 +58,9 @@ __kernel_clock_gettime: 5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ tmll %r4,0x0001 /* pending update ? loop */ jnz 5b - stck 48(%r15) /* Store TOD clock */ + stcke 48(%r15) /* Store TOD clock */ lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ - lg %r1,48(%r15) + lg %r1,49(%r15) sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S index d0860d1..7a34499 100644 --- a/arch/s390/kernel/vdso64/gettimeofday.S +++ b/arch/s390/kernel/vdso64/gettimeofday.S @@ -28,8 +28,8 @@ __kernel_gettimeofday: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ tmll %r4,0x0001 /* pending update ? loop */ jnz 0b - stck 48(%r15) /* Store TOD clock */ - lg %r1,48(%r15) + stcke 48(%r15) /* Store TOD clock */ + lg %r1,49(%r15) sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */ alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ -- cgit v0.10.2 From b7eacb59cd7fb5e98852186e485c0c865f862645 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 29 Aug 2014 12:31:45 +0200 Subject: s390/vdso: add vdso support for coarse clocks Add CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE optimization to the 64-bit and 31-bit vdso. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index bc9746a..a62526d 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -22,13 +22,17 @@ struct vdso_data { __u64 xtime_tod_stamp; /* TOD clock for xtime 0x08 */ __u64 xtime_clock_sec; /* Kernel time 0x10 */ __u64 xtime_clock_nsec; /* 0x18 */ - __u64 wtom_clock_sec; /* Wall to monotonic clock 0x20 */ - __u64 wtom_clock_nsec; /* 0x28 */ - __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */ - __u32 tz_dsttime; /* Type of dst correction 0x34 */ - __u32 ectg_available; /* ECTG instruction present 0x38 */ - __u32 tk_mult; /* Mult. used for xtime_nsec 0x3c */ - __u32 tk_shift; /* Shift used for xtime_nsec 0x40 */ + __u64 xtime_coarse_sec; /* Coarse kernel time 0x20 */ + __u64 xtime_coarse_nsec; /* 0x28 */ + __u64 wtom_clock_sec; /* Wall to monotonic clock 0x30 */ + __u64 wtom_clock_nsec; /* 0x38 */ + __u64 wtom_coarse_sec; /* Coarse wall to monotonic 0x40 */ + __u64 wtom_coarse_nsec; /* 0x48 */ + __u32 tz_minuteswest; /* Minutes west of Greenwich 0x50 */ + __u32 tz_dsttime; /* Type of dst correction 0x54 */ + __u32 ectg_available; /* ECTG instruction present 0x58 */ + __u32 tk_mult; /* Mult. used for xtime_nsec 0x5c */ + __u32 tk_shift; /* Shift used for xtime_nsec 0x60 */ }; struct vdso_per_cpu_data { diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index afe1715..3e9e479 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -62,8 +62,12 @@ int main(void) DEFINE(__VDSO_XTIME_STAMP, offsetof(struct vdso_data, xtime_tod_stamp)); DEFINE(__VDSO_XTIME_SEC, offsetof(struct vdso_data, xtime_clock_sec)); DEFINE(__VDSO_XTIME_NSEC, offsetof(struct vdso_data, xtime_clock_nsec)); + DEFINE(__VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); + DEFINE(__VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec)); DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); + DEFINE(__VDSO_WTOM_CRS_SEC, offsetof(struct vdso_data, wtom_coarse_sec)); + DEFINE(__VDSO_WTOM_CRS_NSEC, offsetof(struct vdso_data, wtom_coarse_nsec)); DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available)); DEFINE(__VDSO_TK_MULT, offsetof(struct vdso_data, tk_mult)); @@ -73,8 +77,11 @@ int main(void) /* constants used by the vdso */ DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC); + DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); + DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID); DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); + DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC); BLANK(); /* idle data offsets */ DEFINE(__CLOCK_IDLE_ENTER, offsetof(struct s390_idle_data, clock_idle_enter)); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 4cef607..69e980d 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -232,6 +232,19 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtom_clock_nsec -= nsecps; vdso_data->wtom_clock_sec++; } + + vdso_data->xtime_coarse_sec = tk->xtime_sec; + vdso_data->xtime_coarse_nsec = + (long)(tk->tkr.xtime_nsec >> tk->tkr.shift); + vdso_data->wtom_coarse_sec = + vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec; + vdso_data->wtom_coarse_nsec = + vdso_data->xtime_coarse_nsec + tk->wall_to_monotonic.tv_nsec; + while (vdso_data->wtom_coarse_nsec >= NSEC_PER_SEC) { + vdso_data->wtom_coarse_nsec -= NSEC_PER_SEC; + vdso_data->wtom_coarse_sec++; + } + vdso_data->tk_mult = tk->tkr.mult; vdso_data->tk_shift = tk->tkr.shift; smp_wmb(); diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S index 36aaa25..eca3f00 100644 --- a/arch/s390/kernel/vdso32/clock_getres.S +++ b/arch/s390/kernel/vdso32/clock_getres.S @@ -19,14 +19,20 @@ .type __kernel_clock_getres,@function __kernel_clock_getres: .cfi_startproc + basr %r1,0 + la %r1,4f-.(%r1) chi %r2,__CLOCK_REALTIME je 0f chi %r2,__CLOCK_MONOTONIC + je 0f + la %r1,5f-4f(%r1) + chi %r2,__CLOCK_REALTIME_COARSE + je 0f + chi %r2,__CLOCK_MONOTONIC_COARSE jne 3f 0: ltr %r3,%r3 jz 2f /* res == NULL */ - basr %r1,0 -1: l %r0,4f-1b(%r1) +1: l %r0,0(%r1) xc 0(4,%r3),0(%r3) /* set tp->tv_sec to zero */ st %r0,4(%r3) /* store tp->tv_usec */ 2: lhi %r2,0 @@ -35,5 +41,6 @@ __kernel_clock_getres: svc 0 br %r14 4: .long __CLOCK_REALTIME_RES +5: .long __CLOCK_COARSE_RES .cfi_endproc .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S index 4e20a93..48c2206 100644 --- a/arch/s390/kernel/vdso32/clock_gettime.S +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -21,8 +21,12 @@ __kernel_clock_gettime: .cfi_startproc basr %r5,0 0: al %r5,21f-0b(%r5) /* get &_vdso_data */ + chi %r2,__CLOCK_REALTIME_COARSE + je 10f chi %r2,__CLOCK_REALTIME je 11f + chi %r2,__CLOCK_MONOTONIC_COARSE + je 9f chi %r2,__CLOCK_MONOTONIC jne 19f @@ -68,6 +72,26 @@ __kernel_clock_gettime: lhi %r2,0 br %r14 + /* CLOCK_MONOTONIC_COARSE */ +9: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 9b + l %r2,__VDSO_WTOM_CRS_SEC+4(%r5) + l %r1,__VDSO_WTOM_CRS_NSEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 9b + j 8b + + /* CLOCK_REALTIME_COARSE */ +10: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ + tml %r4,0x0001 /* pending update ? loop */ + jnz 10b + l %r2,__VDSO_XTIME_CRS_SEC+4(%r5) + l %r1,__VDSO_XTIME_CRS_NSEC+4(%r5) + cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ + jne 10b + j 17f + /* CLOCK_REALTIME */ 11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ tml %r4,0x0001 /* pending update ? loop */ diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index 34deba7..c8513de 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -19,6 +19,12 @@ .type __kernel_clock_getres,@function __kernel_clock_getres: .cfi_startproc + larl %r1,4f + cghi %r2,__CLOCK_REALTIME_COARSE + je 0f + cghi %r2,__CLOCK_MONOTONIC_COARSE + je 0f + larl %r1,3f cghi %r2,__CLOCK_REALTIME je 0f cghi %r2,__CLOCK_MONOTONIC @@ -32,7 +38,6 @@ __kernel_clock_getres: jz 2f 0: ltgr %r3,%r3 jz 1f /* res == NULL */ - larl %r1,3f lg %r0,0(%r1) xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */ stg %r0,8(%r3) /* store tp->tv_usec */ @@ -42,5 +47,6 @@ __kernel_clock_getres: svc 0 br %r14 3: .quad __CLOCK_REALTIME_RES +4: .quad __CLOCK_COARSE_RES .cfi_endproc .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index 4add40b..9d9761f 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -20,12 +20,16 @@ __kernel_clock_gettime: .cfi_startproc larl %r5,_vdso_data + cghi %r2,__CLOCK_REALTIME_COARSE + je 4f cghi %r2,__CLOCK_REALTIME je 5f cghi %r2,__CLOCK_THREAD_CPUTIME_ID je 9f cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */ je 9f + cghi %r2,__CLOCK_MONOTONIC_COARSE + je 3f cghi %r2,__CLOCK_MONOTONIC jne 12f @@ -54,6 +58,26 @@ __kernel_clock_gettime: lghi %r2,0 br %r14 + /* CLOCK_MONOTONIC_COARSE */ +3: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 3b + lg %r0,__VDSO_WTOM_CRS_SEC(%r5) + lg %r1,__VDSO_WTOM_CRS_NSEC(%r5) + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 3b + j 2b + + /* CLOCK_REALTIME_COARSE */ +4: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ + tmll %r4,0x0001 /* pending update ? loop */ + jnz 4b + lg %r0,__VDSO_XTIME_CRS_SEC(%r5) + lg %r1,__VDSO_XTIME_CRS_NSEC(%r5) + clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ + jne 4b + j 7f + /* CLOCK_REALTIME */ 5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */ tmll %r4,0x0001 /* pending update ? loop */ -- cgit v0.10.2 From 0f1b1ff54b386926ef1a524e60ef89ae7738bbd5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 14 Aug 2014 16:06:02 +0200 Subject: s390: pass march flag to assembly files as well Currently the march flag gets only passed to C files, but not to assembler files. This means that we can't add new instructions like e.g. aghik to asm files, since the assembler doesn't know of the new instructions if the appropriate march flag isn't specified. So also pass the march flag when compiling assembler files as well. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 874e6d6..878e679 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -35,13 +35,16 @@ endif export LD_BFD -cflags-$(CONFIG_MARCH_G5) += -march=g5 -cflags-$(CONFIG_MARCH_Z900) += -march=z900 -cflags-$(CONFIG_MARCH_Z990) += -march=z990 -cflags-$(CONFIG_MARCH_Z9_109) += -march=z9-109 -cflags-$(CONFIG_MARCH_Z10) += -march=z10 -cflags-$(CONFIG_MARCH_Z196) += -march=z196 -cflags-$(CONFIG_MARCH_ZEC12) += -march=zEC12 +mflags-$(CONFIG_MARCH_G5) := -march=g5 +mflags-$(CONFIG_MARCH_Z900) := -march=z900 +mflags-$(CONFIG_MARCH_Z990) := -march=z990 +mflags-$(CONFIG_MARCH_Z9_109) := -march=z9-109 +mflags-$(CONFIG_MARCH_Z10) := -march=z10 +mflags-$(CONFIG_MARCH_Z196) := -march=z196 +mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12 + +aflags-y += $(mflags-y) +cflags-y += $(mflags-y) cflags-$(CONFIG_MARCH_G5_TUNE) += -mtune=g5 cflags-$(CONFIG_MARCH_Z900_TUNE) += -mtune=z900 -- cgit v0.10.2 From 2481a87b0250bbf429fc8cdc78331efbc44a0221 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 15 Aug 2014 12:33:46 +0200 Subject: s390/ftrace: optimize function graph caller code When the function graph tracer is disabled we can skip three additional instructions. So let's just do this. So if function tracing is enabled but function graph tracing is runtime disabled, we get away with a single unconditional branch. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index bf246da..7b8e456 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -4,6 +4,7 @@ #ifndef __ASSEMBLY__ extern void _mcount(void); +extern char ftrace_graph_caller_end; struct dyn_arch_ftrace { }; diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 54d6493..de55efa 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -170,6 +170,29 @@ out: * directly after the instructions. To enable the call we calculate * the original offset to prepare_ftrace_return and put it back. */ + +#ifdef CONFIG_64BIT + +int ftrace_enable_ftrace_graph_caller(void) +{ + static unsigned short offset = 0x0002; + + return probe_kernel_write((void *) ftrace_graph_caller + 2, + &offset, sizeof(offset)); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + unsigned short offset; + + offset = ((void *) &ftrace_graph_caller_end - + (void *) ftrace_graph_caller) / 2; + return probe_kernel_write((void *) ftrace_graph_caller + 2, + &offset, sizeof(offset)); +} + +#else /* CONFIG_64BIT */ + int ftrace_enable_ftrace_graph_caller(void) { unsigned short offset; @@ -188,5 +211,6 @@ int ftrace_disable_ftrace_graph_caller(void) &offset, sizeof(offset)); } +#endif /* CONFIG_64BIT */ #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S index c67a8bf..5b33c83 100644 --- a/arch/s390/kernel/mcount64.S +++ b/arch/s390/kernel/mcount64.S @@ -32,14 +32,17 @@ ENTRY(ftrace_caller) lg %r14,0(%r14) basr %r14,%r14 #ifdef CONFIG_FUNCTION_GRAPH_TRACER +# The j instruction gets runtime patched to a nop instruction. +# See ftrace_enable_ftrace_graph_caller. The patched instruction is: +# j .+4 +ENTRY(ftrace_graph_caller) + j ftrace_graph_caller_end lg %r2,168(%r15) lg %r3,272(%r15) -ENTRY(ftrace_graph_caller) -# The bras instruction gets runtime patched to call prepare_ftrace_return. -# See ftrace_enable_ftrace_graph_caller. The patched instruction is: -# bras %r14,prepare_ftrace_return - bras %r14,0f -0: stg %r2,168(%r15) + brasl %r14,prepare_ftrace_return + stg %r2,168(%r15) +ftrace_graph_caller_end: + .globl ftrace_graph_caller_end #endif aghi %r15,160 lmg %r2,%r5,32(%r15) -- cgit v0.10.2 From 10dec7dbd50ab0be96dda085d625d54ce800e426 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 15 Aug 2014 13:01:46 +0200 Subject: s390/ftrace: add HAVE_DYNAMIC_FTRACE_WITH_REGS support This code is based on a patch from Vojtech Pavlik. http://marc.info/?l=linux-s390&m=140438885114413&w=2 The actual implementation now differs significantly: Instead of adding a second function "ftrace_regs_caller" which would be nearly identical to the existing ftrace_caller function, the current ftrace_caller function is now an alias to ftrace_regs_caller and always passes the needed pt_regs structure and function_trace_op parameters unconditionally. Besides that also use asm offsets to correctly allocate and access the new struct pt_regs on the stack. While at it we can make use of new instruction to get rid of some indirect loads if compiled for new machines. The passed struct pt_regs can be changed by the called function and it's new contents will replace the current contents. Note: to change the return address the embedded psw member of the pt_regs structure must be changed. The psw member is right now incomplete, since the mask part is missing. For all current use cases this should be sufficent. Providing and restoring a sane mask would mean we need to add an epsw/lpswe pair to the mcount code. Only these two instruction would cost us ~120 cycles which currently seems not necessary. Cc: Vojtech Pavlik Cc: Jiri Kosina Cc: Jiri Slaby Cc: Steven Rostedt Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 05c78bb..3f845fc 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -114,6 +114,7 @@ config S390 select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS if 64BIT select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 7b8e456..1759d73 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -24,4 +24,8 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) #define MCOUNT_INSN_SIZE 22 #endif +#ifdef CONFIG_64BIT +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#endif + #endif /* _ASM_S390_FTRACE_H */ diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index de55efa..14b6195 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -107,6 +107,13 @@ asm( #endif /* CONFIG_64BIT */ +#ifdef CONFIG_64BIT +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + return 0; +} +#endif int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S index 5b33c83..4a65dab 100644 --- a/arch/s390/kernel/mcount64.S +++ b/arch/s390/kernel/mcount64.S @@ -8,28 +8,47 @@ #include #include #include +#include .section .kprobes.text, "ax" ENTRY(ftrace_stub) br %r14 +#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) +#define STACK_PARENT_IP (STACK_FRAME_SIZE + 8) +#define STACK_PTREGS (STACK_FRAME_OVERHEAD) +#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) +#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) + ENTRY(_mcount) #ifdef CONFIG_DYNAMIC_FTRACE br %r14 ENTRY(ftrace_caller) + .globl ftrace_regs_caller + .set ftrace_regs_caller,ftrace_caller #endif - stmg %r2,%r5,32(%r15) - stg %r14,112(%r15) lgr %r1,%r15 - aghi %r15,-160 + aghi %r15,-STACK_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) + stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) + stmg %r0,%r13,STACK_PTREGS_GPRS(%r15) + stg %r14,(STACK_PTREGS_PSW+8)(%r15) +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + aghik %r2,%r14,-MCOUNT_INSN_SIZE + lgrl %r4,function_trace_op + lgrl %r14,ftrace_trace_function +#else lgr %r2,%r14 - lg %r3,168(%r15) aghi %r2,-MCOUNT_INSN_SIZE + larl %r4,function_trace_op + lg %r4,0(%r4) larl %r14,ftrace_trace_function lg %r14,0(%r14) +#endif + lg %r3,STACK_PARENT_IP(%r15) + la %r5,STACK_PTREGS(%r15) basr %r14,%r14 #ifdef CONFIG_FUNCTION_GRAPH_TRACER # The j instruction gets runtime patched to a nop instruction. @@ -37,16 +56,16 @@ ENTRY(ftrace_caller) # j .+4 ENTRY(ftrace_graph_caller) j ftrace_graph_caller_end - lg %r2,168(%r15) - lg %r3,272(%r15) + lg %r2,STACK_PARENT_IP(%r15) + lg %r3,(STACK_PTREGS_PSW+8)(%r15) brasl %r14,prepare_ftrace_return - stg %r2,168(%r15) + stg %r2,STACK_PARENT_IP(%r15) ftrace_graph_caller_end: .globl ftrace_graph_caller_end #endif - aghi %r15,160 - lmg %r2,%r5,32(%r15) - lg %r14,112(%r15) + lmg %r0,%r13,STACK_PTREGS_GPRS(%r15) + lg %r14,(STACK_PTREGS_PSW+8)(%r15) + aghi %r15,STACK_FRAME_SIZE br %r14 #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -54,10 +73,10 @@ ftrace_graph_caller_end: ENTRY(return_to_handler) stmg %r2,%r5,32(%r15) lgr %r1,%r15 - aghi %r15,-160 + aghi %r15,-STACK_FRAME_OVERHEAD stg %r1,__SF_BACKCHAIN(%r15) brasl %r14,ftrace_return_to_handler - aghi %r15,160 + aghi %r15,STACK_FRAME_OVERHEAD lgr %r14,%r2 lmg %r2,%r5,32(%r15) br %r14 -- cgit v0.10.2 From 5d6a0163494c78ad7b6de733c8793e66b5da9212 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 15 Aug 2014 13:16:09 +0200 Subject: s390/ftrace: enforce DYNAMIC_FTRACE if FUNCTION_TRACER is selected We have too many combinations for function tracing. Lets simply stick to the most advanced option, so we don't have to care of other combinations. This means we always select DYNAMIC_FTRACE if FUNCTION_TRACER is selected. In the s390 Makefile also remove CONFIG_FTRACE_SYSCALLS since that functionality got moved to architecture independent code in the meantime. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 3f845fc..47492fc 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -97,6 +97,7 @@ config S390 select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS2 + select DYNAMIC_FTRACE if FUNCTION_TRACER select GENERIC_CLOCKEVENTS select GENERIC_CPU_DEVICES if !SMP select GENERIC_FIND_FIRST_BIT diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index a95c4ca..d44245d 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -53,9 +53,7 @@ obj-$(CONFIG_COMPAT) += compat_wrapper.o $(compat-obj-y) obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o) -obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o ifdef CONFIG_64BIT diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 14b6195..f908e42 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -17,8 +17,6 @@ #include #include "entry.h" -#ifdef CONFIG_DYNAMIC_FTRACE - void ftrace_disable_code(void); void ftrace_enable_insn(void); @@ -142,8 +140,6 @@ int __init ftrace_dyn_arch_init(void) return 0; } -#endif /* CONFIG_DYNAMIC_FTRACE */ - #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* * Hook the return address and push it in the stack of return addresses @@ -169,7 +165,6 @@ out: return parent; } -#ifdef CONFIG_DYNAMIC_FTRACE /* * Patch the kernel code at ftrace_graph_caller location. The instruction * there is branch relative and save to prepare_ftrace_return. To disable @@ -219,5 +214,4 @@ int ftrace_disable_ftrace_graph_caller(void) } #endif /* CONFIG_64BIT */ -#endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 433c6db..be6dbd9 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -15,11 +15,9 @@ ENTRY(ftrace_stub) br %r14 ENTRY(_mcount) -#ifdef CONFIG_DYNAMIC_FTRACE br %r14 ENTRY(ftrace_caller) -#endif stm %r2,%r5,16(%r15) bras %r1,1f 0: .long ftrace_trace_function diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S index 4a65dab..8cf976f 100644 --- a/arch/s390/kernel/mcount64.S +++ b/arch/s390/kernel/mcount64.S @@ -22,13 +22,11 @@ ENTRY(ftrace_stub) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) ENTRY(_mcount) -#ifdef CONFIG_DYNAMIC_FTRACE br %r14 ENTRY(ftrace_caller) .globl ftrace_regs_caller .set ftrace_regs_caller,ftrace_caller -#endif lgr %r1,%r15 aghi %r15,-STACK_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) -- cgit v0.10.2 From ea2f47699082b971769be8b8f38c08b49219f471 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 3 Sep 2014 10:37:40 +0200 Subject: s390/kprobes: remove unused jprobe_return_end() Even if it has a __used annotation it is actually unused. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index bc71a7b..c48a00c 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -789,11 +789,6 @@ void __kprobes jprobe_return(void) asm volatile(".word 0x0002"); } -static void __used __kprobes jprobe_return_end(void) -{ - asm volatile("bcr 0,0"); -} - int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); -- cgit v0.10.2 From 3d1e220d08c6a00ffa83d39030b8162f66665b2b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 3 Sep 2014 13:26:23 +0200 Subject: s390/ftrace: optimize mcount code Reduce the number of executed instructions within the mcount block if function tracing is enabled. We achieve that by using a non-standard C function call ABI. Since the called function is also written in assembler this is not a problem. This also allows to replace the unconditional store at the beginning of the mcount block with a larl instruction, which doesn't touch memory. In theory we could also patch the first instruction of the mcount block to enable and disable function tracing. However this would break kprobes. This could be fixed with implementing the "kprobes_on_ftrace" feature; however keeping the odd jprobes working seems not to be possible without a lot of code churn. Therefore keep the code easy and simply accept one wasted 1-cycle "larl" instruction per function prologue. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 1759d73..d419362 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -19,7 +19,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) #endif /* __ASSEMBLY__ */ #ifdef CONFIG_64BIT -#define MCOUNT_INSN_SIZE 12 +#define MCOUNT_INSN_SIZE 18 #else #define MCOUNT_INSN_SIZE 22 #endif diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index f908e42..fcb009d 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -1,7 +1,7 @@ /* * Dynamic function tracer architecture backend. * - * Copyright IBM Corp. 2009 + * Copyright IBM Corp. 2009,2014 * * Author(s): Heiko Carstens , * Martin Schwidefsky @@ -17,6 +17,7 @@ #include #include "entry.h" +void mcount_replace_code(void); void ftrace_disable_code(void); void ftrace_enable_insn(void); @@ -24,38 +25,50 @@ void ftrace_enable_insn(void); /* * The 64-bit mcount code looks like this: * stg %r14,8(%r15) # offset 0 - * > larl %r1,<&counter> # offset 6 - * > brasl %r14,_mcount # offset 12 + * larl %r1,<&counter> # offset 6 + * brasl %r14,_mcount # offset 12 * lg %r14,8(%r15) # offset 18 - * Total length is 24 bytes. The middle two instructions of the mcount - * block get overwritten by ftrace_make_nop / ftrace_make_call. + * Total length is 24 bytes. The complete mcount block initially gets replaced + * by ftrace_make_nop. Subsequent calls to ftrace_make_call / ftrace_make_nop + * only patch the jg/lg instruction within the block. + * Note: we do not patch the first instruction to an unconditional branch, + * since that would break kprobes/jprobes. It is easier to leave the larl + * instruction in and only modify the second instruction. * The 64-bit enabled ftrace code block looks like this: - * stg %r14,8(%r15) # offset 0 + * larl %r0,.+24 # offset 0 * > lg %r1,__LC_FTRACE_FUNC # offset 6 - * > lgr %r0,%r0 # offset 12 - * > basr %r14,%r1 # offset 16 - * lg %r14,8(%15) # offset 18 - * The return points of the mcount/ftrace function have the same offset 18. - * The 64-bit disable ftrace code block looks like this: - * stg %r14,8(%r15) # offset 0 + * br %r1 # offset 12 + * brcl 0,0 # offset 14 + * brc 0,0 # offset 20 + * The ftrace function gets called with a non-standard C function call ABI + * where r0 contains the return address. It is also expected that the called + * function only clobbers r0 and r1, but restores r2-r15. + * The return point of the ftrace function has offset 24, so execution + * continues behind the mcount block. + * larl %r0,.+24 # offset 0 * > jg .+18 # offset 6 - * > lgr %r0,%r0 # offset 12 - * > basr %r14,%r1 # offset 16 - * lg %r14,8(%15) # offset 18 + * br %r1 # offset 12 + * brcl 0,0 # offset 14 + * brc 0,0 # offset 20 * The jg instruction branches to offset 24 to skip as many instructions * as possible. */ asm( " .align 4\n" + "mcount_replace_code:\n" + " larl %r0,0f\n" "ftrace_disable_code:\n" " jg 0f\n" - " lgr %r0,%r0\n" - " basr %r14,%r1\n" + " br %r1\n" + " brcl 0,0\n" + " brc 0,0\n" "0:\n" " .align 4\n" "ftrace_enable_insn:\n" " lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n"); +#define MCOUNT_BLOCK_SIZE 24 +#define MCOUNT_INSN_OFFSET 6 #define FTRACE_INSN_SIZE 6 #else /* CONFIG_64BIT */ @@ -116,6 +129,16 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { +#ifdef CONFIG_64BIT + /* Initial replacement of the whole mcount block */ + if (addr == MCOUNT_ADDR) { + if (probe_kernel_write((void *) rec->ip - MCOUNT_INSN_OFFSET, + mcount_replace_code, + MCOUNT_BLOCK_SIZE)) + return -EPERM; + return 0; + } +#endif if (probe_kernel_write((void *) rec->ip, ftrace_disable_code, MCOUNT_INSN_SIZE)) return -EPERM; diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S index 8cf976f..07abe8d 100644 --- a/arch/s390/kernel/mcount64.S +++ b/arch/s390/kernel/mcount64.S @@ -16,7 +16,6 @@ ENTRY(ftrace_stub) br %r14 #define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) -#define STACK_PARENT_IP (STACK_FRAME_SIZE + 8) #define STACK_PTREGS (STACK_FRAME_OVERHEAD) #define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) @@ -31,40 +30,39 @@ ENTRY(ftrace_caller) aghi %r15,-STACK_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) - stmg %r0,%r13,STACK_PTREGS_GPRS(%r15) - stg %r14,(STACK_PTREGS_PSW+8)(%r15) + stg %r0,(STACK_PTREGS_PSW+8)(%r15) + stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - aghik %r2,%r14,-MCOUNT_INSN_SIZE + aghik %r2,%r0,-MCOUNT_INSN_SIZE lgrl %r4,function_trace_op - lgrl %r14,ftrace_trace_function + lgrl %r1,ftrace_trace_function #else - lgr %r2,%r14 + lgr %r2,%r0 aghi %r2,-MCOUNT_INSN_SIZE larl %r4,function_trace_op lg %r4,0(%r4) - larl %r14,ftrace_trace_function - lg %r14,0(%r14) + larl %r1,ftrace_trace_function + lg %r1,0(%r1) #endif - lg %r3,STACK_PARENT_IP(%r15) + lgr %r3,%r14 la %r5,STACK_PTREGS(%r15) - basr %r14,%r14 + basr %r14,%r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER # The j instruction gets runtime patched to a nop instruction. # See ftrace_enable_ftrace_graph_caller. The patched instruction is: # j .+4 ENTRY(ftrace_graph_caller) j ftrace_graph_caller_end - lg %r2,STACK_PARENT_IP(%r15) + lg %r2,(STACK_PTREGS_GPRS+14*8)(%r15) lg %r3,(STACK_PTREGS_PSW+8)(%r15) brasl %r14,prepare_ftrace_return - stg %r2,STACK_PARENT_IP(%r15) + stg %r2,(STACK_PTREGS_GPRS+14*8)(%r15) ftrace_graph_caller_end: .globl ftrace_graph_caller_end #endif - lmg %r0,%r13,STACK_PTREGS_GPRS(%r15) - lg %r14,(STACK_PTREGS_PSW+8)(%r15) - aghi %r15,STACK_FRAME_SIZE - br %r14 + lg %r1,(STACK_PTREGS_PSW+8)(%r15) + lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15) + br %r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER -- cgit v0.10.2 From 442302820356977237e32a76a211e7942255003a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 8 Sep 2014 08:20:43 +0200 Subject: s390/spinlock: optimize spin_unlock code Use a memory barrier + store sequence instead of a load + compare and swap sequence to unlock a spinlock and an rw lock. For the spinlock case this saves us two memory reads and a not needed cpu serialization after the compare and swap instruction stored the new value. The kernel size (performance_defconfig) gets reduced by ~14k. Average execution time of a tight inlined spin_unlock loop drops from 5.8ns to 0.7ns on a zEC12 machine. An artificial stress test case where several counters are protected with a single spinlock and which are only incremented while holding the spinlock shows ~30% improvement on a 4 cpu machine. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h index 19ff956..b5dce65 100644 --- a/arch/s390/include/asm/barrier.h +++ b/arch/s390/include/asm/barrier.h @@ -15,11 +15,13 @@ #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES /* Fast-BCR without checkpoint synchronization */ -#define mb() do { asm volatile("bcr 14,0" : : : "memory"); } while (0) +#define __ASM_BARRIER "bcr 14,0\n" #else -#define mb() do { asm volatile("bcr 15,0" : : : "memory"); } while (0) +#define __ASM_BARRIER "bcr 15,0\n" #endif +#define mb() do { asm volatile(__ASM_BARRIER : : : "memory"); } while (0) + #define rmb() mb() #define wmb() mb() #define read_barrier_depends() do { } while(0) diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 96879f7..d26ad2a 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -64,11 +64,6 @@ static inline int arch_spin_trylock_once(arch_spinlock_t *lp) _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL)); } -static inline int arch_spin_tryrelease_once(arch_spinlock_t *lp) -{ - return _raw_compare_and_swap(&lp->lock, SPINLOCK_LOCKVAL, 0); -} - static inline void arch_spin_lock(arch_spinlock_t *lp) { if (!arch_spin_trylock_once(lp)) @@ -91,7 +86,13 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp) static inline void arch_spin_unlock(arch_spinlock_t *lp) { - arch_spin_tryrelease_once(lp); + typecheck(unsigned int, lp->lock); + asm volatile( + __ASM_BARRIER + "st %1,%0\n" + : "+Q" (lp->lock) + : "d" (0) + : "cc", "memory"); } static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) @@ -179,7 +180,13 @@ static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags) static inline void arch_write_unlock(arch_rwlock_t *rw) { - _raw_compare_and_swap(&rw->lock, 0x80000000, 0); + typecheck(unsigned int, rw->lock); + asm volatile( + __ASM_BARRIER + "st %1,%0\n" + : "+Q" (rw->lock) + : "d" (0) + : "cc", "memory"); } static inline int arch_read_trylock(arch_rwlock_t *rw) -- cgit v0.10.2 From 170387a8877b2c12fee5ae901be1ef4693d06094 Mon Sep 17 00:00:00 2001 From: Ingo Tuchscherer Date: Mon, 8 Sep 2014 13:24:13 +0200 Subject: s390/zcrypt: support for extended number of ap domains Extends the number of ap domains within the zcrypt device driver up to 256. AP domains in the range 00..255 will be detected. Signed-off-by: Ingo Tuchscherer Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 4038437..51e6aa0 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1188,6 +1188,10 @@ static int ap_select_domain(void) ap_qid_t qid; int rc, i, j; + /* IF APXA isn't installed, only 16 domains could be defined */ + if (!ap_configuration->ap_extended && (ap_domain_index > 15)) + return -EINVAL; + /* * We want to use a single domain. Either the one specified with * the "domain=" parameter or the domain with the maximum number @@ -1900,9 +1904,15 @@ static void ap_reset_all(void) { int i, j; - for (i = 0; i < AP_DOMAINS; i++) - for (j = 0; j < AP_DEVICES; j++) + for (i = 0; i < AP_DOMAINS; i++) { + if (!ap_test_config_domain(i)) + continue; + for (j = 0; j < AP_DEVICES; j++) { + if (!ap_test_config_card_id(j)) + continue; ap_reset_queue(AP_MKQID(j, i)); + } + } } static struct reset_call ap_reset_call = { diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 6405ae2..db92e9f 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -31,7 +31,7 @@ #include #define AP_DEVICES 64 /* Number of AP devices. */ -#define AP_DOMAINS 16 /* Number of AP domains. */ +#define AP_DOMAINS 256 /* Number of AP domains. */ #define AP_MAX_RESET 90 /* Maximum number of resets. */ #define AP_RESET_TIMEOUT (HZ*0.7) /* Time in ticks for reset timeouts. */ #define AP_CONFIG_TIME 30 /* Time in seconds between AP bus rescans. */ @@ -45,9 +45,9 @@ extern int ap_domain_index; */ typedef unsigned int ap_qid_t; -#define AP_MKQID(_device,_queue) (((_device) & 63) << 8 | ((_queue) & 15)) +#define AP_MKQID(_device, _queue) (((_device) & 63) << 8 | ((_queue) & 255)) #define AP_QID_DEVICE(_qid) (((_qid) >> 8) & 63) -#define AP_QID_QUEUE(_qid) ((_qid) & 15) +#define AP_QID_QUEUE(_qid) ((_qid) & 255) /** * structy ap_queue_status - Holds the AP queue status. -- cgit v0.10.2 From ea61a579ab87f1620b14777afc32cf3827f07bc8 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 9 Sep 2014 12:53:12 +0200 Subject: s390/sclp: reduce dependency on event type masks The event type masks can change asynchronously. These changes are reported by SCLP to the OS by state-change events which are retrieved with the read event data command. The SCLP driver has a request queue, there is a window where the read event data request has not completed yet but the SCLP console drivers are trying to queue output requests. As the masks are not updated yet the requests are discarded. The simplest fix is to queue the console requests independent of the event type masks and rely on SCLP to return with an error code if a specific event type is not available. Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 1918d9d..5bd6cb1 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -281,7 +281,7 @@ out: static unsigned int __init sclp_con_check_linemode(struct init_sccb *sccb) { - if (!(sccb->sclp_send_mask & (EVTYP_OPCMD_MASK | EVTYP_PMSGCMD_MASK))) + if (!(sccb->sclp_send_mask & EVTYP_OPCMD_MASK)) return 0; if (!(sccb->sclp_receive_mask & (EVTYP_MSG_MASK | EVTYP_PMSGCMD_MASK))) return 0; diff --git a/drivers/s390/char/sclp_rw.c b/drivers/s390/char/sclp_rw.c index 3b13d58..35a84af 100644 --- a/drivers/s390/char/sclp_rw.c +++ b/drivers/s390/char/sclp_rw.c @@ -33,7 +33,7 @@ static void sclp_rw_pm_event(struct sclp_register *reg, /* Event type structure for write message and write priority message */ static struct sclp_register sclp_rw_event = { - .send_mask = EVTYP_MSG_MASK | EVTYP_PMSGCMD_MASK, + .send_mask = EVTYP_MSG_MASK, .pm_event_fn = sclp_rw_pm_event, }; @@ -456,14 +456,9 @@ sclp_emit_buffer(struct sclp_buffer *buffer, return -EIO; sccb = buffer->sccb; - if (sclp_rw_event.sclp_receive_mask & EVTYP_MSG_MASK) - /* Use normal write message */ - sccb->msg_buf.header.type = EVTYP_MSG; - else if (sclp_rw_event.sclp_receive_mask & EVTYP_PMSGCMD_MASK) - /* Use write priority message */ - sccb->msg_buf.header.type = EVTYP_PMSGCMD; - else - return -EOPNOTSUPP; + /* Use normal write message */ + sccb->msg_buf.header.type = EVTYP_MSG; + buffer->request.command = SCLP_CMDW_WRITE_EVENT_DATA; buffer->request.status = SCLP_REQ_FILLED; buffer->request.callback = sclp_writedata_callback; diff --git a/drivers/s390/char/sclp_vt220.c b/drivers/s390/char/sclp_vt220.c index b9a9f72..ae67386 100644 --- a/drivers/s390/char/sclp_vt220.c +++ b/drivers/s390/char/sclp_vt220.c @@ -206,10 +206,6 @@ sclp_vt220_callback(struct sclp_req *request, void *data) static int __sclp_vt220_emit(struct sclp_vt220_request *request) { - if (!(sclp_vt220_register.sclp_receive_mask & EVTYP_VT220MSG_MASK)) { - request->sclp_req.status = SCLP_REQ_FAILED; - return -EIO; - } request->sclp_req.command = SCLP_CMDW_WRITE_EVENT_DATA; request->sclp_req.status = SCLP_REQ_FILLED; request->sclp_req.callback = sclp_vt220_callback; -- cgit v0.10.2 From 8f933b1043e1e51f4776fc1ffe86752c7785fd4e Mon Sep 17 00:00:00 2001 From: Ralf Hoppe Date: Mon, 8 Apr 2013 09:52:57 +0200 Subject: s390/hmcdrv: HMC drive CD/DVD access This device driver allows accessing a HMC drive CD/DVD-ROM. It can be used in a LPAR and z/VM environment. Reviewed-by: Martin Schwidefsky Reviewed-by: Heiko Carstens Signed-off-by: Ralf Hoppe Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index c4dd400..e787cc1 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -51,6 +51,7 @@ enum interruption_class { IRQEXT_CMS, IRQEXT_CMC, IRQEXT_CMR, + IRQEXT_FTP, IRQIO_CIO, IRQIO_QAI, IRQIO_DAS, diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 8eb8244..051574e 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -70,6 +70,7 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = { {.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"}, {.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"}, {.irq = IRQEXT_CMR, .name = "CMR", .desc = "[EXT] CPU-Measurement: RI"}, + {.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"}, {.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"}, {.irq = IRQIO_QAI, .name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"}, {.irq = IRQIO_DAS, .name = "DAS", .desc = "[I/O] DASD"}, diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig index 71bf959..dc24ecf 100644 --- a/drivers/s390/char/Kconfig +++ b/drivers/s390/char/Kconfig @@ -102,6 +102,19 @@ config SCLP_ASYNC want for inform other people about your kernel panics, need this feature and intend to run your kernel in LPAR. +config HMC_DRV + def_tristate m + prompt "Support for file transfers from HMC drive CD/DVD-ROM" + depends on 64BIT + select CRC16 + help + This option enables support for file transfers from a Hardware + Management Console (HMC) drive CD/DVD-ROM. It is available as a + module, called 'hmcdrv', and also as kernel built-in. There is one + optional parameter for this module: cachesize=N, which modifies the + transfer cache size from it's default value 0.5MB to N bytes. If N + is zero, then no caching is performed. + config S390_TAPE def_tristate m prompt "S/390 tape device support" diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile index 78b6ace..6fa9364 100644 --- a/drivers/s390/char/Makefile +++ b/drivers/s390/char/Makefile @@ -33,3 +33,6 @@ obj-$(CONFIG_S390_VMUR) += vmur.o zcore_mod-objs := sclp_sdias.o zcore.o obj-$(CONFIG_CRASH_DUMP) += zcore_mod.o + +hmcdrv-objs := hmcdrv_mod.o hmcdrv_dev.o hmcdrv_ftp.o hmcdrv_cache.o diag_ftp.o sclp_ftp.o +obj-$(CONFIG_HMC_DRV) += hmcdrv.o diff --git a/drivers/s390/char/diag_ftp.c b/drivers/s390/char/diag_ftp.c new file mode 100644 index 0000000..9388963 --- /dev/null +++ b/drivers/s390/char/diag_ftp.c @@ -0,0 +1,237 @@ +/* + * DIAGNOSE X'2C4' instruction based HMC FTP services, useable on z/VM + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + * + */ + +#define KMSG_COMPONENT "hmcdrv" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include + +#include "hmcdrv_ftp.h" +#include "diag_ftp.h" + +/* DIAGNOSE X'2C4' return codes in Ry */ +#define DIAG_FTP_RET_OK 0 /* HMC FTP started successfully */ +#define DIAG_FTP_RET_EBUSY 4 /* HMC FTP service currently busy */ +#define DIAG_FTP_RET_EIO 8 /* HMC FTP service I/O error */ +/* and an artificial extension */ +#define DIAG_FTP_RET_EPERM 2 /* HMC FTP service privilege error */ + +/* FTP service status codes (after INTR at guest real location 133) */ +#define DIAG_FTP_STAT_OK 0U /* request completed successfully */ +#define DIAG_FTP_STAT_PGCC 4U /* program check condition */ +#define DIAG_FTP_STAT_PGIOE 8U /* paging I/O error */ +#define DIAG_FTP_STAT_TIMEOUT 12U /* timeout */ +#define DIAG_FTP_STAT_EBASE 16U /* base of error codes from SCLP */ +#define DIAG_FTP_STAT_LDFAIL (DIAG_FTP_STAT_EBASE + 1U) /* failed */ +#define DIAG_FTP_STAT_LDNPERM (DIAG_FTP_STAT_EBASE + 2U) /* not allowed */ +#define DIAG_FTP_STAT_LDRUNS (DIAG_FTP_STAT_EBASE + 3U) /* runs */ +#define DIAG_FTP_STAT_LDNRUNS (DIAG_FTP_STAT_EBASE + 4U) /* not runs */ + +/** + * struct diag_ftp_ldfpl - load file FTP parameter list (LDFPL) + * @bufaddr: real buffer address (at 4k boundary) + * @buflen: length of buffer + * @offset: dir/file offset + * @intparm: interruption parameter (unused) + * @transferred: bytes transferred + * @fsize: file size, filled on GET + * @failaddr: failing address + * @spare: padding + * @fident: file name - ASCII + */ +struct diag_ftp_ldfpl { + u64 bufaddr; + u64 buflen; + u64 offset; + u64 intparm; + u64 transferred; + u64 fsize; + u64 failaddr; + u64 spare; + u8 fident[HMCDRV_FTP_FIDENT_MAX]; +} __packed; + +static DECLARE_COMPLETION(diag_ftp_rx_complete); +static int diag_ftp_subcode; + +/** + * diag_ftp_handler() - FTP services IRQ handler + * @extirq: external interrupt (sub-) code + * @param32: 32-bit interruption parameter from &struct diag_ftp_ldfpl + * @param64: unused (for 64-bit interrupt parameters) + */ +static void diag_ftp_handler(struct ext_code extirq, + unsigned int param32, + unsigned long param64) +{ + if ((extirq.subcode >> 8) != 8) + return; /* not a FTP services sub-code */ + + inc_irq_stat(IRQEXT_FTP); + diag_ftp_subcode = extirq.subcode & 0xffU; + complete(&diag_ftp_rx_complete); +} + +/** + * diag_ftp_2c4() - DIAGNOSE X'2C4' service call + * @fpl: pointer to prepared LDFPL + * @cmd: FTP command to be executed + * + * Performs a DIAGNOSE X'2C4' call with (input/output) FTP parameter list + * @fpl and FTP function code @cmd. In case of an error the function does + * nothing and returns an (negative) error code. + * + * Notes: + * 1. This function only initiates a transfer, so the caller must wait + * for completion (asynchronous execution). + * 2. The FTP parameter list @fpl must be aligned to a double-word boundary. + * 3. fpl->bufaddr must be a real address, 4k aligned + */ +static int diag_ftp_2c4(struct diag_ftp_ldfpl *fpl, + enum hmcdrv_ftp_cmdid cmd) +{ + int rc; + + asm volatile( + " diag %[addr],%[cmd],0x2c4\n" + "0: j 2f\n" + "1: la %[rc],%[err]\n" + "2:\n" + EX_TABLE(0b, 1b) + : [rc] "=d" (rc), "+m" (*fpl) + : [cmd] "0" (cmd), [addr] "d" (virt_to_phys(fpl)), + [err] "i" (DIAG_FTP_RET_EPERM) + : "cc"); + + switch (rc) { + case DIAG_FTP_RET_OK: + return 0; + case DIAG_FTP_RET_EBUSY: + return -EBUSY; + case DIAG_FTP_RET_EPERM: + return -EPERM; + case DIAG_FTP_RET_EIO: + default: + return -EIO; + } +} + +/** + * diag_ftp_cmd() - executes a DIAG X'2C4' FTP command, targeting a HMC + * @ftp: pointer to FTP command specification + * @fsize: return of file size (or NULL if undesirable) + * + * Attention: Notice that this function is not reentrant - so the caller + * must ensure locking. + * + * Return: number of bytes read/written or a (negative) error code + */ +ssize_t diag_ftp_cmd(const struct hmcdrv_ftp_cmdspec *ftp, size_t *fsize) +{ + struct diag_ftp_ldfpl *ldfpl; + ssize_t len; +#ifdef DEBUG + unsigned long start_jiffies; + + pr_debug("starting DIAG X'2C4' on '%s', requesting %zd bytes\n", + ftp->fname, ftp->len); + start_jiffies = jiffies; +#endif + init_completion(&diag_ftp_rx_complete); + + ldfpl = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!ldfpl) { + len = -ENOMEM; + goto out; + } + + len = strlcpy(ldfpl->fident, ftp->fname, sizeof(ldfpl->fident)); + if (len >= HMCDRV_FTP_FIDENT_MAX) { + len = -EINVAL; + goto out_free; + } + + ldfpl->transferred = 0; + ldfpl->fsize = 0; + ldfpl->offset = ftp->ofs; + ldfpl->buflen = ftp->len; + ldfpl->bufaddr = virt_to_phys(ftp->buf); + + len = diag_ftp_2c4(ldfpl, ftp->id); + if (len) + goto out_free; + + /* + * There is no way to cancel the running diag X'2C4', the code + * needs to wait unconditionally until the transfer is complete. + */ + wait_for_completion(&diag_ftp_rx_complete); + +#ifdef DEBUG + pr_debug("completed DIAG X'2C4' after %lu ms\n", + (jiffies - start_jiffies) * 1000 / HZ); + pr_debug("status of DIAG X'2C4' is %u, with %lld/%lld bytes\n", + diag_ftp_subcode, ldfpl->transferred, ldfpl->fsize); +#endif + + switch (diag_ftp_subcode) { + case DIAG_FTP_STAT_OK: /* success */ + len = ldfpl->transferred; + if (fsize) + *fsize = ldfpl->fsize; + break; + case DIAG_FTP_STAT_LDNPERM: + len = -EPERM; + break; + case DIAG_FTP_STAT_LDRUNS: + len = -EBUSY; + break; + case DIAG_FTP_STAT_LDFAIL: + len = -ENOENT; /* no such file or media */ + break; + default: + len = -EIO; + break; + } + +out_free: + free_page((unsigned long) ldfpl); +out: + return len; +} + +/** + * diag_ftp_startup() - startup of FTP services, when running on z/VM + * + * Return: 0 on success, else an (negative) error code + */ +int diag_ftp_startup(void) +{ + int rc; + + rc = register_external_irq(EXT_IRQ_CP_SERVICE, diag_ftp_handler); + if (rc) + return rc; + + ctl_set_bit(0, 63 - 22); + return 0; +} + +/** + * diag_ftp_shutdown() - shutdown of FTP services, when running on z/VM + */ +void diag_ftp_shutdown(void) +{ + ctl_clear_bit(0, 63 - 22); + unregister_external_irq(EXT_IRQ_CP_SERVICE, diag_ftp_handler); +} diff --git a/drivers/s390/char/diag_ftp.h b/drivers/s390/char/diag_ftp.h new file mode 100644 index 0000000..3abd261 --- /dev/null +++ b/drivers/s390/char/diag_ftp.h @@ -0,0 +1,21 @@ +/* + * DIAGNOSE X'2C4' instruction based SE/HMC FTP Services, useable on z/VM + * + * Notice that all functions exported here are not reentrant. + * So usage should be exclusive, ensured by the caller (e.g. using a + * mutex). + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + */ + +#ifndef __DIAG_FTP_H__ +#define __DIAG_FTP_H__ + +#include "hmcdrv_ftp.h" + +int diag_ftp_startup(void); +void diag_ftp_shutdown(void); +ssize_t diag_ftp_cmd(const struct hmcdrv_ftp_cmdspec *ftp, size_t *fsize); + +#endif /* __DIAG_FTP_H__ */ diff --git a/drivers/s390/char/hmcdrv_cache.c b/drivers/s390/char/hmcdrv_cache.c new file mode 100644 index 0000000..4cda5ad --- /dev/null +++ b/drivers/s390/char/hmcdrv_cache.c @@ -0,0 +1,252 @@ +/* + * SE/HMC Drive (Read) Cache Functions + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + * + */ + +#define KMSG_COMPONENT "hmcdrv" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include + +#include "hmcdrv_ftp.h" +#include "hmcdrv_cache.h" + +#define HMCDRV_CACHE_TIMEOUT 30 /* aging timeout in seconds */ + +/** + * struct hmcdrv_cache_entry - file cache (only used on read/dir) + * @id: FTP command ID + * @content: kernel-space buffer, 4k aligned + * @len: size of @content cache (0 if caching disabled) + * @ofs: start of content within file (-1 if no cached content) + * @fname: file name + * @fsize: file size + * @timeout: cache timeout in jiffies + * + * Notice that the first three members (id, fname, fsize) are cached on all + * read/dir requests. But content is cached only under some preconditions. + * Uncached content is signalled by a negative value of @ofs. + */ +struct hmcdrv_cache_entry { + enum hmcdrv_ftp_cmdid id; + char fname[HMCDRV_FTP_FIDENT_MAX]; + size_t fsize; + loff_t ofs; + unsigned long timeout; + void *content; + size_t len; +}; + +static int hmcdrv_cache_order; /* cache allocated page order */ + +static struct hmcdrv_cache_entry hmcdrv_cache_file = { + .fsize = SIZE_MAX, + .ofs = -1, + .len = 0, + .fname = {'\0'} +}; + +/** + * hmcdrv_cache_get() - looks for file data/content in read cache + * @ftp: pointer to FTP command specification + * + * Return: number of bytes read from cache or a negative number if nothing + * in content cache (for the file/cmd specified in @ftp) + */ +static ssize_t hmcdrv_cache_get(const struct hmcdrv_ftp_cmdspec *ftp) +{ + loff_t pos; /* position in cache (signed) */ + ssize_t len; + + if ((ftp->id != hmcdrv_cache_file.id) || + strcmp(hmcdrv_cache_file.fname, ftp->fname)) + return -1; + + if (ftp->ofs >= hmcdrv_cache_file.fsize) /* EOF ? */ + return 0; + + if ((hmcdrv_cache_file.ofs < 0) || /* has content? */ + time_after(jiffies, hmcdrv_cache_file.timeout)) + return -1; + + /* there seems to be cached content - calculate the maximum number + * of bytes that can be returned (regarding file size and offset) + */ + len = hmcdrv_cache_file.fsize - ftp->ofs; + + if (len > ftp->len) + len = ftp->len; + + /* check if the requested chunk falls into our cache (which starts + * at offset 'hmcdrv_cache_file.ofs' in the file of interest) + */ + pos = ftp->ofs - hmcdrv_cache_file.ofs; + + if ((pos >= 0) && + ((pos + len) <= hmcdrv_cache_file.len)) { + + memcpy(ftp->buf, + hmcdrv_cache_file.content + pos, + len); + pr_debug("using cached content of '%s', returning %zd/%zd bytes\n", + hmcdrv_cache_file.fname, len, + hmcdrv_cache_file.fsize); + + return len; + } + + return -1; +} + +/** + * hmcdrv_cache_do() - do a HMC drive CD/DVD transfer with cache update + * @ftp: pointer to FTP command specification + * @func: FTP transfer function to be used + * + * Return: number of bytes read/written or a (negative) error code + */ +static ssize_t hmcdrv_cache_do(const struct hmcdrv_ftp_cmdspec *ftp, + hmcdrv_cache_ftpfunc func) +{ + ssize_t len; + + /* only cache content if the read/dir cache really exists + * (hmcdrv_cache_file.len > 0), is large enough to handle the + * request (hmcdrv_cache_file.len >= ftp->len) and there is a need + * to do so (ftp->len > 0) + */ + if ((ftp->len > 0) && (hmcdrv_cache_file.len >= ftp->len)) { + + /* because the cache is not located at ftp->buf, we have to + * assemble a new HMC drive FTP cmd specification (pointing + * to our cache, and using the increased size) + */ + struct hmcdrv_ftp_cmdspec cftp = *ftp; /* make a copy */ + cftp.buf = hmcdrv_cache_file.content; /* and update */ + cftp.len = hmcdrv_cache_file.len; /* buffer data */ + + len = func(&cftp, &hmcdrv_cache_file.fsize); /* now do */ + + if (len > 0) { + pr_debug("caching %zd bytes content for '%s'\n", + len, ftp->fname); + + if (len > ftp->len) + len = ftp->len; + + hmcdrv_cache_file.ofs = ftp->ofs; + hmcdrv_cache_file.timeout = jiffies + + HMCDRV_CACHE_TIMEOUT * HZ; + memcpy(ftp->buf, hmcdrv_cache_file.content, len); + } + } else { + len = func(ftp, &hmcdrv_cache_file.fsize); + hmcdrv_cache_file.ofs = -1; /* invalidate content */ + } + + if (len > 0) { + /* cache some file info (FTP command, file name and file + * size) unconditionally + */ + strlcpy(hmcdrv_cache_file.fname, ftp->fname, + HMCDRV_FTP_FIDENT_MAX); + hmcdrv_cache_file.id = ftp->id; + pr_debug("caching cmd %d, file size %zu for '%s'\n", + ftp->id, hmcdrv_cache_file.fsize, ftp->fname); + } + + return len; +} + +/** + * hmcdrv_cache_cmd() - perform a cached HMC drive CD/DVD transfer + * @ftp: pointer to FTP command specification + * @func: FTP transfer function to be used + * + * Attention: Notice that this function is not reentrant - so the caller + * must ensure exclusive execution. + * + * Return: number of bytes read/written or a (negative) error code + */ +ssize_t hmcdrv_cache_cmd(const struct hmcdrv_ftp_cmdspec *ftp, + hmcdrv_cache_ftpfunc func) +{ + ssize_t len; + + if ((ftp->id == HMCDRV_FTP_DIR) || /* read cache */ + (ftp->id == HMCDRV_FTP_NLIST) || + (ftp->id == HMCDRV_FTP_GET)) { + + len = hmcdrv_cache_get(ftp); + + if (len >= 0) /* got it from cache ? */ + return len; /* yes */ + + len = hmcdrv_cache_do(ftp, func); + + if (len >= 0) + return len; + + } else { + len = func(ftp, NULL); /* simply do original command */ + } + + /* invalidate the (read) cache in case there was a write operation + * or an error on read/dir + */ + hmcdrv_cache_file.id = HMCDRV_FTP_NOOP; + hmcdrv_cache_file.fsize = LLONG_MAX; + hmcdrv_cache_file.ofs = -1; + + return len; +} + +/** + * hmcdrv_cache_startup() - startup of HMC drive cache + * @cachesize: cache size + * + * Return: 0 on success, else a (negative) error code + */ +int hmcdrv_cache_startup(size_t cachesize) +{ + if (cachesize > 0) { /* perform caching ? */ + hmcdrv_cache_order = get_order(cachesize); + hmcdrv_cache_file.content = + (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, + hmcdrv_cache_order); + + if (!hmcdrv_cache_file.content) { + pr_err("Allocating the requested cache size of %zu bytes failed\n", + cachesize); + return -ENOMEM; + } + + pr_debug("content cache enabled, size is %zu bytes\n", + cachesize); + } + + hmcdrv_cache_file.len = cachesize; + return 0; +} + +/** + * hmcdrv_cache_shutdown() - shutdown of HMC drive cache + */ +void hmcdrv_cache_shutdown(void) +{ + if (hmcdrv_cache_file.content) { + free_pages((unsigned long) hmcdrv_cache_file.content, + hmcdrv_cache_order); + hmcdrv_cache_file.content = NULL; + } + + hmcdrv_cache_file.id = HMCDRV_FTP_NOOP; + hmcdrv_cache_file.fsize = LLONG_MAX; + hmcdrv_cache_file.ofs = -1; + hmcdrv_cache_file.len = 0; /* no cache */ +} diff --git a/drivers/s390/char/hmcdrv_cache.h b/drivers/s390/char/hmcdrv_cache.h new file mode 100644 index 0000000..a14b575 --- /dev/null +++ b/drivers/s390/char/hmcdrv_cache.h @@ -0,0 +1,24 @@ +/* + * SE/HMC Drive (Read) Cache Functions + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + */ + +#ifndef __HMCDRV_CACHE_H__ +#define __HMCDRV_CACHE_H__ + +#include +#include "hmcdrv_ftp.h" + +#define HMCDRV_CACHE_SIZE_DFLT (MAX_ORDER_NR_PAGES * PAGE_SIZE / 2UL) + +typedef ssize_t (*hmcdrv_cache_ftpfunc)(const struct hmcdrv_ftp_cmdspec *ftp, + size_t *fsize); + +ssize_t hmcdrv_cache_cmd(const struct hmcdrv_ftp_cmdspec *ftp, + hmcdrv_cache_ftpfunc func); +int hmcdrv_cache_startup(size_t cachesize); +void hmcdrv_cache_shutdown(void); + +#endif /* __HMCDRV_CACHE_H__ */ diff --git a/drivers/s390/char/hmcdrv_dev.c b/drivers/s390/char/hmcdrv_dev.c new file mode 100644 index 0000000..0c51761 --- /dev/null +++ b/drivers/s390/char/hmcdrv_dev.c @@ -0,0 +1,370 @@ +/* + * HMC Drive CD/DVD Device + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + * + * This file provides a Linux "misc" character device for access to an + * assigned HMC drive CD/DVD-ROM. It works as follows: First create the + * device by calling hmcdrv_dev_init(). After open() a lseek(fd, 0, + * SEEK_END) indicates that a new FTP command follows (not needed on the + * first command after open). Then write() the FTP command ASCII string + * to it, e.g. "dir /" or "nls " or "get ". At the + * end read() the response. + */ + +#define KMSG_COMPONENT "hmcdrv" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hmcdrv_dev.h" +#include "hmcdrv_ftp.h" + +/* If the following macro is defined, then the HMC device creates it's own + * separated device class (and dynamically assigns a major number). If not + * defined then the HMC device is assigned to the "misc" class devices. + * +#define HMCDRV_DEV_CLASS "hmcftp" + */ + +#define HMCDRV_DEV_NAME "hmcdrv" +#define HMCDRV_DEV_BUSY_DELAY 500 /* delay between -EBUSY trials in ms */ +#define HMCDRV_DEV_BUSY_RETRIES 3 /* number of retries on -EBUSY */ + +struct hmcdrv_dev_node { + +#ifdef HMCDRV_DEV_CLASS + struct cdev dev; /* character device structure */ + umode_t mode; /* mode of device node (unused, zero) */ +#else + struct miscdevice dev; /* "misc" device structure */ +#endif + +}; + +static int hmcdrv_dev_open(struct inode *inode, struct file *fp); +static int hmcdrv_dev_release(struct inode *inode, struct file *fp); +static loff_t hmcdrv_dev_seek(struct file *fp, loff_t pos, int whence); +static ssize_t hmcdrv_dev_read(struct file *fp, char __user *ubuf, + size_t len, loff_t *pos); +static ssize_t hmcdrv_dev_write(struct file *fp, const char __user *ubuf, + size_t len, loff_t *pos); +static ssize_t hmcdrv_dev_transfer(char __kernel *cmd, loff_t offset, + char __user *buf, size_t len); + +/* + * device operations + */ +static const struct file_operations hmcdrv_dev_fops = { + .open = hmcdrv_dev_open, + .llseek = hmcdrv_dev_seek, + .release = hmcdrv_dev_release, + .read = hmcdrv_dev_read, + .write = hmcdrv_dev_write, +}; + +static struct hmcdrv_dev_node hmcdrv_dev; /* HMC device struct (static) */ + +#ifdef HMCDRV_DEV_CLASS + +static struct class *hmcdrv_dev_class; /* device class pointer */ +static dev_t hmcdrv_dev_no; /* device number (major/minor) */ + +/** + * hmcdrv_dev_name() - provides a naming hint for a device node in /dev + * @dev: device for which the naming/mode hint is + * @mode: file mode for device node created in /dev + * + * See: devtmpfs.c, function devtmpfs_create_node() + * + * Return: recommended device file name in /dev + */ +static char *hmcdrv_dev_name(struct device *dev, umode_t *mode) +{ + char *nodename = NULL; + const char *devname = dev_name(dev); /* kernel device name */ + + if (devname) + nodename = kasprintf(GFP_KERNEL, "%s", devname); + + /* on device destroy (rmmod) the mode pointer may be NULL + */ + if (mode) + *mode = hmcdrv_dev.mode; + + return nodename; +} + +#endif /* HMCDRV_DEV_CLASS */ + +/* + * open() + */ +static int hmcdrv_dev_open(struct inode *inode, struct file *fp) +{ + int rc; + + /* check for non-blocking access, which is really unsupported + */ + if (fp->f_flags & O_NONBLOCK) + return -EINVAL; + + /* Because it makes no sense to open this device read-only (then a + * FTP command cannot be emitted), we respond with an error. + */ + if ((fp->f_flags & O_ACCMODE) == O_RDONLY) + return -EINVAL; + + /* prevent unloading this module as long as anyone holds the + * device file open - so increment the reference count here + */ + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + fp->private_data = NULL; /* no command yet */ + rc = hmcdrv_ftp_startup(); + if (rc) + module_put(THIS_MODULE); + + pr_debug("open file '/dev/%s' with return code %d\n", + fp->f_dentry->d_name.name, rc); + return rc; +} + +/* + * release() + */ +static int hmcdrv_dev_release(struct inode *inode, struct file *fp) +{ + pr_debug("closing file '/dev/%s'\n", fp->f_dentry->d_name.name); + kfree(fp->private_data); + fp->private_data = NULL; + hmcdrv_ftp_shutdown(); + module_put(THIS_MODULE); + return 0; +} + +/* + * lseek() + */ +static loff_t hmcdrv_dev_seek(struct file *fp, loff_t pos, int whence) +{ + switch (whence) { + case SEEK_CUR: /* relative to current file position */ + pos += fp->f_pos; /* new position stored in 'pos' */ + break; + + case SEEK_SET: /* absolute (relative to beginning of file) */ + break; /* SEEK_SET */ + + /* We use SEEK_END as a special indicator for a SEEK_SET + * (set absolute position), combined with a FTP command + * clear. + */ + case SEEK_END: + if (fp->private_data) { + kfree(fp->private_data); + fp->private_data = NULL; + } + + break; /* SEEK_END */ + + default: /* SEEK_DATA, SEEK_HOLE: unsupported */ + return -EINVAL; + } + + if (pos < 0) + return -EINVAL; + + if (fp->f_pos != pos) + ++fp->f_version; + + fp->f_pos = pos; + return pos; +} + +/* + * transfer (helper function) + */ +static ssize_t hmcdrv_dev_transfer(char __kernel *cmd, loff_t offset, + char __user *buf, size_t len) +{ + ssize_t retlen; + unsigned trials = HMCDRV_DEV_BUSY_RETRIES; + + do { + retlen = hmcdrv_ftp_cmd(cmd, offset, buf, len); + + if (retlen != -EBUSY) + break; + + msleep(HMCDRV_DEV_BUSY_DELAY); + + } while (--trials > 0); + + return retlen; +} + +/* + * read() + */ +static ssize_t hmcdrv_dev_read(struct file *fp, char __user *ubuf, + size_t len, loff_t *pos) +{ + ssize_t retlen; + + if (((fp->f_flags & O_ACCMODE) == O_WRONLY) || + (fp->private_data == NULL)) { /* no FTP cmd defined ? */ + return -EBADF; + } + + retlen = hmcdrv_dev_transfer((char *) fp->private_data, + *pos, ubuf, len); + + pr_debug("read from file '/dev/%s' at %lld returns %zd/%zu\n", + fp->f_dentry->d_name.name, (long long) *pos, retlen, len); + + if (retlen > 0) + *pos += retlen; + + return retlen; +} + +/* + * write() + */ +static ssize_t hmcdrv_dev_write(struct file *fp, const char __user *ubuf, + size_t len, loff_t *pos) +{ + ssize_t retlen; + + pr_debug("writing file '/dev/%s' at pos. %lld with length %zd\n", + fp->f_dentry->d_name.name, (long long) *pos, len); + + if (!fp->private_data) { /* first expect a cmd write */ + fp->private_data = kmalloc(len + 1, GFP_KERNEL); + + if (!fp->private_data) + return -ENOMEM; + + if (!copy_from_user(fp->private_data, ubuf, len)) { + ((char *)fp->private_data)[len] = '\0'; + return len; + } + + kfree(fp->private_data); + fp->private_data = NULL; + return -EFAULT; + } + + retlen = hmcdrv_dev_transfer((char *) fp->private_data, + *pos, (char __user *) ubuf, len); + if (retlen > 0) + *pos += retlen; + + pr_debug("write to file '/dev/%s' returned %zd\n", + fp->f_dentry->d_name.name, retlen); + + return retlen; +} + +/** + * hmcdrv_dev_init() - creates a HMC drive CD/DVD device + * + * This function creates a HMC drive CD/DVD kernel device and an associated + * device under /dev, using a dynamically allocated major number. + * + * Return: 0 on success, else an error code. + */ +int hmcdrv_dev_init(void) +{ + int rc; + +#ifdef HMCDRV_DEV_CLASS + struct device *dev; + + rc = alloc_chrdev_region(&hmcdrv_dev_no, 0, 1, HMCDRV_DEV_NAME); + + if (rc) + goto out_err; + + cdev_init(&hmcdrv_dev.dev, &hmcdrv_dev_fops); + hmcdrv_dev.dev.owner = THIS_MODULE; + rc = cdev_add(&hmcdrv_dev.dev, hmcdrv_dev_no, 1); + + if (rc) + goto out_unreg; + + /* At this point the character device exists in the kernel (see + * /proc/devices), but not under /dev nor /sys/devices/virtual. So + * we have to create an associated class (see /sys/class). + */ + hmcdrv_dev_class = class_create(THIS_MODULE, HMCDRV_DEV_CLASS); + + if (IS_ERR(hmcdrv_dev_class)) { + rc = PTR_ERR(hmcdrv_dev_class); + goto out_devdel; + } + + /* Finally a device node in /dev has to be established (as 'mkdev' + * does from the command line). Notice that assignment of a device + * node name/mode function is optional (only for mode != 0600). + */ + hmcdrv_dev.mode = 0; /* "unset" */ + hmcdrv_dev_class->devnode = hmcdrv_dev_name; + + dev = device_create(hmcdrv_dev_class, NULL, hmcdrv_dev_no, NULL, + "%s", HMCDRV_DEV_NAME); + if (!IS_ERR(dev)) + return 0; + + rc = PTR_ERR(dev); + class_destroy(hmcdrv_dev_class); + hmcdrv_dev_class = NULL; + +out_devdel: + cdev_del(&hmcdrv_dev.dev); + +out_unreg: + unregister_chrdev_region(hmcdrv_dev_no, 1); + +out_err: + +#else /* !HMCDRV_DEV_CLASS */ + hmcdrv_dev.dev.minor = MISC_DYNAMIC_MINOR; + hmcdrv_dev.dev.name = HMCDRV_DEV_NAME; + hmcdrv_dev.dev.fops = &hmcdrv_dev_fops; + hmcdrv_dev.dev.mode = 0; /* finally produces 0600 */ + rc = misc_register(&hmcdrv_dev.dev); +#endif /* HMCDRV_DEV_CLASS */ + + return rc; +} + +/** + * hmcdrv_dev_exit() - destroys a HMC drive CD/DVD device + */ +void hmcdrv_dev_exit(void) +{ +#ifdef HMCDRV_DEV_CLASS + if (!IS_ERR_OR_NULL(hmcdrv_dev_class)) { + device_destroy(hmcdrv_dev_class, hmcdrv_dev_no); + class_destroy(hmcdrv_dev_class); + } + + cdev_del(&hmcdrv_dev.dev); + unregister_chrdev_region(hmcdrv_dev_no, 1); +#else /* !HMCDRV_DEV_CLASS */ + misc_deregister(&hmcdrv_dev.dev); +#endif /* HMCDRV_DEV_CLASS */ +} diff --git a/drivers/s390/char/hmcdrv_dev.h b/drivers/s390/char/hmcdrv_dev.h new file mode 100644 index 0000000..cb17f07 --- /dev/null +++ b/drivers/s390/char/hmcdrv_dev.h @@ -0,0 +1,14 @@ +/* + * SE/HMC Drive FTP Device + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + */ + +#ifndef __HMCDRV_DEV_H__ +#define __HMCDRV_DEV_H__ + +int hmcdrv_dev_init(void); +void hmcdrv_dev_exit(void); + +#endif /* __HMCDRV_DEV_H__ */ diff --git a/drivers/s390/char/hmcdrv_ftp.c b/drivers/s390/char/hmcdrv_ftp.c new file mode 100644 index 0000000..4bd6332 --- /dev/null +++ b/drivers/s390/char/hmcdrv_ftp.c @@ -0,0 +1,343 @@ +/* + * HMC Drive FTP Services + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + */ + +#define KMSG_COMPONENT "hmcdrv" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include + +#include +#include + +#include "hmcdrv_ftp.h" +#include "hmcdrv_cache.h" +#include "sclp_ftp.h" +#include "diag_ftp.h" + +/** + * struct hmcdrv_ftp_ops - HMC drive FTP operations + * @startup: startup function + * @shutdown: shutdown function + * @cmd: FTP transfer function + */ +struct hmcdrv_ftp_ops { + int (*startup)(void); + void (*shutdown)(void); + ssize_t (*transfer)(const struct hmcdrv_ftp_cmdspec *ftp, + size_t *fsize); +}; + +static enum hmcdrv_ftp_cmdid hmcdrv_ftp_cmd_getid(const char *cmd, int len); +static int hmcdrv_ftp_parse(char *cmd, struct hmcdrv_ftp_cmdspec *ftp); + +static struct hmcdrv_ftp_ops *hmcdrv_ftp_funcs; /* current operations */ +static DEFINE_MUTEX(hmcdrv_ftp_mutex); /* mutex for hmcdrv_ftp_funcs */ +static unsigned hmcdrv_ftp_refcnt; /* start/shutdown reference counter */ + +/** + * hmcdrv_ftp_cmd_getid() - determine FTP command ID from a command string + * @cmd: FTP command string (NOT zero-terminated) + * @len: length of FTP command string in @cmd + */ +static enum hmcdrv_ftp_cmdid hmcdrv_ftp_cmd_getid(const char *cmd, int len) +{ + /* HMC FTP command descriptor */ + struct hmcdrv_ftp_cmd_desc { + const char *str; /* command string */ + enum hmcdrv_ftp_cmdid cmd; /* associated command as enum */ + }; + + /* Description of all HMC drive FTP commands + * + * Notes: + * 1. Array size should be a prime number. + * 2. Do not change the order of commands in table (because the + * index is determined by CRC % ARRAY_SIZE). + * 3. Original command 'nlist' was renamed, else the CRC would + * collide with 'append' (see point 2). + */ + static const struct hmcdrv_ftp_cmd_desc ftpcmds[7] = { + + {.str = "get", /* [0] get (CRC = 0x68eb) */ + .cmd = HMCDRV_FTP_GET}, + {.str = "dir", /* [1] dir (CRC = 0x6a9e) */ + .cmd = HMCDRV_FTP_DIR}, + {.str = "delete", /* [2] delete (CRC = 0x53ae) */ + .cmd = HMCDRV_FTP_DELETE}, + {.str = "nls", /* [3] nls (CRC = 0xf87c) */ + .cmd = HMCDRV_FTP_NLIST}, + {.str = "put", /* [4] put (CRC = 0xac56) */ + .cmd = HMCDRV_FTP_PUT}, + {.str = "append", /* [5] append (CRC = 0xf56e) */ + .cmd = HMCDRV_FTP_APPEND}, + {.str = NULL} /* [6] unused */ + }; + + const struct hmcdrv_ftp_cmd_desc *pdesc; + + u16 crc = 0xffffU; + + if (len == 0) + return HMCDRV_FTP_NOOP; /* error indiactor */ + + crc = crc16(crc, cmd, len); + pdesc = ftpcmds + (crc % ARRAY_SIZE(ftpcmds)); + pr_debug("FTP command '%s' has CRC 0x%04x, at table pos. %lu\n", + cmd, crc, (crc % ARRAY_SIZE(ftpcmds))); + + if (!pdesc->str || strncmp(pdesc->str, cmd, len)) + return HMCDRV_FTP_NOOP; + + pr_debug("FTP command '%s' found, with ID %d\n", + pdesc->str, pdesc->cmd); + + return pdesc->cmd; +} + +/** + * hmcdrv_ftp_parse() - HMC drive FTP command parser + * @cmd: FTP command string " " + * @ftp: Pointer to FTP command specification buffer (output) + * + * Return: 0 on success, else a (negative) error code + */ +static int hmcdrv_ftp_parse(char *cmd, struct hmcdrv_ftp_cmdspec *ftp) +{ + char *start; + int argc = 0; + + ftp->id = HMCDRV_FTP_NOOP; + ftp->fname = NULL; + + while (*cmd != '\0') { + + while (isspace(*cmd)) + ++cmd; + + if (*cmd == '\0') + break; + + start = cmd; + + switch (argc) { + case 0: /* 1st argument (FTP command) */ + while ((*cmd != '\0') && !isspace(*cmd)) + ++cmd; + ftp->id = hmcdrv_ftp_cmd_getid(start, cmd - start); + break; + case 1: /* 2nd / last argument (rest of line) */ + while ((*cmd != '\0') && !iscntrl(*cmd)) + ++cmd; + ftp->fname = start; + /* fall through */ + default: + *cmd = '\0'; + break; + } /* switch */ + + ++argc; + } /* while */ + + if (!ftp->fname || (ftp->id == HMCDRV_FTP_NOOP)) + return -EINVAL; + + return 0; +} + +/** + * hmcdrv_ftp_do() - perform a HMC drive FTP, with data from kernel-space + * @ftp: pointer to FTP command specification + * + * Return: number of bytes read/written or a negative error code + */ +ssize_t hmcdrv_ftp_do(const struct hmcdrv_ftp_cmdspec *ftp) +{ + ssize_t len; + + mutex_lock(&hmcdrv_ftp_mutex); + + if (hmcdrv_ftp_funcs && hmcdrv_ftp_refcnt) { + pr_debug("starting transfer, cmd %d for '%s' at %lld with %zd bytes\n", + ftp->id, ftp->fname, (long long) ftp->ofs, ftp->len); + len = hmcdrv_cache_cmd(ftp, hmcdrv_ftp_funcs->transfer); + } else { + len = -ENXIO; + } + + mutex_unlock(&hmcdrv_ftp_mutex); + return len; +} +EXPORT_SYMBOL(hmcdrv_ftp_do); + +/** + * hmcdrv_ftp_probe() - probe for the HMC drive FTP service + * + * Return: 0 if service is available, else an (negative) error code + */ +int hmcdrv_ftp_probe(void) +{ + int rc; + + struct hmcdrv_ftp_cmdspec ftp = { + .id = HMCDRV_FTP_NOOP, + .ofs = 0, + .fname = "", + .len = PAGE_SIZE + }; + + ftp.buf = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + + if (!ftp.buf) + return -ENOMEM; + + rc = hmcdrv_ftp_startup(); + + if (rc) + return rc; + + rc = hmcdrv_ftp_do(&ftp); + free_page((unsigned long) ftp.buf); + hmcdrv_ftp_shutdown(); + + switch (rc) { + case -ENOENT: /* no such file/media or currently busy, */ + case -EBUSY: /* but service seems to be available */ + rc = 0; + break; + default: /* leave 'rc' as it is for [0, -EPERM, -E...] */ + if (rc > 0) + rc = 0; /* clear length (success) */ + break; + } /* switch */ + + return rc; +} +EXPORT_SYMBOL(hmcdrv_ftp_probe); + +/** + * hmcdrv_ftp_cmd() - Perform a HMC drive FTP, with data from user-space + * + * @cmd: FTP command string " " + * @offset: file position to read/write + * @buf: user-space buffer for read/written directory/file + * @len: size of @buf (read/dir) or number of bytes to write + * + * This function must not be called before hmcdrv_ftp_startup() was called. + * + * Return: number of bytes read/written or a negative error code + */ +ssize_t hmcdrv_ftp_cmd(char __kernel *cmd, loff_t offset, + char __user *buf, size_t len) +{ + int order; + + struct hmcdrv_ftp_cmdspec ftp = {.len = len, .ofs = offset}; + ssize_t retlen = hmcdrv_ftp_parse(cmd, &ftp); + + if (retlen) + return retlen; + + order = get_order(ftp.len); + ftp.buf = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, order); + + if (!ftp.buf) + return -ENOMEM; + + switch (ftp.id) { + case HMCDRV_FTP_DIR: + case HMCDRV_FTP_NLIST: + case HMCDRV_FTP_GET: + retlen = hmcdrv_ftp_do(&ftp); + + if ((retlen >= 0) && + copy_to_user(buf, ftp.buf, retlen)) + retlen = -EFAULT; + break; + + case HMCDRV_FTP_PUT: + case HMCDRV_FTP_APPEND: + if (!copy_from_user(ftp.buf, buf, ftp.len)) + retlen = hmcdrv_ftp_do(&ftp); + else + retlen = -EFAULT; + break; + + case HMCDRV_FTP_DELETE: + retlen = hmcdrv_ftp_do(&ftp); + break; + + default: + retlen = -EOPNOTSUPP; + break; + } + + free_pages((unsigned long) ftp.buf, order); + return retlen; +} + +/** + * hmcdrv_ftp_startup() - startup of HMC drive FTP functionality for a + * dedicated (owner) instance + * + * Return: 0 on success, else an (negative) error code + */ +int hmcdrv_ftp_startup(void) +{ + static struct hmcdrv_ftp_ops hmcdrv_ftp_zvm = { + .startup = diag_ftp_startup, + .shutdown = diag_ftp_shutdown, + .transfer = diag_ftp_cmd + }; + + static struct hmcdrv_ftp_ops hmcdrv_ftp_lpar = { + .startup = sclp_ftp_startup, + .shutdown = sclp_ftp_shutdown, + .transfer = sclp_ftp_cmd + }; + + int rc = 0; + + mutex_lock(&hmcdrv_ftp_mutex); /* block transfers while start-up */ + + if (hmcdrv_ftp_refcnt == 0) { + if (MACHINE_IS_VM) + hmcdrv_ftp_funcs = &hmcdrv_ftp_zvm; + else if (MACHINE_IS_LPAR || MACHINE_IS_KVM) + hmcdrv_ftp_funcs = &hmcdrv_ftp_lpar; + else + rc = -EOPNOTSUPP; + + if (hmcdrv_ftp_funcs) + rc = hmcdrv_ftp_funcs->startup(); + } + + if (!rc) + ++hmcdrv_ftp_refcnt; + + mutex_unlock(&hmcdrv_ftp_mutex); + return rc; +} +EXPORT_SYMBOL(hmcdrv_ftp_startup); + +/** + * hmcdrv_ftp_shutdown() - shutdown of HMC drive FTP functionality for a + * dedicated (owner) instance + */ +void hmcdrv_ftp_shutdown(void) +{ + mutex_lock(&hmcdrv_ftp_mutex); + --hmcdrv_ftp_refcnt; + + if ((hmcdrv_ftp_refcnt == 0) && hmcdrv_ftp_funcs) + hmcdrv_ftp_funcs->shutdown(); + + mutex_unlock(&hmcdrv_ftp_mutex); +} +EXPORT_SYMBOL(hmcdrv_ftp_shutdown); diff --git a/drivers/s390/char/hmcdrv_ftp.h b/drivers/s390/char/hmcdrv_ftp.h new file mode 100644 index 0000000..f3643a7b3 --- /dev/null +++ b/drivers/s390/char/hmcdrv_ftp.h @@ -0,0 +1,63 @@ +/* + * SE/HMC Drive FTP Services + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + */ + +#ifndef __HMCDRV_FTP_H__ +#define __HMCDRV_FTP_H__ + +#include /* size_t, loff_t */ + +/* + * HMC drive FTP Service max. length of path (w/ EOS) + */ +#define HMCDRV_FTP_FIDENT_MAX 192 + +/** + * enum hmcdrv_ftp_cmdid - HMC drive FTP commands + * @HMCDRV_FTP_NOOP: do nothing (only for probing) + * @HMCDRV_FTP_GET: read a file + * @HMCDRV_FTP_PUT: (over-) write a file + * @HMCDRV_FTP_APPEND: append to a file + * @HMCDRV_FTP_DIR: list directory long (ls -l) + * @HMCDRV_FTP_NLIST: list files, no directories (name list) + * @HMCDRV_FTP_DELETE: delete a file + * @HMCDRV_FTP_CANCEL: cancel operation (SCLP/LPAR only) + */ +enum hmcdrv_ftp_cmdid { + HMCDRV_FTP_NOOP = 0, + HMCDRV_FTP_GET = 1, + HMCDRV_FTP_PUT = 2, + HMCDRV_FTP_APPEND = 3, + HMCDRV_FTP_DIR = 4, + HMCDRV_FTP_NLIST = 5, + HMCDRV_FTP_DELETE = 6, + HMCDRV_FTP_CANCEL = 7 +}; + +/** + * struct hmcdrv_ftp_cmdspec - FTP command specification + * @id: FTP command ID + * @ofs: offset in file + * @fname: filename (ASCII), null-terminated + * @buf: kernel-space transfer data buffer, 4k aligned + * @len: (max) number of bytes to transfer from/to @buf + */ +struct hmcdrv_ftp_cmdspec { + enum hmcdrv_ftp_cmdid id; + loff_t ofs; + const char *fname; + void __kernel *buf; + size_t len; +}; + +int hmcdrv_ftp_startup(void); +void hmcdrv_ftp_shutdown(void); +int hmcdrv_ftp_probe(void); +ssize_t hmcdrv_ftp_do(const struct hmcdrv_ftp_cmdspec *ftp); +ssize_t hmcdrv_ftp_cmd(char __kernel *cmd, loff_t offset, + char __user *buf, size_t len); + +#endif /* __HMCDRV_FTP_H__ */ diff --git a/drivers/s390/char/hmcdrv_mod.c b/drivers/s390/char/hmcdrv_mod.c new file mode 100644 index 0000000..505c6a7 --- /dev/null +++ b/drivers/s390/char/hmcdrv_mod.c @@ -0,0 +1,64 @@ +/* + * HMC Drive DVD Module + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + */ + +#define KMSG_COMPONENT "hmcdrv" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include + +#include "hmcdrv_ftp.h" +#include "hmcdrv_dev.h" +#include "hmcdrv_cache.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Copyright 2013 IBM Corporation"); +MODULE_DESCRIPTION("HMC drive DVD access"); + +/* + * module parameter 'cachesize' + */ +static size_t hmcdrv_mod_cachesize = HMCDRV_CACHE_SIZE_DFLT; +module_param_named(cachesize, hmcdrv_mod_cachesize, ulong, S_IRUGO); + +/** + * hmcdrv_mod_init() - module init function + */ +static int __init hmcdrv_mod_init(void) +{ + int rc = hmcdrv_ftp_probe(); /* perform w/o cache */ + + if (rc) + return rc; + + rc = hmcdrv_cache_startup(hmcdrv_mod_cachesize); + + if (rc) + return rc; + + rc = hmcdrv_dev_init(); + + if (rc) + hmcdrv_cache_shutdown(); + + return rc; +} + +/** + * hmcdrv_mod_exit() - module exit function + */ +static void __exit hmcdrv_mod_exit(void) +{ + hmcdrv_dev_exit(); + hmcdrv_cache_shutdown(); +} + +module_init(hmcdrv_mod_init); +module_exit(hmcdrv_mod_exit); diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index a68b5ec..a88069f 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -19,6 +19,7 @@ #define EVTYP_OPCMD 0x01 #define EVTYP_MSG 0x02 +#define EVTYP_DIAG_TEST 0x07 #define EVTYP_STATECHANGE 0x08 #define EVTYP_PMSGCMD 0x09 #define EVTYP_CNTLPROGOPCMD 0x20 @@ -32,6 +33,7 @@ #define EVTYP_OPCMD_MASK 0x80000000 #define EVTYP_MSG_MASK 0x40000000 +#define EVTYP_DIAG_TEST_MASK 0x02000000 #define EVTYP_STATECHANGE_MASK 0x01000000 #define EVTYP_PMSGCMD_MASK 0x00800000 #define EVTYP_CTLPROGOPCMD_MASK 0x00000001 diff --git a/drivers/s390/char/sclp_diag.h b/drivers/s390/char/sclp_diag.h new file mode 100644 index 0000000..59c4afa --- /dev/null +++ b/drivers/s390/char/sclp_diag.h @@ -0,0 +1,89 @@ +/* + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + */ + +#ifndef _SCLP_DIAG_H +#define _SCLP_DIAG_H + +#include + +/* return codes for Diagnostic Test FTP Service, as indicated in member + * sclp_diag_ftp::ldflg + */ +#define SCLP_DIAG_FTP_OK 0x80U /* success */ +#define SCLP_DIAG_FTP_LDFAIL 0x01U /* load failed */ +#define SCLP_DIAG_FTP_LDNPERM 0x02U /* not allowed */ +#define SCLP_DIAG_FTP_LDRUNS 0x03U /* LD runs */ +#define SCLP_DIAG_FTP_LDNRUNS 0x04U /* LD does not run */ + +#define SCLP_DIAG_FTP_XPCX 0x80 /* PCX communication code */ +#define SCLP_DIAG_FTP_ROUTE 4 /* routing code for new FTP service */ + +/* + * length of Diagnostic Test FTP Service event buffer + */ +#define SCLP_DIAG_FTP_EVBUF_LEN \ + (offsetof(struct sclp_diag_evbuf, mdd) + \ + sizeof(struct sclp_diag_ftp)) + +/** + * struct sclp_diag_ftp - Diagnostic Test FTP Service model-dependent data + * @pcx: code for PCX communication (should be 0x80) + * @ldflg: load flag (see defines above) + * @cmd: FTP command + * @pgsize: page size (0 = 4kB, 1 = large page size) + * @srcflg: source flag + * @spare: reserved (zeroes) + * @offset: file offset + * @fsize: file size + * @length: buffer size resp. bytes transferred + * @failaddr: failing address + * @bufaddr: buffer address, virtual + * @asce: region or segment table designation + * @fident: file name (ASCII, zero-terminated) + */ +struct sclp_diag_ftp { + u8 pcx; + u8 ldflg; + u8 cmd; + u8 pgsize; + u8 srcflg; + u8 spare; + u64 offset; + u64 fsize; + u64 length; + u64 failaddr; + u64 bufaddr; + u64 asce; + + u8 fident[256]; +} __packed; + +/** + * struct sclp_diag_evbuf - Diagnostic Test (ET7) Event Buffer + * @hdr: event buffer header + * @route: diagnostic route + * @mdd: model-dependent data (@route dependent) + */ +struct sclp_diag_evbuf { + struct evbuf_header hdr; + u16 route; + + union { + struct sclp_diag_ftp ftp; + } mdd; +} __packed; + +/** + * struct sclp_diag_sccb - Diagnostic Test (ET7) SCCB + * @hdr: SCCB header + * @evbuf: event buffer + */ +struct sclp_diag_sccb { + + struct sccb_header hdr; + struct sclp_diag_evbuf evbuf; +} __packed; + +#endif /* _SCLP_DIAG_H */ diff --git a/drivers/s390/char/sclp_ftp.c b/drivers/s390/char/sclp_ftp.c new file mode 100644 index 0000000..6561cc5 --- /dev/null +++ b/drivers/s390/char/sclp_ftp.c @@ -0,0 +1,275 @@ +/* + * SCLP Event Type (ET) 7 - Diagnostic Test FTP Services, useable on LPAR + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + * + */ + +#define KMSG_COMPONENT "hmcdrv" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sclp.h" +#include "sclp_diag.h" +#include "sclp_ftp.h" + +static DECLARE_COMPLETION(sclp_ftp_rx_complete); +static u8 sclp_ftp_ldflg; +static u64 sclp_ftp_fsize; +static u64 sclp_ftp_length; + +/** + * sclp_ftp_txcb() - Diagnostic Test FTP services SCLP command callback + */ +static void sclp_ftp_txcb(struct sclp_req *req, void *data) +{ + struct completion *completion = data; + +#ifdef DEBUG + pr_debug("SCLP (ET7) TX-IRQ, SCCB @ 0x%p: %*phN\n", + req->sccb, 24, req->sccb); +#endif + complete(completion); +} + +/** + * sclp_ftp_rxcb() - Diagnostic Test FTP services receiver event callback + */ +static void sclp_ftp_rxcb(struct evbuf_header *evbuf) +{ + struct sclp_diag_evbuf *diag = (struct sclp_diag_evbuf *) evbuf; + + /* + * Check for Diagnostic Test FTP Service + */ + if (evbuf->type != EVTYP_DIAG_TEST || + diag->route != SCLP_DIAG_FTP_ROUTE || + diag->mdd.ftp.pcx != SCLP_DIAG_FTP_XPCX || + evbuf->length < SCLP_DIAG_FTP_EVBUF_LEN) + return; + +#ifdef DEBUG + pr_debug("SCLP (ET7) RX-IRQ, Event @ 0x%p: %*phN\n", + evbuf, 24, evbuf); +#endif + + /* + * Because the event buffer is located in a page which is owned + * by the SCLP core, all data of interest must be copied. The + * error indication is in 'sclp_ftp_ldflg' + */ + sclp_ftp_ldflg = diag->mdd.ftp.ldflg; + sclp_ftp_fsize = diag->mdd.ftp.fsize; + sclp_ftp_length = diag->mdd.ftp.length; + + complete(&sclp_ftp_rx_complete); +} + +/** + * sclp_ftp_et7() - start a Diagnostic Test FTP Service SCLP request + * @ftp: pointer to FTP descriptor + * + * Return: 0 on success, else a (negative) error code + */ +static int sclp_ftp_et7(const struct hmcdrv_ftp_cmdspec *ftp) +{ + struct completion completion; + struct sclp_diag_sccb *sccb; + struct sclp_req *req; + size_t len; + int rc; + + req = kzalloc(sizeof(*req), GFP_KERNEL); + sccb = (void *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!req || !sccb) { + rc = -ENOMEM; + goto out_free; + } + + sccb->hdr.length = SCLP_DIAG_FTP_EVBUF_LEN + + sizeof(struct sccb_header); + sccb->evbuf.hdr.type = EVTYP_DIAG_TEST; + sccb->evbuf.hdr.length = SCLP_DIAG_FTP_EVBUF_LEN; + sccb->evbuf.hdr.flags = 0; /* clear processed-buffer */ + sccb->evbuf.route = SCLP_DIAG_FTP_ROUTE; + sccb->evbuf.mdd.ftp.pcx = SCLP_DIAG_FTP_XPCX; + sccb->evbuf.mdd.ftp.srcflg = 0; + sccb->evbuf.mdd.ftp.pgsize = 0; + sccb->evbuf.mdd.ftp.asce = _ASCE_REAL_SPACE; + sccb->evbuf.mdd.ftp.ldflg = SCLP_DIAG_FTP_LDFAIL; + sccb->evbuf.mdd.ftp.fsize = 0; + sccb->evbuf.mdd.ftp.cmd = ftp->id; + sccb->evbuf.mdd.ftp.offset = ftp->ofs; + sccb->evbuf.mdd.ftp.length = ftp->len; + sccb->evbuf.mdd.ftp.bufaddr = virt_to_phys(ftp->buf); + + len = strlcpy(sccb->evbuf.mdd.ftp.fident, ftp->fname, + HMCDRV_FTP_FIDENT_MAX); + if (len >= HMCDRV_FTP_FIDENT_MAX) { + rc = -EINVAL; + goto out_free; + } + + req->command = SCLP_CMDW_WRITE_EVENT_DATA; + req->sccb = sccb; + req->status = SCLP_REQ_FILLED; + req->callback = sclp_ftp_txcb; + req->callback_data = &completion; + + init_completion(&completion); + + rc = sclp_add_request(req); + if (rc) + goto out_free; + + /* Wait for end of ftp sclp command. */ + wait_for_completion(&completion); + +#ifdef DEBUG + pr_debug("status of SCLP (ET7) request is 0x%04x (0x%02x)\n", + sccb->hdr.response_code, sccb->evbuf.hdr.flags); +#endif + + /* + * Check if sclp accepted the request. The data transfer runs + * asynchronously and the completion is indicated with an + * sclp ET7 event. + */ + if (req->status != SCLP_REQ_DONE || + (sccb->evbuf.hdr.flags & 0x80) == 0 || /* processed-buffer */ + (sccb->hdr.response_code & 0xffU) != 0x20U) { + rc = -EIO; + } + +out_free: + free_page((unsigned long) sccb); + kfree(req); + return rc; +} + +/** + * sclp_ftp_cmd() - executes a HMC related SCLP Diagnose (ET7) FTP command + * @ftp: pointer to FTP command specification + * @fsize: return of file size (or NULL if undesirable) + * + * Attention: Notice that this function is not reentrant - so the caller + * must ensure locking. + * + * Return: number of bytes read/written or a (negative) error code + */ +ssize_t sclp_ftp_cmd(const struct hmcdrv_ftp_cmdspec *ftp, size_t *fsize) +{ + ssize_t len; +#ifdef DEBUG + unsigned long start_jiffies; + + pr_debug("starting SCLP (ET7), cmd %d for '%s' at %lld with %zd bytes\n", + ftp->id, ftp->fname, (long long) ftp->ofs, ftp->len); + start_jiffies = jiffies; +#endif + + init_completion(&sclp_ftp_rx_complete); + + /* Start ftp sclp command. */ + len = sclp_ftp_et7(ftp); + if (len) + goto out_unlock; + + /* + * There is no way to cancel the sclp ET7 request, the code + * needs to wait unconditionally until the transfer is complete. + */ + wait_for_completion(&sclp_ftp_rx_complete); + +#ifdef DEBUG + pr_debug("completed SCLP (ET7) request after %lu ms (all)\n", + (jiffies - start_jiffies) * 1000 / HZ); + pr_debug("return code of SCLP (ET7) FTP Service is 0x%02x, with %lld/%lld bytes\n", + sclp_ftp_ldflg, sclp_ftp_length, sclp_ftp_fsize); +#endif + + switch (sclp_ftp_ldflg) { + case SCLP_DIAG_FTP_OK: + len = sclp_ftp_length; + if (fsize) + *fsize = sclp_ftp_fsize; + break; + case SCLP_DIAG_FTP_LDNPERM: + len = -EPERM; + break; + case SCLP_DIAG_FTP_LDRUNS: + len = -EBUSY; + break; + case SCLP_DIAG_FTP_LDFAIL: + len = -ENOENT; + break; + default: + len = -EIO; + break; + } + +out_unlock: + return len; +} + +/* + * ET7 event listener + */ +static struct sclp_register sclp_ftp_event = { + .send_mask = EVTYP_DIAG_TEST_MASK, /* want tx events */ + .receive_mask = EVTYP_DIAG_TEST_MASK, /* want rx events */ + .receiver_fn = sclp_ftp_rxcb, /* async callback (rx) */ + .state_change_fn = NULL, + .pm_event_fn = NULL, +}; + +/** + * sclp_ftp_startup() - startup of FTP services, when running on LPAR + */ +int sclp_ftp_startup(void) +{ +#ifdef DEBUG + unsigned long info; +#endif + int rc; + + rc = sclp_register(&sclp_ftp_event); + if (rc) + return rc; + +#ifdef DEBUG + info = get_zeroed_page(GFP_KERNEL); + + if (info != 0) { + struct sysinfo_2_2_2 *info222 = (struct sysinfo_2_2_2 *)info; + + if (!stsi(info222, 2, 2, 2)) { /* get SYSIB 2.2.2 */ + info222->name[sizeof(info222->name) - 1] = '\0'; + EBCASC_500(info222->name, sizeof(info222->name) - 1); + pr_debug("SCLP (ET7) FTP Service working on LPAR %u (%s)\n", + info222->lpar_number, info222->name); + } + + free_page(info); + } +#endif /* DEBUG */ + return 0; +} + +/** + * sclp_ftp_shutdown() - shutdown of FTP services, when running on LPAR + */ +void sclp_ftp_shutdown(void) +{ + sclp_unregister(&sclp_ftp_event); +} diff --git a/drivers/s390/char/sclp_ftp.h b/drivers/s390/char/sclp_ftp.h new file mode 100644 index 0000000..98ba318 --- /dev/null +++ b/drivers/s390/char/sclp_ftp.h @@ -0,0 +1,21 @@ +/* + * SCLP Event Type (ET) 7 - Diagnostic Test FTP Services, useable on LPAR + * + * Notice that all functions exported here are not reentrant. + * So usage should be exclusive, ensured by the caller (e.g. using a + * mutex). + * + * Copyright IBM Corp. 2013 + * Author(s): Ralf Hoppe (rhoppe@de.ibm.com) + */ + +#ifndef __SCLP_FTP_H__ +#define __SCLP_FTP_H__ + +#include "hmcdrv_ftp.h" + +int sclp_ftp_startup(void); +void sclp_ftp_shutdown(void); +ssize_t sclp_ftp_cmd(const struct hmcdrv_ftp_cmdspec *ftp, size_t *fsize); + +#endif /* __SCLP_FTP_H__ */ -- cgit v0.10.2 From 9fc98ad0d2bf3cd71772d1bda75e7a8b4dce261b Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Tue, 16 Sep 2014 11:02:24 +0200 Subject: s390/tape: fix MTIOCGET ioctl to report blocksize Remove tape_state from status register and report the drive's current setting for block size instead as known from other tapes. Density is not supported so nothing to report here. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/tape_char.c b/drivers/s390/char/tape_char.c index 6dc6072..77f9b9c 100644 --- a/drivers/s390/char/tape_char.c +++ b/drivers/s390/char/tape_char.c @@ -402,7 +402,9 @@ __tapechar_ioctl(struct tape_device *device, memset(&get, 0, sizeof(get)); get.mt_type = MT_ISUNKNOWN; get.mt_resid = 0 /* device->devstat.rescnt */; - get.mt_dsreg = device->tape_state; + get.mt_dsreg = + ((device->char_data.block_size << MT_ST_BLKSIZE_SHIFT) + & MT_ST_BLKSIZE_MASK); /* FIXME: mt_gstat, mt_erreg, mt_fileno */ get.mt_gstat = 0; get.mt_erreg = 0; -- cgit v0.10.2 From 46b05c7bd51edafb8c8da088b49bddf7f78d48f9 Mon Sep 17 00:00:00 2001 From: Ingo Tuchscherer Date: Tue, 16 Sep 2014 14:37:25 +0200 Subject: s390/zcrypt: Fixed possible race condition in zcrypt module handling Signed-off-by: Ingo Tuchscherer Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 0e18c5d..08f1830 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -343,10 +343,11 @@ struct zcrypt_ops *__ops_lookup(unsigned char *name, int variant) break; } } + if (!found || !try_module_get(zops->owner)) + zops = NULL; + spin_unlock_bh(&zcrypt_ops_list_lock); - if (!found) - return NULL; return zops; } @@ -359,8 +360,6 @@ struct zcrypt_ops *zcrypt_msgtype_request(unsigned char *name, int variant) request_module("%s", name); zops = __ops_lookup(name, variant); } - if ((!zops) || (!try_module_get(zops->owner))) - return NULL; return zops; } EXPORT_SYMBOL(zcrypt_msgtype_request); -- cgit v0.10.2 From d59b93da5e572703e1a7311c13dd3472a4e56e30 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 19 Sep 2014 14:29:31 +0200 Subject: s390/rwlock: use directed yield for write-locked rwlocks Add an owner field to the arch_rwlock_t to be able to pass the timeslice of a virtual CPU with diagnose 0x9c to the lock owner in case the rwlock is write-locked. The undirected yield in case the rwlock is acquired writable but the lock is read-locked is removed. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 4f13079..762d4f8 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -29,7 +29,6 @@ extern int smp_find_processor_id(u16 address); extern int smp_store_status(int cpu); extern int smp_vcpu_scheduled(int cpu); extern void smp_yield_cpu(int cpu); -extern void smp_yield(void); extern void smp_cpu_set_polarization(int cpu, int val); extern int smp_cpu_get_polarization(int cpu); extern void smp_fill_possible_mask(void); @@ -50,7 +49,6 @@ static inline int smp_find_processor_id(u16 address) { return 0; } static inline int smp_store_status(int cpu) { return 0; } static inline int smp_vcpu_scheduled(int cpu) { return 1; } static inline void smp_yield_cpu(int cpu) { } -static inline void smp_yield(void) { } static inline void smp_fill_possible_mask(void) { } #endif /* CONFIG_SMP */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index d26ad2a..e9865416 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -37,11 +37,17 @@ _raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new) * (the type definitions are in asm/spinlock_types.h) */ +void arch_lock_relax(unsigned int cpu); + void arch_spin_lock_wait(arch_spinlock_t *); int arch_spin_trylock_retry(arch_spinlock_t *); -void arch_spin_relax(arch_spinlock_t *); void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags); +static inline void arch_spin_relax(arch_spinlock_t *lock) +{ + arch_lock_relax(lock->lock); +} + static inline u32 arch_spin_lockval(int cpu) { return ~cpu; @@ -170,17 +176,21 @@ static inline void arch_write_lock(arch_rwlock_t *rw) { if (!arch_write_trylock_once(rw)) _raw_write_lock_wait(rw); + rw->owner = SPINLOCK_LOCKVAL; } static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags) { if (!arch_write_trylock_once(rw)) _raw_write_lock_wait_flags(rw, flags); + rw->owner = SPINLOCK_LOCKVAL; } static inline void arch_write_unlock(arch_rwlock_t *rw) { typecheck(unsigned int, rw->lock); + + rw->owner = 0; asm volatile( __ASM_BARRIER "st %1,%0\n" @@ -198,12 +208,20 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) static inline int arch_write_trylock(arch_rwlock_t *rw) { - if (!arch_write_trylock_once(rw)) - return _raw_write_trylock_retry(rw); + if (!arch_write_trylock_once(rw) && !_raw_write_trylock_retry(rw)) + return 0; + rw->owner = SPINLOCK_LOCKVAL; return 1; } -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() +static inline void arch_read_relax(arch_rwlock_t *rw) +{ + arch_lock_relax(rw->owner); +} + +static inline void arch_write_relax(arch_rwlock_t *rw) +{ + arch_lock_relax(rw->owner); +} #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h index b2cd6ff..d84b693 100644 --- a/arch/s390/include/asm/spinlock_types.h +++ b/arch/s390/include/asm/spinlock_types.h @@ -13,6 +13,7 @@ typedef struct { typedef struct { unsigned int lock; + unsigned int owner; } arch_rwlock_t; #define __ARCH_RW_LOCK_UNLOCKED { 0 } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 243c7e5..abec97b 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -333,12 +333,6 @@ int smp_vcpu_scheduled(int cpu) return pcpu_running(pcpu_devices + cpu); } -void smp_yield(void) -{ - if (MACHINE_HAS_DIAG44) - asm volatile("diag 0,0,0x44"); -} - void smp_yield_cpu(int cpu) { if (MACHINE_HAS_DIAG9C) diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 5b0e445..5f63ac5 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -98,17 +98,6 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) } EXPORT_SYMBOL(arch_spin_lock_wait_flags); -void arch_spin_relax(arch_spinlock_t *lp) -{ - unsigned int cpu = lp->lock; - if (cpu != 0) { - if (MACHINE_IS_VM || MACHINE_IS_KVM || - !smp_vcpu_scheduled(~cpu)) - smp_yield_cpu(~cpu); - } -} -EXPORT_SYMBOL(arch_spin_relax); - int arch_spin_trylock_retry(arch_spinlock_t *lp) { int count; @@ -122,15 +111,18 @@ EXPORT_SYMBOL(arch_spin_trylock_retry); void _raw_read_lock_wait(arch_rwlock_t *rw) { - unsigned int old; + unsigned int owner, old; int count = spin_retry; + owner = 0; while (1) { if (count-- <= 0) { - smp_yield(); + if (owner && !smp_vcpu_scheduled(~owner)) + smp_yield_cpu(~owner); count = spin_retry; } old = ACCESS_ONCE(rw->lock); + owner = ACCESS_ONCE(rw->owner); if ((int) old < 0) continue; if (_raw_compare_and_swap(&rw->lock, old, old + 1)) @@ -141,16 +133,19 @@ EXPORT_SYMBOL(_raw_read_lock_wait); void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) { - unsigned int old; + unsigned int owner, old; int count = spin_retry; local_irq_restore(flags); + owner = 0; while (1) { if (count-- <= 0) { - smp_yield(); + if (owner && !smp_vcpu_scheduled(~owner)) + smp_yield_cpu(~owner); count = spin_retry; } old = ACCESS_ONCE(rw->lock); + owner = ACCESS_ONCE(rw->owner); if ((int) old < 0) continue; local_irq_disable(); @@ -179,15 +174,18 @@ EXPORT_SYMBOL(_raw_read_trylock_retry); void _raw_write_lock_wait(arch_rwlock_t *rw) { - unsigned int old; + unsigned int owner, old; int count = spin_retry; + owner = 0; while (1) { if (count-- <= 0) { - smp_yield(); + if (owner && !smp_vcpu_scheduled(~owner)) + smp_yield_cpu(~owner); count = spin_retry; } old = ACCESS_ONCE(rw->lock); + owner = ACCESS_ONCE(rw->owner); if (old) continue; if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000)) @@ -198,16 +196,19 @@ EXPORT_SYMBOL(_raw_write_lock_wait); void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) { - unsigned int old; + unsigned int owner, old; int count = spin_retry; local_irq_restore(flags); + owner = 0; while (1) { if (count-- <= 0) { - smp_yield(); + if (owner && !smp_vcpu_scheduled(~owner)) + smp_yield_cpu(~owner); count = spin_retry; } old = ACCESS_ONCE(rw->lock); + owner = ACCESS_ONCE(rw->owner); if (old) continue; local_irq_disable(); @@ -233,3 +234,13 @@ int _raw_write_trylock_retry(arch_rwlock_t *rw) return 0; } EXPORT_SYMBOL(_raw_write_trylock_retry); + +void arch_lock_relax(unsigned int cpu) +{ + if (!cpu) + return; + if (MACHINE_IS_LPAR && smp_vcpu_scheduled(~cpu)) + return; + smp_yield_cpu(~cpu); +} +EXPORT_SYMBOL(arch_lock_relax); -- cgit v0.10.2 From b881dcfbf7fd89b2be801843b060b9ad77cc77e7 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 19 Sep 2014 15:46:02 +0200 Subject: s390/head.s: use zero as address for stfl The architecture suggests to use address 0 as parameter for stfl, to allow for future extensions. Using __LC_STFL_FAC_LIST (0x200) shows which address is used, but might be not future proof. Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index e88d35d..d62eee1 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -398,7 +398,7 @@ ENTRY(startup_kdump) xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST #ifndef CONFIG_MARCH_G5 # check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10} - .insn s,0xb2b10000,__LC_STFL_FAC_LIST # store facility list + .insn s,0xb2b10000,0 # store facilities @ __LC_STFL_FAC_LIST tm __LC_STFL_FAC_LIST,0x01 # stfle available ? jz 0f la %r0,1 -- cgit v0.10.2 From 70c9d296325b398a87c30de77cc94033a60bdad2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 20 Sep 2014 11:12:08 +0200 Subject: s390/vmemmap: remove memset call from vmemmap_populate() If the vmemmap array gets filled with large pages we allocate those pages with vmemmap_alloc_block(), which returns cleared pages. Only for single 4k pages we call our own vmem_alloc_pages() which does not return cleared pages. However we can also call vmemmap_alloc_block() to allocate the 4k pages. This way we can also make sure the vmemmap array is cleared after its population. Therefore we can remove the memset at the end of the function which would clear the vmmemmap array a second time on machines which do support EDAT1. On very large configurations this can save us several seconds. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index fe9012a..121aff0 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -253,9 +253,9 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) pt_dir = pte_offset_kernel(pm_dir, address); if (pte_none(*pt_dir)) { - unsigned long new_page; + void *new_page; - new_page =__pa(vmem_alloc_pages(0)); + new_page = vmemmap_alloc_block(PAGE_SIZE, node); if (!new_page) goto out; pte_val(*pt_dir) = @@ -263,7 +263,6 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) } address += PAGE_SIZE; } - memset((void *)start, 0, end - start); ret = 0; out: return ret; -- cgit v0.10.2 From 6a5c1482e2b700e89f174ddb42434b37f15abccd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 22 Sep 2014 08:50:51 +0200 Subject: s390/mm: remove change bit override support Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 5efb2fe..7c4af56 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -216,7 +216,6 @@ extern unsigned long MODULES_END; */ /* Hardware bits in the page table entry */ -#define _PAGE_CO 0x100 /* HW Change-bit override */ #define _PAGE_PROTECT 0x200 /* HW read-only bit */ #define _PAGE_INVALID 0x400 /* HW invalid bit */ #define _PAGE_LARGE 0x800 /* Bit to mark a large pte */ @@ -233,8 +232,8 @@ extern unsigned long MODULES_END; #define __HAVE_ARCH_PTE_SPECIAL /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \ - _PAGE_DIRTY | _PAGE_YOUNG) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_DIRTY | \ + _PAGE_YOUNG) /* * handle_pte_fault uses pte_present, pte_none and pte_file to find out the @@ -353,7 +352,6 @@ extern unsigned long MODULES_END; #define _REGION3_ENTRY_LARGE 0x400 /* RTTE-format control, large page */ #define _REGION3_ENTRY_RO 0x200 /* page protection bit */ -#define _REGION3_ENTRY_CO 0x100 /* change-recording override */ /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL @@ -370,7 +368,6 @@ extern unsigned long MODULES_END; #define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */ #define _SEGMENT_ENTRY_SPLIT 0x0800 /* THP splitting bit */ #define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */ -#define _SEGMENT_ENTRY_CO 0x0100 /* change-recording override */ #define _SEGMENT_ENTRY_READ 0x0002 /* SW segment read bit */ #define _SEGMENT_ENTRY_WRITE 0x0001 /* SW segment write bit */ @@ -887,8 +884,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pgste = pgste_set_pte(ptep, pgste, entry); pgste_set_unlock(ptep, pgste); } else { - if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1) - pte_val(entry) |= _PAGE_CO; *ptep = entry; } } diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 46d517c..d46cade 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -54,7 +54,6 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level) return; } seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW "); - seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : " "); seq_putc(m, '\n'); } @@ -129,7 +128,7 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, } #ifdef CONFIG_64BIT -#define _PMD_PROT_MASK (_SEGMENT_ENTRY_PROTECT | _SEGMENT_ENTRY_CO) +#define _PMD_PROT_MASK _SEGMENT_ENTRY_PROTECT #else #define _PMD_PROT_MASK 0 #endif @@ -157,7 +156,7 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, } #ifdef CONFIG_64BIT -#define _PUD_PROT_MASK (_REGION3_ENTRY_RO | _REGION3_ENTRY_CO) +#define _PUD_PROT_MASK _REGION3_ENTRY_RO #else #define _PUD_PROT_MASK 0 #endif diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 389bc17..3c80d2e 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -88,7 +88,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; pmd_val(pmd) |= pte_page(pte)[1].index; } else - pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO; + pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; *(pmd_t *) ptep = pmd; } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 121aff0..88475b8 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -236,8 +236,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) if (!new_page) goto out; pmd_val(*pm_dir) = __pa(new_page) | - _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | - _SEGMENT_ENTRY_CO; + _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE; address = (address + PMD_SIZE) & PMD_MASK; continue; } -- cgit v0.10.2 From 2684e73a861fe7b2ab763f442207025a1d9bb6a6 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 22 Sep 2014 14:45:11 +0200 Subject: s390/rwlock: remove interrupt-enabling rwlock variant. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index e9865416..f9537b9 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -131,12 +131,13 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) #define arch_write_can_lock(x) ((x)->lock == 0) extern void _raw_read_lock_wait(arch_rwlock_t *lp); -extern void _raw_read_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags); -extern int _raw_read_trylock_retry(arch_rwlock_t *lp); extern void _raw_write_lock_wait(arch_rwlock_t *lp); -extern void _raw_write_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags); +extern int _raw_read_trylock_retry(arch_rwlock_t *lp); extern int _raw_write_trylock_retry(arch_rwlock_t *lp); +#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) + static inline int arch_read_trylock_once(arch_rwlock_t *rw) { unsigned int old = ACCESS_ONCE(rw->lock); @@ -157,12 +158,6 @@ static inline void arch_read_lock(arch_rwlock_t *rw) _raw_read_lock_wait(rw); } -static inline void arch_read_lock_flags(arch_rwlock_t *rw, unsigned long flags) -{ - if (!arch_read_trylock_once(rw)) - _raw_read_lock_wait_flags(rw, flags); -} - static inline void arch_read_unlock(arch_rwlock_t *rw) { unsigned int old; @@ -179,13 +174,6 @@ static inline void arch_write_lock(arch_rwlock_t *rw) rw->owner = SPINLOCK_LOCKVAL; } -static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags) -{ - if (!arch_write_trylock_once(rw)) - _raw_write_lock_wait_flags(rw, flags); - rw->owner = SPINLOCK_LOCKVAL; -} - static inline void arch_write_unlock(arch_rwlock_t *rw) { typecheck(unsigned int, rw->lock); diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 5f63ac5..c717f4a 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -131,31 +131,6 @@ void _raw_read_lock_wait(arch_rwlock_t *rw) } EXPORT_SYMBOL(_raw_read_lock_wait); -void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) -{ - unsigned int owner, old; - int count = spin_retry; - - local_irq_restore(flags); - owner = 0; - while (1) { - if (count-- <= 0) { - if (owner && !smp_vcpu_scheduled(~owner)) - smp_yield_cpu(~owner); - count = spin_retry; - } - old = ACCESS_ONCE(rw->lock); - owner = ACCESS_ONCE(rw->owner); - if ((int) old < 0) - continue; - local_irq_disable(); - if (_raw_compare_and_swap(&rw->lock, old, old + 1)) - return; - local_irq_restore(flags); - } -} -EXPORT_SYMBOL(_raw_read_lock_wait_flags); - int _raw_read_trylock_retry(arch_rwlock_t *rw) { unsigned int old; @@ -194,31 +169,6 @@ void _raw_write_lock_wait(arch_rwlock_t *rw) } EXPORT_SYMBOL(_raw_write_lock_wait); -void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags) -{ - unsigned int owner, old; - int count = spin_retry; - - local_irq_restore(flags); - owner = 0; - while (1) { - if (count-- <= 0) { - if (owner && !smp_vcpu_scheduled(~owner)) - smp_yield_cpu(~owner); - count = spin_retry; - } - old = ACCESS_ONCE(rw->lock); - owner = ACCESS_ONCE(rw->owner); - if (old) - continue; - local_irq_disable(); - if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000)) - return; - local_irq_restore(flags); - } -} -EXPORT_SYMBOL(_raw_write_lock_wait_flags); - int _raw_write_trylock_retry(arch_rwlock_t *rw) { unsigned int old; -- cgit v0.10.2 From 94232a4332de3bc210e7067fd43521b3eb12336a Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 22 Sep 2014 16:25:25 +0200 Subject: s390/rwlock: improve writer fairness Set the write-lock bit in the out-of-line rwlock code to indicate that a writer is waiting. Additional readers will no be able to get the lock until at least one writer got the lock. Additional writers have to wait for the first writer to release the lock again. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index c717f4a..01f29bb 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -149,9 +149,10 @@ EXPORT_SYMBOL(_raw_read_trylock_retry); void _raw_write_lock_wait(arch_rwlock_t *rw) { - unsigned int owner, old; + unsigned int owner, old, prev; int count = spin_retry; + prev = 0x80000000; owner = 0; while (1) { if (count-- <= 0) { @@ -161,10 +162,13 @@ void _raw_write_lock_wait(arch_rwlock_t *rw) } old = ACCESS_ONCE(rw->lock); owner = ACCESS_ONCE(rw->owner); - if (old) - continue; - if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000)) - return; + if ((int) old >= 0 && + _raw_compare_and_swap(&rw->lock, old, old | 0x80000000)) + prev = old; + else + smp_rmb(); + if ((old & 0x7fffffff) == 0 && (int) prev >= 0) + break; } } EXPORT_SYMBOL(_raw_write_lock_wait); -- cgit v0.10.2 From bbae71bf9c2fe90dc5642d4cddbbc1994861fd92 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 22 Sep 2014 16:34:38 +0200 Subject: s390/rwlock: use the interlocked-access facility 1 instructions Make use of the load-and-add, load-and-or and load-and-and instructions to atomically update the read-write lock without a compare-and-swap loop. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index f9537b9..d6bdf90 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -130,8 +130,6 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) */ #define arch_write_can_lock(x) ((x)->lock == 0) -extern void _raw_read_lock_wait(arch_rwlock_t *lp); -extern void _raw_write_lock_wait(arch_rwlock_t *lp); extern int _raw_read_trylock_retry(arch_rwlock_t *lp); extern int _raw_write_trylock_retry(arch_rwlock_t *lp); @@ -152,6 +150,78 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw) _raw_compare_and_swap(&rw->lock, 0, 0x80000000)); } +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + +#define __RAW_OP_OR "lao" +#define __RAW_OP_AND "lan" +#define __RAW_OP_ADD "laa" + +#define __RAW_LOCK(ptr, op_val, op_string) \ +({ \ + unsigned int old_val; \ + \ + typecheck(unsigned int *, ptr); \ + asm volatile( \ + op_string " %0,%2,%1\n" \ + "bcr 14,0\n" \ + : "=d" (old_val), "+Q" (*ptr) \ + : "d" (op_val) \ + : "cc", "memory"); \ + old_val; \ +}) + +#define __RAW_UNLOCK(ptr, op_val, op_string) \ +({ \ + unsigned int old_val; \ + \ + typecheck(unsigned int *, ptr); \ + asm volatile( \ + "bcr 14,0\n" \ + op_string " %0,%2,%1\n" \ + : "=d" (old_val), "+Q" (*ptr) \ + : "d" (op_val) \ + : "cc", "memory"); \ + old_val; \ +}) + +extern void _raw_read_lock_wait(arch_rwlock_t *lp); +extern void _raw_write_lock_wait(arch_rwlock_t *lp, unsigned int prev); + +static inline void arch_read_lock(arch_rwlock_t *rw) +{ + unsigned int old; + + old = __RAW_LOCK(&rw->lock, 1, __RAW_OP_ADD); + if ((int) old < 0) + _raw_read_lock_wait(rw); +} + +static inline void arch_read_unlock(arch_rwlock_t *rw) +{ + __RAW_UNLOCK(&rw->lock, -1, __RAW_OP_ADD); +} + +static inline void arch_write_lock(arch_rwlock_t *rw) +{ + unsigned int old; + + old = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR); + if (old != 0) + _raw_write_lock_wait(rw, old); + rw->owner = SPINLOCK_LOCKVAL; +} + +static inline void arch_write_unlock(arch_rwlock_t *rw) +{ + rw->owner = 0; + __RAW_UNLOCK(&rw->lock, 0x7fffffff, __RAW_OP_AND); +} + +#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + +extern void _raw_read_lock_wait(arch_rwlock_t *lp); +extern void _raw_write_lock_wait(arch_rwlock_t *lp); + static inline void arch_read_lock(arch_rwlock_t *rw) { if (!arch_read_trylock_once(rw)) @@ -187,6 +257,8 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) : "cc", "memory"); } +#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + static inline int arch_read_trylock(arch_rwlock_t *rw) { if (!arch_read_trylock_once(rw)) diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 01f29bb..034a35a 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -114,6 +114,9 @@ void _raw_read_lock_wait(arch_rwlock_t *rw) unsigned int owner, old; int count = spin_retry; +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + __RAW_LOCK(&rw->lock, -1, __RAW_OP_ADD); +#endif owner = 0; while (1) { if (count-- <= 0) { @@ -147,6 +150,35 @@ int _raw_read_trylock_retry(arch_rwlock_t *rw) } EXPORT_SYMBOL(_raw_read_trylock_retry); +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + +void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev) +{ + unsigned int owner, old; + int count = spin_retry; + + owner = 0; + while (1) { + if (count-- <= 0) { + if (owner && !smp_vcpu_scheduled(~owner)) + smp_yield_cpu(~owner); + count = spin_retry; + } + old = ACCESS_ONCE(rw->lock); + owner = ACCESS_ONCE(rw->owner); + smp_rmb(); + if ((int) old >= 0) { + prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR); + old = prev; + } + if ((old & 0x7fffffff) == 0 && (int) prev >= 0) + break; + } +} +EXPORT_SYMBOL(_raw_write_lock_wait); + +#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + void _raw_write_lock_wait(arch_rwlock_t *rw) { unsigned int owner, old, prev; @@ -173,6 +205,8 @@ void _raw_write_lock_wait(arch_rwlock_t *rw) } EXPORT_SYMBOL(_raw_write_lock_wait); +#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ + int _raw_write_trylock_retry(arch_rwlock_t *rw) { unsigned int old; -- cgit v0.10.2 From 975fab17399a2b29985166181ad80e5f50fa42e9 Mon Sep 17 00:00:00 2001 From: Jan Willeke Date: Mon, 22 Sep 2014 16:37:27 +0200 Subject: s390/uprobes: common library for kprobes and uprobes This patch moves common functions from kprobes.c to probes.c. Thus its possible for uprobes to use them without enabling kprobes. Signed-off-by: Jan Willeke Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 4176dfe..9862917 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -84,6 +84,10 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr); int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); +int probe_is_prohibited_opcode(u16 *insn); +int probe_get_fixup_type(u16 *insn); +int probe_is_insn_relative_long(u16 *insn); + #define flush_insn_slot(p) do { } while (0) #endif /* _ASM_S390_KPROBES_H */ diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index c48a00c..27ae5433 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -58,161 +58,13 @@ struct kprobe_insn_cache kprobe_dmainsn_slots = { .insn_size = MAX_INSN_SIZE, }; -static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn) -{ - if (!is_known_insn((unsigned char *)insn)) - return -EINVAL; - switch (insn[0] >> 8) { - case 0x0c: /* bassm */ - case 0x0b: /* bsm */ - case 0x83: /* diag */ - case 0x44: /* ex */ - case 0xac: /* stnsm */ - case 0xad: /* stosm */ - return -EINVAL; - case 0xc6: - switch (insn[0] & 0x0f) { - case 0x00: /* exrl */ - return -EINVAL; - } - } - switch (insn[0]) { - case 0x0101: /* pr */ - case 0xb25a: /* bsa */ - case 0xb240: /* bakr */ - case 0xb258: /* bsg */ - case 0xb218: /* pc */ - case 0xb228: /* pt */ - case 0xb98d: /* epsw */ - return -EINVAL; - } - return 0; -} - -static int __kprobes get_fixup_type(kprobe_opcode_t *insn) -{ - /* default fixup method */ - int fixup = FIXUP_PSW_NORMAL; - - switch (insn[0] >> 8) { - case 0x05: /* balr */ - case 0x0d: /* basr */ - fixup = FIXUP_RETURN_REGISTER; - /* if r2 = 0, no branch will be taken */ - if ((insn[0] & 0x0f) == 0) - fixup |= FIXUP_BRANCH_NOT_TAKEN; - break; - case 0x06: /* bctr */ - case 0x07: /* bcr */ - fixup = FIXUP_BRANCH_NOT_TAKEN; - break; - case 0x45: /* bal */ - case 0x4d: /* bas */ - fixup = FIXUP_RETURN_REGISTER; - break; - case 0x47: /* bc */ - case 0x46: /* bct */ - case 0x86: /* bxh */ - case 0x87: /* bxle */ - fixup = FIXUP_BRANCH_NOT_TAKEN; - break; - case 0x82: /* lpsw */ - fixup = FIXUP_NOT_REQUIRED; - break; - case 0xb2: /* lpswe */ - if ((insn[0] & 0xff) == 0xb2) - fixup = FIXUP_NOT_REQUIRED; - break; - case 0xa7: /* bras */ - if ((insn[0] & 0x0f) == 0x05) - fixup |= FIXUP_RETURN_REGISTER; - break; - case 0xc0: - if ((insn[0] & 0x0f) == 0x05) /* brasl */ - fixup |= FIXUP_RETURN_REGISTER; - break; - case 0xeb: - switch (insn[2] & 0xff) { - case 0x44: /* bxhg */ - case 0x45: /* bxleg */ - fixup = FIXUP_BRANCH_NOT_TAKEN; - break; - } - break; - case 0xe3: /* bctg */ - if ((insn[2] & 0xff) == 0x46) - fixup = FIXUP_BRANCH_NOT_TAKEN; - break; - case 0xec: - switch (insn[2] & 0xff) { - case 0xe5: /* clgrb */ - case 0xe6: /* cgrb */ - case 0xf6: /* crb */ - case 0xf7: /* clrb */ - case 0xfc: /* cgib */ - case 0xfd: /* cglib */ - case 0xfe: /* cib */ - case 0xff: /* clib */ - fixup = FIXUP_BRANCH_NOT_TAKEN; - break; - } - break; - } - return fixup; -} - -static int __kprobes is_insn_relative_long(kprobe_opcode_t *insn) -{ - /* Check if we have a RIL-b or RIL-c format instruction which - * we need to modify in order to avoid instruction emulation. */ - switch (insn[0] >> 8) { - case 0xc0: - if ((insn[0] & 0x0f) == 0x00) /* larl */ - return true; - break; - case 0xc4: - switch (insn[0] & 0x0f) { - case 0x02: /* llhrl */ - case 0x04: /* lghrl */ - case 0x05: /* lhrl */ - case 0x06: /* llghrl */ - case 0x07: /* sthrl */ - case 0x08: /* lgrl */ - case 0x0b: /* stgrl */ - case 0x0c: /* lgfrl */ - case 0x0d: /* lrl */ - case 0x0e: /* llgfrl */ - case 0x0f: /* strl */ - return true; - } - break; - case 0xc6: - switch (insn[0] & 0x0f) { - case 0x02: /* pfdrl */ - case 0x04: /* cghrl */ - case 0x05: /* chrl */ - case 0x06: /* clghrl */ - case 0x07: /* clhrl */ - case 0x08: /* cgrl */ - case 0x0a: /* clgrl */ - case 0x0c: /* cgfrl */ - case 0x0d: /* crl */ - case 0x0e: /* clgfrl */ - case 0x0f: /* clrl */ - return true; - } - break; - } - return false; -} - static void __kprobes copy_instruction(struct kprobe *p) { s64 disp, new_disp; u64 addr, new_addr; memcpy(p->ainsn.insn, p->addr, insn_length(p->opcode >> 8)); - if (!is_insn_relative_long(p->ainsn.insn)) + if (!probe_is_insn_relative_long(p->ainsn.insn)) return; /* * For pc-relative instructions in RIL-b or RIL-c format patch the @@ -276,7 +128,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if ((unsigned long) p->addr & 0x01) return -EINVAL; /* Make sure the probe isn't going on a difficult instruction */ - if (is_prohibited_opcode(p->addr)) + if (probe_is_prohibited_opcode(p->addr)) return -EINVAL; if (s390_get_insn_slot(p)) return -ENOMEM; @@ -605,7 +457,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); unsigned long ip = regs->psw.addr & PSW_ADDR_INSN; - int fixup = get_fixup_type(p->ainsn.insn); + int fixup = probe_get_fixup_type(p->ainsn.insn); if (fixup & FIXUP_PSW_NORMAL) ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn; diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index c6d752e..a01df23 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -6,3 +6,5 @@ lib-y += delay.o string.o uaccess.o find.o obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o mem32.o obj-$(CONFIG_64BIT) += mem64.o lib-$(CONFIG_SMP) += spinlock.o +lib-$(CONFIG_KPROBES) += probes.o +lib-$(CONFIG_UPROBES) += probes.o diff --git a/arch/s390/lib/probes.c b/arch/s390/lib/probes.c new file mode 100644 index 0000000..c5d64a0 --- /dev/null +++ b/arch/s390/lib/probes.c @@ -0,0 +1,159 @@ +/* + * Common helper functions for kprobes and uprobes + * + * Copyright IBM Corp. 2014 + */ + +#include +#include + +int probe_is_prohibited_opcode(u16 *insn) +{ + if (!is_known_insn((unsigned char *)insn)) + return -EINVAL; + switch (insn[0] >> 8) { + case 0x0c: /* bassm */ + case 0x0b: /* bsm */ + case 0x83: /* diag */ + case 0x44: /* ex */ + case 0xac: /* stnsm */ + case 0xad: /* stosm */ + return -EINVAL; + case 0xc6: + switch (insn[0] & 0x0f) { + case 0x00: /* exrl */ + return -EINVAL; + } + } + switch (insn[0]) { + case 0x0101: /* pr */ + case 0xb25a: /* bsa */ + case 0xb240: /* bakr */ + case 0xb258: /* bsg */ + case 0xb218: /* pc */ + case 0xb228: /* pt */ + case 0xb98d: /* epsw */ + case 0xe560: /* tbegin */ + case 0xe561: /* tbeginc */ + case 0xb2f8: /* tend */ + return -EINVAL; + } + return 0; +} + +int probe_get_fixup_type(u16 *insn) +{ + /* default fixup method */ + int fixup = FIXUP_PSW_NORMAL; + + switch (insn[0] >> 8) { + case 0x05: /* balr */ + case 0x0d: /* basr */ + fixup = FIXUP_RETURN_REGISTER; + /* if r2 = 0, no branch will be taken */ + if ((insn[0] & 0x0f) == 0) + fixup |= FIXUP_BRANCH_NOT_TAKEN; + break; + case 0x06: /* bctr */ + case 0x07: /* bcr */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + case 0x45: /* bal */ + case 0x4d: /* bas */ + fixup = FIXUP_RETURN_REGISTER; + break; + case 0x47: /* bc */ + case 0x46: /* bct */ + case 0x86: /* bxh */ + case 0x87: /* bxle */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + case 0x82: /* lpsw */ + fixup = FIXUP_NOT_REQUIRED; + break; + case 0xb2: /* lpswe */ + if ((insn[0] & 0xff) == 0xb2) + fixup = FIXUP_NOT_REQUIRED; + break; + case 0xa7: /* bras */ + if ((insn[0] & 0x0f) == 0x05) + fixup |= FIXUP_RETURN_REGISTER; + break; + case 0xc0: + if ((insn[0] & 0x0f) == 0x05) /* brasl */ + fixup |= FIXUP_RETURN_REGISTER; + break; + case 0xeb: + switch (insn[2] & 0xff) { + case 0x44: /* bxhg */ + case 0x45: /* bxleg */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + } + break; + case 0xe3: /* bctg */ + if ((insn[2] & 0xff) == 0x46) + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + case 0xec: + switch (insn[2] & 0xff) { + case 0xe5: /* clgrb */ + case 0xe6: /* cgrb */ + case 0xf6: /* crb */ + case 0xf7: /* clrb */ + case 0xfc: /* cgib */ + case 0xfd: /* cglib */ + case 0xfe: /* cib */ + case 0xff: /* clib */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + } + break; + } + return fixup; +} + +int probe_is_insn_relative_long(u16 *insn) +{ + /* Check if we have a RIL-b or RIL-c format instruction which + * we need to modify in order to avoid instruction emulation. */ + switch (insn[0] >> 8) { + case 0xc0: + if ((insn[0] & 0x0f) == 0x00) /* larl */ + return true; + break; + case 0xc4: + switch (insn[0] & 0x0f) { + case 0x02: /* llhrl */ + case 0x04: /* lghrl */ + case 0x05: /* lhrl */ + case 0x06: /* llghrl */ + case 0x07: /* sthrl */ + case 0x08: /* lgrl */ + case 0x0b: /* stgrl */ + case 0x0c: /* lgfrl */ + case 0x0d: /* lrl */ + case 0x0e: /* llgfrl */ + case 0x0f: /* strl */ + return true; + } + break; + case 0xc6: + switch (insn[0] & 0x0f) { + case 0x02: /* pfdrl */ + case 0x04: /* cghrl */ + case 0x05: /* chrl */ + case 0x06: /* clghrl */ + case 0x07: /* clhrl */ + case 0x08: /* cgrl */ + case 0x0a: /* clgrl */ + case 0x0c: /* cgfrl */ + case 0x0d: /* crl */ + case 0x0e: /* clgfrl */ + case 0x0f: /* clrl */ + return true; + } + break; + } + return false; +} -- cgit v0.10.2 From 2a0a5b2299b9bef76123fac91e68d39cb361c33e Mon Sep 17 00:00:00 2001 From: Jan Willeke Date: Mon, 22 Sep 2014 16:39:06 +0200 Subject: s390/uprobes: architecture backend for uprobes Signed-off-by: Jan Willeke Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 47492fc..608adfb 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -58,6 +58,9 @@ config NO_IOPORT_MAP config PCI_QUIRKS def_bool n +config ARCH_SUPPORTS_UPROBES + def_bool 64BIT + config S390 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 55d69dd..be317fe 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -161,6 +161,12 @@ static inline long regs_return_value(struct pt_regs *regs) return regs->gprs[2]; } +static inline void instruction_pointer_set(struct pt_regs *regs, + unsigned long val) +{ + regs->psw.addr = val | PSW_ADDR_AMODE; +} + int regs_query_register_offset(const char *name); const char *regs_query_register_name(unsigned int offset); unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset); diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index b833e9c..4d62fd5 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -84,11 +84,13 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ #define TIF_SECCOMP 5 /* secure computing */ #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ +#define TIF_UPROBE 7 /* breakpointed or single-stepping */ #define TIF_31BIT 16 /* 32bit process */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ #define TIF_SINGLE_STEP 19 /* This task is single stepped */ #define TIF_BLOCK_STEP 20 /* This task is block stepped */ +#define TIF_UPROBE_SINGLESTEP 21 /* This task is uprobe single stepped */ #define _TIF_NOTIFY_RESUME (1< + +typedef u16 uprobe_opcode_t; + +#define UPROBE_XOL_SLOT_BYTES 256 /* cache aligned */ + +#define UPROBE_SWBP_INSN 0x0002 +#define UPROBE_SWBP_INSN_SIZE 2 + +struct arch_uprobe { + union{ + uprobe_opcode_t insn[3]; + uprobe_opcode_t ixol[3]; + }; + unsigned int saved_per : 1; + unsigned int saved_int_code; +}; + +struct arch_uprobe_task { +}; + +int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, + unsigned long addr); +int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); +int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); +bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); +int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, + void *data); +void arch_uprobe_abort_xol(struct arch_uprobe *ap, struct pt_regs *regs); +unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline, + struct pt_regs *regs); +#endif /* _ASM_UPROBES_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index d44245d..3249e1f 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -55,6 +55,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o) obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_UPROBES) += uprobes.o ifdef CONFIG_64BIT obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o \ diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 1aad483..5854163 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -45,6 +45,7 @@ void transaction_exception(struct pt_regs *regs); void translation_exception(struct pt_regs *regs); void do_per_trap(struct pt_regs *regs); +void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str); void syscall_trace(struct pt_regs *regs, int entryexit); void kernel_stack_overflow(struct pt_regs * regs); void do_signal(struct pt_regs *regs); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index f2e674c..7b2e03a 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -42,7 +42,8 @@ STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE -_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) +_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ + _TIF_UPROBE) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) _CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE) @@ -265,6 +266,10 @@ sysc_work: jo sysc_mcck_pending tm __TI_flags+7(%r12),_TIF_NEED_RESCHED jo sysc_reschedule +#ifdef CONFIG_UPROBES + tm __TI_flags+7(%r12),_TIF_UPROBE + jo sysc_uprobe_notify +#endif tm __PT_FLAGS+7(%r11),_PIF_PER_TRAP jo sysc_singlestep tm __TI_flags+7(%r12),_TIF_SIGPENDING @@ -323,6 +328,16 @@ sysc_notify_resume: jg do_notify_resume # +# _TIF_UPROBE is set, call uprobe_notify_resume +# +#ifdef CONFIG_UPROBES +sysc_uprobe_notify: + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,sysc_return + jg uprobe_notify_resume +#endif + +# # _PIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 5dc7ad9..fe99d6b 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -84,7 +84,8 @@ void update_cr_regs(struct task_struct *task) new.end = thread->per_user.end; /* merge TIF_SINGLE_STEP into user specified PER registers. */ - if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) { + if (test_tsk_thread_flag(task, TIF_SINGLE_STEP) || + test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) { if (test_tsk_thread_flag(task, TIF_BLOCK_STEP)) new.control |= PER_EVENT_BRANCH; else @@ -93,6 +94,8 @@ void update_cr_regs(struct task_struct *task) new.control |= PER_CONTROL_SUSPENSION; new.control |= PER_EVENT_TRANSACTION_END; #endif + if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) + new.control |= PER_EVENT_IFETCH; new.start = 0; new.end = PSW_ADDR_INSN; } diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index c576232..e3e06a4 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -58,15 +58,10 @@ int is_valid_bugaddr(unsigned long addr) return 1; } -static void __kprobes do_trap(struct pt_regs *regs, - int si_signo, int si_code, char *str) +void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) { siginfo_t info; - if (notify_die(DIE_TRAP, str, regs, 0, - regs->int_code, si_signo) == NOTIFY_STOP) - return; - if (user_mode(regs)) { info.si_signo = si_signo; info.si_errno = 0; @@ -90,6 +85,15 @@ static void __kprobes do_trap(struct pt_regs *regs, } } +static void __kprobes do_trap(struct pt_regs *regs, int si_signo, int si_code, + char *str) +{ + if (notify_die(DIE_TRAP, str, regs, 0, + regs->int_code, si_signo) == NOTIFY_STOP) + return; + do_report_trap(regs, si_signo, si_code, str); +} + void __kprobes do_per_trap(struct pt_regs *regs) { siginfo_t info; @@ -178,6 +182,7 @@ void __kprobes illegal_op(struct pt_regs *regs) siginfo_t info; __u8 opcode[6]; __u16 __user *location; + int is_uprobe_insn = 0; int signal = 0; location = get_trap_ip(regs); @@ -194,6 +199,10 @@ void __kprobes illegal_op(struct pt_regs *regs) force_sig_info(SIGTRAP, &info, current); } else signal = SIGILL; +#ifdef CONFIG_UPROBES + } else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) { + is_uprobe_insn = 1; +#endif #ifdef CONFIG_MATHEMU } else if (opcode[0] == 0xb3) { if (get_user(*((__u16 *) (opcode+2)), location+1)) @@ -219,11 +228,13 @@ void __kprobes illegal_op(struct pt_regs *regs) #endif } else signal = SIGILL; - } else { - /* - * If we get an illegal op in kernel mode, send it through the - * kprobes notifier. If kprobes doesn't pick it up, SIGILL - */ + } + /* + * We got either an illegal op in kernel mode, or user space trapped + * on a uprobes illegal instruction. See if kprobes or uprobes picks + * it up. If not, SIGILL. + */ + if (is_uprobe_insn || !user_mode(regs)) { if (notify_die(DIE_BPT, "bpt", regs, 0, 3, SIGTRAP) != NOTIFY_STOP) signal = SIGILL; diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c new file mode 100644 index 0000000..956f4f7 --- /dev/null +++ b/arch/s390/kernel/uprobes.c @@ -0,0 +1,332 @@ +/* + * User-space Probes (UProbes) for s390 + * + * Copyright IBM Corp. 2014 + * Author(s): Jan Willeke, + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "entry.h" + +#define UPROBE_TRAP_NR UINT_MAX + +int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, + unsigned long addr) +{ + return probe_is_prohibited_opcode(auprobe->insn); +} + +int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + if (psw_bits(regs->psw).eaba == PSW_AMODE_24BIT) + return -EINVAL; + if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_AMODE_31BIT) + return -EINVAL; + clear_pt_regs_flag(regs, PIF_PER_TRAP); + auprobe->saved_per = psw_bits(regs->psw).r; + auprobe->saved_int_code = regs->int_code; + regs->int_code = UPROBE_TRAP_NR; + regs->psw.addr = current->utask->xol_vaddr; + set_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP); + update_cr_regs(current); + return 0; +} + +bool arch_uprobe_xol_was_trapped(struct task_struct *tsk) +{ + struct pt_regs *regs = task_pt_regs(tsk); + + if (regs->int_code != UPROBE_TRAP_NR) + return true; + return false; +} + +int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + int fixup = probe_get_fixup_type(auprobe->insn); + struct uprobe_task *utask = current->utask; + + clear_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP); + update_cr_regs(current); + psw_bits(regs->psw).r = auprobe->saved_per; + regs->int_code = auprobe->saved_int_code; + + if (fixup & FIXUP_PSW_NORMAL) + regs->psw.addr += utask->vaddr - utask->xol_vaddr; + if (fixup & FIXUP_RETURN_REGISTER) { + int reg = (auprobe->insn[0] & 0xf0) >> 4; + + regs->gprs[reg] += utask->vaddr - utask->xol_vaddr; + } + if (fixup & FIXUP_BRANCH_NOT_TAKEN) { + int ilen = insn_length(auprobe->insn[0] >> 8); + + if (regs->psw.addr - utask->xol_vaddr == ilen) + regs->psw.addr = utask->vaddr + ilen; + } + /* If per tracing was active generate trap */ + if (regs->psw.mask & PSW_MASK_PER) + do_per_trap(regs); + return 0; +} + +int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, + void *data) +{ + struct die_args *args = data; + struct pt_regs *regs = args->regs; + + if (!user_mode(regs)) + return NOTIFY_DONE; + if (regs->int_code & 0x200) /* Trap during transaction */ + return NOTIFY_DONE; + switch (val) { + case DIE_BPT: + if (uprobe_pre_sstep_notifier(regs)) + return NOTIFY_STOP; + break; + case DIE_SSTEP: + if (uprobe_post_sstep_notifier(regs)) + return NOTIFY_STOP; + default: + break; + } + return NOTIFY_DONE; +} + +void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + clear_thread_flag(TIF_UPROBE_SINGLESTEP); + regs->int_code = auprobe->saved_int_code; + regs->psw.addr = current->utask->vaddr; +} + +unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline, + struct pt_regs *regs) +{ + unsigned long orig; + + orig = regs->gprs[14]; + regs->gprs[14] = trampoline; + return orig; +} + +/* Instruction Emulation */ + +static void adjust_psw_addr(psw_t *psw, unsigned long len) +{ + psw->addr = __rewind_psw(*psw, -len); +} + +#define EMU_ILLEGAL_OP 1 +#define EMU_SPECIFICATION 2 +#define EMU_ADDRESSING 3 + +#define emu_load_ril(ptr, output) \ +({ \ + unsigned int mask = sizeof(*(ptr)) - 1; \ + __typeof__(*(ptr)) input; \ + int __rc = 0; \ + \ + if (!test_facility(34)) \ + __rc = EMU_ILLEGAL_OP; \ + else if ((u64 __force)ptr & mask) \ + __rc = EMU_SPECIFICATION; \ + else if (get_user(input, ptr)) \ + __rc = EMU_ADDRESSING; \ + else \ + *(output) = input; \ + __rc; \ +}) + +#define emu_store_ril(ptr, input) \ +({ \ + unsigned int mask = sizeof(*(ptr)) - 1; \ + int __rc = 0; \ + \ + if (!test_facility(34)) \ + __rc = EMU_ILLEGAL_OP; \ + else if ((u64 __force)ptr & mask) \ + __rc = EMU_SPECIFICATION; \ + else if (put_user(*(input), ptr)) \ + __rc = EMU_ADDRESSING; \ + __rc; \ +}) + +#define emu_cmp_ril(regs, ptr, cmp) \ +({ \ + unsigned int mask = sizeof(*(ptr)) - 1; \ + __typeof__(*(ptr)) input; \ + int __rc = 0; \ + \ + if (!test_facility(34)) \ + __rc = EMU_ILLEGAL_OP; \ + else if ((u64 __force)ptr & mask) \ + __rc = EMU_SPECIFICATION; \ + else if (get_user(input, ptr)) \ + __rc = EMU_ADDRESSING; \ + else if (input > *(cmp)) \ + psw_bits((regs)->psw).cc = 1; \ + else if (input < *(cmp)) \ + psw_bits((regs)->psw).cc = 2; \ + else \ + psw_bits((regs)->psw).cc = 0; \ + __rc; \ +}) + +struct insn_ril { + u8 opc0; + u8 reg : 4; + u8 opc1 : 4; + s32 disp; +} __packed; + +union split_register { + u64 u64; + u32 u32[2]; + u16 u16[4]; + s64 s64; + s32 s32[2]; + s16 s16[4]; +}; + +/* + * pc relative instructions are emulated, since parameters may not be + * accessible from the xol area due to range limitations. + */ +static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + union split_register *rx; + struct insn_ril *insn; + unsigned int ilen; + void *uptr; + int rc = 0; + + insn = (struct insn_ril *) &auprobe->insn; + rx = (union split_register *) ®s->gprs[insn->reg]; + uptr = (void *)(regs->psw.addr + (insn->disp * 2)); + ilen = insn_length(insn->opc0); + + switch (insn->opc0) { + case 0xc0: + switch (insn->opc1) { + case 0x00: /* larl */ + rx->u64 = (unsigned long)uptr; + break; + } + break; + case 0xc4: + switch (insn->opc1) { + case 0x02: /* llhrl */ + rc = emu_load_ril((u16 __user *)uptr, &rx->u32[1]); + break; + case 0x04: /* lghrl */ + rc = emu_load_ril((s16 __user *)uptr, &rx->u64); + break; + case 0x05: /* lhrl */ + rc = emu_load_ril((s16 __user *)uptr, &rx->u32[1]); + break; + case 0x06: /* llghrl */ + rc = emu_load_ril((u16 __user *)uptr, &rx->u64); + break; + case 0x08: /* lgrl */ + rc = emu_load_ril((u64 __user *)uptr, &rx->u64); + break; + case 0x0c: /* lgfrl */ + rc = emu_load_ril((s32 __user *)uptr, &rx->u64); + break; + case 0x0d: /* lrl */ + rc = emu_load_ril((u32 __user *)uptr, &rx->u32[1]); + break; + case 0x0e: /* llgfrl */ + rc = emu_load_ril((u32 __user *)uptr, &rx->u64); + break; + case 0x07: /* sthrl */ + rc = emu_store_ril((u16 __user *)uptr, &rx->u16[3]); + break; + case 0x0b: /* stgrl */ + rc = emu_store_ril((u64 __user *)uptr, &rx->u64); + break; + case 0x0f: /* strl */ + rc = emu_store_ril((u32 __user *)uptr, &rx->u32[1]); + break; + } + break; + case 0xc6: + switch (insn->opc1) { + case 0x02: /* pfdrl */ + if (!test_facility(34)) + rc = EMU_ILLEGAL_OP; + break; + case 0x04: /* cghrl */ + rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s64); + break; + case 0x05: /* chrl */ + rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s32[1]); + break; + case 0x06: /* clghrl */ + rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u64); + break; + case 0x07: /* clhrl */ + rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u32[1]); + break; + case 0x08: /* cgrl */ + rc = emu_cmp_ril(regs, (s64 __user *)uptr, &rx->s64); + break; + case 0x0a: /* clgrl */ + rc = emu_cmp_ril(regs, (u64 __user *)uptr, &rx->u64); + break; + case 0x0c: /* cgfrl */ + rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s64); + break; + case 0x0d: /* crl */ + rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s32[1]); + break; + case 0x0e: /* clgfrl */ + rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u64); + break; + case 0x0f: /* clrl */ + rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u32[1]); + break; + } + break; + } + adjust_psw_addr(®s->psw, ilen); + switch (rc) { + case EMU_ILLEGAL_OP: + regs->int_code = ilen << 16 | 0x0001; + do_report_trap(regs, SIGILL, ILL_ILLOPC, NULL); + break; + case EMU_SPECIFICATION: + regs->int_code = ilen << 16 | 0x0006; + do_report_trap(regs, SIGILL, ILL_ILLOPC , NULL); + break; + case EMU_ADDRESSING: + regs->int_code = ilen << 16 | 0x0005; + do_report_trap(regs, SIGSEGV, SEGV_MAPERR, NULL); + break; + } +} + +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + if ((psw_bits(regs->psw).eaba == PSW_AMODE_24BIT) || + ((psw_bits(regs->psw).eaba == PSW_AMODE_31BIT) && + !is_compat_task())) { + regs->psw.addr = __rewind_psw(regs->psw, UPROBE_SWBP_INSN_SIZE); + do_report_trap(regs, SIGILL, ILL_ILLADR, NULL); + return true; + } + if (probe_is_insn_relative_long(auprobe->insn)) { + handle_insn_ril(auprobe, regs); + return true; + } + return false; +} -- cgit v0.10.2 From 48e9a6c1f54695609b709bf674aac133794ada00 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 24 Sep 2014 16:37:20 +0200 Subject: s390/topology: call set_sched_topology early The call to topology_init is too late for the set_sched_topology call. The initial scheduling domain structure has already been established with default topology array. Use the smp_cpus_done() call to get the s390 specific topology array registered early enough. Cc: stable@vger.kernel.org # v3.16+ Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 355a16c..b93bed7 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -464,15 +464,17 @@ static struct sched_domain_topology_level s390_topology[] = { static int __init topology_init(void) { - if (!MACHINE_HAS_TOPOLOGY) { + if (MACHINE_HAS_TOPOLOGY) + set_topology_timer(); + else topology_update_polarization_simple(); - goto out; - } - set_topology_timer(); -out: - - set_sched_topology(s390_topology); - return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); } device_initcall(topology_init); + +static int __init early_topology_init(void) +{ + set_sched_topology(s390_topology); + return 0; +} +early_initcall(early_topology_init); -- cgit v0.10.2 From 242a112af62ea73ce507cbe76c2c944c23b6a1e3 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 26 Sep 2014 09:23:20 +0200 Subject: s390/setup: correct 4-level kernel page table detection Fix calculation to decide if a 4-level kernel page table is required. Git commit c972cc60c23f5a63 "s390/vmalloc: have separate modules area" added the separate module area which reduces the size of the vmalloc area but fails to take it into account for the 3 vs 4 level page table decision. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 82bc113..cdfc060 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -452,8 +452,8 @@ static void __init setup_memory_end(void) #ifdef CONFIG_64BIT vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN; tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE; - tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size; - if (tmp <= (1UL << 42)) + tmp = tmp * (sizeof(struct page) + PAGE_SIZE); + if (tmp + vmalloc_size + MODULES_LEN <= (1UL << 42)) vmax = 1UL << 42; /* 3-level kernel page table */ else vmax = 1UL << 53; /* 4-level kernel page table */ -- cgit v0.10.2 From cfb0b24143b4f587ff3e3bd829f9f471285d097b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 23 Sep 2014 21:29:20 +0200 Subject: s390/mm: make use of ipte range facility Invalidate several pte entries at once if the ipte range facility is available. Currently this works only for DEBUG_PAGE_ALLOC where several up to 2 ^ MAX_ORDER may be invalidated at once. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 7c4af56..2de229c 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1053,6 +1053,22 @@ static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep) : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address)); } +static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep) +{ + unsigned long pto = (unsigned long) ptep; + +#ifndef CONFIG_64BIT + /* pto in ESA mode must point to the start of the segment table */ + pto &= 0x7ffffc00; +#endif + /* Invalidate a range of ptes + global TLB flush of the ptes */ + do { + asm volatile( + " .insn rrf,0xb2210000,%2,%0,%1,0" + : "+a" (address), "+a" (nr) : "a" (pto) : "memory"); + } while (nr != 255); +} + static inline void ptep_flush_direct(struct mm_struct *mm, unsigned long address, pte_t *ptep) { diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 8400f49..3fef3b2 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -103,27 +104,50 @@ int set_memory_x(unsigned long addr, int numpages) } #ifdef CONFIG_DEBUG_PAGEALLOC + +static void ipte_range(pte_t *pte, unsigned long address, int nr) +{ + int i; + + if (test_facility(13) && IS_ENABLED(CONFIG_64BIT)) { + __ptep_ipte_range(address, nr - 1, pte); + return; + } + for (i = 0; i < nr; i++) { + __ptep_ipte(address, pte); + address += PAGE_SIZE; + pte++; + } +} + void kernel_map_pages(struct page *page, int numpages, int enable) { unsigned long address; + int nr, i, j; pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; - int i; - for (i = 0; i < numpages; i++) { + for (i = 0; i < numpages;) { address = page_to_phys(page + i); pgd = pgd_offset_k(address); pud = pud_offset(pgd, address); pmd = pmd_offset(pud, address); pte = pte_offset_kernel(pmd, address); - if (!enable) { - __ptep_ipte(address, pte); - pte_val(*pte) = _PAGE_INVALID; - continue; + nr = (unsigned long)pte >> ilog2(sizeof(long)); + nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1)); + nr = min(numpages - i, nr); + if (enable) { + for (j = 0; j < nr; j++) { + pte_val(*pte) = __pa(address); + address += PAGE_SIZE; + pte++; + } + } else { + ipte_range(pte, address, nr); } - pte_val(*pte) = __pa(address); + i += nr; } } -- cgit v0.10.2 From 362ce84f43aac61589a8b60e5bb3fcfae9801b13 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 1 Oct 2014 13:04:54 +0200 Subject: s390/dasd: fix infinite loop during format Error recovery requests may not be cleaned up correctly so that other needed erp requests can not be build because of insufficient memory. This would lead to an infinite loop trying to build erp requests. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 5df05f2..f0895f4 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2261,8 +2261,8 @@ static inline int _wait_for_wakeup_queue(struct list_head *ccw_queue) static int _dasd_sleep_on_queue(struct list_head *ccw_queue, int interruptible) { struct dasd_device *device; - int rc; struct dasd_ccw_req *cqr, *n; + int rc; retry: list_for_each_entry_safe(cqr, n, ccw_queue, blocklist) { @@ -2310,21 +2310,26 @@ retry: /* * for alias devices simplify error recovery and * return to upper layer + * do not skip ERP requests */ - if (cqr->startdev != cqr->basedev && + if (cqr->startdev != cqr->basedev && !cqr->refers && (cqr->status == DASD_CQR_TERMINATED || cqr->status == DASD_CQR_NEED_ERP)) return -EAGAIN; - else { - /* normal recovery for basedev IO */ - if (__dasd_sleep_on_erp(cqr)) { - if (!cqr->status == DASD_CQR_TERMINATED && - !cqr->status == DASD_CQR_NEED_ERP) - break; - rc = 1; - } + + /* normal recovery for basedev IO */ + if (__dasd_sleep_on_erp(cqr)) { + goto retry; + /* remember that ERP was needed */ + rc = 1; + /* skip processing for active cqr */ + if (cqr->status != DASD_CQR_TERMINATED && + cqr->status != DASD_CQR_NEED_ERP) + break; } } + + /* start ERP requests in upper loop */ if (rc) goto retry; -- cgit v0.10.2 From 5db8440c36a3308649b99f65b68195394dd7fed4 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Wed, 1 Oct 2014 14:39:47 +0200 Subject: s390/dasd: add support for control unit initiated reconfiguration Add support for Control Unit Initiated Reconfiguration (CUIR) to Linux, a storage server interface to reconcile concurrent hardware changes between storage and host. Reviewed-by: Stefan Weinhuber Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index f0895f4..329db99 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1660,6 +1660,14 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, device->discipline->check_for_device_change(device, cqr, irb); dasd_put_device(device); } + + /* check for for attention message */ + if (scsw_dstat(&irb->scsw) & DEV_STAT_ATTENTION) { + device = dasd_device_from_cdev_locked(cdev); + device->discipline->check_attention(device, irb->esw.esw1.lpum); + dasd_put_device(device); + } + if (!cqr) return; diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 14ba80b..8286f74 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -1432,6 +1432,29 @@ static ssize_t dasd_reservation_state_store(struct device *dev, static DEVICE_ATTR(last_known_reservation_state, 0644, dasd_reservation_state_show, dasd_reservation_state_store); +static ssize_t dasd_pm_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dasd_device *device; + u8 opm, nppm, cablepm, cuirpm, hpfpm; + + device = dasd_device_from_cdev(to_ccwdev(dev)); + if (IS_ERR(device)) + return sprintf(buf, "0\n"); + + opm = device->path_data.opm; + nppm = device->path_data.npm; + cablepm = device->path_data.cablepm; + cuirpm = device->path_data.cuirpm; + hpfpm = device->path_data.hpfpm; + dasd_put_device(device); + + return sprintf(buf, "%02x %02x %02x %02x %02x\n", opm, nppm, + cablepm, cuirpm, hpfpm); +} + +static DEVICE_ATTR(path_masks, 0444, dasd_pm_show, NULL); + static struct attribute * dasd_attrs[] = { &dev_attr_readonly.attr, &dev_attr_discipline.attr, @@ -1450,6 +1473,7 @@ static struct attribute * dasd_attrs[] = { &dev_attr_reservation_policy.attr, &dev_attr_last_known_reservation_state.attr, &dev_attr_safe_offline.attr, + &dev_attr_path_masks.attr, NULL, }; diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 51dea7b..d47f5b9 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include "dasd_int.h" #include "dasd_eckd.h" @@ -112,6 +114,12 @@ struct path_verification_work_data { static struct path_verification_work_data *path_verification_worker; static DEFINE_MUTEX(dasd_path_verification_mutex); +struct check_attention_work_data { + struct work_struct worker; + struct dasd_device *device; + __u8 lpum; +}; + /* initial attempt at a probe function. this can be simplified once * the other detection code is gone */ static int @@ -1126,6 +1134,7 @@ static int dasd_eckd_read_conf(struct dasd_device *device) "device %s instead of %s\n", lpm, print_path_uid, print_device_uid); path_err = -EINVAL; + path_data->cablepm |= lpm; continue; } @@ -1141,6 +1150,13 @@ static int dasd_eckd_read_conf(struct dasd_device *device) break; } path_data->opm |= lpm; + /* + * if the path is used + * it should not be in one of the negative lists + */ + path_data->cablepm &= ~lpm; + path_data->hpfpm &= ~lpm; + path_data->cuirpm &= ~lpm; if (conf_data != private->conf_data) kfree(conf_data); @@ -1230,7 +1246,7 @@ static void do_path_verification_work(struct work_struct *work) struct dasd_eckd_private path_private; struct dasd_uid *uid; __u8 path_rcd_buf[DASD_ECKD_RCD_DATA_SIZE]; - __u8 lpm, opm, npm, ppm, epm; + __u8 lpm, opm, npm, ppm, epm, hpfpm, cablepm; unsigned long flags; char print_uid[60]; int rc; @@ -1248,6 +1264,9 @@ static void do_path_verification_work(struct work_struct *work) npm = 0; ppm = 0; epm = 0; + hpfpm = 0; + cablepm = 0; + for (lpm = 0x80; lpm; lpm >>= 1) { if (!(lpm & data->tbvpm)) continue; @@ -1289,6 +1308,7 @@ static void do_path_verification_work(struct work_struct *work) opm &= ~lpm; npm &= ~lpm; ppm &= ~lpm; + hpfpm |= lpm; continue; } @@ -1350,6 +1370,7 @@ static void do_path_verification_work(struct work_struct *work) opm &= ~lpm; npm &= ~lpm; ppm &= ~lpm; + cablepm |= lpm; continue; } } @@ -1364,12 +1385,21 @@ static void do_path_verification_work(struct work_struct *work) spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); if (!device->path_data.opm && opm) { device->path_data.opm = opm; + device->path_data.cablepm &= ~opm; + device->path_data.cuirpm &= ~opm; + device->path_data.hpfpm &= ~opm; dasd_generic_path_operational(device); - } else + } else { device->path_data.opm |= opm; + device->path_data.cablepm &= ~opm; + device->path_data.cuirpm &= ~opm; + device->path_data.hpfpm &= ~opm; + } device->path_data.npm |= npm; device->path_data.ppm |= ppm; device->path_data.tbvpm |= epm; + device->path_data.cablepm |= cablepm; + device->path_data.hpfpm |= hpfpm; spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); } @@ -4475,6 +4505,343 @@ out_err: return -1; } +static int dasd_eckd_read_message_buffer(struct dasd_device *device, + struct dasd_rssd_messages *messages, + __u8 lpum) +{ + struct dasd_rssd_messages *message_buf; + struct dasd_psf_prssd_data *prssdp; + struct dasd_eckd_private *private; + struct dasd_ccw_req *cqr; + struct ccw1 *ccw; + int rc; + + private = (struct dasd_eckd_private *) device->private; + cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */, + (sizeof(struct dasd_psf_prssd_data) + + sizeof(struct dasd_rssd_messages)), + device); + if (IS_ERR(cqr)) { + DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "%s", + "Could not allocate read message buffer request"); + return PTR_ERR(cqr); + } + + cqr->startdev = device; + cqr->memdev = device; + cqr->block = NULL; + cqr->retries = 256; + cqr->expires = 10 * HZ; + + /* we need to check for messages on exactly this path */ + set_bit(DASD_CQR_VERIFY_PATH, &cqr->flags); + cqr->lpm = lpum; + + /* Prepare for Read Subsystem Data */ + prssdp = (struct dasd_psf_prssd_data *) cqr->data; + memset(prssdp, 0, sizeof(struct dasd_psf_prssd_data)); + prssdp->order = PSF_ORDER_PRSSD; + prssdp->suborder = 0x03; /* Message Buffer */ + /* all other bytes of prssdp must be zero */ + + ccw = cqr->cpaddr; + ccw->cmd_code = DASD_ECKD_CCW_PSF; + ccw->count = sizeof(struct dasd_psf_prssd_data); + ccw->flags |= CCW_FLAG_CC; + ccw->flags |= CCW_FLAG_SLI; + ccw->cda = (__u32)(addr_t) prssdp; + + /* Read Subsystem Data - message buffer */ + message_buf = (struct dasd_rssd_messages *) (prssdp + 1); + memset(message_buf, 0, sizeof(struct dasd_rssd_messages)); + + ccw++; + ccw->cmd_code = DASD_ECKD_CCW_RSSD; + ccw->count = sizeof(struct dasd_rssd_messages); + ccw->flags |= CCW_FLAG_SLI; + ccw->cda = (__u32)(addr_t) message_buf; + + cqr->buildclk = get_tod_clock(); + cqr->status = DASD_CQR_FILLED; + rc = dasd_sleep_on_immediatly(cqr); + if (rc == 0) { + prssdp = (struct dasd_psf_prssd_data *) cqr->data; + message_buf = (struct dasd_rssd_messages *) + (prssdp + 1); + memcpy(messages, message_buf, + sizeof(struct dasd_rssd_messages)); + } else + DBF_EVENT_DEVID(DBF_WARNING, device->cdev, + "Reading messages failed with rc=%d\n" + , rc); + dasd_sfree_request(cqr, cqr->memdev); + return rc; +} + +/* + * Perform Subsystem Function - CUIR response + */ +static int +dasd_eckd_psf_cuir_response(struct dasd_device *device, int response, + __u32 message_id, + struct channel_path_desc *desc, + struct subchannel_id sch_id) +{ + struct dasd_psf_cuir_response *psf_cuir; + struct dasd_ccw_req *cqr; + struct ccw1 *ccw; + int rc; + + cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ , + sizeof(struct dasd_psf_cuir_response), + device); + + if (IS_ERR(cqr)) { + DBF_DEV_EVENT(DBF_WARNING, device, "%s", + "Could not allocate PSF-CUIR request"); + return PTR_ERR(cqr); + } + + psf_cuir = (struct dasd_psf_cuir_response *)cqr->data; + psf_cuir->order = PSF_ORDER_CUIR_RESPONSE; + psf_cuir->cc = response; + if (desc) + psf_cuir->chpid = desc->chpid; + psf_cuir->message_id = message_id; + psf_cuir->cssid = sch_id.cssid; + psf_cuir->ssid = sch_id.ssid; + + ccw = cqr->cpaddr; + ccw->cmd_code = DASD_ECKD_CCW_PSF; + ccw->cda = (__u32)(addr_t)psf_cuir; + ccw->count = sizeof(struct dasd_psf_cuir_response); + + cqr->startdev = device; + cqr->memdev = device; + cqr->block = NULL; + cqr->retries = 256; + cqr->expires = 10*HZ; + cqr->buildclk = get_tod_clock(); + cqr->status = DASD_CQR_FILLED; + + rc = dasd_sleep_on(cqr); + + dasd_sfree_request(cqr, cqr->memdev); + return rc; +} + +static int dasd_eckd_cuir_change_state(struct dasd_device *device, __u8 lpum) +{ + unsigned long flags; + __u8 tbcpm; + + spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); + tbcpm = device->path_data.opm & ~lpum; + if (tbcpm) { + device->path_data.opm = tbcpm; + device->path_data.cuirpm |= lpum; + } + spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); + return tbcpm ? 0 : PSF_CUIR_LAST_PATH; +} + +/* + * walk through all devices and quiesce them + * if it is the last path return error + * + * if only part of the devices are quiesced and an error + * occurs no onlining necessary, the storage server will + * notify the already set offline devices again + */ +static int dasd_eckd_cuir_quiesce(struct dasd_device *device, __u8 lpum, + struct channel_path_desc *desc, + struct subchannel_id sch_id) +{ + struct alias_pav_group *pavgroup, *tempgroup; + struct dasd_eckd_private *private; + struct dasd_device *dev, *n; + int rc; + + private = (struct dasd_eckd_private *) device->private; + rc = 0; + + /* active devices */ + list_for_each_entry_safe(dev, n, + &private->lcu->active_devices, + alias_list) { + rc = dasd_eckd_cuir_change_state(dev, lpum); + if (rc) + goto out; + } + + /* inactive devices */ + list_for_each_entry_safe(dev, n, + &private->lcu->inactive_devices, + alias_list) { + rc = dasd_eckd_cuir_change_state(dev, lpum); + if (rc) + goto out; + } + + /* devices in PAV groups */ + list_for_each_entry_safe(pavgroup, tempgroup, + &private->lcu->grouplist, group) { + list_for_each_entry_safe(dev, n, &pavgroup->baselist, + alias_list) { + rc = dasd_eckd_cuir_change_state(dev, lpum); + if (rc) + goto out; + } + list_for_each_entry_safe(dev, n, &pavgroup->aliaslist, + alias_list) { + rc = dasd_eckd_cuir_change_state(dev, lpum); + if (rc) + goto out; + } + } + + pr_warn("Service on the storage server caused path %x.%02x to go offline", + sch_id.cssid, desc ? desc->chpid : 0); + rc = PSF_CUIR_COMPLETED; +out: + return rc; +} + +static int dasd_eckd_cuir_resume(struct dasd_device *device, __u8 lpum, + struct channel_path_desc *desc, + struct subchannel_id sch_id) +{ + struct alias_pav_group *pavgroup, *tempgroup; + struct dasd_eckd_private *private; + struct dasd_device *dev, *n; + + pr_info("Path %x.%02x is back online after service on the storage server", + sch_id.cssid, desc ? desc->chpid : 0); + private = (struct dasd_eckd_private *) device->private; + + /* + * the path may have been added through a generic path event before + * only trigger path verification if the path is not already in use + */ + + list_for_each_entry_safe(dev, n, + &private->lcu->active_devices, + alias_list) { + if (!(dev->path_data.opm & lpum)) { + dev->path_data.tbvpm |= lpum; + dasd_schedule_device_bh(dev); + } + } + + list_for_each_entry_safe(dev, n, + &private->lcu->inactive_devices, + alias_list) { + if (!(dev->path_data.opm & lpum)) { + dev->path_data.tbvpm |= lpum; + dasd_schedule_device_bh(dev); + } + } + + /* devices in PAV groups */ + list_for_each_entry_safe(pavgroup, tempgroup, + &private->lcu->grouplist, + group) { + list_for_each_entry_safe(dev, n, + &pavgroup->baselist, + alias_list) { + if (!(dev->path_data.opm & lpum)) { + dev->path_data.tbvpm |= lpum; + dasd_schedule_device_bh(dev); + } + } + list_for_each_entry_safe(dev, n, + &pavgroup->aliaslist, + alias_list) { + if (!(dev->path_data.opm & lpum)) { + dev->path_data.tbvpm |= lpum; + dasd_schedule_device_bh(dev); + } + } + } + return PSF_CUIR_COMPLETED; +} + +static void dasd_eckd_handle_cuir(struct dasd_device *device, void *messages, + __u8 lpum) +{ + struct dasd_cuir_message *cuir = messages; + struct channel_path_desc *desc; + struct subchannel_id sch_id; + int pos, response; + ccw_device_get_schid(device->cdev, &sch_id); + + /* get position of path in mask */ + pos = 8 - ffs(lpum); + /* get channel path descriptor from this position */ + desc = ccw_device_get_chp_desc(device->cdev, pos); + + if (cuir->code == CUIR_QUIESCE) { + /* quiesce */ + response = dasd_eckd_cuir_quiesce(device, lpum, desc, sch_id); + } else if (cuir->code == CUIR_RESUME) { + /* resume */ + response = dasd_eckd_cuir_resume(device, lpum, desc, sch_id); + } else + response = PSF_CUIR_NOT_SUPPORTED; + + dasd_eckd_psf_cuir_response(device, response, cuir->message_id, + desc, sch_id); + + /* free descriptor copy */ + kfree(desc); +} + +static void dasd_eckd_check_attention_work(struct work_struct *work) +{ + struct check_attention_work_data *data; + struct dasd_rssd_messages *messages; + struct dasd_device *device; + int rc; + + data = container_of(work, struct check_attention_work_data, worker); + device = data->device; + + messages = kzalloc(sizeof(*messages), GFP_KERNEL); + if (!messages) { + DBF_DEV_EVENT(DBF_WARNING, device, "%s", + "Could not allocate attention message buffer"); + goto out; + } + + rc = dasd_eckd_read_message_buffer(device, messages, data->lpum); + if (rc) + goto out; + + if (messages->length == ATTENTION_LENGTH_CUIR && + messages->format == ATTENTION_FORMAT_CUIR) + dasd_eckd_handle_cuir(device, messages, data->lpum); + +out: + dasd_put_device(device); + kfree(messages); + kfree(data); +} + +static int dasd_eckd_check_attention(struct dasd_device *device, __u8 lpum) +{ + struct check_attention_work_data *data; + + data = kzalloc(sizeof(*data), GFP_ATOMIC); + if (!data) + return -ENOMEM; + INIT_WORK(&data->worker, dasd_eckd_check_attention_work); + dasd_get_device(device); + data->device = device; + data->lpum = lpum; + schedule_work(&data->worker); + return 0; +} + static struct ccw_driver dasd_eckd_driver = { .driver = { .name = "dasd-eckd", @@ -4539,6 +4906,7 @@ static struct dasd_discipline dasd_eckd_discipline = { .reload = dasd_eckd_reload_device, .get_uid = dasd_eckd_get_uid, .kick_validate = dasd_eckd_kick_validate_server, + .check_attention = dasd_eckd_check_attention, }; static int __init diff --git a/drivers/s390/block/dasd_eckd.h b/drivers/s390/block/dasd_eckd.h index 2555e49..ddab7df 100644 --- a/drivers/s390/block/dasd_eckd.h +++ b/drivers/s390/block/dasd_eckd.h @@ -51,8 +51,35 @@ /* * Perform Subsystem Function / Sub-Orders */ -#define PSF_ORDER_PRSSD 0x18 -#define PSF_ORDER_SSC 0x1D +#define PSF_ORDER_PRSSD 0x18 +#define PSF_ORDER_CUIR_RESPONSE 0x1A +#define PSF_ORDER_SSC 0x1D + +/* + * CUIR response condition codes + */ +#define PSF_CUIR_INVALID 0x00 +#define PSF_CUIR_COMPLETED 0x01 +#define PSF_CUIR_NOT_SUPPORTED 0x02 +#define PSF_CUIR_ERROR_IN_REQ 0x03 +#define PSF_CUIR_DENIED 0x04 +#define PSF_CUIR_LAST_PATH 0x05 +#define PSF_CUIR_DEVICE_ONLINE 0x06 +#define PSF_CUIR_VARY_FAILURE 0x07 +#define PSF_CUIR_SOFTWARE_FAILURE 0x08 +#define PSF_CUIR_NOT_RECOGNIZED 0x09 + +/* + * CUIR codes + */ +#define CUIR_QUIESCE 0x01 +#define CUIR_RESUME 0x02 + +/* + * attention message definitions + */ +#define ATTENTION_LENGTH_CUIR 0x0e +#define ATTENTION_FORMAT_CUIR 0x01 /* * Size that is reportet for large volumes in the old 16-bit no_cyl field @@ -342,6 +369,38 @@ struct dasd_rssd_features { char feature[256]; } __attribute__((packed)); +struct dasd_rssd_messages { + __u16 length; + __u8 format; + __u8 code; + __u32 message_id; + __u8 flags; + char messages[4087]; +} __packed; + +struct dasd_cuir_message { + __u16 length; + __u8 format; + __u8 code; + __u32 message_id; + __u8 flags; + __u8 neq_map[3]; + __u8 ned_map; + __u8 record_selector; +} __packed; + +struct dasd_psf_cuir_response { + __u8 order; + __u8 flags; + __u8 cc; + __u8 chpid; + __u16 device_nr; + __u16 reserved; + __u32 message_id; + __u64 system_id; + __u8 cssid; + __u8 ssid; +} __packed; /* * Perform Subsystem Function - Prepare for Read Subsystem Data diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index c201701..8b5d410 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -357,6 +357,7 @@ struct dasd_discipline { int (*get_uid) (struct dasd_device *, struct dasd_uid *); void (*kick_validate) (struct dasd_device *); + int (*check_attention)(struct dasd_device *, __u8); }; extern struct dasd_discipline *dasd_diag_discipline_pointer; @@ -382,6 +383,10 @@ struct dasd_path { __u8 tbvpm; __u8 ppm; __u8 npm; + /* paths that are not used because of a special condition */ + __u8 cablepm; /* miss-cabled */ + __u8 hpfpm; /* the HPF requirements of the other paths are not met */ + __u8 cuirpm; /* CUIR varied offline */ }; struct dasd_profile_info { @@ -501,7 +506,10 @@ struct dasd_block { struct dasd_profile profile; }; - +struct dasd_attention_data { + struct dasd_device *device; + __u8 lpum; +}; /* reasons why device (ccw_device_start) was stopped */ #define DASD_STOPPED_NOT_ACC 1 /* not accessible */ -- cgit v0.10.2 From a9b1649917f0d2058022eda06082f9d299a06354 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 1 Oct 2014 10:44:40 +0200 Subject: s390/vtime: do not reset idle data on CPU hotplug The sysfs attributes /sys/devices/system/cpu/cpu0/idle_count and /sys/devices/system/cpu/cpu0/idle_time_us are reset to zero every time a CPU is set online. The idle and iowait fields in /proc/stat corresponding to idle_time_us are not reset. To make things consistent do not reset the data for the sys attributes. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 2461202..32587cc 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -23,7 +23,6 @@ static DEFINE_PER_CPU(struct cpuid, cpu_id); */ void cpu_init(void) { - struct s390_idle_data *idle = &__get_cpu_var(s390_idle); struct cpuid *id = &__get_cpu_var(cpu_id); get_cpu_id(id); @@ -31,7 +30,6 @@ void cpu_init(void) current->active_mm = &init_mm; BUG_ON(current->mm); enter_lazy_tlb(&init_mm, current); - memset(idle, 0, sizeof(*idle)); } /* -- cgit v0.10.2 From fe0f49768d807a8fe6336b097feb8c4441951710 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 30 Sep 2014 17:37:52 +0200 Subject: s390/nohz: use a per-cpu flag for arch_needs_cpu Move the nohz_delay bit from the s390_idle data structure to the per-cpu flags. Clear the nohz delay flag in __cpu_disable and remove the cpu hotplug notifier that used to do this. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index f65bd36..01887b1 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -166,7 +166,6 @@ static inline clock_t cputime64_to_clock_t(cputime64_t cputime) } struct s390_idle_data { - int nohz_delay; unsigned int sequence; unsigned long long idle_count; unsigned long long idle_time; @@ -182,11 +181,4 @@ cputime64_t s390_get_idle_time(int cpu); #define arch_idle_time(cpu) s390_get_idle_time(cpu) -static inline int s390_nohz_delay(int cpu) -{ - return __get_cpu_var(s390_idle).nohz_delay != 0; -} - -#define arch_needs_cpu(cpu) s390_nohz_delay(cpu) - #endif /* _S390_CPUTIME_H */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index e568fc8..bc796d7 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -13,9 +13,11 @@ #define CIF_MCCK_PENDING 0 /* machine check handling is pending */ #define CIF_ASCE 1 /* user asce needs fixup / uaccess */ +#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ #define _CIF_MCCK_PENDING (1<int_code; if (ext_code.code != EXT_IRQ_CLK_COMP) - __get_cpu_var(s390_idle).nohz_delay = 1; + set_cpu_flag(CIF_NOHZ_DELAY); index = ext_hash(ext_code.code); rcu_read_lock(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index abec97b..46317d6 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -720,6 +720,7 @@ int __cpu_disable(void) cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */ cregs[14] &= ~0x1f000000UL; /* disable most machine checks */ __ctl_load(cregs, 0, 15); + clear_cpu_flag(CIF_NOHZ_DELAY); return 0; } diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 8c34363..4070982 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -163,7 +163,7 @@ void __kprobes vtime_stop_cpu(void) /* Wait for external, I/O or machine check interrupt. */ psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; - idle->nohz_delay = 0; + clear_cpu_flag(CIF_NOHZ_DELAY); /* Call the assembler magic in entry.S */ psw_idle(idle, psw_mask); @@ -378,25 +378,8 @@ void init_cpu_vtimer(void) set_vtimer(VTIMER_MAX_SLICE); } -static int s390_nohz_notify(struct notifier_block *self, unsigned long action, - void *hcpu) -{ - struct s390_idle_data *idle; - long cpu = (long) hcpu; - - idle = &per_cpu(s390_idle, cpu); - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DYING: - idle->nohz_delay = 0; - default: - break; - } - return NOTIFY_OK; -} - void __init vtime_init(void) { /* Enable cpu timer interrupts on the boot cpu. */ init_cpu_vtimer(); - cpu_notifier(s390_nohz_notify, 0); } diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c index 00bfbee..56eb4ee 100644 --- a/drivers/s390/cio/airq.c +++ b/drivers/s390/cio/airq.c @@ -87,7 +87,7 @@ static irqreturn_t do_airq_interrupt(int irq, void *dummy) struct airq_struct *airq; struct hlist_head *head; - __this_cpu_write(s390_idle.nohz_delay, 1); + set_cpu_flag(CIF_NOHZ_DELAY); tpi_info = (struct tpi_info *) &get_irq_regs()->int_code; head = &airq_lists[tpi_info->isc]; rcu_read_lock(); diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 2905d8b..d5a6f28 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -561,7 +561,7 @@ static irqreturn_t do_cio_interrupt(int irq, void *dummy) struct subchannel *sch; struct irb *irb; - __this_cpu_write(s390_idle.nohz_delay, 1); + set_cpu_flag(CIF_NOHZ_DELAY); tpi_info = (struct tpi_info *) &get_irq_regs()->int_code; irb = &__get_cpu_var(cio_irb); sch = (struct subchannel *)(unsigned long) tpi_info->intparm; diff --git a/include/linux/tick.h b/include/linux/tick.h index 9a82c7d..e5832d0 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -108,7 +108,7 @@ extern struct tick_sched *tick_get_tick_sched(int cpu); extern void tick_irq_enter(void); extern int tick_oneshot_mode_active(void); # ifndef arch_needs_cpu -# define arch_needs_cpu(cpu) (0) +# define arch_needs_cpu() (0) # endif # else static inline void tick_clock_notify(void) { } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f654a8a..01d512f 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -572,7 +572,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, } while (read_seqretry(&jiffies_lock, seq)); if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || - arch_needs_cpu(cpu) || irq_work_needs_cpu()) { + arch_needs_cpu() || irq_work_needs_cpu()) { next_jiffies = last_jiffies + 1; delta_jiffies = 1; } else { -- cgit v0.10.2 From b5f87f15e20092c060f465b283b07a76af7f2e5f Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 1 Oct 2014 10:57:57 +0200 Subject: s390/idle: consolidate idle functions and definitions Move the C functions and definitions related to the idle state handling to arch/s390/include/asm/idle.h and arch/s390/kernel/idle.c. The function s390_get_idle_time is renamed to arch_cpu_idle_time and vtime_stop_cpu to enabled_wait. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 01887b1..1c016a5 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -8,8 +8,6 @@ #define _S390_CPUTIME_H #include -#include -#include #include @@ -165,20 +163,8 @@ static inline clock_t cputime64_to_clock_t(cputime64_t cputime) return clock; } -struct s390_idle_data { - unsigned int sequence; - unsigned long long idle_count; - unsigned long long idle_time; - unsigned long long clock_idle_enter; - unsigned long long clock_idle_exit; - unsigned long long timer_idle_enter; - unsigned long long timer_idle_exit; -}; +cputime64_t arch_cpu_idle_time(int cpu); -DECLARE_PER_CPU(struct s390_idle_data, s390_idle); - -cputime64_t s390_get_idle_time(int cpu); - -#define arch_idle_time(cpu) s390_get_idle_time(cpu) +#define arch_idle_time(cpu) arch_cpu_idle_time(cpu) #endif /* _S390_CPUTIME_H */ diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h new file mode 100644 index 0000000..6af037f --- /dev/null +++ b/arch/s390/include/asm/idle.h @@ -0,0 +1,26 @@ +/* + * Copyright IBM Corp. 2014 + * + * Author: Martin Schwidefsky + */ + +#ifndef _S390_IDLE_H +#define _S390_IDLE_H + +#include +#include + +struct s390_idle_data { + unsigned int sequence; + unsigned long long idle_count; + unsigned long long idle_time; + unsigned long long clock_idle_enter; + unsigned long long clock_idle_exit; + unsigned long long timer_idle_enter; + unsigned long long timer_idle_exit; +}; + +extern struct device_attribute dev_attr_idle_count; +extern struct device_attribute dev_attr_idle_time_us; + +#endif /* _S390_IDLE_H */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index bc796d7..3d08710 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -289,7 +289,12 @@ static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc) return (psw.addr - ilc) & mask; #endif } - + +/* + * Function to stop a processor until the next interrupt occurs + */ +void enabled_wait(void); + /* * Function to drop a processor into disabled wait state */ diff --git a/arch/s390/include/asm/vtimer.h b/arch/s390/include/asm/vtimer.h index bfe25d5..10a179a 100644 --- a/arch/s390/include/asm/vtimer.h +++ b/arch/s390/include/asm/vtimer.h @@ -28,6 +28,4 @@ extern int del_virt_timer(struct vtimer_list *timer); extern void init_cpu_vtimer(void); extern void vtime_init(void); -extern void vtime_stop_cpu(void); - #endif /* _ASM_S390_TIMER_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 3249e1f..c249785 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -28,7 +28,7 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w -obj-y := traps.o time.o process.o base.o early.o setup.o vtime.o +obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 3e9e479..ef279a1 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 5854163..cd68869 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include extern void *restart_stack; extern unsigned long suspend_zero_pages; diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c new file mode 100644 index 0000000..c846aee --- /dev/null +++ b/arch/s390/kernel/idle.c @@ -0,0 +1,124 @@ +/* + * Idle functions for s390. + * + * Copyright IBM Corp. 2014 + * + * Author(s): Martin Schwidefsky + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "entry.h" + +static DEFINE_PER_CPU(struct s390_idle_data, s390_idle); + +void __kprobes enabled_wait(void) +{ + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); + unsigned long long idle_time; + unsigned long psw_mask; + + trace_hardirqs_on(); + + /* Wait for external, I/O or machine check interrupt. */ + psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT | + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + clear_cpu_flag(CIF_NOHZ_DELAY); + + /* Call the assembler magic in entry.S */ + psw_idle(idle, psw_mask); + + /* Account time spent with enabled wait psw loaded as idle time. */ + idle->sequence++; + smp_wmb(); + idle_time = idle->clock_idle_exit - idle->clock_idle_enter; + idle->clock_idle_enter = idle->clock_idle_exit = 0ULL; + idle->idle_time += idle_time; + idle->idle_count++; + account_idle_time(idle_time); + smp_wmb(); + idle->sequence++; +} + +static ssize_t show_idle_count(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); + unsigned long long idle_count; + unsigned int sequence; + + do { + sequence = ACCESS_ONCE(idle->sequence); + idle_count = ACCESS_ONCE(idle->idle_count); + if (ACCESS_ONCE(idle->clock_idle_enter)) + idle_count++; + } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); + return sprintf(buf, "%llu\n", idle_count); +} +DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); + +static ssize_t show_idle_time(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); + unsigned long long now, idle_time, idle_enter, idle_exit; + unsigned int sequence; + + do { + now = get_tod_clock(); + sequence = ACCESS_ONCE(idle->sequence); + idle_time = ACCESS_ONCE(idle->idle_time); + idle_enter = ACCESS_ONCE(idle->clock_idle_enter); + idle_exit = ACCESS_ONCE(idle->clock_idle_exit); + } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); + idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; + return sprintf(buf, "%llu\n", idle_time >> 12); +} +DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); + +cputime64_t arch_cpu_idle_time(int cpu) +{ + struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); + unsigned long long now, idle_enter, idle_exit; + unsigned int sequence; + + do { + now = get_tod_clock(); + sequence = ACCESS_ONCE(idle->sequence); + idle_enter = ACCESS_ONCE(idle->clock_idle_enter); + idle_exit = ACCESS_ONCE(idle->clock_idle_exit); + } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); + return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0; +} + +void arch_cpu_idle_enter(void) +{ + local_mcck_disable(); +} + +void arch_cpu_idle(void) +{ + if (!test_cpu_flag(CIF_MCCK_PENDING)) + /* Halt the cpu and keep track of cpu time accounting. */ + enabled_wait(); + local_irq_enable(); +} + +void arch_cpu_idle_exit(void) +{ + local_mcck_enable(); + if (test_cpu_flag(CIF_MCCK_PENDING)) + s390_handle_mcck(); +} + +void arch_cpu_idle_dead(void) +{ + cpu_die(); +} diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 93b9ca4..ed84cc2 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -61,30 +61,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return sf->gprs[8]; } -void arch_cpu_idle(void) -{ - local_mcck_disable(); - if (test_cpu_flag(CIF_MCCK_PENDING)) { - local_mcck_enable(); - local_irq_enable(); - return; - } - /* Halt the cpu and keep track of cpu time accounting. */ - vtime_stop_cpu(); - local_irq_enable(); -} - -void arch_cpu_idle_exit(void) -{ - if (test_cpu_flag(CIF_MCCK_PENDING)) - s390_handle_mcck(); -} - -void arch_cpu_idle_dead(void) -{ - cpu_die(); -} - extern void __kprobes kernel_thread_starter(void); /* diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 46317d6..bba0e246 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "entry.h" enum { @@ -661,7 +662,7 @@ static void smp_start_secondary(void *cpuvoid) cpu_init(); preempt_disable(); init_cpu_timer(); - init_cpu_vtimer(); + vtime_init(); pfault_init(); notify_cpu_starting(smp_processor_id()); set_cpu_online(smp_processor_id(), true); @@ -893,42 +894,6 @@ static struct attribute_group cpu_common_attr_group = { .attrs = cpu_common_attrs, }; -static ssize_t show_idle_count(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long long idle_count; - unsigned int sequence; - - do { - sequence = ACCESS_ONCE(idle->sequence); - idle_count = ACCESS_ONCE(idle->idle_count); - if (ACCESS_ONCE(idle->clock_idle_enter)) - idle_count++; - } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); - return sprintf(buf, "%llu\n", idle_count); -} -static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); - -static ssize_t show_idle_time(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long long now, idle_time, idle_enter, idle_exit; - unsigned int sequence; - - do { - now = get_tod_clock(); - sequence = ACCESS_ONCE(idle->sequence); - idle_time = ACCESS_ONCE(idle->idle_time); - idle_enter = ACCESS_ONCE(idle->clock_idle_enter); - idle_exit = ACCESS_ONCE(idle->clock_idle_exit); - } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); - idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; - return sprintf(buf, "%llu\n", idle_time >> 12); -} -static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); - static struct attribute *cpu_online_attrs[] = { &dev_attr_idle_count.attr, &dev_attr_idle_time_us.attr, diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 4070982..416f2a3 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -6,27 +6,18 @@ */ #include -#include -#include #include #include #include #include #include -#include -#include -#include #include #include #include -#include -#include "entry.h" static void virt_timer_expire(void); -DEFINE_PER_CPU(struct s390_idle_data, s390_idle); - static LIST_HEAD(virt_timer_list); static DEFINE_SPINLOCK(virt_timer_lock); static atomic64_t virt_timer_current; @@ -152,49 +143,6 @@ void vtime_account_system(struct task_struct *tsk) __attribute__((alias("vtime_account_irq_enter"))); EXPORT_SYMBOL_GPL(vtime_account_system); -void __kprobes vtime_stop_cpu(void) -{ - struct s390_idle_data *idle = &__get_cpu_var(s390_idle); - unsigned long long idle_time; - unsigned long psw_mask; - - trace_hardirqs_on(); - - /* Wait for external, I/O or machine check interrupt. */ - psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; - clear_cpu_flag(CIF_NOHZ_DELAY); - - /* Call the assembler magic in entry.S */ - psw_idle(idle, psw_mask); - - /* Account time spent with enabled wait psw loaded as idle time. */ - idle->sequence++; - smp_wmb(); - idle_time = idle->clock_idle_exit - idle->clock_idle_enter; - idle->clock_idle_enter = idle->clock_idle_exit = 0ULL; - idle->idle_time += idle_time; - idle->idle_count++; - account_idle_time(idle_time); - smp_wmb(); - idle->sequence++; -} - -cputime64_t s390_get_idle_time(int cpu) -{ - struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); - unsigned long long now, idle_enter, idle_exit; - unsigned int sequence; - - do { - now = get_tod_clock(); - sequence = ACCESS_ONCE(idle->sequence); - idle_enter = ACCESS_ONCE(idle->clock_idle_enter); - idle_exit = ACCESS_ONCE(idle->clock_idle_exit); - } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); - return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0; -} - /* * Sorted add to a list. List is linear searched until first bigger * element is found. @@ -372,14 +320,8 @@ EXPORT_SYMBOL(del_virt_timer); /* * Start the virtual CPU timer on the current CPU. */ -void init_cpu_vtimer(void) +void vtime_init(void) { /* set initial cpu timer */ set_vtimer(VTIMER_MAX_SLICE); } - -void __init vtime_init(void) -{ - /* Enable cpu timer interrupts on the boot cpu. */ - init_cpu_vtimer(); -} diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index a9f3d00..16dc42d 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -43,7 +43,7 @@ static void __udelay_disabled(unsigned long long usecs) lockdep_off(); do { set_clock_comparator(end); - vtime_stop_cpu(); + enabled_wait(); } while (get_tod_clock_fast() < end); lockdep_on(); __ctl_load(cr0, 0, 0); @@ -62,7 +62,7 @@ static void __udelay_enabled(unsigned long long usecs) clock_saved = local_tick_disable(); set_clock_comparator(end); } - vtime_stop_cpu(); + enabled_wait(); if (clock_saved) local_tick_enable(clock_saved); } while (get_tod_clock_fast() < end); -- cgit v0.10.2 From 42f4dd613fe808676126472bbe1283e452201148 Mon Sep 17 00:00:00 2001 From: Ingo Tuchscherer Date: Thu, 2 Oct 2014 14:48:46 +0200 Subject: s390/zcrypt: Toleration of new crypto hardware The zcrypt device driver will accept the new crypto adapter in toleration mode. A new sysfs attribute 'raw_hwtype' will expose the raw hardware type. Signed-off-by: Ingo Tuchscherer Signed-off-by: Harald Freudenberger diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 51e6aa0..9948541 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -664,6 +664,17 @@ static ssize_t ap_hwtype_show(struct device *dev, } static DEVICE_ATTR(hwtype, 0444, ap_hwtype_show, NULL); + +static ssize_t ap_raw_hwtype_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ap_device *ap_dev = to_ap_dev(dev); + + return snprintf(buf, PAGE_SIZE, "%d\n", ap_dev->raw_hwtype); +} + +static DEVICE_ATTR(raw_hwtype, 0444, ap_raw_hwtype_show, NULL); + static ssize_t ap_depth_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -734,6 +745,7 @@ static DEVICE_ATTR(ap_functions, 0444, ap_functions_show, NULL); static struct attribute *ap_dev_attrs[] = { &dev_attr_hwtype.attr, + &dev_attr_raw_hwtype.attr, &dev_attr_depth.attr, &dev_attr_request_count.attr, &dev_attr_requestq_count.attr, @@ -1417,9 +1429,13 @@ static void ap_scan_bus(struct work_struct *unused) continue; } break; + case 11: + ap_dev->device_type = 10; + break; default: ap_dev->device_type = device_type; } + ap_dev->raw_hwtype = device_type; rc = ap_query_functions(qid, &device_functions); if (!rc) diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index db92e9f..055a0f9 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -161,6 +161,7 @@ struct ap_device { ap_qid_t qid; /* AP queue id. */ int queue_depth; /* AP queue depth.*/ int device_type; /* AP device type. */ + int raw_hwtype; /* AP raw hardware type. */ unsigned int functions; /* AP device function bitfield. */ int unregistered; /* marks AP device as unregistered */ struct timer_list timeout; /* Timer for request timeouts. */ -- cgit v0.10.2 From 8070361799ae1e3f4ef347bd10f0a508ac10acfb Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 6 Oct 2014 17:53:53 +0200 Subject: s390: add support for vector extension The vector extension introduces 32 128-bit vector registers and a set of instruction to operate on the vector registers. The kernel can control the use of vector registers for the problem state program with a bit in control register 0. Once enabled for a process the kernel needs to retain the content of the vector registers on context switch. The signal frame is extended to include the vector registers. Two new register sets NT_S390_VXRS_LOW and NT_S390_VXRS_HIGH are added to the regset interface for the debugger and core dumps. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 78f4f87..27735ae 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -102,6 +102,7 @@ #define HWCAP_S390_ETF3EH 256 #define HWCAP_S390_HIGH_GPRS 512 #define HWCAP_S390_TE 1024 +#define HWCAP_S390_VXRS 2048 /* * These are used to set parameters in the core dumps. diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 4349197..d812cf1 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -310,7 +310,10 @@ struct _lowcore { /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ - __u8 pad_0x1000[0x11b8-0x1000]; /* 0x1000 */ + __u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */ + + /* Pointer to vector register save area */ + __u64 vector_save_area_addr; /* 0x11b0 */ /* 64 bit extparam used for pfault/diag 250: defined by architecture */ __u64 ext_params2; /* 0x11B8 */ @@ -334,9 +337,10 @@ struct _lowcore { /* Transaction abort diagnostic block */ __u8 pgm_tdb[256]; /* 0x1800 */ + __u8 pad_0x1900[0x1c00-0x1900]; /* 0x1900 */ - /* align to the top of the prefix area */ - __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */ + /* Software defined save area for vector registers */ + __u8 vector_save_area[1024]; /* 0x1c00 */ } __packed; #endif /* CONFIG_32BIT */ diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index 35f8ec1..3027a5a 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -38,7 +38,7 @@ struct mci { __u32 pm : 1; /* 22 psw program mask and cc validity */ __u32 ia : 1; /* 23 psw instruction address validity */ __u32 fa : 1; /* 24 failing storage address validity */ - __u32 : 1; /* 25 */ + __u32 vr : 1; /* 25 vector register validity */ __u32 ec : 1; /* 26 external damage code validity */ __u32 fp : 1; /* 27 floating point register validity */ __u32 gr : 1; /* 28 general register validity */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 3d08710..d559bdb 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -117,6 +117,7 @@ struct thread_struct { int ri_signum; #ifdef CONFIG_64BIT unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ + __vector128 *vxrs; /* Vector register save area */ #endif }; diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index dbde7c2..7736fdd 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -56,6 +56,7 @@ extern void detect_memory_memblock(void); #define MACHINE_FLAG_TOPOLOGY (1UL << 14) #define MACHINE_FLAG_TE (1UL << 15) #define MACHINE_FLAG_TLB_LC (1UL << 17) +#define MACHINE_FLAG_VX (1UL << 18) #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) @@ -78,6 +79,7 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_TOPOLOGY (0) #define MACHINE_HAS_TE (0) #define MACHINE_HAS_TLB_LC (0) +#define MACHINE_HAS_VX (0) #else /* CONFIG_64BIT */ #define MACHINE_HAS_IEEE (1) #define MACHINE_HAS_CSP (1) @@ -90,6 +92,7 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) #define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) +#define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX) #endif /* CONFIG_64BIT */ /* diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index 18ea9e3..0e01095 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -103,6 +103,48 @@ static inline void restore_fp_regs(freg_t *fprs) asm volatile("ld 15,%0" : : "Q" (fprs[15])); } +static inline void save_vx_regs(__vector128 *vxrs) +{ + typedef struct { __vector128 _[__NUM_VXRS]; } addrtype; + + asm volatile( + " la 1,%0\n" + " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ + " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */ + : "=Q" (*(addrtype *) vxrs) : : "1"); +} + +static inline void restore_vx_regs(__vector128 *vxrs) +{ + typedef struct { __vector128 _[__NUM_VXRS]; } addrtype; + + asm volatile( + " la 1,%0\n" + " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ + " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ + : : "Q" (*(addrtype *) vxrs) : "1"); +} + +static inline void save_fp_vx_regs(struct task_struct *task) +{ +#ifdef CONFIG_64BIT + if (task->thread.vxrs) + save_vx_regs(task->thread.vxrs); + else +#endif + save_fp_regs(task->thread.fp_regs.fprs); +} + +static inline void restore_fp_vx_regs(struct task_struct *task) +{ +#ifdef CONFIG_64BIT + if (task->thread.vxrs) + restore_vx_regs(task->thread.vxrs); + else +#endif + restore_fp_regs(task->thread.fp_regs.fprs); +} + static inline void save_access_regs(unsigned int *acrs) { typedef struct { int _[NUM_ACRS]; } acrstype; @@ -120,16 +162,16 @@ static inline void restore_access_regs(unsigned int *acrs) #define switch_to(prev,next,last) do { \ if (prev->mm) { \ save_fp_ctl(&prev->thread.fp_regs.fpc); \ - save_fp_regs(prev->thread.fp_regs.fprs); \ + save_fp_vx_regs(prev); \ save_access_regs(&prev->thread.acrs[0]); \ save_ri_cb(prev->thread.ri_cb); \ } \ if (next->mm) { \ + update_cr_regs(next); \ restore_fp_ctl(&next->thread.fp_regs.fpc); \ - restore_fp_regs(next->thread.fp_regs.fprs); \ + restore_fp_vx_regs(next); \ restore_access_regs(&next->thread.acrs[0]); \ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ - update_cr_regs(next); \ } \ prev = __switch_to(prev,next); \ } while (0) diff --git a/arch/s390/include/uapi/asm/sigcontext.h b/arch/s390/include/uapi/asm/sigcontext.h index b30de9c..5f0b8d7 100644 --- a/arch/s390/include/uapi/asm/sigcontext.h +++ b/arch/s390/include/uapi/asm/sigcontext.h @@ -7,10 +7,14 @@ #define _ASM_S390_SIGCONTEXT_H #include +#include -#define __NUM_GPRS 16 -#define __NUM_FPRS 16 -#define __NUM_ACRS 16 +#define __NUM_GPRS 16 +#define __NUM_FPRS 16 +#define __NUM_ACRS 16 +#define __NUM_VXRS 32 +#define __NUM_VXRS_LOW 16 +#define __NUM_VXRS_HIGH 16 #ifndef __s390x__ @@ -59,6 +63,16 @@ typedef struct _s390_fp_regs fpregs; } _sigregs; +typedef struct +{ +#ifndef __s390x__ + unsigned long gprs_high[__NUM_GPRS]; +#endif + unsigned long long vxrs_low[__NUM_VXRS_LOW]; + __vector128 vxrs_high[__NUM_VXRS_HIGH]; + unsigned char __reserved[128]; +} _sigregs_ext; + struct sigcontext { unsigned long oldmask[_SIGCONTEXT_NSIG_WORDS]; diff --git a/arch/s390/include/uapi/asm/types.h b/arch/s390/include/uapi/asm/types.h index 038f2b9..3c3951e 100644 --- a/arch/s390/include/uapi/asm/types.h +++ b/arch/s390/include/uapi/asm/types.h @@ -17,6 +17,10 @@ typedef unsigned long addr_t; typedef __signed__ long saddr_t; +typedef struct { + __u32 u[4]; +} __vector128; + #endif /* __ASSEMBLY__ */ #endif /* _UAPI_S390_TYPES_H */ diff --git a/arch/s390/include/uapi/asm/ucontext.h b/arch/s390/include/uapi/asm/ucontext.h index 3e077b2..64a69aa 100644 --- a/arch/s390/include/uapi/asm/ucontext.h +++ b/arch/s390/include/uapi/asm/ucontext.h @@ -7,10 +7,15 @@ #ifndef _ASM_S390_UCONTEXT_H #define _ASM_S390_UCONTEXT_H -#define UC_EXTENDED 0x00000001 - -#ifndef __s390x__ +#define UC_GPRS_HIGH 1 /* uc_mcontext_ext has valid high gprs */ +#define UC_VXRS 2 /* uc_mcontext_ext has valid vector regs */ +/* + * The struct ucontext_extended describes how the registers are stored + * on a rt signal frame. Please note that the structure is not fixed, + * if new CPU registers are added to the user state the size of the + * struct ucontext_extended will increase. + */ struct ucontext_extended { unsigned long uc_flags; struct ucontext *uc_link; @@ -19,11 +24,9 @@ struct ucontext_extended { sigset_t uc_sigmask; /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */ unsigned char __unused[128 - sizeof(sigset_t)]; - unsigned long uc_gprs_high[16]; + _sigregs_ext uc_mcontext_ext; }; -#endif - struct ucontext { unsigned long uc_flags; struct ucontext *uc_link; diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 70d4b7c..a0a886c 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -50,6 +50,14 @@ typedef struct _s390_fp_regs32 fpregs; } _sigregs32; +typedef struct +{ + __u32 gprs_high[__NUM_GPRS]; + __u64 vxrs_low[__NUM_VXRS_LOW]; + __vector128 vxrs_high[__NUM_VXRS_HIGH]; + __u8 __reserved[128]; +} _sigregs_ext32; + #define _SIGCONTEXT_NSIG32 64 #define _SIGCONTEXT_NSIG_BPW32 32 #define __SIGNAL_FRAMESIZE32 96 @@ -72,6 +80,7 @@ struct ucontext32 { compat_sigset_t uc_sigmask; /* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */ unsigned char __unused[128 - sizeof(compat_sigset_t)]; + _sigregs_ext32 uc_mcontext_ext; }; struct stat64_emu31; diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 598b0b4..009f5eb 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -36,17 +36,16 @@ typedef struct struct sigcontext32 sc; _sigregs32 sregs; int signo; - __u32 gprs_high[NUM_GPRS]; - __u8 retcode[S390_SYSCALL_SIZE]; + _sigregs_ext32 sregs_ext; + __u16 svc_insn; /* Offset of svc_insn is NOT fixed! */ } sigframe32; typedef struct { __u8 callee_used_stack[__SIGNAL_FRAMESIZE32]; - __u8 retcode[S390_SYSCALL_SIZE]; + __u16 svc_insn; compat_siginfo_t info; struct ucontext32 uc; - __u32 gprs_high[NUM_GPRS]; } rt_sigframe32; int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) @@ -151,6 +150,38 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) return err ? -EFAULT : 0; } +/* Store registers needed to create the signal frame */ +static void store_sigregs(void) +{ + int i; + + save_access_regs(current->thread.acrs); + save_fp_ctl(¤t->thread.fp_regs.fpc); + if (current->thread.vxrs) { + save_vx_regs(current->thread.vxrs); + for (i = 0; i < __NUM_FPRS; i++) + current->thread.fp_regs.fprs[i] = + *(freg_t *)(current->thread.vxrs + i); + } else + save_fp_regs(current->thread.fp_regs.fprs); +} + +/* Load registers after signal return */ +static void load_sigregs(void) +{ + int i; + + restore_access_regs(current->thread.acrs); + /* restore_fp_ctl is done in restore_sigregs */ + if (current->thread.vxrs) { + for (i = 0; i < __NUM_FPRS; i++) + *(freg_t *)(current->thread.vxrs + i) = + current->thread.fp_regs.fprs[i]; + restore_vx_regs(current->thread.vxrs); + } else + restore_fp_regs(current->thread.fp_regs.fprs); +} + static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) { _sigregs32 user_sregs; @@ -163,11 +194,8 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) (__u32)(regs->psw.mask & PSW_MASK_BA); for (i = 0; i < NUM_GPRS; i++) user_sregs.regs.gprs[i] = (__u32) regs->gprs[i]; - save_access_regs(current->thread.acrs); memcpy(&user_sregs.regs.acrs, current->thread.acrs, sizeof(user_sregs.regs.acrs)); - save_fp_ctl(¤t->thread.fp_regs.fpc); - save_fp_regs(current->thread.fp_regs.fprs); memcpy(&user_sregs.fpregs, ¤t->thread.fp_regs, sizeof(user_sregs.fpregs)); if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32))) @@ -207,37 +235,67 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) regs->gprs[i] = (__u64) user_sregs.regs.gprs[i]; memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, sizeof(current->thread.acrs)); - restore_access_regs(current->thread.acrs); memcpy(¤t->thread.fp_regs, &user_sregs.fpregs, sizeof(current->thread.fp_regs)); - restore_fp_regs(current->thread.fp_regs.fprs); clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ return 0; } -static int save_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs) +static int save_sigregs_ext32(struct pt_regs *regs, + _sigregs_ext32 __user *sregs_ext) { __u32 gprs_high[NUM_GPRS]; + __u64 vxrs[__NUM_VXRS_LOW]; int i; + /* Save high gprs to signal stack */ for (i = 0; i < NUM_GPRS; i++) gprs_high[i] = regs->gprs[i] >> 32; - if (__copy_to_user(uregs, &gprs_high, sizeof(gprs_high))) + if (__copy_to_user(&sregs_ext->gprs_high, &gprs_high, + sizeof(sregs_ext->gprs_high))) return -EFAULT; + + /* Save vector registers to signal stack */ + if (current->thread.vxrs) { + for (i = 0; i < __NUM_VXRS_LOW; i++) + vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1); + if (__copy_to_user(&sregs_ext->vxrs_low, vxrs, + sizeof(sregs_ext->vxrs_low)) || + __copy_to_user(&sregs_ext->vxrs_high, + current->thread.vxrs + __NUM_VXRS_LOW, + sizeof(sregs_ext->vxrs_high))) + return -EFAULT; + } return 0; } -static int restore_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs) +static int restore_sigregs_ext32(struct pt_regs *regs, + _sigregs_ext32 __user *sregs_ext) { __u32 gprs_high[NUM_GPRS]; + __u64 vxrs[__NUM_VXRS_LOW]; int i; - if (__copy_from_user(&gprs_high, uregs, sizeof(gprs_high))) + /* Restore high gprs from signal stack */ + if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high, + sizeof(&sregs_ext->gprs_high))) return -EFAULT; for (i = 0; i < NUM_GPRS; i++) *(__u32 *)®s->gprs[i] = gprs_high[i]; + + /* Restore vector registers from signal stack */ + if (current->thread.vxrs) { + if (__copy_from_user(vxrs, &sregs_ext->vxrs_low, + sizeof(sregs_ext->vxrs_low)) || + __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW, + &sregs_ext->vxrs_high, + sizeof(sregs_ext->vxrs_high))) + return -EFAULT; + for (i = 0; i < __NUM_VXRS_LOW; i++) + *((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i]; + } return 0; } @@ -252,8 +310,9 @@ COMPAT_SYSCALL_DEFINE0(sigreturn) set_current_blocked(&set); if (restore_sigregs32(regs, &frame->sregs)) goto badframe; - if (restore_sigregs_gprs_high(regs, frame->gprs_high)) + if (restore_sigregs_ext32(regs, &frame->sregs_ext)) goto badframe; + load_sigregs(); return regs->gprs[2]; badframe: force_sig(SIGSEGV, current); @@ -269,12 +328,13 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn) if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; set_current_blocked(&set); + if (compat_restore_altstack(&frame->uc.uc_stack)) + goto badframe; if (restore_sigregs32(regs, &frame->uc.uc_mcontext)) goto badframe; - if (restore_sigregs_gprs_high(regs, frame->gprs_high)) + if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) goto badframe; - if (compat_restore_altstack(&frame->uc.uc_stack)) - goto badframe; + load_sigregs(); return regs->gprs[2]; badframe: force_sig(SIGSEGV, current); @@ -324,37 +384,64 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { int sig = ksig->sig; - sigframe32 __user *frame = get_sigframe(&ksig->ka, regs, sizeof(sigframe32)); - + sigframe32 __user *frame; + struct sigcontext32 sc; + unsigned long restorer; + size_t frame_size; + + /* + * gprs_high are always present for 31-bit compat tasks. + * The space for vector registers is only allocated if + * the machine supports it + */ + frame_size = sizeof(*frame) - sizeof(frame->sregs_ext.__reserved); + if (!MACHINE_HAS_VX) + frame_size -= sizeof(frame->sregs_ext.vxrs_low) + + sizeof(frame->sregs_ext.vxrs_high); + frame = get_sigframe(&ksig->ka, regs, frame_size); if (frame == (void __user *) -1UL) return -EFAULT; - if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32)) + /* Set up backchain. */ + if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) + return -EFAULT; + + /* Create struct sigcontext32 on the signal stack */ + memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32); + sc.sregs = (__u32)(unsigned long __force) &frame->sregs; + if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc))) return -EFAULT; + /* Store registers needed to create the signal frame */ + store_sigregs(); + + /* Create _sigregs32 on the signal stack */ if (save_sigregs32(regs, &frame->sregs)) return -EFAULT; - if (save_sigregs_gprs_high(regs, frame->gprs_high)) + + /* Place signal number on stack to allow backtrace from handler. */ + if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) return -EFAULT; - if (__put_user((unsigned long) &frame->sregs, &frame->sc.sregs)) + + /* Create _sigregs_ext32 on the signal stack */ + if (save_sigregs_ext32(regs, &frame->sregs_ext)) return -EFAULT; /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ksig->ka.sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64 __force) ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; + restorer = (unsigned long __force) + ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE; - if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, - (u16 __force __user *)(frame->retcode))) + /* Signal frames without vectors registers are short ! */ + __u16 __user *svc = (void *) frame + frame_size - 2; + if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc)) return -EFAULT; + restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE; } - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) - return -EFAULT; - /* Set up registers for signal handler */ + regs->gprs[14] = restorer; regs->gprs[15] = (__force __u64) frame; /* Force 31 bit amode and default user address space control. */ regs->psw.mask = PSW_MASK_BA | @@ -375,50 +462,69 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, regs->gprs[6] = task_thread_info(current)->last_break; } - /* Place signal number on stack to allow backtrace from handler. */ - if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) - return -EFAULT; return 0; } static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { - int err = 0; - rt_sigframe32 __user *frame = get_sigframe(&ksig->ka, regs, sizeof(rt_sigframe32)); - + rt_sigframe32 __user *frame; + unsigned long restorer; + size_t frame_size; + u32 uc_flags; + + frame_size = sizeof(*frame) - + sizeof(frame->uc.uc_mcontext_ext.__reserved); + /* + * gprs_high are always present for 31-bit compat tasks. + * The space for vector registers is only allocated if + * the machine supports it + */ + uc_flags = UC_GPRS_HIGH; + if (MACHINE_HAS_VX) { + if (current->thread.vxrs) + uc_flags |= UC_VXRS; + } else + frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) + + sizeof(frame->uc.uc_mcontext_ext.vxrs_high); + frame = get_sigframe(&ksig->ka, regs, frame_size); if (frame == (void __user *) -1UL) return -EFAULT; - if (copy_siginfo_to_user32(&frame->info, &ksig->info)) - return -EFAULT; - - /* Create the ucontext. */ - err |= __put_user(UC_EXTENDED, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]); - err |= save_sigregs32(regs, &frame->uc.uc_mcontext); - err |= save_sigregs_gprs_high(regs, frame->gprs_high); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) + /* Set up backchain. */ + if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame)) return -EFAULT; /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ksig->ka.sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64 __force) ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; + restorer = (unsigned long __force) + ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE; - if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, - (u16 __force __user *)(frame->retcode))) + __u16 __user *svc = &frame->svc_insn; + if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc)) return -EFAULT; + restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE; } - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame)) + /* Create siginfo on the signal stack */ + if (copy_siginfo_to_user32(&frame->info, &ksig->info)) + return -EFAULT; + + /* Store registers needed to create the signal frame */ + store_sigregs(); + + /* Create ucontext on the signal stack. */ + if (__put_user(uc_flags, &frame->uc.uc_flags) || + __put_user(0, &frame->uc.uc_link) || + __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) || + save_sigregs32(regs, &frame->uc.uc_mcontext) || + __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) || + save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) return -EFAULT; /* Set up registers for signal handler */ + regs->gprs[14] = restorer; regs->gprs[15] = (__force __u64) frame; /* Force 31 bit amode and default user address space control. */ regs->psw.mask = PSW_MASK_BA | diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index f6c66b5..cef2879e 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -392,6 +392,8 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_TE; if (test_facility(51)) S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC; + if (test_facility(129)) + S390_lowcore.machine_flags |= MACHINE_FLAG_VX; #endif } diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index cd68869..0554b97 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -21,6 +21,8 @@ void psw_idle(struct s390_idle_data *, unsigned long); asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); +int alloc_vector_registers(struct task_struct *tsk); + void do_protection_exception(struct pt_regs *regs); void do_dat_exception(struct pt_regs *regs); @@ -43,6 +45,7 @@ void special_op_exception(struct pt_regs *regs); void specification_exception(struct pt_regs *regs); void transaction_exception(struct pt_regs *regs); void translation_exception(struct pt_regs *regs); +void vector_exception(struct pt_regs *regs); void do_per_trap(struct pt_regs *regs); void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str); diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 210e128..db96b41 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -20,6 +20,7 @@ #include #include #include +#include struct mcck_struct { int kill_task; @@ -163,6 +164,21 @@ static int notrace s390_revalidate_registers(struct mci *mci) " ld 15,120(%0)\n" : : "a" (fpt_save_area)); } + +#ifdef CONFIG_64BIT + /* Revalidate vector registers */ + if (MACHINE_HAS_VX && current->thread.vxrs) { + if (!mci->vr) { + /* + * Vector registers can't be restored and therefore + * the process needs to be terminated. + */ + kill_task = 1; + } + restore_vx_regs((__vector128 *) + S390_lowcore.vector_save_area_addr); + } +#endif /* Revalidate access registers */ asm volatile( " lam 0,15,0(%0)" diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S index 813ec72..f6f8886 100644 --- a/arch/s390/kernel/pgm_check.S +++ b/arch/s390/kernel/pgm_check.S @@ -49,7 +49,7 @@ PGM_CHECK_DEFAULT /* 17 */ PGM_CHECK_64BIT(transaction_exception) /* 18 */ PGM_CHECK_DEFAULT /* 19 */ PGM_CHECK_DEFAULT /* 1a */ -PGM_CHECK_DEFAULT /* 1b */ +PGM_CHECK_64BIT(vector_exception) /* 1b */ PGM_CHECK(space_switch_exception) /* 1c */ PGM_CHECK(hfp_sqrt_exception) /* 1d */ PGM_CHECK_DEFAULT /* 1e */ diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 32587cc..edefead 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -39,7 +39,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) { static const char *hwcap_str[] = { "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", - "edat", "etf3eh", "highgprs", "te" + "edat", "etf3eh", "highgprs", "te", "vx" }; unsigned long n = (unsigned long) v - 1; int i; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index fe99d6b..0ecfdb3 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -38,15 +38,6 @@ #define CREATE_TRACE_POINTS #include -enum s390_regset { - REGSET_GENERAL, - REGSET_FP, - REGSET_LAST_BREAK, - REGSET_TDB, - REGSET_SYSTEM_CALL, - REGSET_GENERAL_EXTENDED, -}; - void update_cr_regs(struct task_struct *task) { struct pt_regs *regs = task_pt_regs(task); @@ -55,27 +46,39 @@ void update_cr_regs(struct task_struct *task) #ifdef CONFIG_64BIT /* Take care of the enable/disable of transactional execution. */ - if (MACHINE_HAS_TE) { + if (MACHINE_HAS_TE || MACHINE_HAS_VX) { unsigned long cr, cr_new; __ctl_store(cr, 0, 0); - /* Set or clear transaction execution TXC bit 8. */ - cr_new = cr | (1UL << 55); - if (task->thread.per_flags & PER_FLAG_NO_TE) - cr_new &= ~(1UL << 55); + cr_new = cr; + if (MACHINE_HAS_TE) { + /* Set or clear transaction execution TXC bit 8. */ + cr_new |= (1UL << 55); + if (task->thread.per_flags & PER_FLAG_NO_TE) + cr_new &= ~(1UL << 55); + } + if (MACHINE_HAS_VX) { + /* Enable/disable of vector extension */ + cr_new &= ~(1UL << 17); + if (task->thread.vxrs) + cr_new |= (1UL << 17); + } if (cr_new != cr) __ctl_load(cr_new, 0, 0); - /* Set or clear transaction execution TDC bits 62 and 63. */ - __ctl_store(cr, 2, 2); - cr_new = cr & ~3UL; - if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { - if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) - cr_new |= 1UL; - else - cr_new |= 2UL; + if (MACHINE_HAS_TE) { + /* Set/clear transaction execution TDC bits 62/63. */ + __ctl_store(cr, 2, 2); + cr_new = cr & ~3UL; + if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { + if (task->thread.per_flags & + PER_FLAG_TE_ABORT_RAND_TEND) + cr_new |= 1UL; + else + cr_new |= 2UL; + } + if (cr_new != cr) + __ctl_load(cr_new, 2, 2); } - if (cr_new != cr) - __ctl_load(cr_new, 2, 2); } #endif /* Copy user specified PER registers */ @@ -926,7 +929,15 @@ static int s390_fpregs_get(struct task_struct *target, save_fp_ctl(&target->thread.fp_regs.fpc); save_fp_regs(target->thread.fp_regs.fprs); } +#ifdef CONFIG_64BIT + else if (target->thread.vxrs) { + int i; + for (i = 0; i < __NUM_VXRS_LOW; i++) + target->thread.fp_regs.fprs[i] = + *(freg_t *)(target->thread.vxrs + i); + } +#endif return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.fp_regs, 0, -1); } @@ -960,9 +971,20 @@ static int s390_fpregs_set(struct task_struct *target, target->thread.fp_regs.fprs, offsetof(s390_fp_regs, fprs), -1); - if (rc == 0 && target == current) { - restore_fp_ctl(&target->thread.fp_regs.fpc); - restore_fp_regs(target->thread.fp_regs.fprs); + if (rc == 0) { + if (target == current) { + restore_fp_ctl(&target->thread.fp_regs.fpc); + restore_fp_regs(target->thread.fp_regs.fprs); + } +#ifdef CONFIG_64BIT + else if (target->thread.vxrs) { + int i; + + for (i = 0; i < __NUM_VXRS_LOW; i++) + *(freg_t *)(target->thread.vxrs + i) = + target->thread.fp_regs.fprs[i]; + } +#endif } return rc; @@ -1018,6 +1040,95 @@ static int s390_tdb_set(struct task_struct *target, return 0; } +static int s390_vxrs_active(struct task_struct *target, + const struct user_regset *regset) +{ + return !!target->thread.vxrs; +} + +static int s390_vxrs_low_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + __u64 vxrs[__NUM_VXRS_LOW]; + int i; + + if (target->thread.vxrs) { + if (target == current) + save_vx_regs(target->thread.vxrs); + for (i = 0; i < __NUM_VXRS_LOW; i++) + vxrs[i] = *((__u64 *)(target->thread.vxrs + i) + 1); + } else + memset(vxrs, 0, sizeof(vxrs)); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); +} + +static int s390_vxrs_low_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + __u64 vxrs[__NUM_VXRS_LOW]; + int i, rc; + + if (!target->thread.vxrs) { + rc = alloc_vector_registers(target); + if (rc) + return rc; + } else if (target == current) + save_vx_regs(target->thread.vxrs); + + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); + if (rc == 0) { + for (i = 0; i < __NUM_VXRS_LOW; i++) + *((__u64 *)(target->thread.vxrs + i) + 1) = vxrs[i]; + if (target == current) + restore_vx_regs(target->thread.vxrs); + } + + return rc; +} + +static int s390_vxrs_high_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + __vector128 vxrs[__NUM_VXRS_HIGH]; + + if (target->thread.vxrs) { + if (target == current) + save_vx_regs(target->thread.vxrs); + memcpy(vxrs, target->thread.vxrs + __NUM_VXRS_LOW, + sizeof(vxrs)); + } else + memset(vxrs, 0, sizeof(vxrs)); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); +} + +static int s390_vxrs_high_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int rc; + + if (!target->thread.vxrs) { + rc = alloc_vector_registers(target); + if (rc) + return rc; + } else if (target == current) + save_vx_regs(target->thread.vxrs); + + rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + target->thread.vxrs + __NUM_VXRS_LOW, 0, -1); + if (rc == 0 && target == current) + restore_vx_regs(target->thread.vxrs); + + return rc; +} + #endif static int s390_system_call_get(struct task_struct *target, @@ -1041,7 +1152,7 @@ static int s390_system_call_set(struct task_struct *target, } static const struct user_regset s390_regsets[] = { - [REGSET_GENERAL] = { + { .core_note_type = NT_PRSTATUS, .n = sizeof(s390_regs) / sizeof(long), .size = sizeof(long), @@ -1049,7 +1160,7 @@ static const struct user_regset s390_regsets[] = { .get = s390_regs_get, .set = s390_regs_set, }, - [REGSET_FP] = { + { .core_note_type = NT_PRFPREG, .n = sizeof(s390_fp_regs) / sizeof(long), .size = sizeof(long), @@ -1057,8 +1168,16 @@ static const struct user_regset s390_regsets[] = { .get = s390_fpregs_get, .set = s390_fpregs_set, }, + { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(unsigned int), + .align = sizeof(unsigned int), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, #ifdef CONFIG_64BIT - [REGSET_LAST_BREAK] = { + { .core_note_type = NT_S390_LAST_BREAK, .n = 1, .size = sizeof(long), @@ -1066,7 +1185,7 @@ static const struct user_regset s390_regsets[] = { .get = s390_last_break_get, .set = s390_last_break_set, }, - [REGSET_TDB] = { + { .core_note_type = NT_S390_TDB, .n = 1, .size = 256, @@ -1074,15 +1193,25 @@ static const struct user_regset s390_regsets[] = { .get = s390_tdb_get, .set = s390_tdb_set, }, -#endif - [REGSET_SYSTEM_CALL] = { - .core_note_type = NT_S390_SYSTEM_CALL, - .n = 1, - .size = sizeof(unsigned int), - .align = sizeof(unsigned int), - .get = s390_system_call_get, - .set = s390_system_call_set, + { + .core_note_type = NT_S390_VXRS_LOW, + .n = __NUM_VXRS_LOW, + .size = sizeof(__u64), + .align = sizeof(__u64), + .active = s390_vxrs_active, + .get = s390_vxrs_low_get, + .set = s390_vxrs_low_set, + }, + { + .core_note_type = NT_S390_VXRS_HIGH, + .n = __NUM_VXRS_HIGH, + .size = sizeof(__vector128), + .align = sizeof(__vector128), + .active = s390_vxrs_active, + .get = s390_vxrs_high_get, + .set = s390_vxrs_high_set, }, +#endif }; static const struct user_regset_view user_s390_view = { @@ -1247,7 +1376,7 @@ static int s390_compat_last_break_set(struct task_struct *target, } static const struct user_regset s390_compat_regsets[] = { - [REGSET_GENERAL] = { + { .core_note_type = NT_PRSTATUS, .n = sizeof(s390_compat_regs) / sizeof(compat_long_t), .size = sizeof(compat_long_t), @@ -1255,7 +1384,7 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_compat_regs_get, .set = s390_compat_regs_set, }, - [REGSET_FP] = { + { .core_note_type = NT_PRFPREG, .n = sizeof(s390_fp_regs) / sizeof(compat_long_t), .size = sizeof(compat_long_t), @@ -1263,7 +1392,15 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_fpregs_get, .set = s390_fpregs_set, }, - [REGSET_LAST_BREAK] = { + { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(compat_uint_t), + .align = sizeof(compat_uint_t), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, + { .core_note_type = NT_S390_LAST_BREAK, .n = 1, .size = sizeof(long), @@ -1271,7 +1408,7 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_compat_last_break_get, .set = s390_compat_last_break_set, }, - [REGSET_TDB] = { + { .core_note_type = NT_S390_TDB, .n = 1, .size = 256, @@ -1279,15 +1416,25 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_tdb_get, .set = s390_tdb_set, }, - [REGSET_SYSTEM_CALL] = { - .core_note_type = NT_S390_SYSTEM_CALL, - .n = 1, - .size = sizeof(compat_uint_t), - .align = sizeof(compat_uint_t), - .get = s390_system_call_get, - .set = s390_system_call_set, + { + .core_note_type = NT_S390_VXRS_LOW, + .n = __NUM_VXRS_LOW, + .size = sizeof(__u64), + .align = sizeof(__u64), + .active = s390_vxrs_active, + .get = s390_vxrs_low_get, + .set = s390_vxrs_low_set, + }, + { + .core_note_type = NT_S390_VXRS_HIGH, + .n = __NUM_VXRS_HIGH, + .size = sizeof(__vector128), + .align = sizeof(__vector128), + .active = s390_vxrs_active, + .get = s390_vxrs_high_get, + .set = s390_vxrs_high_set, }, - [REGSET_GENERAL_EXTENDED] = { + { .core_note_type = NT_S390_HIGH_GPRS, .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), .size = sizeof(compat_long_t), diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index cdfc060..e80d9ff 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -343,6 +343,9 @@ static void __init setup_lowcore(void) __ctl_set_bit(14, 29); } #else + if (MACHINE_HAS_VX) + lc->vector_save_area_addr = + (unsigned long) &lc->vector_save_area; lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; #endif lc->sync_enter_timer = S390_lowcore.sync_enter_timer; @@ -765,6 +768,12 @@ static void __init setup_hwcaps(void) */ if (test_facility(50) && test_facility(73)) elf_hwcap |= HWCAP_S390_TE; + + /* + * Vector extension HWCAP_S390_VXRS is bit 11. + */ + if (test_facility(129)) + elf_hwcap |= HWCAP_S390_VXRS; #endif get_cpu_id(&cpu_id); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 469c4c6..0c1a0ff 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -31,30 +31,117 @@ #include #include "entry.h" -typedef struct +/* + * Layout of an old-style signal-frame: + * ----------------------------------------- + * | save area (_SIGNAL_FRAMESIZE) | + * ----------------------------------------- + * | struct sigcontext | + * | oldmask | + * | _sigregs * | + * ----------------------------------------- + * | _sigregs with | + * | _s390_regs_common | + * | _s390_fp_regs | + * ----------------------------------------- + * | int signo | + * ----------------------------------------- + * | _sigregs_ext with | + * | gprs_high 64 byte (opt) | + * | vxrs_low 128 byte (opt) | + * | vxrs_high 256 byte (opt) | + * | reserved 128 byte (opt) | + * ----------------------------------------- + * | __u16 svc_insn | + * ----------------------------------------- + * The svc_insn entry with the sigreturn system call opcode does not + * have a fixed position and moves if gprs_high or vxrs exist. + * Future extensions will be added to _sigregs_ext. + */ +struct sigframe { __u8 callee_used_stack[__SIGNAL_FRAMESIZE]; struct sigcontext sc; _sigregs sregs; int signo; - __u8 retcode[S390_SYSCALL_SIZE]; -} sigframe; + _sigregs_ext sregs_ext; + __u16 svc_insn; /* Offset of svc_insn is NOT fixed! */ +}; -typedef struct +/* + * Layout of an rt signal-frame: + * ----------------------------------------- + * | save area (_SIGNAL_FRAMESIZE) | + * ----------------------------------------- + * | svc __NR_rt_sigreturn 2 byte | + * ----------------------------------------- + * | struct siginfo | + * ----------------------------------------- + * | struct ucontext_extended with | + * | unsigned long uc_flags | + * | struct ucontext *uc_link | + * | stack_t uc_stack | + * | _sigregs uc_mcontext with | + * | _s390_regs_common | + * | _s390_fp_regs | + * | sigset_t uc_sigmask | + * | _sigregs_ext uc_mcontext_ext | + * | gprs_high 64 byte (opt) | + * | vxrs_low 128 byte (opt) | + * | vxrs_high 256 byte (opt)| + * | reserved 128 byte (opt) | + * ----------------------------------------- + * Future extensions will be added to _sigregs_ext. + */ +struct rt_sigframe { __u8 callee_used_stack[__SIGNAL_FRAMESIZE]; - __u8 retcode[S390_SYSCALL_SIZE]; + __u16 svc_insn; struct siginfo info; - struct ucontext uc; -} rt_sigframe; + struct ucontext_extended uc; +}; + +/* Store registers needed to create the signal frame */ +static void store_sigregs(void) +{ + save_access_regs(current->thread.acrs); + save_fp_ctl(¤t->thread.fp_regs.fpc); +#ifdef CONFIG_64BIT + if (current->thread.vxrs) { + int i; + + save_vx_regs(current->thread.vxrs); + for (i = 0; i < __NUM_FPRS; i++) + current->thread.fp_regs.fprs[i] = + *(freg_t *)(current->thread.vxrs + i); + } else +#endif + save_fp_regs(current->thread.fp_regs.fprs); +} + +/* Load registers after signal return */ +static void load_sigregs(void) +{ + restore_access_regs(current->thread.acrs); + /* restore_fp_ctl is done in restore_sigregs */ +#ifdef CONFIG_64BIT + if (current->thread.vxrs) { + int i; + + for (i = 0; i < __NUM_FPRS; i++) + *(freg_t *)(current->thread.vxrs + i) = + current->thread.fp_regs.fprs[i]; + restore_vx_regs(current->thread.vxrs); + } else +#endif + restore_fp_regs(current->thread.fp_regs.fprs); +} /* Returns non-zero on fault. */ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) { _sigregs user_sregs; - save_access_regs(current->thread.acrs); - /* Copy a 'clean' PSW mask to the user to avoid leaking information about whether PER is currently on. */ user_sregs.regs.psw.mask = PSW_USER_BITS | @@ -63,12 +150,6 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) memcpy(&user_sregs.regs.gprs, ®s->gprs, sizeof(sregs->regs.gprs)); memcpy(&user_sregs.regs.acrs, current->thread.acrs, sizeof(user_sregs.regs.acrs)); - /* - * We have to store the fp registers to current->thread.fp_regs - * to merge them with the emulated registers. - */ - save_fp_ctl(¤t->thread.fp_regs.fpc); - save_fp_regs(current->thread.fp_regs.fprs); memcpy(&user_sregs.fpregs, ¤t->thread.fp_regs, sizeof(user_sregs.fpregs)); if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs))) @@ -107,20 +188,64 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) memcpy(®s->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs)); memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, sizeof(current->thread.acrs)); - restore_access_regs(current->thread.acrs); memcpy(¤t->thread.fp_regs, &user_sregs.fpregs, sizeof(current->thread.fp_regs)); - restore_fp_regs(current->thread.fp_regs.fprs); clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ return 0; } +/* Returns non-zero on fault. */ +static int save_sigregs_ext(struct pt_regs *regs, + _sigregs_ext __user *sregs_ext) +{ +#ifdef CONFIG_64BIT + __u64 vxrs[__NUM_VXRS_LOW]; + int i; + + /* Save vector registers to signal stack */ + if (current->thread.vxrs) { + for (i = 0; i < __NUM_VXRS_LOW; i++) + vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1); + if (__copy_to_user(&sregs_ext->vxrs_low, vxrs, + sizeof(sregs_ext->vxrs_low)) || + __copy_to_user(&sregs_ext->vxrs_high, + current->thread.vxrs + __NUM_VXRS_LOW, + sizeof(sregs_ext->vxrs_high))) + return -EFAULT; + } +#endif + return 0; +} + +static int restore_sigregs_ext(struct pt_regs *regs, + _sigregs_ext __user *sregs_ext) +{ +#ifdef CONFIG_64BIT + __u64 vxrs[__NUM_VXRS_LOW]; + int i; + + /* Restore vector registers from signal stack */ + if (current->thread.vxrs) { + if (__copy_from_user(vxrs, &sregs_ext->vxrs_low, + sizeof(sregs_ext->vxrs_low)) || + __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW, + &sregs_ext->vxrs_high, + sizeof(sregs_ext->vxrs_high))) + return -EFAULT; + for (i = 0; i < __NUM_VXRS_LOW; i++) + *((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i]; + } +#endif + return 0; +} + SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = task_pt_regs(current); - sigframe __user *frame = (sigframe __user *)regs->gprs[15]; + struct sigframe __user *frame = + (struct sigframe __user *) regs->gprs[15]; sigset_t set; if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE)) @@ -128,6 +253,9 @@ SYSCALL_DEFINE0(sigreturn) set_current_blocked(&set); if (restore_sigregs(regs, &frame->sregs)) goto badframe; + if (restore_sigregs_ext(regs, &frame->sregs_ext)) + goto badframe; + load_sigregs(); return regs->gprs[2]; badframe: force_sig(SIGSEGV, current); @@ -137,16 +265,20 @@ badframe: SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = task_pt_regs(current); - rt_sigframe __user *frame = (rt_sigframe __user *)regs->gprs[15]; + struct rt_sigframe __user *frame = + (struct rt_sigframe __user *)regs->gprs[15]; sigset_t set; if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; set_current_blocked(&set); + if (restore_altstack(&frame->uc.uc_stack)) + goto badframe; if (restore_sigregs(regs, &frame->uc.uc_mcontext)) goto badframe; - if (restore_altstack(&frame->uc.uc_stack)) + if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext)) goto badframe; + load_sigregs(); return regs->gprs[2]; badframe: force_sig(SIGSEGV, current); @@ -154,11 +286,6 @@ badframe: } /* - * Set up a signal frame. - */ - - -/* * Determine which stack to use.. */ static inline void __user * @@ -195,39 +322,63 @@ static inline int map_signal(int sig) static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs * regs) { - sigframe __user *frame; - - frame = get_sigframe(ka, regs, sizeof(sigframe)); + struct sigframe __user *frame; + struct sigcontext sc; + unsigned long restorer; + size_t frame_size; + /* + * gprs_high are only present for a 31-bit task running on + * a 64-bit kernel (see compat_signal.c) but the space for + * gprs_high need to be allocated if vector registers are + * included in the signal frame on a 31-bit system. + */ + frame_size = sizeof(*frame) - sizeof(frame->sregs_ext); + if (MACHINE_HAS_VX) + frame_size += sizeof(frame->sregs_ext); + frame = get_sigframe(ka, regs, frame_size); if (frame == (void __user *) -1UL) return -EFAULT; - if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE)) + /* Set up backchain. */ + if (__put_user(regs->gprs[15], (addr_t __user *) frame)) return -EFAULT; + /* Create struct sigcontext on the signal stack */ + memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE); + sc.sregs = (_sigregs __user __force *) &frame->sregs; + if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc))) + return -EFAULT; + + /* Store registers needed to create the signal frame */ + store_sigregs(); + + /* Create _sigregs on the signal stack */ if (save_sigregs(regs, &frame->sregs)) return -EFAULT; - if (__put_user(&frame->sregs, &frame->sc.sregs)) + + /* Place signal number on stack to allow backtrace from handler. */ + if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) + return -EFAULT; + + /* Create _sigregs_ext on the signal stack */ + if (save_sigregs_ext(regs, &frame->sregs_ext)) return -EFAULT; /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (unsigned long) - ka->sa.sa_restorer | PSW_ADDR_AMODE; + restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE; } else { - regs->gprs[14] = (unsigned long) - frame->retcode | PSW_ADDR_AMODE; - if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, - (u16 __user *)(frame->retcode))) + /* Signal frame without vector registers are short ! */ + __u16 __user *svc = (void *) frame + frame_size - 2; + if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc)) return -EFAULT; + restorer = (unsigned long) svc | PSW_ADDR_AMODE; } - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (addr_t __user *) frame)) - return -EFAULT; - /* Set up registers for signal handler */ + regs->gprs[14] = restorer; regs->gprs[15] = (unsigned long) frame; /* Force default amode and default user address space control. */ regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | @@ -247,54 +398,69 @@ static int setup_frame(int sig, struct k_sigaction *ka, regs->gprs[5] = regs->int_parm_long; regs->gprs[6] = task_thread_info(current)->last_break; } - - /* Place signal number on stack to allow backtrace from handler. */ - if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) - return -EFAULT; return 0; } static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { - int err = 0; - rt_sigframe __user *frame; - - frame = get_sigframe(&ksig->ka, regs, sizeof(rt_sigframe)); + struct rt_sigframe __user *frame; + unsigned long uc_flags, restorer; + size_t frame_size; + frame_size = sizeof(struct rt_sigframe) - sizeof(_sigregs_ext); + /* + * gprs_high are only present for a 31-bit task running on + * a 64-bit kernel (see compat_signal.c) but the space for + * gprs_high need to be allocated if vector registers are + * included in the signal frame on a 31-bit system. + */ + uc_flags = 0; +#ifdef CONFIG_64BIT + if (MACHINE_HAS_VX) { + frame_size += sizeof(_sigregs_ext); + if (current->thread.vxrs) + uc_flags |= UC_VXRS; + } +#endif + frame = get_sigframe(&ksig->ka, regs, frame_size); if (frame == (void __user *) -1UL) return -EFAULT; - if (copy_siginfo_to_user(&frame->info, &ksig->info)) - return -EFAULT; - - /* Create the ucontext. */ - err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(NULL, &frame->uc.uc_link); - err |= __save_altstack(&frame->uc.uc_stack, regs->gprs[15]); - err |= save_sigregs(regs, &frame->uc.uc_mcontext); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - if (err) + /* Set up backchain. */ + if (__put_user(regs->gprs[15], (addr_t __user *) frame)) return -EFAULT; /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ksig->ka.sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (unsigned long) + restorer = (unsigned long) ksig->ka.sa.sa_restorer | PSW_ADDR_AMODE; } else { - regs->gprs[14] = (unsigned long) - frame->retcode | PSW_ADDR_AMODE; - if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, - (u16 __user *)(frame->retcode))) + __u16 __user *svc = &frame->svc_insn; + if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc)) return -EFAULT; + restorer = (unsigned long) svc | PSW_ADDR_AMODE; } - /* Set up backchain. */ - if (__put_user(regs->gprs[15], (addr_t __user *) frame)) + /* Create siginfo on the signal stack */ + if (copy_siginfo_to_user(&frame->info, &ksig->info)) + return -EFAULT; + + /* Store registers needed to create the signal frame */ + store_sigregs(); + + /* Create ucontext on the signal stack. */ + if (__put_user(uc_flags, &frame->uc.uc_flags) || + __put_user(NULL, &frame->uc.uc_link) || + __save_altstack(&frame->uc.uc_stack, regs->gprs[15]) || + save_sigregs(regs, &frame->uc.uc_mcontext) || + __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) || + save_sigregs_ext(regs, &frame->uc.uc_mcontext_ext)) return -EFAULT; /* Set up registers for signal handler */ + regs->gprs[14] = restorer; regs->gprs[15] = (unsigned long) frame; /* Force default amode and default user address space control. */ regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA | diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index bba0e246..13cae5b 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -179,6 +179,9 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) goto out; } #else + if (MACHINE_HAS_VX) + lc->vector_save_area_addr = + (unsigned long) &lc->vector_save_area; if (vdso_alloc_per_cpu(lc)) goto out; #endif diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index e3e06a4..9ff5ecb 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include "entry.h" int show_unhandled_signals = 1; @@ -303,6 +305,74 @@ DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, "specification exception"); #endif +#ifdef CONFIG_64BIT +int alloc_vector_registers(struct task_struct *tsk) +{ + __vector128 *vxrs; + int i; + + /* Allocate vector register save area. */ + vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS, + GFP_KERNEL|__GFP_REPEAT); + if (!vxrs) + return -ENOMEM; + preempt_disable(); + if (tsk == current) + save_fp_regs(tsk->thread.fp_regs.fprs); + /* Copy the 16 floating point registers */ + for (i = 0; i < 16; i++) + *(freg_t *) &vxrs[i] = tsk->thread.fp_regs.fprs[i]; + tsk->thread.vxrs = vxrs; + if (tsk == current) { + __ctl_set_bit(0, 17); + restore_vx_regs(vxrs); + } + preempt_enable(); + return 0; +} + +void vector_exception(struct pt_regs *regs) +{ + int si_code, vic; + + if (!MACHINE_HAS_VX) { + do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation"); + return; + } + + /* get vector interrupt code from fpc */ + asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc)); + vic = (current->thread.fp_regs.fpc & 0xf00) >> 8; + switch (vic) { + case 1: /* invalid vector operation */ + si_code = FPE_FLTINV; + break; + case 2: /* division by zero */ + si_code = FPE_FLTDIV; + break; + case 3: /* overflow */ + si_code = FPE_FLTOVF; + break; + case 4: /* underflow */ + si_code = FPE_FLTUND; + break; + case 5: /* inexact */ + si_code = FPE_FLTRES; + break; + default: /* unknown cause */ + si_code = 0; + } + do_trap(regs, SIGFPE, si_code, "vector exception"); +} + +static int __init disable_vector_extension(char *str) +{ + S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX; + return 1; +} +__setup("novx", disable_vector_extension); +#endif + void data_exception(struct pt_regs *regs) { __u16 __user *location; @@ -368,6 +438,18 @@ void data_exception(struct pt_regs *regs) } } #endif +#ifdef CONFIG_64BIT + /* Check for vector register enablement */ + if (MACHINE_HAS_VX && !current->thread.vxrs && + (current->thread.fp_regs.fpc & FPC_DXC_MASK) == 0xfe00) { + alloc_vector_registers(current); + /* Vector data exception is suppressing, rewind psw. */ + regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); + clear_pt_regs_flag(regs, PIF_PER_TRAP); + return; + } +#endif + if (current->thread.fp_regs.fpc & FPC_DXC_MASK) signal = SIGFPE; else diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index ef6103b..ea9bf25 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -391,6 +391,8 @@ typedef struct elf64_shdr { #define NT_S390_LAST_BREAK 0x306 /* s390 breaking event address */ #define NT_S390_SYSTEM_CALL 0x307 /* s390 system call restart data */ #define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */ +#define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */ +#define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ #define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ #define NT_ARM_TLS 0x401 /* ARM TLS register */ #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ -- cgit v0.10.2 From 3585cb0280654acbc559a360a839c8d58bb0cb87 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 6 Oct 2014 17:54:42 +0200 Subject: s390/disassembler: add vector instructions Add the instruction introduced with the vector extension to the in-kernel disassembler. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/dis.h b/arch/s390/include/asm/dis.h index 04a83f5..60323c2 100644 --- a/arch/s390/include/asm/dis.h +++ b/arch/s390/include/asm/dis.h @@ -13,12 +13,13 @@ #define OPERAND_FPR 0x2 /* Operand printed as %fx */ #define OPERAND_AR 0x4 /* Operand printed as %ax */ #define OPERAND_CR 0x8 /* Operand printed as %cx */ -#define OPERAND_DISP 0x10 /* Operand printed as displacement */ -#define OPERAND_BASE 0x20 /* Operand printed as base register */ -#define OPERAND_INDEX 0x40 /* Operand printed as index register */ -#define OPERAND_PCREL 0x80 /* Operand printed as pc-relative symbol */ -#define OPERAND_SIGNED 0x100 /* Operand printed as signed value */ -#define OPERAND_LENGTH 0x200 /* Operand printed as length (+1) */ +#define OPERAND_VR 0x10 /* Operand printed as %vx */ +#define OPERAND_DISP 0x20 /* Operand printed as displacement */ +#define OPERAND_BASE 0x40 /* Operand printed as base register */ +#define OPERAND_INDEX 0x80 /* Operand printed as index register */ +#define OPERAND_PCREL 0x100 /* Operand printed as pc-relative symbol */ +#define OPERAND_SIGNED 0x200 /* Operand printed as signed value */ +#define OPERAND_LENGTH 0x400 /* Operand printed as length (+1) */ struct s390_operand { diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 993efe6..f376293 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -60,6 +60,11 @@ enum { A_28, /* Access reg. starting at position 28 */ C_8, /* Control reg. starting at position 8 */ C_12, /* Control reg. starting at position 12 */ + V_8, /* Vector reg. starting at position 8, extension bit at 36 */ + V_12, /* Vector reg. starting at position 12, extension bit at 37 */ + V_16, /* Vector reg. starting at position 16, extension bit at 38 */ + V_32, /* Vector reg. starting at position 32, extension bit at 39 */ + W_12, /* Vector reg. at bit 12, extension at bit 37, used as index */ B_16, /* Base register starting at position 16 */ B_32, /* Base register starting at position 32 */ X_12, /* Index register starting at position 12 */ @@ -82,6 +87,8 @@ enum { U8_24, /* 8 bit unsigned value starting at 24 */ U8_32, /* 8 bit unsigned value starting at 32 */ I8_8, /* 8 bit signed value starting at 8 */ + I8_16, /* 8 bit signed value starting at 16 */ + I8_24, /* 8 bit signed value starting at 24 */ I8_32, /* 8 bit signed value starting at 32 */ J12_12, /* PC relative offset at 12 */ I16_16, /* 16 bit signed value starting at 16 */ @@ -96,6 +103,9 @@ enum { U32_16, /* 32 bit unsigned value starting at 16 */ M_16, /* 4 bit optional mask starting at 16 */ M_20, /* 4 bit optional mask starting at 20 */ + M_24, /* 4 bit optional mask starting at 24 */ + M_28, /* 4 bit optional mask starting at 28 */ + M_32, /* 4 bit optional mask starting at 32 */ RO_28, /* optional GPR starting at position 28 */ }; @@ -130,7 +140,7 @@ enum { INSTR_RSY_RDRM, INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, INSTR_RS_RRRD, INSTR_RS_RURD, - INSTR_RXE_FRRD, INSTR_RXE_RRRD, + INSTR_RXE_FRRD, INSTR_RXE_RRRD, INSTR_RXE_RRRDM, INSTR_RXF_FRRDF, INSTR_RXY_FRRD, INSTR_RXY_RRRD, INSTR_RXY_URRD, INSTR_RX_FRRD, INSTR_RX_RRRD, INSTR_RX_URRD, @@ -143,6 +153,17 @@ enum { INSTR_SS_L0RDRD, INSTR_SS_LIRDRD, INSTR_SS_LLRDRD, INSTR_SS_RRRDRD, INSTR_SS_RRRDRD2, INSTR_SS_RRRDRD3, INSTR_S_00, INSTR_S_RD, + INSTR_VRI_V0IM, INSTR_VRI_V0I0, INSTR_VRI_V0IIM, INSTR_VRI_VVIM, + INSTR_VRI_VVV0IM, INSTR_VRI_VVV0I0, INSTR_VRI_VVIMM, + INSTR_VRR_VV00MMM, INSTR_VRR_VV000MM, INSTR_VRR_VV0000M, + INSTR_VRR_VV00000, INSTR_VRR_VVV0M0M, INSTR_VRR_VV00M0M, + INSTR_VRR_VVV000M, INSTR_VRR_VVV000V, INSTR_VRR_VVV0000, + INSTR_VRR_VVV0MMM, INSTR_VRR_VVV00MM, INSTR_VRR_VVVMM0V, + INSTR_VRR_VVVM0MV, INSTR_VRR_VVVM00V, INSTR_VRR_VRR0000, + INSTR_VRS_VVRDM, INSTR_VRS_VVRD0, INSTR_VRS_VRRDM, INSTR_VRS_VRRD0, + INSTR_VRS_RVRDM, + INSTR_VRV_VVRDM, INSTR_VRV_VWRDM, + INSTR_VRX_VRRDM, INSTR_VRX_VRRD0, }; static const struct s390_operand operands[] = @@ -168,6 +189,11 @@ static const struct s390_operand operands[] = [A_28] = { 4, 28, OPERAND_AR }, [C_8] = { 4, 8, OPERAND_CR }, [C_12] = { 4, 12, OPERAND_CR }, + [V_8] = { 4, 8, OPERAND_VR }, + [V_12] = { 4, 12, OPERAND_VR }, + [V_16] = { 4, 16, OPERAND_VR }, + [V_32] = { 4, 32, OPERAND_VR }, + [W_12] = { 4, 12, OPERAND_INDEX | OPERAND_VR }, [B_16] = { 4, 16, OPERAND_BASE | OPERAND_GPR }, [B_32] = { 4, 32, OPERAND_BASE | OPERAND_GPR }, [X_12] = { 4, 12, OPERAND_INDEX | OPERAND_GPR }, @@ -190,6 +216,11 @@ static const struct s390_operand operands[] = [U8_24] = { 8, 24, 0 }, [U8_32] = { 8, 32, 0 }, [J12_12] = { 12, 12, OPERAND_PCREL }, + [I8_8] = { 8, 8, OPERAND_SIGNED }, + [I8_16] = { 8, 16, OPERAND_SIGNED }, + [I8_24] = { 8, 24, OPERAND_SIGNED }, + [I8_32] = { 8, 32, OPERAND_SIGNED }, + [I16_32] = { 16, 32, OPERAND_SIGNED }, [I16_16] = { 16, 16, OPERAND_SIGNED }, [U16_16] = { 16, 16, 0 }, [U16_32] = { 16, 32, 0 }, @@ -202,6 +233,9 @@ static const struct s390_operand operands[] = [U32_16] = { 32, 16, 0 }, [M_16] = { 4, 16, 0 }, [M_20] = { 4, 20, 0 }, + [M_24] = { 4, 24, 0 }, + [M_28] = { 4, 28, 0 }, + [M_32] = { 4, 32, 0 }, [RO_28] = { 4, 28, OPERAND_GPR } }; @@ -283,6 +317,7 @@ static const unsigned char formats[][7] = { [INSTR_RS_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 }, [INSTR_RXE_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 }, [INSTR_RXE_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 }, + [INSTR_RXE_RRRDM] = { 0xff, R_8,D_20,X_12,B_16,M_32,0 }, [INSTR_RXF_FRRDF] = { 0xff, F_32,F_8,D_20,X_12,B_16,0 }, [INSTR_RXY_FRRD] = { 0xff, F_8,D20_20,X_12,B_16,0,0 }, [INSTR_RXY_RRRD] = { 0xff, R_8,D20_20,X_12,B_16,0,0 }, @@ -307,6 +342,37 @@ static const unsigned char formats[][7] = { [INSTR_SS_RRRDRD] = { 0xff, D_20,R_8,B_16,D_36,B_32,R_12 }, [INSTR_S_00] = { 0xff, 0,0,0,0,0,0 }, [INSTR_S_RD] = { 0xff, D_20,B_16,0,0,0,0 }, + [INSTR_VRI_V0IM] = { 0xff, V_8,I16_16,M_32,0,0,0 }, + [INSTR_VRI_V0I0] = { 0xff, V_8,I16_16,0,0,0,0 }, + [INSTR_VRI_V0IIM] = { 0xff, V_8,I8_16,I8_24,M_32,0,0 }, + [INSTR_VRI_VVIM] = { 0xff, V_8,I16_16,V_12,M_32,0,0 }, + [INSTR_VRI_VVV0IM]= { 0xff, V_8,V_12,V_16,I8_24,M_32,0 }, + [INSTR_VRI_VVV0I0]= { 0xff, V_8,V_12,V_16,I8_24,0,0 }, + [INSTR_VRI_VVIMM] = { 0xff, V_8,V_12,I16_16,M_32,M_28,0 }, + [INSTR_VRR_VV00MMM]={ 0xff, V_8,V_12,M_32,M_28,M_24,0 }, + [INSTR_VRR_VV000MM]={ 0xff, V_8,V_12,M_32,M_28,0,0 }, + [INSTR_VRR_VV0000M]={ 0xff, V_8,V_12,M_32,0,0,0 }, + [INSTR_VRR_VV00000]={ 0xff, V_8,V_12,0,0,0,0 }, + [INSTR_VRR_VVV0M0M]={ 0xff, V_8,V_12,V_16,M_32,M_24,0 }, + [INSTR_VRR_VV00M0M]={ 0xff, V_8,V_12,M_32,M_24,0,0 }, + [INSTR_VRR_VVV000M]={ 0xff, V_8,V_12,V_16,M_32,0,0 }, + [INSTR_VRR_VVV000V]={ 0xff, V_8,V_12,V_16,V_32,0,0 }, + [INSTR_VRR_VVV0000]={ 0xff, V_8,V_12,V_16,0,0,0 }, + [INSTR_VRR_VVV0MMM]={ 0xff, V_8,V_12,V_16,M_32,M_28,M_24 }, + [INSTR_VRR_VVV00MM]={ 0xff, V_8,V_12,V_16,M_32,M_28,0 }, + [INSTR_VRR_VVVMM0V]={ 0xff, V_8,V_12,V_16,V_32,M_20,M_24 }, + [INSTR_VRR_VVVM0MV]={ 0xff, V_8,V_12,V_16,V_32,M_28,M_20 }, + [INSTR_VRR_VVVM00V]={ 0xff, V_8,V_12,V_16,V_32,M_20,0 }, + [INSTR_VRR_VRR0000]={ 0xff, V_8,R_12,R_16,0,0,0 }, + [INSTR_VRS_VVRDM] = { 0xff, V_8,V_12,D_20,B_16,M_32,0 }, + [INSTR_VRS_VVRD0] = { 0xff, V_8,V_12,D_20,B_16,0,0 }, + [INSTR_VRS_VRRDM] = { 0xff, V_8,R_12,D_20,B_16,M_32,0 }, + [INSTR_VRS_VRRD0] = { 0xff, V_8,R_12,D_20,B_16,0,0 }, + [INSTR_VRS_RVRDM] = { 0xff, R_8,V_12,D_20,B_16,M_32,0 }, + [INSTR_VRV_VVRDM] = { 0xff, V_8,V_12,D_20,B_16,M_32,0 }, + [INSTR_VRV_VWRDM] = { 0xff, V_8,D_20,W_12,B_16,M_32,0 }, + [INSTR_VRX_VRRDM] = { 0xff, V_8,D_20,X_12,B_16,M_32,0 }, + [INSTR_VRX_VRRD0] = { 0xff, V_8,D_20,X_12,B_16,0,0 }, }; enum { @@ -381,6 +447,11 @@ enum { LONG_INSN_MPCIFC, LONG_INSN_STPCIFC, LONG_INSN_PCISTB, + LONG_INSN_VPOPCT, + LONG_INSN_VERLLV, + LONG_INSN_VESRAV, + LONG_INSN_VESRLV, + LONG_INSN_VSBCBI }; static char *long_insn_name[] = { @@ -455,6 +526,11 @@ static char *long_insn_name[] = { [LONG_INSN_MPCIFC] = "mpcifc", [LONG_INSN_STPCIFC] = "stpcifc", [LONG_INSN_PCISTB] = "pcistb", + [LONG_INSN_VPOPCT] = "vpopct", + [LONG_INSN_VERLLV] = "verllv", + [LONG_INSN_VESRAV] = "vesrav", + [LONG_INSN_VESRLV] = "vesrlv", + [LONG_INSN_VSBCBI] = "vsbcbi", }; static struct s390_insn opcode[] = { @@ -1369,6 +1445,150 @@ static struct s390_insn opcode_e5[] = { { "", 0, INSTR_INVALID } }; +static struct s390_insn opcode_e7[] = { +#ifdef CONFIG_64BIT + { "lcbb", 0x27, INSTR_RXE_RRRDM }, + { "vgef", 0x13, INSTR_VRV_VVRDM }, + { "vgeg", 0x12, INSTR_VRV_VVRDM }, + { "vgbm", 0x44, INSTR_VRI_V0I0 }, + { "vgm", 0x46, INSTR_VRI_V0IIM }, + { "vl", 0x06, INSTR_VRX_VRRD0 }, + { "vlr", 0x56, INSTR_VRR_VV00000 }, + { "vlrp", 0x05, INSTR_VRX_VRRDM }, + { "vleb", 0x00, INSTR_VRX_VRRDM }, + { "vleh", 0x01, INSTR_VRX_VRRDM }, + { "vlef", 0x03, INSTR_VRX_VRRDM }, + { "vleg", 0x02, INSTR_VRX_VRRDM }, + { "vleib", 0x40, INSTR_VRI_V0IM }, + { "vleih", 0x41, INSTR_VRI_V0IM }, + { "vleif", 0x43, INSTR_VRI_V0IM }, + { "vleig", 0x42, INSTR_VRI_V0IM }, + { "vlgv", 0x21, INSTR_VRS_RVRDM }, + { "vllez", 0x04, INSTR_VRX_VRRDM }, + { "vlm", 0x36, INSTR_VRS_VVRD0 }, + { "vlbb", 0x07, INSTR_VRX_VRRDM }, + { "vlvg", 0x22, INSTR_VRS_VRRDM }, + { "vlvgp", 0x62, INSTR_VRR_VRR0000 }, + { "vll", 0x37, INSTR_VRS_VRRD0 }, + { "vmrh", 0x61, INSTR_VRR_VVV000M }, + { "vmrl", 0x60, INSTR_VRR_VVV000M }, + { "vpk", 0x94, INSTR_VRR_VVV000M }, + { "vpks", 0x97, INSTR_VRR_VVV0M0M }, + { "vpkls", 0x95, INSTR_VRR_VVV0M0M }, + { "vperm", 0x8c, INSTR_VRR_VVV000V }, + { "vpdi", 0x84, INSTR_VRR_VVV000M }, + { "vrep", 0x4d, INSTR_VRI_VVIM }, + { "vrepi", 0x45, INSTR_VRI_V0IM }, + { "vscef", 0x1b, INSTR_VRV_VWRDM }, + { "vsceg", 0x1a, INSTR_VRV_VWRDM }, + { "vsel", 0x8d, INSTR_VRR_VVV000V }, + { "vseg", 0x5f, INSTR_VRR_VV0000M }, + { "vst", 0x0e, INSTR_VRX_VRRD0 }, + { "vsteb", 0x08, INSTR_VRX_VRRDM }, + { "vsteh", 0x09, INSTR_VRX_VRRDM }, + { "vstef", 0x0b, INSTR_VRX_VRRDM }, + { "vsteg", 0x0a, INSTR_VRX_VRRDM }, + { "vstm", 0x3e, INSTR_VRS_VVRD0 }, + { "vstl", 0x3f, INSTR_VRS_VRRD0 }, + { "vuph", 0xd7, INSTR_VRR_VV0000M }, + { "vuplh", 0xd5, INSTR_VRR_VV0000M }, + { "vupl", 0xd6, INSTR_VRR_VV0000M }, + { "vupll", 0xd4, INSTR_VRR_VV0000M }, + { "va", 0xf3, INSTR_VRR_VVV000M }, + { "vacc", 0xf1, INSTR_VRR_VVV000M }, + { "vac", 0xbb, INSTR_VRR_VVVM00V }, + { "vaccc", 0xb9, INSTR_VRR_VVVM00V }, + { "vn", 0x68, INSTR_VRR_VVV0000 }, + { "vnc", 0x69, INSTR_VRR_VVV0000 }, + { "vavg", 0xf2, INSTR_VRR_VVV000M }, + { "vavgl", 0xf0, INSTR_VRR_VVV000M }, + { "vcksm", 0x66, INSTR_VRR_VVV0000 }, + { "vec", 0xdb, INSTR_VRR_VV0000M }, + { "vecl", 0xd9, INSTR_VRR_VV0000M }, + { "vceq", 0xf8, INSTR_VRR_VVV0M0M }, + { "vch", 0xfb, INSTR_VRR_VVV0M0M }, + { "vchl", 0xf9, INSTR_VRR_VVV0M0M }, + { "vclz", 0x53, INSTR_VRR_VV0000M }, + { "vctz", 0x52, INSTR_VRR_VV0000M }, + { "vx", 0x6d, INSTR_VRR_VVV0000 }, + { "vgfm", 0xb4, INSTR_VRR_VVV000M }, + { "vgfma", 0xbc, INSTR_VRR_VVVM00V }, + { "vlc", 0xde, INSTR_VRR_VV0000M }, + { "vlp", 0xdf, INSTR_VRR_VV0000M }, + { "vmx", 0xff, INSTR_VRR_VVV000M }, + { "vmxl", 0xfd, INSTR_VRR_VVV000M }, + { "vmn", 0xfe, INSTR_VRR_VVV000M }, + { "vmnl", 0xfc, INSTR_VRR_VVV000M }, + { "vmal", 0xaa, INSTR_VRR_VVVM00V }, + { "vmae", 0xae, INSTR_VRR_VVVM00V }, + { "vmale", 0xac, INSTR_VRR_VVVM00V }, + { "vmah", 0xab, INSTR_VRR_VVVM00V }, + { "vmalh", 0xa9, INSTR_VRR_VVVM00V }, + { "vmao", 0xaf, INSTR_VRR_VVVM00V }, + { "vmalo", 0xad, INSTR_VRR_VVVM00V }, + { "vmh", 0xa3, INSTR_VRR_VVV000M }, + { "vmlh", 0xa1, INSTR_VRR_VVV000M }, + { "vml", 0xa2, INSTR_VRR_VVV000M }, + { "vme", 0xa6, INSTR_VRR_VVV000M }, + { "vmle", 0xa4, INSTR_VRR_VVV000M }, + { "vmo", 0xa7, INSTR_VRR_VVV000M }, + { "vmlo", 0xa5, INSTR_VRR_VVV000M }, + { "vno", 0x6b, INSTR_VRR_VVV0000 }, + { "vo", 0x6a, INSTR_VRR_VVV0000 }, + { { 0, LONG_INSN_VPOPCT }, 0x50, INSTR_VRR_VV0000M }, + { { 0, LONG_INSN_VERLLV }, 0x73, INSTR_VRR_VVV000M }, + { "verll", 0x33, INSTR_VRS_VVRDM }, + { "verim", 0x72, INSTR_VRI_VVV0IM }, + { "veslv", 0x70, INSTR_VRR_VVV000M }, + { "vesl", 0x30, INSTR_VRS_VVRDM }, + { { 0, LONG_INSN_VESRAV }, 0x7a, INSTR_VRR_VVV000M }, + { "vesra", 0x3a, INSTR_VRS_VVRDM }, + { { 0, LONG_INSN_VESRLV }, 0x78, INSTR_VRR_VVV000M }, + { "vesrl", 0x38, INSTR_VRS_VVRDM }, + { "vsl", 0x74, INSTR_VRR_VVV0000 }, + { "vslb", 0x75, INSTR_VRR_VVV0000 }, + { "vsldb", 0x77, INSTR_VRI_VVV0I0 }, + { "vsra", 0x7e, INSTR_VRR_VVV0000 }, + { "vsrab", 0x7f, INSTR_VRR_VVV0000 }, + { "vsrl", 0x7c, INSTR_VRR_VVV0000 }, + { "vsrlb", 0x7d, INSTR_VRR_VVV0000 }, + { "vs", 0xf7, INSTR_VRR_VVV000M }, + { "vscb", 0xf5, INSTR_VRR_VVV000M }, + { "vsb", 0xbf, INSTR_VRR_VVVM00V }, + { { 0, LONG_INSN_VSBCBI }, 0xbd, INSTR_VRR_VVVM00V }, + { "vsumg", 0x65, INSTR_VRR_VVV000M }, + { "vsumq", 0x67, INSTR_VRR_VVV000M }, + { "vsum", 0x64, INSTR_VRR_VVV000M }, + { "vtm", 0xd8, INSTR_VRR_VV00000 }, + { "vfae", 0x82, INSTR_VRR_VVV0M0M }, + { "vfee", 0x80, INSTR_VRR_VVV0M0M }, + { "vfene", 0x81, INSTR_VRR_VVV0M0M }, + { "vistr", 0x5c, INSTR_VRR_VV00M0M }, + { "vstrc", 0x8a, INSTR_VRR_VVVMM0V }, + { "vfa", 0xe3, INSTR_VRR_VVV00MM }, + { "wfc", 0xcb, INSTR_VRR_VV000MM }, + { "wfk", 0xca, INSTR_VRR_VV000MM }, + { "vfce", 0xe8, INSTR_VRR_VVV0MMM }, + { "vfch", 0xeb, INSTR_VRR_VVV0MMM }, + { "vfche", 0xea, INSTR_VRR_VVV0MMM }, + { "vcdg", 0xc3, INSTR_VRR_VV00MMM }, + { "vcdlg", 0xc1, INSTR_VRR_VV00MMM }, + { "vcgd", 0xc2, INSTR_VRR_VV00MMM }, + { "vclgd", 0xc0, INSTR_VRR_VV00MMM }, + { "vfd", 0xe5, INSTR_VRR_VVV00MM }, + { "vfi", 0xc7, INSTR_VRR_VV00MMM }, + { "vlde", 0xc4, INSTR_VRR_VV000MM }, + { "vled", 0xc5, INSTR_VRR_VV00MMM }, + { "vfm", 0xe7, INSTR_VRR_VVV00MM }, + { "vfma", 0x8f, INSTR_VRR_VVVM0MV }, + { "vfms", 0x8e, INSTR_VRR_VVVM0MV }, + { "vfpso", 0xcc, INSTR_VRR_VV00MMM }, + { "vfsq", 0xce, INSTR_VRR_VV000MM }, + { "vfs", 0xe2, INSTR_VRR_VVV00MM }, + { "vftci", 0x4a, INSTR_VRI_VVIMM }, +#endif +}; + static struct s390_insn opcode_eb[] = { #ifdef CONFIG_64BIT { "lmg", 0x04, INSTR_RSY_RRRD }, @@ -1552,16 +1772,17 @@ static struct s390_insn opcode_ed[] = { static unsigned int extract_operand(unsigned char *code, const struct s390_operand *operand) { + unsigned char *cp; unsigned int val; int bits; /* Extract fragments of the operand byte for byte. */ - code += operand->shift / 8; + cp = code + operand->shift / 8; bits = (operand->shift & 7) + operand->bits; val = 0; do { val <<= 8; - val |= (unsigned int) *code++; + val |= (unsigned int) *cp++; bits -= 8; } while (bits > 0); val >>= -bits; @@ -1571,6 +1792,18 @@ static unsigned int extract_operand(unsigned char *code, if (operand->bits == 20 && operand->shift == 20) val = (val & 0xff) << 12 | (val & 0xfff00) >> 8; + /* Check for register extensions bits for vector registers. */ + if (operand->flags & OPERAND_VR) { + if (operand->shift == 8) + val |= (code[4] & 8) << 1; + else if (operand->shift == 12) + val |= (code[4] & 4) << 2; + else if (operand->shift == 16) + val |= (code[4] & 2) << 3; + else if (operand->shift == 32) + val |= (code[4] & 1) << 4; + } + /* Sign extend value if the operand is signed or pc relative. */ if ((operand->flags & (OPERAND_SIGNED | OPERAND_PCREL)) && (val & (1U << (operand->bits - 1)))) @@ -1639,6 +1872,10 @@ struct s390_insn *find_insn(unsigned char *code) case 0xe5: table = opcode_e5; break; + case 0xe7: + table = opcode_e7; + opfrag = code[5]; + break; case 0xeb: table = opcode_eb; opfrag = code[5]; @@ -1734,6 +1971,8 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) ptr += sprintf(ptr, "%%a%i", value); else if (operand->flags & OPERAND_CR) ptr += sprintf(ptr, "%%c%i", value); + else if (operand->flags & OPERAND_VR) + ptr += sprintf(ptr, "%%v%i", value); else if (operand->flags & OPERAND_PCREL) ptr += sprintf(ptr, "%lx", (signed int) value + addr); -- cgit v0.10.2 From a62bc0739253939d6fce40d51d92412252a9bb55 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 6 Oct 2014 17:57:43 +0200 Subject: s390/kdump: add support for vector extension With this patch for kdump the s390 vector registers are stored into the prepared save areas in the old kernel and into the REGSET_VX_LOW and REGSET_VX_HIGH ELF notes for /proc/vmcore in the new kernel. The NT_S390_VXRS_LOW note contains the lower halves of the first 16 vector registers 0-15. The higher halves are stored in the floating point register ELF note. The NT_S390_VXRS_HIGH contains the full vector registers 16-31. The kernel provides a save area for storing vector register in case of machine checks. A pointer to this save are is stored in the CPU lowcore at offset 0x11b0. This save area is also used to save the registers for kdump. In case of a dumped crashed kdump those areas are used to extract the registers of the production system. The vector registers for remote CPUs are stored using the "store additional status at address" SIGP. For the dump CPU the vector registers are stored with the VSTM instruction. With this patch also zfcpdump stores the vector registers. Reviewed-by: Heiko Carstens Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 27735ae..f6e43d3 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -226,6 +226,6 @@ int arch_setup_additional_pages(struct linux_binprm *, int); extern unsigned long arch_randomize_brk(struct mm_struct *mm); #define arch_randomize_brk arch_randomize_brk -void *fill_cpu_elf_notes(void *ptr, struct save_area *sa); +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vxrs); #endif diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index c81661e..ece606c 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -89,12 +89,12 @@ extern u32 ipl_flags; extern u32 dump_prefix_page; struct dump_save_areas { - struct save_area **areas; + struct save_area_ext **areas; int count; }; extern struct dump_save_areas dump_save_areas; -struct save_area *dump_save_area_create(int cpu); +struct save_area_ext *dump_save_area_create(int cpu); extern void do_reipl(void); extern void do_halt(void); diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index d812cf1..6cc51fe 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -11,6 +11,7 @@ #include #include #include +#include #ifdef CONFIG_32BIT @@ -31,6 +32,11 @@ struct save_area { u32 ctrl_regs[16]; } __packed; +struct save_area_ext { + struct save_area sa; + __vector128 vx_regs[32]; +}; + struct _lowcore { psw_t restart_psw; /* 0x0000 */ psw_t restart_old_psw; /* 0x0008 */ @@ -183,6 +189,11 @@ struct save_area { u64 ctrl_regs[16]; } __packed; +struct save_area_ext { + struct save_area sa; + __vector128 vx_regs[32]; +}; + struct _lowcore { __u8 pad_0x0000[0x0014-0x0000]; /* 0x0000 */ __u32 ipl_parmblock_ptr; /* 0x0014 */ diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index bf9c823..4957611 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -15,6 +15,7 @@ #define SIGP_SET_ARCHITECTURE 18 #define SIGP_COND_EMERGENCY_SIGNAL 19 #define SIGP_SENSE_RUNNING 21 +#define SIGP_STORE_ADDITIONAL_STATUS 23 /* SIGP condition codes */ #define SIGP_CC_ORDER_CODE_ACCEPTED 0 @@ -33,9 +34,10 @@ #ifndef __ASSEMBLY__ -static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status) +static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm, + u32 *status) { - register unsigned int reg1 asm ("1") = parm; + register unsigned long reg1 asm ("1") = parm; int cc; asm volatile( diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index 0e01095..2542a7e 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -114,6 +114,19 @@ static inline void save_vx_regs(__vector128 *vxrs) : "=Q" (*(addrtype *) vxrs) : : "1"); } +static inline void save_vx_regs_safe(__vector128 *vxrs) +{ + unsigned long cr0, flags; + + flags = arch_local_irq_save(); + __ctl_store(cr0, 0, 0); + __ctl_set_bit(0, 17); + __ctl_set_bit(0, 18); + save_vx_regs(vxrs); + __ctl_load(cr0, 0, 0); + arch_local_irq_restore(flags); +} + static inline void restore_vx_regs(__vector128 *vxrs) { typedef struct { __vector128 _[__NUM_VXRS]; } addrtype; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index a3b9150..9f73c80 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -46,9 +46,9 @@ struct dump_save_areas dump_save_areas; /* * Allocate and add a save area for a CPU */ -struct save_area *dump_save_area_create(int cpu) +struct save_area_ext *dump_save_area_create(int cpu) { - struct save_area **save_areas, *save_area; + struct save_area_ext **save_areas, *save_area; save_area = kmalloc(sizeof(*save_area), GFP_KERNEL); if (!save_area) @@ -386,9 +386,45 @@ static void *nt_s390_prefix(void *ptr, struct save_area *sa) } /* + * Initialize vxrs high note (full 128 bit VX registers 16-31) + */ +static void *nt_s390_vx_high(void *ptr, __vector128 *vx_regs) +{ + return nt_init(ptr, NT_S390_VXRS_HIGH, &vx_regs[16], + 16 * sizeof(__vector128), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize vxrs low note (lower halves of VX registers 0-15) + */ +static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs) +{ + Elf64_Nhdr *note; + u64 len; + int i; + + note = (Elf64_Nhdr *)ptr; + note->n_namesz = strlen(KEXEC_CORE_NOTE_NAME) + 1; + note->n_descsz = 16 * 8; + note->n_type = NT_S390_VXRS_LOW; + len = sizeof(Elf64_Nhdr); + + memcpy(ptr + len, KEXEC_CORE_NOTE_NAME, note->n_namesz); + len = roundup(len + note->n_namesz, 4); + + ptr += len; + /* Copy lower halves of SIMD registers 0-15 */ + for (i = 0; i < 16; i++) { + memcpy(ptr, &vx_regs[i], 8); + ptr += 8; + } + return ptr; +} + +/* * Fill ELF notes for one CPU with save area registers */ -void *fill_cpu_elf_notes(void *ptr, struct save_area *sa) +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vx_regs) { ptr = nt_prstatus(ptr, sa); ptr = nt_fpregset(ptr, sa); @@ -397,6 +433,10 @@ void *fill_cpu_elf_notes(void *ptr, struct save_area *sa) ptr = nt_s390_tod_preg(ptr, sa); ptr = nt_s390_ctrs(ptr, sa); ptr = nt_s390_prefix(ptr, sa); + if (MACHINE_HAS_VX && vx_regs) { + ptr = nt_s390_vx_low(ptr, vx_regs); + ptr = nt_s390_vx_high(ptr, vx_regs); + } return ptr; } @@ -484,7 +524,7 @@ static int get_cpu_cnt(void) int i, cpus = 0; for (i = 0; i < dump_save_areas.count; i++) { - if (dump_save_areas.areas[i]->pref_reg == 0) + if (dump_save_areas.areas[i]->sa.pref_reg == 0) continue; cpus++; } @@ -530,17 +570,17 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) */ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) { - struct save_area *sa; + struct save_area_ext *sa_ext; void *ptr_start = ptr; int i; ptr = nt_prpsinfo(ptr); for (i = 0; i < dump_save_areas.count; i++) { - sa = dump_save_areas.areas[i]; - if (sa->pref_reg == 0) + sa_ext = dump_save_areas.areas[i]; + if (sa_ext->sa.pref_reg == 0) continue; - ptr = fill_cpu_elf_notes(ptr, sa); + ptr = fill_cpu_elf_notes(ptr, &sa_ext->sa, sa_ext->vx_regs); } ptr = nt_vmcoreinfo(ptr); memset(phdr, 0, sizeof(*phdr)); @@ -581,7 +621,7 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) mem_chunk_cnt = get_mem_chunk_cnt(); - alloc_size = 0x1000 + get_cpu_cnt() * 0x300 + + alloc_size = 0x1000 + get_cpu_cnt() * 0x4a0 + mem_chunk_cnt * sizeof(Elf64_Phdr); hdr = kzalloc_panic(alloc_size); /* Init elf header */ diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 719e27b..4685337 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -25,6 +25,7 @@ #include #include #include +#include typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); @@ -43,7 +44,7 @@ static void add_elf_notes(int cpu) memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa)); ptr = (u64 *) per_cpu_ptr(crash_notes, cpu); - ptr = fill_cpu_elf_notes(ptr, sa); + ptr = fill_cpu_elf_notes(ptr, sa, NULL); memset(ptr, 0, sizeof(struct elf_note)); } @@ -53,8 +54,11 @@ static void add_elf_notes(int cpu) static void setup_regs(void) { unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE; + struct _lowcore *lc; int cpu, this_cpu; + /* Get lowcore pointer from store status of this CPU (absolute zero) */ + lc = (struct _lowcore *)(unsigned long)S390_lowcore.prefixreg_save_area; this_cpu = smp_find_processor_id(stap()); add_elf_notes(this_cpu); for_each_online_cpu(cpu) { @@ -64,6 +68,8 @@ static void setup_regs(void) continue; add_elf_notes(cpu); } + if (MACHINE_HAS_VX) + save_vx_regs_safe((void *) lc->vector_save_area_addr); /* Copy dump CPU store status info to absolute zero */ memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 13cae5b..6fd9e60 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -83,7 +83,8 @@ DEFINE_MUTEX(smp_cpu_state_mutex); /* * Signal processor helper functions. */ -static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status) +static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm, + u32 *status) { int cc; @@ -515,35 +516,53 @@ EXPORT_SYMBOL(smp_ctl_clear_bit); static void __init smp_get_save_area(int cpu, u16 address) { void *lc = pcpu_devices[0].lowcore; - struct save_area *save_area; + struct save_area_ext *sa_ext; + unsigned long vx_sa; if (is_kdump_kernel()) return; if (!OLDMEM_BASE && (address == boot_cpu_address || ipl_info.type != IPL_TYPE_FCP_DUMP)) return; - save_area = dump_save_area_create(cpu); - if (!save_area) + sa_ext = dump_save_area_create(cpu); + if (!sa_ext) panic("could not allocate memory for save area\n"); if (address == boot_cpu_address) { /* Copy the registers of the boot cpu. */ - copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), + copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa), SAVE_AREA_BASE - PAGE_SIZE, 0); + if (MACHINE_HAS_VX) + save_vx_regs_safe(sa_ext->vx_regs); return; } /* Get the registers of a non-boot cpu. */ __pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL); - memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area)); + memcpy_real(&sa_ext->sa, lc + SAVE_AREA_BASE, sizeof(sa_ext->sa)); + if (!MACHINE_HAS_VX) + return; + /* Get the VX registers */ + vx_sa = __get_free_page(GFP_KERNEL); + if (!vx_sa) + panic("could not allocate memory for VX save area\n"); + __pcpu_sigp_relax(address, SIGP_STORE_ADDITIONAL_STATUS, vx_sa, NULL); + memcpy(sa_ext->vx_regs, (void *) vx_sa, sizeof(sa_ext->vx_regs)); + free_page(vx_sa); } int smp_store_status(int cpu) { + unsigned long vx_sa; struct pcpu *pcpu; pcpu = pcpu_devices + cpu; if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS, 0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; + if (!MACHINE_HAS_VX) + return 0; + vx_sa = __pa(pcpu->lowcore->vector_save_area_addr); + __pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, + vx_sa, NULL); return 0; } diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 1884653..efcf484 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "sclp.h" #define TRACE(x...) debug_sprintf_event(zcore_dbf, 1, x) @@ -149,18 +150,21 @@ static int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count) static int __init init_cpu_info(enum arch_id arch) { - struct save_area *sa; + struct save_area_ext *sa_ext; /* get info for boot cpu from lowcore, stored in the HSA */ - sa = dump_save_area_create(0); - if (!sa) + sa_ext = dump_save_area_create(0); + if (!sa_ext) return -ENOMEM; - if (memcpy_hsa_kernel(sa, sys_info.sa_base, sys_info.sa_size) < 0) { + if (memcpy_hsa_kernel(&sa_ext->sa, sys_info.sa_base, + sys_info.sa_size) < 0) { TRACE("could not copy from HSA\n"); - kfree(sa); + kfree(sa_ext); return -EIO; } + if (MACHINE_HAS_VX) + save_vx_regs_safe(sa_ext->vx_regs); return 0; } @@ -258,7 +262,7 @@ static int zcore_add_lc(char __user *buf, unsigned long start, size_t count) unsigned long sa_start, sa_end; /* save area range */ unsigned long prefix; unsigned long sa_off, len, buf_off; - struct save_area *save_area = dump_save_areas.areas[i]; + struct save_area *save_area = &dump_save_areas.areas[i]->sa; prefix = save_area->pref_reg; sa_start = prefix + sys_info.sa_base; @@ -612,7 +616,7 @@ static void __init zcore_header_init(int arch, struct zcore_header *hdr, hdr->tod = get_tod_clock(); get_cpu_id(&hdr->cpu_id); for (i = 0; i < dump_save_areas.count; i++) { - prefix = dump_save_areas.areas[i]->pref_reg; + prefix = dump_save_areas.areas[i]->sa.pref_reg; hdr->real_cpu_cnt++; if (!prefix) continue; -- cgit v0.10.2 From 53255c9a4dade6ff2162121430d13aaadb38a69c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 7 Oct 2014 15:45:10 +0200 Subject: s390/ftrace: remove 31 bit ftrace support 31 bit and 64 bit diverge more and more and it is rather painful to keep both parts running. To make things simpler just remove the 31 bit support which nobody uses anyway. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 608adfb..95174d2 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -117,11 +117,11 @@ config S390 select HAVE_CMPXCHG_LOCAL select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK - select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE if 64BIT select HAVE_DYNAMIC_FTRACE_WITH_REGS if 64BIT select HAVE_FTRACE_MCOUNT_RECORD - select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_GRAPH_TRACER if 64BIT + select HAVE_FUNCTION_TRACER if 64BIT select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index d419362..3aef8af 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -18,14 +18,8 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) #endif /* __ASSEMBLY__ */ -#ifdef CONFIG_64BIT #define MCOUNT_INSN_SIZE 18 -#else -#define MCOUNT_INSN_SIZE 22 -#endif -#ifdef CONFIG_64BIT #define ARCH_SUPPORTS_FTRACE_OPS 1 -#endif #endif /* _ASM_S390_FTRACE_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index c249785..204c43a 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -52,8 +52,7 @@ obj-$(CONFIG_COMPAT) += compat_wrapper.o $(compat-obj-y) obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o) -obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o +obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UPROBES) += uprobes.o diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index fcb009d..f007212 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -21,9 +21,8 @@ void mcount_replace_code(void); void ftrace_disable_code(void); void ftrace_enable_insn(void); -#ifdef CONFIG_64BIT /* - * The 64-bit mcount code looks like this: + * The mcount code looks like this: * stg %r14,8(%r15) # offset 0 * larl %r1,<&counter> # offset 6 * brasl %r14,_mcount # offset 12 @@ -34,7 +33,7 @@ void ftrace_enable_insn(void); * Note: we do not patch the first instruction to an unconditional branch, * since that would break kprobes/jprobes. It is easier to leave the larl * instruction in and only modify the second instruction. - * The 64-bit enabled ftrace code block looks like this: + * The enabled ftrace code block looks like this: * larl %r0,.+24 # offset 0 * > lg %r1,__LC_FTRACE_FUNC # offset 6 * br %r1 # offset 12 @@ -71,65 +70,15 @@ asm( #define MCOUNT_INSN_OFFSET 6 #define FTRACE_INSN_SIZE 6 -#else /* CONFIG_64BIT */ -/* - * The 31-bit mcount code looks like this: - * st %r14,4(%r15) # offset 0 - * > bras %r1,0f # offset 4 - * > .long _mcount # offset 8 - * > .long <&counter> # offset 12 - * > 0: l %r14,0(%r1) # offset 16 - * > l %r1,4(%r1) # offset 20 - * basr %r14,%r14 # offset 24 - * l %r14,4(%r15) # offset 26 - * Total length is 30 bytes. The twenty bytes starting from offset 4 - * to offset 24 get overwritten by ftrace_make_nop / ftrace_make_call. - * The 31-bit enabled ftrace code block looks like this: - * st %r14,4(%r15) # offset 0 - * > l %r14,__LC_FTRACE_FUNC # offset 4 - * > j 0f # offset 8 - * > .fill 12,1,0x07 # offset 12 - * 0: basr %r14,%r14 # offset 24 - * l %r14,4(%r14) # offset 26 - * The return points of the mcount/ftrace function have the same offset 26. - * The 31-bit disabled ftrace code block looks like this: - * st %r14,4(%r15) # offset 0 - * > j .+26 # offset 4 - * > j 0f # offset 8 - * > .fill 12,1,0x07 # offset 12 - * 0: basr %r14,%r14 # offset 24 - * l %r14,4(%r14) # offset 26 - * The j instruction branches to offset 30 to skip as many instructions - * as possible. - */ -asm( - " .align 4\n" - "ftrace_disable_code:\n" - " j 1f\n" - " j 0f\n" - " .fill 12,1,0x07\n" - "0: basr %r14,%r14\n" - "1:\n" - " .align 4\n" - "ftrace_enable_insn:\n" - " l %r14,"__stringify(__LC_FTRACE_FUNC)"\n"); - -#define FTRACE_INSN_SIZE 4 - -#endif /* CONFIG_64BIT */ - -#ifdef CONFIG_64BIT int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { return 0; } -#endif int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { -#ifdef CONFIG_64BIT /* Initial replacement of the whole mcount block */ if (addr == MCOUNT_ADDR) { if (probe_kernel_write((void *) rec->ip - MCOUNT_INSN_OFFSET, @@ -138,7 +87,6 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return -EPERM; return 0; } -#endif if (probe_kernel_write((void *) rec->ip, ftrace_disable_code, MCOUNT_INSN_SIZE)) return -EPERM; @@ -196,8 +144,6 @@ out: * the original offset to prepare_ftrace_return and put it back. */ -#ifdef CONFIG_64BIT - int ftrace_enable_ftrace_graph_caller(void) { static unsigned short offset = 0x0002; @@ -216,25 +162,4 @@ int ftrace_disable_ftrace_graph_caller(void) &offset, sizeof(offset)); } -#else /* CONFIG_64BIT */ - -int ftrace_enable_ftrace_graph_caller(void) -{ - unsigned short offset; - - offset = ((void *) prepare_ftrace_return - - (void *) ftrace_graph_caller) / 2; - return probe_kernel_write((void *) ftrace_graph_caller + 2, - &offset, sizeof(offset)); -} - -int ftrace_disable_ftrace_graph_caller(void) -{ - static unsigned short offset = 0x0002; - - return probe_kernel_write((void *) ftrace_graph_caller + 2, - &offset, sizeof(offset)); -} - -#endif /* CONFIG_64BIT */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index be6dbd9..07abe8d 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -8,60 +8,73 @@ #include #include #include +#include .section .kprobes.text, "ax" ENTRY(ftrace_stub) br %r14 +#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) +#define STACK_PTREGS (STACK_FRAME_OVERHEAD) +#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) +#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) + ENTRY(_mcount) br %r14 ENTRY(ftrace_caller) - stm %r2,%r5,16(%r15) - bras %r1,1f -0: .long ftrace_trace_function -1: st %r14,56(%r15) - lr %r0,%r15 - ahi %r15,-96 - l %r3,100(%r15) - la %r2,0(%r14) - st %r0,__SF_BACKCHAIN(%r15) - la %r3,0(%r3) - ahi %r2,-MCOUNT_INSN_SIZE - l %r14,0b-0b(%r1) - l %r14,0(%r14) - basr %r14,%r14 + .globl ftrace_regs_caller + .set ftrace_regs_caller,ftrace_caller + lgr %r1,%r15 + aghi %r15,-STACK_FRAME_SIZE + stg %r1,__SF_BACKCHAIN(%r15) + stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) + stg %r0,(STACK_PTREGS_PSW+8)(%r15) + stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15) +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES + aghik %r2,%r0,-MCOUNT_INSN_SIZE + lgrl %r4,function_trace_op + lgrl %r1,ftrace_trace_function +#else + lgr %r2,%r0 + aghi %r2,-MCOUNT_INSN_SIZE + larl %r4,function_trace_op + lg %r4,0(%r4) + larl %r1,ftrace_trace_function + lg %r1,0(%r1) +#endif + lgr %r3,%r14 + la %r5,STACK_PTREGS(%r15) + basr %r14,%r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER - l %r2,100(%r15) - l %r3,152(%r15) -ENTRY(ftrace_graph_caller) -# The bras instruction gets runtime patched to call prepare_ftrace_return. +# The j instruction gets runtime patched to a nop instruction. # See ftrace_enable_ftrace_graph_caller. The patched instruction is: -# bras %r14,prepare_ftrace_return - bras %r14,0f -0: st %r2,100(%r15) +# j .+4 +ENTRY(ftrace_graph_caller) + j ftrace_graph_caller_end + lg %r2,(STACK_PTREGS_GPRS+14*8)(%r15) + lg %r3,(STACK_PTREGS_PSW+8)(%r15) + brasl %r14,prepare_ftrace_return + stg %r2,(STACK_PTREGS_GPRS+14*8)(%r15) +ftrace_graph_caller_end: + .globl ftrace_graph_caller_end #endif - ahi %r15,96 - l %r14,56(%r15) - lm %r2,%r5,16(%r15) - br %r14 + lg %r1,(STACK_PTREGS_PSW+8)(%r15) + lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15) + br %r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(return_to_handler) - stm %r2,%r5,16(%r15) - st %r14,56(%r15) - lr %r0,%r15 - ahi %r15,-96 - st %r0,__SF_BACKCHAIN(%r15) - bras %r1,0f - .long ftrace_return_to_handler -0: l %r2,0b-0b(%r1) - basr %r14,%r2 - lr %r14,%r2 - ahi %r15,96 - lm %r2,%r5,16(%r15) + stmg %r2,%r5,32(%r15) + lgr %r1,%r15 + aghi %r15,-STACK_FRAME_OVERHEAD + stg %r1,__SF_BACKCHAIN(%r15) + brasl %r14,ftrace_return_to_handler + aghi %r15,STACK_FRAME_OVERHEAD + lgr %r14,%r2 + lmg %r2,%r5,32(%r15) br %r14 #endif diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S deleted file mode 100644 index 07abe8d..0000000 --- a/arch/s390/kernel/mcount64.S +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright IBM Corp. 2008, 2009 - * - * Author(s): Heiko Carstens , - * - */ - -#include -#include -#include -#include - - .section .kprobes.text, "ax" - -ENTRY(ftrace_stub) - br %r14 - -#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) -#define STACK_PTREGS (STACK_FRAME_OVERHEAD) -#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) -#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) - -ENTRY(_mcount) - br %r14 - -ENTRY(ftrace_caller) - .globl ftrace_regs_caller - .set ftrace_regs_caller,ftrace_caller - lgr %r1,%r15 - aghi %r15,-STACK_FRAME_SIZE - stg %r1,__SF_BACKCHAIN(%r15) - stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) - stg %r0,(STACK_PTREGS_PSW+8)(%r15) - stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15) -#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - aghik %r2,%r0,-MCOUNT_INSN_SIZE - lgrl %r4,function_trace_op - lgrl %r1,ftrace_trace_function -#else - lgr %r2,%r0 - aghi %r2,-MCOUNT_INSN_SIZE - larl %r4,function_trace_op - lg %r4,0(%r4) - larl %r1,ftrace_trace_function - lg %r1,0(%r1) -#endif - lgr %r3,%r14 - la %r5,STACK_PTREGS(%r15) - basr %r14,%r1 -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -# The j instruction gets runtime patched to a nop instruction. -# See ftrace_enable_ftrace_graph_caller. The patched instruction is: -# j .+4 -ENTRY(ftrace_graph_caller) - j ftrace_graph_caller_end - lg %r2,(STACK_PTREGS_GPRS+14*8)(%r15) - lg %r3,(STACK_PTREGS_PSW+8)(%r15) - brasl %r14,prepare_ftrace_return - stg %r2,(STACK_PTREGS_GPRS+14*8)(%r15) -ftrace_graph_caller_end: - .globl ftrace_graph_caller_end -#endif - lg %r1,(STACK_PTREGS_PSW+8)(%r15) - lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15) - br %r1 - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - -ENTRY(return_to_handler) - stmg %r2,%r5,32(%r15) - lgr %r1,%r15 - aghi %r15,-STACK_FRAME_OVERHEAD - stg %r1,__SF_BACKCHAIN(%r15) - brasl %r14,ftrace_return_to_handler - aghi %r15,STACK_FRAME_OVERHEAD - lgr %r14,%r2 - lmg %r2,%r5,32(%r15) - br %r14 - -#endif diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 650ecc83..001facf 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -388,10 +388,6 @@ do_file(char const *const fname) "unrecognized ET_REL file: %s\n", fname); fail_file(); } - if (w2(ehdr->e_machine) == EM_S390) { - reltype = R_390_32; - mcount_adjust_32 = -4; - } if (w2(ehdr->e_machine) == EM_MIPS) { reltype = R_MIPS_32; is_fake_mcount32 = MIPS32_is_fake_mcount; diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 397b6b8..d4b6656 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -241,13 +241,6 @@ if ($arch eq "x86_64") { $objcopy .= " -O elf32-i386"; $cc .= " -m32"; -} elsif ($arch eq "s390" && $bits == 32) { - $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_32\\s+_mcount\$"; - $mcount_adjust = -4; - $alignment = 4; - $ld .= " -m elf_s390"; - $cc .= " -m31"; - } elsif ($arch eq "s390" && $bits == 64) { $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$"; $mcount_adjust = -8; -- cgit v0.10.2 From 0cccdda8d1512af4d3f6913044e8c8e58e15ef37 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 8 Oct 2014 10:03:08 +0200 Subject: s390/ftrace: simplify enabling/disabling of ftrace_graph_caller We can simply patch the mask field within the branch relative on condition instruction at the beginning of the ftrace_graph_caller code block. This makes the logic even simpler and we get rid of the displacement calculation. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index f007212..51d14fe5 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -138,28 +138,24 @@ out: /* * Patch the kernel code at ftrace_graph_caller location. The instruction - * there is branch relative and save to prepare_ftrace_return. To disable - * the call to prepare_ftrace_return we patch the bras offset to point - * directly after the instructions. To enable the call we calculate - * the original offset to prepare_ftrace_return and put it back. + * there is branch relative on condition. To enable the ftrace graph code + * block, we simply patch the mask field of the instruction to zero and + * turn the instruction into a nop. + * To disable the ftrace graph code the mask field will be patched to + * all ones, which turns the instruction into an unconditional branch. */ - int ftrace_enable_ftrace_graph_caller(void) { - static unsigned short offset = 0x0002; + u8 op = 0x04; /* set mask field to zero */ - return probe_kernel_write((void *) ftrace_graph_caller + 2, - &offset, sizeof(offset)); + return probe_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op)); } int ftrace_disable_ftrace_graph_caller(void) { - unsigned short offset; + u8 op = 0xf4; /* set mask field to all ones */ - offset = ((void *) &ftrace_graph_caller_end - - (void *) ftrace_graph_caller) / 2; - return probe_kernel_write((void *) ftrace_graph_caller + 2, - &offset, sizeof(offset)); + return probe_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op)); } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 07abe8d..4300ea3 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -49,8 +49,7 @@ ENTRY(ftrace_caller) basr %r14,%r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER # The j instruction gets runtime patched to a nop instruction. -# See ftrace_enable_ftrace_graph_caller. The patched instruction is: -# j .+4 +# See ftrace_enable_ftrace_graph_caller. ENTRY(ftrace_graph_caller) j ftrace_graph_caller_end lg %r2,(STACK_PTREGS_GPRS+14*8)(%r15) -- cgit v0.10.2