From 94038a99119c171aea27608f81c7ba359de98c4e Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 17 May 2010 10:00:00 +0200 Subject: [S390] More cleanup for struct _lowcore Remove cpu_id from lowcore and replace addr_t with __u64. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 05527c0..f7e78c7 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -126,16 +126,15 @@ struct _lowcore { __u32 user_exec_asce; /* 0x02ac */ /* SMP info area */ - struct cpuid cpu_id; /* 0x02b0 */ - __u32 cpu_nr; /* 0x02b8 */ - __u32 softirq_pending; /* 0x02bc */ - __u32 percpu_offset; /* 0x02c0 */ - __u32 ext_call_fast; /* 0x02c4 */ - __u64 int_clock; /* 0x02c8 */ - __u64 clock_comparator; /* 0x02d0 */ - __u32 machine_flags; /* 0x02d8 */ - __u32 ftrace_func; /* 0x02dc */ - __u8 pad_0x02e0[0x0300-0x02e0]; /* 0x02e0 */ + __u32 cpu_nr; /* 0x02b0 */ + __u32 softirq_pending; /* 0x02b4 */ + __u32 percpu_offset; /* 0x02b8 */ + __u32 ext_call_fast; /* 0x02bc */ + __u64 int_clock; /* 0x02c0 */ + __u64 clock_comparator; /* 0x02c8 */ + __u32 machine_flags; /* 0x02d0 */ + __u32 ftrace_func; /* 0x02d4 */ + __u8 pad_0x02d8[0x0300-0x02d8]; /* 0x02d8 */ /* Interrupt response block */ __u8 irb[64]; /* 0x0300 */ @@ -189,14 +188,14 @@ struct _lowcore { __u32 data_exc_code; /* 0x0090 */ __u16 mon_class_num; /* 0x0094 */ __u16 per_perc_atmid; /* 0x0096 */ - addr_t per_address; /* 0x0098 */ + __u64 per_address; /* 0x0098 */ __u8 exc_access_id; /* 0x00a0 */ __u8 per_access_id; /* 0x00a1 */ __u8 op_access_id; /* 0x00a2 */ __u8 ar_access_id; /* 0x00a3 */ __u8 pad_0x00a4[0x00a8-0x00a4]; /* 0x00a4 */ - addr_t trans_exc_code; /* 0x00a8 */ - addr_t monitor_code; /* 0x00b0 */ + __u64 trans_exc_code; /* 0x00a8 */ + __u64 monitor_code; /* 0x00b0 */ __u16 subchannel_id; /* 0x00b8 */ __u16 subchannel_nr; /* 0x00ba */ __u32 io_int_parm; /* 0x00bc */ @@ -207,7 +206,7 @@ struct _lowcore { __u32 mcck_interruption_code[2]; /* 0x00e8 */ __u8 pad_0x00f0[0x00f4-0x00f0]; /* 0x00f0 */ __u32 external_damage_code; /* 0x00f4 */ - addr_t failing_storage_address; /* 0x00f8 */ + __u64 failing_storage_address; /* 0x00f8 */ __u8 pad_0x0100[0x0110-0x0100]; /* 0x0100 */ __u64 breaking_event_addr; /* 0x0110 */ __u8 pad_0x0118[0x0120-0x0118]; /* 0x0118 */ @@ -255,17 +254,16 @@ struct _lowcore { __u64 user_exec_asce; /* 0x0318 */ /* SMP info area */ - struct cpuid cpu_id; /* 0x0320 */ - __u32 cpu_nr; /* 0x0328 */ - __u32 softirq_pending; /* 0x032c */ - __u64 percpu_offset; /* 0x0330 */ - __u64 ext_call_fast; /* 0x0338 */ - __u64 int_clock; /* 0x0340 */ - __u64 clock_comparator; /* 0x0348 */ - __u64 vdso_per_cpu_data; /* 0x0350 */ - __u64 machine_flags; /* 0x0358 */ - __u64 ftrace_func; /* 0x0360 */ - __u8 pad_0x0368[0x0380-0x0368]; /* 0x0368 */ + __u32 cpu_nr; /* 0x0320 */ + __u32 softirq_pending; /* 0x0324 */ + __u64 percpu_offset; /* 0x0328 */ + __u64 ext_call_fast; /* 0x0330 */ + __u64 int_clock; /* 0x0338 */ + __u64 clock_comparator; /* 0x0340 */ + __u64 vdso_per_cpu_data; /* 0x0348 */ + __u64 machine_flags; /* 0x0350 */ + __u64 ftrace_func; /* 0x0358 */ + __u8 pad_0x0368[0x0380-0x0360]; /* 0x0360 */ /* Interrupt response block. */ __u8 irb[64]; /* 0x0380 */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index a094089..32b1ede 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -126,7 +126,6 @@ int main(void) DEFINE(__LC_KERNEL_ASCE, offsetof(struct _lowcore, kernel_asce)); DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce)); DEFINE(__LC_USER_EXEC_ASCE, offsetof(struct _lowcore, user_exec_asce)); - DEFINE(__LC_CPUID, offsetof(struct _lowcore, cpu_id)); DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock)); DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func)); diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 9d1f767..51838ad 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -328,8 +328,8 @@ iplstart: # # reset files in VM reader # - stidp __LC_CPUID # store cpuid - tm __LC_CPUID,0xff # running VM ? + stidp __LC_SAVE_AREA # store cpuid + tm __LC_SAVE_AREA,0xff # running VM ? bno .Lnoreset la %r2,.Lreset lhi %r3,26 diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 0729f36..ecb2d02 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -18,24 +18,42 @@ #include #include +static DEFINE_PER_CPU(struct cpuid, cpu_id); + +/* + * cpu_init - initializes state that is per-CPU. + */ +void __cpuinit cpu_init(void) +{ + struct cpuid *id = &per_cpu(cpu_id, smp_processor_id()); + + get_cpu_id(id); + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + BUG_ON(current->mm); + enter_lazy_tlb(&init_mm, current); +} + +/* + * print_cpu_info - print basic information about a cpu + */ void __cpuinit print_cpu_info(void) { + struct cpuid *id = &per_cpu(cpu_id, smp_processor_id()); + pr_info("Processor %d started, address %d, identification %06X\n", - S390_lowcore.cpu_nr, S390_lowcore.cpu_addr, - S390_lowcore.cpu_id.ident); + S390_lowcore.cpu_nr, S390_lowcore.cpu_addr, id->ident); } /* * show_cpuinfo - Get information on one CPU for use by procfs. */ - static int show_cpuinfo(struct seq_file *m, void *v) { static const char *hwcap_str[10] = { "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", "edat", "etf3eh", "highgprs" }; - struct _lowcore *lc; unsigned long n = (unsigned long) v - 1; int i; @@ -55,19 +73,12 @@ static int show_cpuinfo(struct seq_file *m, void *v) } if (cpu_online(n)) { -#ifdef CONFIG_SMP - lc = (smp_processor_id() == n) ? - &S390_lowcore : lowcore_ptr[n]; -#else - lc = &S390_lowcore; -#endif + struct cpuid *id = &per_cpu(cpu_id, n); seq_printf(m, "processor %li: " "version = %02X, " "identification = %06X, " "machine = %04X\n", - n, lc->cpu_id.version, - lc->cpu_id.ident, - lc->cpu_id.machine); + n, id->version, id->ident, id->machine); } preempt_enable(); return 0; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 91625f7..6309276 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -113,22 +113,6 @@ static struct resource data_resource = { }; /* - * cpu_init() initializes state that is per-CPU. - */ -void __cpuinit cpu_init(void) -{ - /* - * Store processor id in lowcore (used e.g. in timer_interrupt) - */ - get_cpu_id(&S390_lowcore.cpu_id); - - atomic_inc(&init_mm.mm_count); - current->active_mm = &init_mm; - BUG_ON(current->mm); - enter_lazy_tlb(&init_mm, current); -} - -/* * condev= and conmode= setup parameter. */ @@ -695,6 +679,7 @@ static void __init setup_hwcaps(void) static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 }; unsigned long long facility_list_extended; unsigned int facility_list; + struct cpuid cpu_id; int i; facility_list = stfl(); @@ -756,7 +741,8 @@ static void __init setup_hwcaps(void) */ elf_hwcap |= HWCAP_S390_HIGH_GPRS; - switch (S390_lowcore.cpu_id.machine) { + get_cpu_id(&cpu_id); + switch (cpu_id.machine) { case 0x9672: #if !defined(CONFIG_64BIT) default: /* Use "g5" as default for 31 bit kernels. */ diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 5116491..da6df04 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -648,6 +648,8 @@ static void css_process_crw(struct crw *crw0, struct crw *crw1, int overflow) static void __init css_generate_pgid(struct channel_subsystem *css, u32 tod_high) { + struct cpuid cpu_id; + if (css_general_characteristics.mcss) { css->global_pgid.pgid_high.ext_cssid.version = 0x80; css->global_pgid.pgid_high.ext_cssid.cssid = css->cssid; @@ -658,8 +660,9 @@ css_generate_pgid(struct channel_subsystem *css, u32 tod_high) css->global_pgid.pgid_high.cpu_addr = 0; #endif } - css->global_pgid.cpu_id = S390_lowcore.cpu_id.ident; - css->global_pgid.cpu_model = S390_lowcore.cpu_id.machine; + get_cpu_id(&cpu_id); + css->global_pgid.cpu_id = cpu_id.ident; + css->global_pgid.cpu_model = cpu_id.machine; css->global_pgid.tod_high = tod_high; } -- cgit v0.10.2 From 43d399d2ab7e96cb8d952d0ba4e9131587b7c8b9 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 17 May 2010 10:00:01 +0200 Subject: [S390] cleanup sysc_work and io_work code Cleanup the #ifdef mess at io_work in entry[64].S and streamline the TIF work code of the system call and io exit path. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 6af7045..ffebfb6 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -301,31 +301,29 @@ sysc_restore_trace_psw: #endif # -# recheck if there is more work to do -# -sysc_work_loop: - tm __TI_flags+3(%r9),_TIF_WORK_SVC - bz BASED(sysc_restore) # there is no work to do -# -# One of the work bits is on. Find out which one. +# There is work to do, but first we need to check if we return to userspace. # sysc_work: tm SP_PSW+1(%r15),0x01 # returning to user ? bno BASED(sysc_restore) + +# +# One of the work bits is on. Find out which one. +# +sysc_work_loop: tm __TI_flags+3(%r9),_TIF_MCCK_PENDING bo BASED(sysc_mcck_pending) tm __TI_flags+3(%r9),_TIF_NEED_RESCHED bo BASED(sysc_reschedule) tm __TI_flags+3(%r9),_TIF_SIGPENDING - bnz BASED(sysc_sigpending) + bo BASED(sysc_sigpending) tm __TI_flags+3(%r9),_TIF_NOTIFY_RESUME - bnz BASED(sysc_notify_resume) + bo BASED(sysc_notify_resume) tm __TI_flags+3(%r9),_TIF_RESTART_SVC bo BASED(sysc_restart) tm __TI_flags+3(%r9),_TIF_SINGLE_STEP bo BASED(sysc_singlestep) - b BASED(sysc_restore) -sysc_work_done: + b BASED(sysc_return) # beware of critical section cleanup # # _TIF_NEED_RESCHED is set, call schedule @@ -386,7 +384,7 @@ sysc_singlestep: mvi SP_SVCNR+1(%r15),0xff la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler - la %r14,BASED(sysc_return) # load adr. of system return + la %r14,BASED(sysc_work_loop) # load adr. of system return br %r1 # branch to do_single_step # @@ -636,30 +634,36 @@ io_restore_trace_psw: #endif # -# switch to kernel stack, then check the TIF bits +# There is work todo, find out in which context we have been interrupted: +# 1) if we return to user space we can do all _TIF_WORK_INT work +# 2) if we return to kernel code and preemptive scheduling is enabled check +# the preemption counter and if it is zero call preempt_schedule_irq +# Before any work can be done, a switch to the kernel stack is required. # io_work: tm SP_PSW+1(%r15),0x01 # returning to user ? -#ifndef CONFIG_PREEMPT - bno BASED(io_restore) # no-> skip resched & signal -#else - bnz BASED(io_work_user) # no -> check for preemptive scheduling + bo BASED(io_work_user) # yes -> do resched & signal +#ifdef CONFIG_PREEMPT # check for preemptive scheduling icm %r0,15,__TI_precount(%r9) bnz BASED(io_restore) # preemption disabled + # switch to kernel stack l %r1,SP_R15(%r15) s %r1,BASED(.Lc_spsize) mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain lr %r15,%r1 io_resume_loop: - tm __TI_flags+3(%r9),_TIF_NEED_RESCHED - bno BASED(io_restore) l %r1,BASED(.Lpreempt_schedule_irq) la %r14,BASED(io_resume_loop) - br %r1 # call schedule + tm __TI_flags+3(%r9),_TIF_NEED_RESCHED + bor %r1 # call preempt_schedule_irq #endif + b BASED(io_restore) +# +# Need to do work before returning to userspace, switch to kernel stack +# io_work_user: l %r1,__LC_KERNEL_STACK s %r1,BASED(.Lc_spsize) @@ -668,7 +672,7 @@ io_work_user: lr %r15,%r1 # # One of the work bits is on. Find out which one. -# Checked are: _TIF_SIGPENDING, _TIF_NEED_RESCHED +# Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED # and _TIF_MCCK_PENDING # io_work_loop: @@ -677,11 +681,10 @@ io_work_loop: tm __TI_flags+3(%r9),_TIF_NEED_RESCHED bo BASED(io_reschedule) tm __TI_flags+3(%r9),_TIF_SIGPENDING - bnz BASED(io_sigpending) + bo BASED(io_sigpending) tm __TI_flags+3(%r9),_TIF_NOTIFY_RESUME - bnz BASED(io_notify_resume) - b BASED(io_restore) -io_work_done: + bo BASED(io_notify_resume) + b BASED(io_return) # beware of critical section cleanup # # _TIF_MCCK_PENDING is set, call handler @@ -701,8 +704,6 @@ io_reschedule: basr %r14,%r1 # call scheduler stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts TRACE_IRQS_OFF - tm __TI_flags+3(%r9),_TIF_WORK_INT - bz BASED(io_restore) # there is no work to do b BASED(io_work_loop) # @@ -921,14 +922,10 @@ cleanup_table_sysc_return: .long sysc_return + 0x80000000, sysc_leave + 0x80000000 cleanup_table_sysc_leave: .long sysc_leave + 0x80000000, sysc_done + 0x80000000 -cleanup_table_sysc_work_loop: - .long sysc_work_loop + 0x80000000, sysc_work_done + 0x80000000 cleanup_table_io_return: .long io_return + 0x80000000, io_leave + 0x80000000 cleanup_table_io_leave: .long io_leave + 0x80000000, io_done + 0x80000000 -cleanup_table_io_work_loop: - .long io_work_loop + 0x80000000, io_work_done + 0x80000000 cleanup_critical: clc 4(4,%r12),BASED(cleanup_table_system_call) @@ -946,11 +943,6 @@ cleanup_critical: clc 4(4,%r12),BASED(cleanup_table_sysc_leave+4) bl BASED(cleanup_sysc_leave) 0: - clc 4(4,%r12),BASED(cleanup_table_sysc_work_loop) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_sysc_work_loop+4) - bl BASED(cleanup_sysc_return) -0: clc 4(4,%r12),BASED(cleanup_table_io_return) bl BASED(0f) clc 4(4,%r12),BASED(cleanup_table_io_return+4) @@ -961,11 +953,6 @@ cleanup_critical: clc 4(4,%r12),BASED(cleanup_table_io_leave+4) bl BASED(cleanup_io_leave) 0: - clc 4(4,%r12),BASED(cleanup_table_io_work_loop) - bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_io_work_loop+4) - bl BASED(cleanup_io_work_loop) -0: br %r14 cleanup_system_call: @@ -1043,12 +1030,6 @@ cleanup_io_return: la %r12,__LC_RETURN_PSW br %r14 -cleanup_io_work_loop: - mvc __LC_RETURN_PSW(4),0(%r12) - mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_io_work_loop) - la %r12,__LC_RETURN_PSW - br %r14 - cleanup_io_leave: clc 4(4,%r12),BASED(cleanup_io_leave_insn) be BASED(2f) diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 52106d5..ca02b10 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -291,38 +291,36 @@ sysc_restore_trace_psw: #endif # -# recheck if there is more work to do -# -sysc_work_loop: - tm __TI_flags+7(%r9),_TIF_WORK_SVC - jz sysc_restore # there is no work to do -# -# One of the work bits is on. Find out which one. +# There is work to do, but first we need to check if we return to userspace. # sysc_work: tm SP_PSW+1(%r15),0x01 # returning to user ? jno sysc_restore + +# +# One of the work bits is on. Find out which one. +# +sysc_work_loop: tm __TI_flags+7(%r9),_TIF_MCCK_PENDING jo sysc_mcck_pending tm __TI_flags+7(%r9),_TIF_NEED_RESCHED jo sysc_reschedule tm __TI_flags+7(%r9),_TIF_SIGPENDING - jnz sysc_sigpending + jo sysc_sigpending tm __TI_flags+7(%r9),_TIF_NOTIFY_RESUME - jnz sysc_notify_resume + jo sysc_notify_resume tm __TI_flags+7(%r9),_TIF_RESTART_SVC jo sysc_restart tm __TI_flags+7(%r9),_TIF_SINGLE_STEP jo sysc_singlestep - j sysc_restore -sysc_work_done: + j sysc_return # beware of critical section cleanup # # _TIF_NEED_RESCHED is set, call schedule # sysc_reschedule: larl %r14,sysc_work_loop - jg schedule # return point is sysc_return + jg schedule # return point is sysc_work_loop # # _TIF_MCCK_PENDING is set, call handler @@ -369,7 +367,7 @@ sysc_singlestep: ni __TI_flags+7(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number la %r2,SP_PTREGS(%r15) # address of register-save area - larl %r14,sysc_return # load adr. of system return + larl %r14,sysc_work_loop # load adr. of system return jg do_single_step # branch to do_sigtrap # @@ -605,37 +603,27 @@ io_restore_trace_psw: #endif # -# There is work todo, we need to check if we return to userspace, then -# check, if we are in SIE, if yes leave it +# There is work todo, find out in which context we have been interrupted: +# 1) if we return to user space we can do all _TIF_WORK_INT work +# 2) if we return to kernel code and kvm is enabled check if we need to +# modify the psw to leave SIE +# 3) if we return to kernel code and preemptive scheduling is enabled check +# the preemption counter and if it is zero call preempt_schedule_irq +# Before any work can be done, a switch to the kernel stack is required. # io_work: tm SP_PSW+1(%r15),0x01 # returning to user ? -#ifndef CONFIG_PREEMPT -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) - jnz io_work_user # yes -> no need to check for SIE - la %r1, BASED(sie_opcode) # we return to kernel here - lg %r2, SP_PSW+8(%r15) - clc 0(2,%r1), 0(%r2) # is current instruction = SIE? - jne io_restore # no-> return to kernel - lg %r1, SP_PSW+8(%r15) # yes-> add 4 bytes to leave SIE - aghi %r1, 4 - stg %r1, SP_PSW+8(%r15) - j io_restore # return to kernel -#else - jno io_restore # no-> skip resched & signal -#endif -#else - jnz io_work_user # yes -> do resched & signal + jo io_work_user # yes -> do resched & signal #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) - la %r1, BASED(sie_opcode) - lg %r2, SP_PSW+8(%r15) - clc 0(2,%r1), 0(%r2) # is current instruction = SIE? + lg %r2,SP_PSW+8(%r15) # check if current instruction is SIE + lh %r1,0(%r2) + chi %r1,-19948 # signed 16 bit compare with 0xb214 jne 0f # no -> leave PSW alone - lg %r1, SP_PSW+8(%r15) # yes-> add 4 bytes to leave SIE - aghi %r1, 4 - stg %r1, SP_PSW+8(%r15) + aghi %r2,4 # yes-> add 4 bytes to leave SIE + stg %r2,SP_PSW+8(%r15) 0: #endif +#ifdef CONFIG_PREEMPT # check for preemptive scheduling icm %r0,15,__TI_precount(%r9) jnz io_restore # preemption is disabled @@ -646,21 +634,25 @@ io_work: xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain lgr %r15,%r1 io_resume_loop: - tm __TI_flags+7(%r9),_TIF_NEED_RESCHED - jno io_restore larl %r14,io_resume_loop - jg preempt_schedule_irq + tm __TI_flags+7(%r12),_TIF_NEED_RESCHED + jgo preempt_schedule_irq #endif + j io_restore +# +# Need to do work before returning to userspace, switch to kernel stack +# io_work_user: lg %r1,__LC_KERNEL_STACK aghi %r1,-SP_SIZE mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain lgr %r15,%r1 + # # One of the work bits is on. Find out which one. -# Checked are: _TIF_SIGPENDING, _TIF_RESTORE_SIGPENDING, _TIF_NEED_RESCHED +# Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED # and _TIF_MCCK_PENDING # io_work_loop: @@ -669,16 +661,10 @@ io_work_loop: tm __TI_flags+7(%r9),_TIF_NEED_RESCHED jo io_reschedule tm __TI_flags+7(%r9),_TIF_SIGPENDING - jnz io_sigpending + jo io_sigpending tm __TI_flags+7(%r9),_TIF_NOTIFY_RESUME - jnz io_notify_resume - j io_restore -io_work_done: - -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) -sie_opcode: - .long 0xb2140000 -#endif + jo io_notify_resume + j io_return # beware of critical section cleanup # # _TIF_MCCK_PENDING is set, call handler @@ -696,8 +682,6 @@ io_reschedule: brasl %r14,schedule # call scheduler stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts TRACE_IRQS_OFF - tm __TI_flags+7(%r9),_TIF_WORK_INT - jz io_restore # there is no work to do j io_work_loop # @@ -903,14 +887,10 @@ cleanup_table_sysc_return: .quad sysc_return, sysc_leave cleanup_table_sysc_leave: .quad sysc_leave, sysc_done -cleanup_table_sysc_work_loop: - .quad sysc_work_loop, sysc_work_done cleanup_table_io_return: .quad io_return, io_leave cleanup_table_io_leave: .quad io_leave, io_done -cleanup_table_io_work_loop: - .quad io_work_loop, io_work_done cleanup_critical: clc 8(8,%r12),BASED(cleanup_table_system_call) @@ -928,11 +908,6 @@ cleanup_critical: clc 8(8,%r12),BASED(cleanup_table_sysc_leave+8) jl cleanup_sysc_leave 0: - clc 8(8,%r12),BASED(cleanup_table_sysc_work_loop) - jl 0f - clc 8(8,%r12),BASED(cleanup_table_sysc_work_loop+8) - jl cleanup_sysc_return -0: clc 8(8,%r12),BASED(cleanup_table_io_return) jl 0f clc 8(8,%r12),BASED(cleanup_table_io_return+8) @@ -943,11 +918,6 @@ cleanup_critical: clc 8(8,%r12),BASED(cleanup_table_io_leave+8) jl cleanup_io_leave 0: - clc 8(8,%r12),BASED(cleanup_table_io_work_loop) - jl 0f - clc 8(8,%r12),BASED(cleanup_table_io_work_loop+8) - jl cleanup_io_work_loop -0: br %r14 cleanup_system_call: @@ -1025,12 +995,6 @@ cleanup_io_return: la %r12,__LC_RETURN_PSW br %r14 -cleanup_io_work_loop: - mvc __LC_RETURN_PSW(8),0(%r12) - mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_io_work_loop) - la %r12,__LC_RETURN_PSW - br %r14 - cleanup_io_leave: clc 8(8,%r12),BASED(cleanup_io_leave_insn) je 3f -- cgit v0.10.2 From 6a2df3a87276cdc08fd87070d09ea18d1fb9d622 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 17 May 2010 10:00:02 +0200 Subject: [S390] improve irq tracing code in entry[64].S The system call path in entry[64].S is run with interrupts enabled. Remove the irq tracing check from the system call exit code. If a program check interrupted a context enabled for interrupts do a call to trace_irq_off_caller in the program check handler before branching to the system call exit code. Restructure the system call and io interrupt return code to avoid avoid the lpsw[e] to disable machine checks. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index 1741c15..cef6621 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -459,11 +459,6 @@ extern void (*_machine_power_off)(void); #define arch_align_stack(x) (x) -#ifdef CONFIG_TRACE_IRQFLAGS -extern psw_t sysc_restore_trace_psw; -extern psw_t io_restore_trace_psw; -#endif - static inline int tprot(unsigned long addr) { int rc = -EFAULT; diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index ffebfb6..07d8499 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -73,21 +73,24 @@ STACK_SIZE = 1 << STACK_SHIFT basr %r14,%r1 .endm - .macro TRACE_IRQS_CHECK - basr %r2,%r0 + .macro TRACE_IRQS_CHECK_ON tm SP_PSW(%r15),0x03 # irqs enabled? - jz 0f - l %r1,BASED(.Ltrace_irq_on_caller) - basr %r14,%r1 - j 1f -0: l %r1,BASED(.Ltrace_irq_off_caller) - basr %r14,%r1 -1: + bz BASED(0f) + TRACE_IRQS_ON +0: + .endm + + .macro TRACE_IRQS_CHECK_OFF + tm SP_PSW(%r15),0x03 # irqs enabled? + bz BASED(0f) + TRACE_IRQS_OFF +0: .endm #else #define TRACE_IRQS_ON #define TRACE_IRQS_OFF -#define TRACE_IRQS_CHECK +#define TRACE_IRQS_CHECK_ON +#define TRACE_IRQS_CHECK_OFF #endif #ifdef CONFIG_LOCKDEP @@ -273,33 +276,14 @@ sysc_do_restart: st %r2,SP_R2(%r15) # store return value (change R2 on stack) sysc_return: + LOCKDEP_SYS_EXIT +sysc_tif: tm __TI_flags+3(%r9),_TIF_WORK_SVC bnz BASED(sysc_work) # there is work to do (signals etc.) sysc_restore: -#ifdef CONFIG_TRACE_IRQFLAGS - la %r1,BASED(sysc_restore_trace_psw_addr) - l %r1,0(%r1) - lpsw 0(%r1) -sysc_restore_trace: - TRACE_IRQS_CHECK - LOCKDEP_SYS_EXIT -#endif -sysc_leave: RESTORE_ALL __LC_RETURN_PSW,1 sysc_done: -#ifdef CONFIG_TRACE_IRQFLAGS -sysc_restore_trace_psw_addr: - .long sysc_restore_trace_psw - - .section .data,"aw",@progbits - .align 8 - .globl sysc_restore_trace_psw -sysc_restore_trace_psw: - .long 0, sysc_restore_trace + 0x80000000 - .previous -#endif - # # There is work to do, but first we need to check if we return to userspace. # @@ -310,7 +294,7 @@ sysc_work: # # One of the work bits is on. Find out which one. # -sysc_work_loop: +sysc_work_tif: tm __TI_flags+3(%r9),_TIF_MCCK_PENDING bo BASED(sysc_mcck_pending) tm __TI_flags+3(%r9),_TIF_NEED_RESCHED @@ -330,7 +314,7 @@ sysc_work_loop: # sysc_reschedule: l %r1,BASED(.Lschedule) - la %r14,BASED(sysc_work_loop) + la %r14,BASED(sysc_return) br %r1 # call scheduler # @@ -338,7 +322,7 @@ sysc_reschedule: # sysc_mcck_pending: l %r1,BASED(.Ls390_handle_mcck) - la %r14,BASED(sysc_work_loop) + la %r14,BASED(sysc_return) br %r1 # TIF bit will be cleared by handler # @@ -353,7 +337,7 @@ sysc_sigpending: bo BASED(sysc_restart) tm __TI_flags+3(%r9),_TIF_SINGLE_STEP bo BASED(sysc_singlestep) - b BASED(sysc_work_loop) + b BASED(sysc_return) # # _TIF_NOTIFY_RESUME is set, call do_notify_resume @@ -361,7 +345,7 @@ sysc_sigpending: sysc_notify_resume: la %r2,SP_PTREGS(%r15) # load pt_regs l %r1,BASED(.Ldo_notify_resume) - la %r14,BASED(sysc_work_loop) + la %r14,BASED(sysc_return) br %r1 # call do_notify_resume @@ -384,7 +368,7 @@ sysc_singlestep: mvi SP_SVCNR+1(%r15),0xff la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler - la %r14,BASED(sysc_work_loop) # load adr. of system return + la %r14,BASED(sysc_return) # load adr. of system return br %r1 # branch to do_single_step # @@ -456,11 +440,13 @@ kernel_execve: br %r14 # execve succeeded. 0: stnsm __SF_EMPTY(%r15),0xfc # disable interrupts + TRACE_IRQS_OFF l %r15,__LC_KERNEL_STACK # load ksp s %r15,BASED(.Lc_spsize) # make room for registers & psw l %r9,__LC_THREAD_INFO mvc SP_PTREGS(__PT_SIZE,%r15),0(%r12) # copy pt_regs xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) + TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts l %r1,BASED(.Lexecve_tail) basr %r14,%r1 @@ -497,8 +483,8 @@ pgm_check_handler: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER pgm_no_vtime: + TRACE_IRQS_CHECK_OFF l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - TRACE_IRQS_OFF l %r3,__LC_PGM_ILC # load program interruption code la %r8,0x7f nr %r8,%r3 @@ -507,8 +493,10 @@ pgm_do_call: sll %r8,2 l %r7,0(%r8,%r7) # load address of handler routine la %r2,SP_PTREGS(%r15) # address of register-save area - la %r14,BASED(sysc_return) - br %r7 # branch to interrupt-handler + basr %r14,%r7 # branch to interrupt-handler +pgm_exit: + TRACE_IRQS_CHECK_ON + b BASED(sysc_return) # # handle per exception @@ -535,19 +523,19 @@ pgm_per_std: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER pgm_no_vtime2: + TRACE_IRQS_CHECK_OFF l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - TRACE_IRQS_OFF l %r1,__TI_task(%r9) + tm SP_PSW+1(%r15),0x01 # kernel per event ? + bz BASED(kernel_per) mvc __THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID mvc __THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP - tm SP_PSW+1(%r15),0x01 # kernel per event ? - bz BASED(kernel_per) l %r3,__LC_PGM_ILC # load program interruption code la %r8,0x7f nr %r8,%r3 # clear per-event-bit and ilc - be BASED(sysc_return) # only per or per+check ? + be BASED(pgm_exit) # only per or per+check ? b BASED(pgm_do_call) # @@ -568,8 +556,8 @@ pgm_svcper: mvc __THREAD_per+__PER_access_id(1,%r8),__LC_PER_ACCESS_ID oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP TRACE_IRQS_ON - lm %r2,%r6,SP_R2(%r15) # load svc arguments stosm __SF_EMPTY(%r15),0x03 # reenable interrupts + lm %r2,%r6,SP_R2(%r15) # load svc arguments b BASED(sysc_do_svc) # @@ -580,8 +568,8 @@ kernel_per: mvi SP_SVCNR+1(%r15),0xff la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler - la %r14,BASED(sysc_restore)# load adr. of system return - br %r1 # branch to do_single_step + basr %r14,%r1 # branch to do_single_step + b BASED(pgm_exit) /* * IO interrupt handler routine @@ -600,39 +588,21 @@ io_int_handler: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER io_no_vtime: - l %r9,__LC_THREAD_INFO # load pointer to thread_info struct TRACE_IRQS_OFF + l %r9,__LC_THREAD_INFO # load pointer to thread_info struct l %r1,BASED(.Ldo_IRQ) # load address of do_IRQ la %r2,SP_PTREGS(%r15) # address of register-save area basr %r14,%r1 # branch to standard irq handler io_return: + LOCKDEP_SYS_EXIT + TRACE_IRQS_ON +io_tif: tm __TI_flags+3(%r9),_TIF_WORK_INT bnz BASED(io_work) # there is work to do (signals etc.) io_restore: -#ifdef CONFIG_TRACE_IRQFLAGS - la %r1,BASED(io_restore_trace_psw_addr) - l %r1,0(%r1) - lpsw 0(%r1) -io_restore_trace: - TRACE_IRQS_CHECK - LOCKDEP_SYS_EXIT -#endif -io_leave: RESTORE_ALL __LC_RETURN_PSW,0 io_done: -#ifdef CONFIG_TRACE_IRQFLAGS -io_restore_trace_psw_addr: - .long io_restore_trace_psw - - .section .data,"aw",@progbits - .align 8 - .globl io_restore_trace_psw -io_restore_trace_psw: - .long 0, io_restore_trace + 0x80000000 - .previous -#endif - # # There is work todo, find out in which context we have been interrupted: # 1) if we return to user space we can do all _TIF_WORK_INT work @@ -647,19 +617,23 @@ io_work: # check for preemptive scheduling icm %r0,15,__TI_precount(%r9) bnz BASED(io_restore) # preemption disabled + tm __TI_flags+3(%r9),_TIF_NEED_RESCHED + bno BASED(io_restore) # switch to kernel stack l %r1,SP_R15(%r15) s %r1,BASED(.Lc_spsize) mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain lr %r15,%r1 -io_resume_loop: + # TRACE_IRQS_ON already done at io_return, call + # TRACE_IRQS_OFF to keep things symmetrical + TRACE_IRQS_OFF l %r1,BASED(.Lpreempt_schedule_irq) - la %r14,BASED(io_resume_loop) - tm __TI_flags+3(%r9),_TIF_NEED_RESCHED - bor %r1 # call preempt_schedule_irq -#endif + basr %r14,%r1 # call preempt_schedule_irq + b BASED(io_return) +#else b BASED(io_restore) +#endif # # Need to do work before returning to userspace, switch to kernel stack @@ -670,12 +644,13 @@ io_work_user: mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) xc __SF_BACKCHAIN(4,%r1),__SF_BACKCHAIN(%r1) # clear back chain lr %r15,%r1 + # # One of the work bits is on. Find out which one. # Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED # and _TIF_MCCK_PENDING # -io_work_loop: +io_work_tif: tm __TI_flags+3(%r9),_TIF_MCCK_PENDING bo BASED(io_mcck_pending) tm __TI_flags+3(%r9),_TIF_NEED_RESCHED @@ -690,47 +665,49 @@ io_work_loop: # _TIF_MCCK_PENDING is set, call handler # io_mcck_pending: + # TRACE_IRQS_ON already done at io_return l %r1,BASED(.Ls390_handle_mcck) basr %r14,%r1 # TIF bit will be cleared by handler - b BASED(io_work_loop) + TRACE_IRQS_OFF + b BASED(io_return) # # _TIF_NEED_RESCHED is set, call schedule # io_reschedule: - TRACE_IRQS_ON + # TRACE_IRQS_ON already done at io_return l %r1,BASED(.Lschedule) stosm __SF_EMPTY(%r15),0x03 # reenable interrupts basr %r14,%r1 # call scheduler stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts TRACE_IRQS_OFF - b BASED(io_work_loop) + b BASED(io_return) # # _TIF_SIGPENDING is set, call do_signal # io_sigpending: - TRACE_IRQS_ON + # TRACE_IRQS_ON already done at io_return stosm __SF_EMPTY(%r15),0x03 # reenable interrupts la %r2,SP_PTREGS(%r15) # load pt_regs l %r1,BASED(.Ldo_signal) basr %r14,%r1 # call do_signal stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts TRACE_IRQS_OFF - b BASED(io_work_loop) + b BASED(io_return) # # _TIF_SIGPENDING is set, call do_signal # io_notify_resume: - TRACE_IRQS_ON + # TRACE_IRQS_ON already done at io_return stosm __SF_EMPTY(%r15),0x03 # reenable interrupts la %r2,SP_PTREGS(%r15) # load pt_regs l %r1,BASED(.Ldo_notify_resume) basr %r14,%r1 # call do_signal stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts TRACE_IRQS_OFF - b BASED(io_work_loop) + b BASED(io_return) /* * External interrupt handler routine @@ -918,14 +895,14 @@ stack_overflow: cleanup_table_system_call: .long system_call + 0x80000000, sysc_do_svc + 0x80000000 -cleanup_table_sysc_return: - .long sysc_return + 0x80000000, sysc_leave + 0x80000000 -cleanup_table_sysc_leave: - .long sysc_leave + 0x80000000, sysc_done + 0x80000000 -cleanup_table_io_return: - .long io_return + 0x80000000, io_leave + 0x80000000 -cleanup_table_io_leave: - .long io_leave + 0x80000000, io_done + 0x80000000 +cleanup_table_sysc_tif: + .long sysc_tif + 0x80000000, sysc_restore + 0x80000000 +cleanup_table_sysc_restore: + .long sysc_restore + 0x80000000, sysc_done + 0x80000000 +cleanup_table_io_tif: + .long io_tif + 0x80000000, io_restore + 0x80000000 +cleanup_table_io_restore: + .long io_restore + 0x80000000, io_done + 0x80000000 cleanup_critical: clc 4(4,%r12),BASED(cleanup_table_system_call) @@ -933,25 +910,25 @@ cleanup_critical: clc 4(4,%r12),BASED(cleanup_table_system_call+4) bl BASED(cleanup_system_call) 0: - clc 4(4,%r12),BASED(cleanup_table_sysc_return) + clc 4(4,%r12),BASED(cleanup_table_sysc_tif) bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_sysc_return+4) - bl BASED(cleanup_sysc_return) + clc 4(4,%r12),BASED(cleanup_table_sysc_tif+4) + bl BASED(cleanup_sysc_tif) 0: - clc 4(4,%r12),BASED(cleanup_table_sysc_leave) + clc 4(4,%r12),BASED(cleanup_table_sysc_restore) bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_sysc_leave+4) - bl BASED(cleanup_sysc_leave) + clc 4(4,%r12),BASED(cleanup_table_sysc_restore+4) + bl BASED(cleanup_sysc_restore) 0: - clc 4(4,%r12),BASED(cleanup_table_io_return) + clc 4(4,%r12),BASED(cleanup_table_io_tif) bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_io_return+4) - bl BASED(cleanup_io_return) + clc 4(4,%r12),BASED(cleanup_table_io_tif+4) + bl BASED(cleanup_io_tif) 0: - clc 4(4,%r12),BASED(cleanup_table_io_leave) + clc 4(4,%r12),BASED(cleanup_table_io_restore) bl BASED(0f) - clc 4(4,%r12),BASED(cleanup_table_io_leave+4) - bl BASED(cleanup_io_leave) + clc 4(4,%r12),BASED(cleanup_table_io_restore+4) + bl BASED(cleanup_io_restore) 0: br %r14 @@ -998,17 +975,17 @@ cleanup_system_call_insn: .long sysc_stime + 0x80000000 .long sysc_update + 0x80000000 -cleanup_sysc_return: +cleanup_sysc_tif: mvc __LC_RETURN_PSW(4),0(%r12) - mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_sysc_return) + mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_sysc_tif) la %r12,__LC_RETURN_PSW br %r14 -cleanup_sysc_leave: - clc 4(4,%r12),BASED(cleanup_sysc_leave_insn) +cleanup_sysc_restore: + clc 4(4,%r12),BASED(cleanup_sysc_restore_insn) be BASED(2f) mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER - clc 4(4,%r12),BASED(cleanup_sysc_leave_insn+4) + clc 4(4,%r12),BASED(cleanup_sysc_restore_insn+4) be BASED(2f) mvc __LC_RETURN_PSW(8),SP_PSW(%r15) c %r12,BASED(.Lmck_old_psw) @@ -1020,21 +997,21 @@ cleanup_sysc_leave: l %r15,SP_R15(%r15) 2: la %r12,__LC_RETURN_PSW br %r14 -cleanup_sysc_leave_insn: +cleanup_sysc_restore_insn: .long sysc_done - 4 + 0x80000000 .long sysc_done - 8 + 0x80000000 -cleanup_io_return: +cleanup_io_tif: mvc __LC_RETURN_PSW(4),0(%r12) - mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_io_return) + mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_io_tif) la %r12,__LC_RETURN_PSW br %r14 -cleanup_io_leave: - clc 4(4,%r12),BASED(cleanup_io_leave_insn) +cleanup_io_restore: + clc 4(4,%r12),BASED(cleanup_io_restore_insn) be BASED(2f) mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER - clc 4(4,%r12),BASED(cleanup_io_leave_insn+4) + clc 4(4,%r12),BASED(cleanup_io_restore_insn+4) be BASED(2f) mvc __LC_RETURN_PSW(8),SP_PSW(%r15) c %r12,BASED(.Lmck_old_psw) @@ -1046,7 +1023,7 @@ cleanup_io_leave: l %r15,SP_R15(%r15) 2: la %r12,__LC_RETURN_PSW br %r14 -cleanup_io_leave_insn: +cleanup_io_restore_insn: .long io_done - 4 + 0x80000000 .long io_done - 8 + 0x80000000 diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index ca02b10..860cea1 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -61,28 +61,33 @@ _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ #ifdef CONFIG_TRACE_IRQFLAGS .macro TRACE_IRQS_ON - basr %r2,%r0 - brasl %r14,trace_hardirqs_on_caller + basr %r2,%r0 + brasl %r14,trace_hardirqs_on_caller .endm .macro TRACE_IRQS_OFF - basr %r2,%r0 - brasl %r14,trace_hardirqs_off_caller + basr %r2,%r0 + brasl %r14,trace_hardirqs_off_caller .endm - .macro TRACE_IRQS_CHECK - basr %r2,%r0 + .macro TRACE_IRQS_CHECK_ON tm SP_PSW(%r15),0x03 # irqs enabled? jz 0f - brasl %r14,trace_hardirqs_on_caller - j 1f -0: brasl %r14,trace_hardirqs_off_caller -1: + TRACE_IRQS_ON +0: + .endm + + .macro TRACE_IRQS_CHECK_OFF + tm SP_PSW(%r15),0x03 # irqs enabled? + jz 0f + TRACE_IRQS_OFF +0: .endm #else #define TRACE_IRQS_ON #define TRACE_IRQS_OFF -#define TRACE_IRQS_CHECK +#define TRACE_IRQS_CHECK_ON +#define TRACE_IRQS_CHECK_OFF #endif #ifdef CONFIG_LOCKDEP @@ -267,29 +272,14 @@ sysc_noemu: stg %r2,SP_R2(%r15) # store return value (change R2 on stack) sysc_return: + LOCKDEP_SYS_EXIT +sysc_tif: tm __TI_flags+7(%r9),_TIF_WORK_SVC jnz sysc_work # there is work to do (signals etc.) sysc_restore: -#ifdef CONFIG_TRACE_IRQFLAGS - larl %r1,sysc_restore_trace_psw - lpswe 0(%r1) -sysc_restore_trace: - TRACE_IRQS_CHECK - LOCKDEP_SYS_EXIT -#endif -sysc_leave: RESTORE_ALL __LC_RETURN_PSW,1 sysc_done: -#ifdef CONFIG_TRACE_IRQFLAGS - .section .data,"aw",@progbits - .align 8 - .globl sysc_restore_trace_psw -sysc_restore_trace_psw: - .quad 0, sysc_restore_trace - .previous -#endif - # # There is work to do, but first we need to check if we return to userspace. # @@ -300,7 +290,7 @@ sysc_work: # # One of the work bits is on. Find out which one. # -sysc_work_loop: +sysc_work_tif: tm __TI_flags+7(%r9),_TIF_MCCK_PENDING jo sysc_mcck_pending tm __TI_flags+7(%r9),_TIF_NEED_RESCHED @@ -319,14 +309,14 @@ sysc_work_loop: # _TIF_NEED_RESCHED is set, call schedule # sysc_reschedule: - larl %r14,sysc_work_loop - jg schedule # return point is sysc_work_loop + larl %r14,sysc_return + jg schedule # return point is sysc_return # # _TIF_MCCK_PENDING is set, call handler # sysc_mcck_pending: - larl %r14,sysc_work_loop + larl %r14,sysc_return jg s390_handle_mcck # TIF bit will be cleared by handler # @@ -340,14 +330,14 @@ sysc_sigpending: jo sysc_restart tm __TI_flags+7(%r9),_TIF_SINGLE_STEP jo sysc_singlestep - j sysc_work_loop + j sysc_return # # _TIF_NOTIFY_RESUME is set, call do_notify_resume # sysc_notify_resume: la %r2,SP_PTREGS(%r15) # load pt_regs - larl %r14,sysc_work_loop + larl %r14,sysc_return jg do_notify_resume # call do_notify_resume # @@ -367,7 +357,7 @@ sysc_singlestep: ni __TI_flags+7(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number la %r2,SP_PTREGS(%r15) # address of register-save area - larl %r14,sysc_work_loop # load adr. of system return + larl %r14,sysc_return # load adr. of system return jg do_single_step # branch to do_sigtrap # @@ -433,12 +423,14 @@ kernel_execve: br %r14 # execve succeeded. 0: stnsm __SF_EMPTY(%r15),0xfc # disable interrupts +# TRACE_IRQS_OFF lg %r15,__LC_KERNEL_STACK # load ksp aghi %r15,-SP_SIZE # make room for registers & psw lg %r13,__LC_SVC_NEW_PSW+8 lg %r9,__LC_THREAD_INFO mvc SP_PTREGS(__PT_SIZE,%r15),0(%r12) # copy pt_regs xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) +# TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts brasl %r14,execve_tail j sysc_return @@ -474,9 +466,9 @@ pgm_check_handler: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER pgm_no_vtime: + TRACE_IRQS_CHECK_OFF lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct mvc SP_ARGS(8,%r15),__LC_LAST_BREAK - TRACE_IRQS_OFF lgf %r3,__LC_PGM_ILC # load program interruption code lghi %r8,0x7f ngr %r8,%r3 @@ -485,8 +477,10 @@ pgm_do_call: larl %r1,pgm_check_table lg %r1,0(%r8,%r1) # load address of handler routine la %r2,SP_PTREGS(%r15) # address of register-save area - larl %r14,sysc_return - br %r1 # branch to interrupt-handler + basr %r14,%r1 # branch to interrupt-handler +pgm_exit: + TRACE_IRQS_CHECK_ON + j sysc_return # # handle per exception @@ -513,8 +507,8 @@ pgm_per_std: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER pgm_no_vtime2: + TRACE_IRQS_CHECK_OFF lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct - TRACE_IRQS_OFF lg %r1,__TI_task(%r9) tm SP_PSW+1(%r15),0x01 # kernel per event ? jz kernel_per @@ -525,7 +519,7 @@ pgm_no_vtime2: lgf %r3,__LC_PGM_ILC # load program interruption code lghi %r8,0x7f ngr %r8,%r3 # clear per-event-bit and ilc - je sysc_return + je pgm_exit j pgm_do_call # @@ -539,14 +533,15 @@ pgm_svcper: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER llgh %r7,__LC_SVC_INT_CODE # get svc number from lowcore lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct + TRACE_IRQS_OFF lg %r8,__TI_task(%r9) mvc __THREAD_per+__PER_atmid(2,%r8),__LC_PER_ATMID mvc __THREAD_per+__PER_address(8,%r8),__LC_PER_ADDRESS mvc __THREAD_per+__PER_access_id(1,%r8),__LC_PER_ACCESS_ID oi __TI_flags+7(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP TRACE_IRQS_ON - lmg %r2,%r6,SP_R2(%r15) # load svc arguments stosm __SF_EMPTY(%r15),0x03 # reenable interrupts + lmg %r2,%r6,SP_R2(%r15) # load svc arguments j sysc_do_svc # @@ -555,8 +550,8 @@ pgm_svcper: kernel_per: xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number la %r2,SP_PTREGS(%r15) # address of register-save area - larl %r14,sysc_restore # load adr. of system ret, no work - jg do_single_step # branch to do_single_step + brasl %r14,do_single_step + j pgm_exit /* * IO interrupt handler routine @@ -579,29 +574,15 @@ io_no_vtime: la %r2,SP_PTREGS(%r15) # address of register-save area brasl %r14,do_IRQ # call standard irq handler io_return: + LOCKDEP_SYS_EXIT + TRACE_IRQS_ON +io_tif: tm __TI_flags+7(%r9),_TIF_WORK_INT jnz io_work # there is work to do (signals etc.) io_restore: -#ifdef CONFIG_TRACE_IRQFLAGS - larl %r1,io_restore_trace_psw - lpswe 0(%r1) -io_restore_trace: - TRACE_IRQS_CHECK - LOCKDEP_SYS_EXIT -#endif -io_leave: RESTORE_ALL __LC_RETURN_PSW,0 io_done: -#ifdef CONFIG_TRACE_IRQFLAGS - .section .data,"aw",@progbits - .align 8 - .globl io_restore_trace_psw -io_restore_trace_psw: - .quad 0, io_restore_trace - .previous -#endif - # # There is work todo, find out in which context we have been interrupted: # 1) if we return to user space we can do all _TIF_WORK_INT work @@ -627,18 +608,22 @@ io_work: # check for preemptive scheduling icm %r0,15,__TI_precount(%r9) jnz io_restore # preemption is disabled + tm __TI_flags+7(%r12),_TIF_NEED_RESCHED + jno io_restore # switch to kernel stack lg %r1,SP_R15(%r15) aghi %r1,-SP_SIZE mvc SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15) xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain lgr %r15,%r1 -io_resume_loop: - larl %r14,io_resume_loop - tm __TI_flags+7(%r12),_TIF_NEED_RESCHED - jgo preempt_schedule_irq -#endif + # TRACE_IRQS_ON already done at io_return, call + # TRACE_IRQS_OFF to keep things symmetrical + TRACE_IRQS_OFF + brasl %r14,preempt_schedule_irq + j io_return +#else j io_restore +#endif # # Need to do work before returning to userspace, switch to kernel stack @@ -655,7 +640,7 @@ io_work_user: # Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED # and _TIF_MCCK_PENDING # -io_work_loop: +io_work_tif: tm __TI_flags+7(%r9),_TIF_MCCK_PENDING jo io_mcck_pending tm __TI_flags+7(%r9),_TIF_NEED_RESCHED @@ -670,43 +655,45 @@ io_work_loop: # _TIF_MCCK_PENDING is set, call handler # io_mcck_pending: + # TRACE_IRQS_ON already done at io_return brasl %r14,s390_handle_mcck # TIF bit will be cleared by handler - j io_work_loop + TRACE_IRQS_OFF + j io_return # # _TIF_NEED_RESCHED is set, call schedule # io_reschedule: - TRACE_IRQS_ON + # TRACE_IRQS_ON already done at io_return stosm __SF_EMPTY(%r15),0x03 # reenable interrupts brasl %r14,schedule # call scheduler stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts TRACE_IRQS_OFF - j io_work_loop + j io_return # # _TIF_SIGPENDING or is set, call do_signal # io_sigpending: - TRACE_IRQS_ON + # TRACE_IRQS_ON already done at io_return stosm __SF_EMPTY(%r15),0x03 # reenable interrupts la %r2,SP_PTREGS(%r15) # load pt_regs brasl %r14,do_signal # call do_signal stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts TRACE_IRQS_OFF - j io_work_loop + j io_return # # _TIF_NOTIFY_RESUME or is set, call do_notify_resume # io_notify_resume: - TRACE_IRQS_ON + # TRACE_IRQS_ON already done at io_return stosm __SF_EMPTY(%r15),0x03 # reenable interrupts la %r2,SP_PTREGS(%r15) # load pt_regs brasl %r14,do_notify_resume # call do_notify_resume stnsm __SF_EMPTY(%r15),0xfc # disable I/O and ext. interrupts TRACE_IRQS_OFF - j io_work_loop + j io_return /* * External interrupt handler routine @@ -883,14 +870,14 @@ stack_overflow: cleanup_table_system_call: .quad system_call, sysc_do_svc -cleanup_table_sysc_return: - .quad sysc_return, sysc_leave -cleanup_table_sysc_leave: - .quad sysc_leave, sysc_done -cleanup_table_io_return: - .quad io_return, io_leave -cleanup_table_io_leave: - .quad io_leave, io_done +cleanup_table_sysc_tif: + .quad sysc_tif, sysc_restore +cleanup_table_sysc_restore: + .quad sysc_restore, sysc_done +cleanup_table_io_tif: + .quad io_tif, io_restore +cleanup_table_io_restore: + .quad io_restore, io_done cleanup_critical: clc 8(8,%r12),BASED(cleanup_table_system_call) @@ -898,25 +885,25 @@ cleanup_critical: clc 8(8,%r12),BASED(cleanup_table_system_call+8) jl cleanup_system_call 0: - clc 8(8,%r12),BASED(cleanup_table_sysc_return) + clc 8(8,%r12),BASED(cleanup_table_sysc_tif) jl 0f - clc 8(8,%r12),BASED(cleanup_table_sysc_return+8) - jl cleanup_sysc_return + clc 8(8,%r12),BASED(cleanup_table_sysc_tif+8) + jl cleanup_sysc_tif 0: - clc 8(8,%r12),BASED(cleanup_table_sysc_leave) + clc 8(8,%r12),BASED(cleanup_table_sysc_restore) jl 0f - clc 8(8,%r12),BASED(cleanup_table_sysc_leave+8) - jl cleanup_sysc_leave + clc 8(8,%r12),BASED(cleanup_table_sysc_restore+8) + jl cleanup_sysc_restore 0: - clc 8(8,%r12),BASED(cleanup_table_io_return) + clc 8(8,%r12),BASED(cleanup_table_io_tif) jl 0f - clc 8(8,%r12),BASED(cleanup_table_io_return+8) - jl cleanup_io_return + clc 8(8,%r12),BASED(cleanup_table_io_tif+8) + jl cleanup_io_tif 0: - clc 8(8,%r12),BASED(cleanup_table_io_leave) + clc 8(8,%r12),BASED(cleanup_table_io_restore) jl 0f - clc 8(8,%r12),BASED(cleanup_table_io_leave+8) - jl cleanup_io_leave + clc 8(8,%r12),BASED(cleanup_table_io_restore+8) + jl cleanup_io_restore 0: br %r14 @@ -963,16 +950,16 @@ cleanup_system_call_insn: .quad sysc_stime .quad sysc_update -cleanup_sysc_return: +cleanup_sysc_tif: mvc __LC_RETURN_PSW(8),0(%r12) - mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_sysc_return) + mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_sysc_tif) la %r12,__LC_RETURN_PSW br %r14 -cleanup_sysc_leave: - clc 8(8,%r12),BASED(cleanup_sysc_leave_insn) +cleanup_sysc_restore: + clc 8(8,%r12),BASED(cleanup_sysc_restore_insn) je 3f - clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8) + clc 8(8,%r12),BASED(cleanup_sysc_restore_insn+8) jhe 0f mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER 0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) @@ -985,20 +972,20 @@ cleanup_sysc_leave: lg %r15,SP_R15(%r15) 3: la %r12,__LC_RETURN_PSW br %r14 -cleanup_sysc_leave_insn: +cleanup_sysc_restore_insn: .quad sysc_done - 4 .quad sysc_done - 16 -cleanup_io_return: +cleanup_io_tif: mvc __LC_RETURN_PSW(8),0(%r12) - mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_io_return) + mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_io_tif) la %r12,__LC_RETURN_PSW br %r14 -cleanup_io_leave: - clc 8(8,%r12),BASED(cleanup_io_leave_insn) +cleanup_io_restore: + clc 8(8,%r12),BASED(cleanup_io_restore_insn) je 3f - clc 8(8,%r12),BASED(cleanup_io_leave_insn+8) + clc 8(8,%r12),BASED(cleanup_io_restore_insn+8) jhe 0f mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER 0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) @@ -1011,7 +998,7 @@ cleanup_io_leave: lg %r15,SP_R15(%r15) 3: la %r12,__LC_RETURN_PSW br %r14 -cleanup_io_leave_insn: +cleanup_io_restore_insn: .quad io_done - 4 .quad io_done - 16 diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 6309276..5987524 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -369,10 +369,6 @@ static void setup_addressing_mode(void) pr_info("Address spaces switched, " "mvcos not available\n"); } -#ifdef CONFIG_TRACE_IRQFLAGS - sysc_restore_trace_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; - io_restore_trace_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; -#endif } static void __init -- cgit v0.10.2 From 6377981faf1a4425b0531e577736ef03df97c8f6 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 17 May 2010 10:00:03 +0200 Subject: [S390] idle time accounting vs. machine checks A machine check can interrupt the i/o and external interrupt handler anytime. If the machine check occurs while the interrupt handler is waking up from idle vtime_start_cpu can get executed a second time and the int_clock / async_enter_timer values in the lowcore get clobbered. This can confuse the cpu time accounting. To fix this problem two changes are needed. First the machine check handler has to use its own copies of int_clock and async_enter_timer, named mcck_clock and mcck_enter_timer. Second the nested execution of vtime_start_cpu has to be prevented. This is done in s390_idle_check by checking the wait bit in the program status word. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 258ba88..8b1a52a 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -188,15 +188,16 @@ struct s390_idle_data { DECLARE_PER_CPU(struct s390_idle_data, s390_idle); -void vtime_start_cpu(void); +void vtime_start_cpu(__u64 int_clock, __u64 enter_timer); cputime64_t s390_get_idle_time(int cpu); #define arch_idle_time(cpu) s390_get_idle_time(cpu) -static inline void s390_idle_check(void) +static inline void s390_idle_check(struct pt_regs *regs, __u64 int_clock, + __u64 enter_timer) { - if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL) - vtime_start_cpu(); + if (regs->psw.mask & PSW_MASK_WAIT) + vtime_start_cpu(int_clock, enter_timer); } static inline int s390_nohz_delay(int cpu) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index f7e78c7..2c02d46 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -104,37 +104,39 @@ struct _lowcore { /* CPU time accounting values */ __u64 sync_enter_timer; /* 0x0250 */ __u64 async_enter_timer; /* 0x0258 */ - __u64 exit_timer; /* 0x0260 */ - __u64 user_timer; /* 0x0268 */ - __u64 system_timer; /* 0x0270 */ - __u64 steal_timer; /* 0x0278 */ - __u64 last_update_timer; /* 0x0280 */ - __u64 last_update_clock; /* 0x0288 */ + __u64 mcck_enter_timer; /* 0x0260 */ + __u64 exit_timer; /* 0x0268 */ + __u64 user_timer; /* 0x0270 */ + __u64 system_timer; /* 0x0278 */ + __u64 steal_timer; /* 0x0280 */ + __u64 last_update_timer; /* 0x0288 */ + __u64 last_update_clock; /* 0x0290 */ /* Current process. */ - __u32 current_task; /* 0x0290 */ - __u32 thread_info; /* 0x0294 */ - __u32 kernel_stack; /* 0x0298 */ + __u32 current_task; /* 0x0298 */ + __u32 thread_info; /* 0x029c */ + __u32 kernel_stack; /* 0x02a0 */ /* Interrupt and panic stack. */ - __u32 async_stack; /* 0x029c */ - __u32 panic_stack; /* 0x02a0 */ + __u32 async_stack; /* 0x02a4 */ + __u32 panic_stack; /* 0x02a8 */ /* Address space pointer. */ - __u32 kernel_asce; /* 0x02a4 */ - __u32 user_asce; /* 0x02a8 */ - __u32 user_exec_asce; /* 0x02ac */ + __u32 kernel_asce; /* 0x02ac */ + __u32 user_asce; /* 0x02b0 */ + __u32 user_exec_asce; /* 0x02b4 */ /* SMP info area */ - __u32 cpu_nr; /* 0x02b0 */ - __u32 softirq_pending; /* 0x02b4 */ - __u32 percpu_offset; /* 0x02b8 */ - __u32 ext_call_fast; /* 0x02bc */ - __u64 int_clock; /* 0x02c0 */ - __u64 clock_comparator; /* 0x02c8 */ - __u32 machine_flags; /* 0x02d0 */ - __u32 ftrace_func; /* 0x02d4 */ - __u8 pad_0x02d8[0x0300-0x02d8]; /* 0x02d8 */ + __u32 cpu_nr; /* 0x02b8 */ + __u32 softirq_pending; /* 0x02bc */ + __u32 percpu_offset; /* 0x02c0 */ + __u32 ext_call_fast; /* 0x02c4 */ + __u64 int_clock; /* 0x02c8 */ + __u64 mcck_clock; /* 0x02d0 */ + __u64 clock_comparator; /* 0x02d8 */ + __u32 machine_flags; /* 0x02e0 */ + __u32 ftrace_func; /* 0x02e4 */ + __u8 pad_0x02e8[0x0300-0x02e8]; /* 0x02e8 */ /* Interrupt response block */ __u8 irb[64]; /* 0x0300 */ @@ -232,38 +234,40 @@ struct _lowcore { /* CPU accounting and timing values. */ __u64 sync_enter_timer; /* 0x02a0 */ __u64 async_enter_timer; /* 0x02a8 */ - __u64 exit_timer; /* 0x02b0 */ - __u64 user_timer; /* 0x02b8 */ - __u64 system_timer; /* 0x02c0 */ - __u64 steal_timer; /* 0x02c8 */ - __u64 last_update_timer; /* 0x02d0 */ - __u64 last_update_clock; /* 0x02d8 */ + __u64 mcck_enter_timer; /* 0x02b0 */ + __u64 exit_timer; /* 0x02b8 */ + __u64 user_timer; /* 0x02c0 */ + __u64 system_timer; /* 0x02c8 */ + __u64 steal_timer; /* 0x02d0 */ + __u64 last_update_timer; /* 0x02d8 */ + __u64 last_update_clock; /* 0x02e0 */ /* Current process. */ - __u64 current_task; /* 0x02e0 */ - __u64 thread_info; /* 0x02e8 */ - __u64 kernel_stack; /* 0x02f0 */ + __u64 current_task; /* 0x02e8 */ + __u64 thread_info; /* 0x02f0 */ + __u64 kernel_stack; /* 0x02f8 */ /* Interrupt and panic stack. */ - __u64 async_stack; /* 0x02f8 */ - __u64 panic_stack; /* 0x0300 */ + __u64 async_stack; /* 0x0300 */ + __u64 panic_stack; /* 0x0308 */ /* Address space pointer. */ - __u64 kernel_asce; /* 0x0308 */ - __u64 user_asce; /* 0x0310 */ - __u64 user_exec_asce; /* 0x0318 */ + __u64 kernel_asce; /* 0x0310 */ + __u64 user_asce; /* 0x0318 */ + __u64 user_exec_asce; /* 0x0320 */ /* SMP info area */ - __u32 cpu_nr; /* 0x0320 */ - __u32 softirq_pending; /* 0x0324 */ - __u64 percpu_offset; /* 0x0328 */ - __u64 ext_call_fast; /* 0x0330 */ - __u64 int_clock; /* 0x0338 */ - __u64 clock_comparator; /* 0x0340 */ - __u64 vdso_per_cpu_data; /* 0x0348 */ - __u64 machine_flags; /* 0x0350 */ - __u64 ftrace_func; /* 0x0358 */ - __u8 pad_0x0368[0x0380-0x0360]; /* 0x0360 */ + __u32 cpu_nr; /* 0x0328 */ + __u32 softirq_pending; /* 0x032c */ + __u64 percpu_offset; /* 0x0330 */ + __u64 ext_call_fast; /* 0x0338 */ + __u64 int_clock; /* 0x0340 */ + __u64 mcck_clock; /* 0x0348 */ + __u64 clock_comparator; /* 0x0350 */ + __u64 vdso_per_cpu_data; /* 0x0358 */ + __u64 machine_flags; /* 0x0360 */ + __u64 ftrace_func; /* 0x0368 */ + __u8 pad_0x0370[0x0380-0x0370]; /* 0x0370 */ /* Interrupt response block. */ __u8 irb[64]; /* 0x0380 */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 32b1ede..816d81f 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -112,6 +112,7 @@ int main(void) DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw)); DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer)); DEFINE(__LC_ASYNC_ENTER_TIMER, offsetof(struct _lowcore, async_enter_timer)); + DEFINE(__LC_MCCK_ENTER_TIMER, offsetof(struct _lowcore, mcck_enter_timer)); DEFINE(__LC_EXIT_TIMER, offsetof(struct _lowcore, exit_timer)); DEFINE(__LC_USER_TIMER, offsetof(struct _lowcore, user_timer)); DEFINE(__LC_SYSTEM_TIMER, offsetof(struct _lowcore, system_timer)); @@ -127,6 +128,7 @@ int main(void) DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce)); DEFINE(__LC_USER_EXEC_ASCE, offsetof(struct _lowcore, user_exec_asce)); DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock)); + DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock)); DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func)); DEFINE(__LC_IRB, offsetof(struct _lowcore, irb)); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 07d8499..0e2b162 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -742,15 +742,14 @@ __critical_end: .globl mcck_int_handler mcck_int_handler: - stck __LC_INT_CLOCK + stck __LC_MCCK_CLOCK spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs SAVE_ALL_BASE __LC_SAVE_AREA+32 la %r12,__LC_MCK_OLD_PSW tm __LC_MCCK_CODE,0x80 # system damage? bo BASED(mcck_int_main) # yes -> rest of mcck code invalid - mvc __LC_SAVE_AREA+52(8),__LC_ASYNC_ENTER_TIMER - mvc __LC_ASYNC_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA + mvc __LC_MCCK_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? bo BASED(1f) la %r14,__LC_SYNC_ENTER_TIMER @@ -764,7 +763,7 @@ mcck_int_handler: bl BASED(0f) la %r14,__LC_LAST_UPDATE_TIMER 0: spt 0(%r14) - mvc __LC_ASYNC_ENTER_TIMER(8),0(%r14) + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) 1: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? bno BASED(mcck_int_main) # no -> skip cleanup critical tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit @@ -786,9 +785,9 @@ mcck_int_main: bno BASED(mcck_no_vtime) # no -> skip cleanup critical tm SP_PSW+1(%r15),0x01 # interrupting from user ? bz BASED(mcck_no_vtime) - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER + UPDATE_VTIME __LC_EXIT_TIMER,__LC_MCCK_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__LC_MCCK_ENTER_TIMER mcck_no_vtime: l %r9,__LC_THREAD_INFO # load pointer to thread_info struct la %r2,SP_PTREGS(%r15) # load pt_regs @@ -811,7 +810,6 @@ mcck_no_vtime: mcck_return: mvc __LC_RETURN_MCCK_PSW(8),SP_PSW(%r15) # move return PSW ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit - mvc __LC_ASYNC_ENTER_TIMER(8),__LC_SAVE_AREA+52 tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? bno BASED(0f) lm %r0,%r15,SP_R0(%r15) # load gprs 0-15 @@ -934,15 +932,16 @@ cleanup_critical: cleanup_system_call: mvc __LC_RETURN_PSW(8),0(%r12) - c %r12,BASED(.Lmck_old_psw) - be BASED(0f) - la %r12,__LC_SAVE_AREA+16 - b BASED(1f) -0: la %r12,__LC_SAVE_AREA+32 -1: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+4) bh BASED(0f) + mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER + c %r12,BASED(.Lmck_old_psw) + be BASED(0f) mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER +0: c %r12,BASED(.Lmck_old_psw) + la %r12,__LC_SAVE_AREA+32 + be BASED(0f) + la %r12,__LC_SAVE_AREA+16 0: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+8) bhe BASED(cleanup_vtime) clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn) @@ -984,16 +983,19 @@ cleanup_sysc_tif: cleanup_sysc_restore: clc 4(4,%r12),BASED(cleanup_sysc_restore_insn) be BASED(2f) + mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER + c %r12,BASED(.Lmck_old_psw) + be BASED(0f) mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER - clc 4(4,%r12),BASED(cleanup_sysc_restore_insn+4) +0: clc 4(4,%r12),BASED(cleanup_sysc_restore_insn+4) be BASED(2f) mvc __LC_RETURN_PSW(8),SP_PSW(%r15) c %r12,BASED(.Lmck_old_psw) - bne BASED(0f) - mvc __LC_SAVE_AREA+32(16),SP_R12(%r15) - b BASED(1f) -0: mvc __LC_SAVE_AREA+16(16),SP_R12(%r15) -1: lm %r0,%r11,SP_R0(%r15) + la %r12,__LC_SAVE_AREA+32 + be BASED(1f) + la %r12,__LC_SAVE_AREA+16 +1: mvc 0(16,%r12),SP_R12(%r15) + lm %r0,%r11,SP_R0(%r15) l %r15,SP_R15(%r15) 2: la %r12,__LC_RETURN_PSW br %r14 @@ -1009,19 +1011,15 @@ cleanup_io_tif: cleanup_io_restore: clc 4(4,%r12),BASED(cleanup_io_restore_insn) - be BASED(2f) - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER + be BASED(1f) + mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER clc 4(4,%r12),BASED(cleanup_io_restore_insn+4) - be BASED(2f) + be BASED(1f) mvc __LC_RETURN_PSW(8),SP_PSW(%r15) - c %r12,BASED(.Lmck_old_psw) - bne BASED(0f) mvc __LC_SAVE_AREA+32(16),SP_R12(%r15) - b BASED(1f) -0: mvc __LC_SAVE_AREA+16(16),SP_R12(%r15) -1: lm %r0,%r11,SP_R0(%r15) + lm %r0,%r11,SP_R0(%r15) l %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW +1: la %r12,__LC_RETURN_PSW br %r14 cleanup_io_restore_insn: .long io_done - 4 + 0x80000000 diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 860cea1..6536f5c 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -725,7 +725,7 @@ __critical_end: */ .globl mcck_int_handler mcck_int_handler: - stck __LC_INT_CLOCK + stck __LC_MCCK_CLOCK la %r1,4095 # revalidate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs @@ -734,8 +734,7 @@ mcck_int_handler: tm __LC_MCCK_CODE,0x80 # system damage? jo mcck_int_main # yes -> rest of mcck code invalid la %r14,4095 - mvc __LC_SAVE_AREA+104(8),__LC_ASYNC_ENTER_TIMER - mvc __LC_ASYNC_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA-4095(%r14) + mvc __LC_MCCK_ENTER_TIMER(8),__LC_CPU_TIMER_SAVE_AREA-4095(%r14) tm __LC_MCCK_CODE+5,0x02 # stored cpu timer value valid? jo 1f la %r14,__LC_SYNC_ENTER_TIMER @@ -749,7 +748,7 @@ mcck_int_handler: jl 0f la %r14,__LC_LAST_UPDATE_TIMER 0: spt 0(%r14) - mvc __LC_ASYNC_ENTER_TIMER(8),0(%r14) + mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) 1: tm __LC_MCCK_CODE+2,0x09 # mwp + ia of old psw valid? jno mcck_int_main # no -> skip cleanup critical tm __LC_MCK_OLD_PSW+1,0x01 # test problem state bit @@ -770,9 +769,9 @@ mcck_int_main: jno mcck_no_vtime # no -> no timer update tm SP_PSW+1(%r15),0x01 # interrupting from user ? jz mcck_no_vtime - UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER + UPDATE_VTIME __LC_EXIT_TIMER,__LC_MCCK_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER - mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER + mvc __LC_LAST_UPDATE_TIMER(8),__LC_MCCK_ENTER_TIMER mcck_no_vtime: lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct la %r2,SP_PTREGS(%r15) # load pt_regs @@ -794,7 +793,6 @@ mcck_return: mvc __LC_RETURN_MCCK_PSW(16),SP_PSW(%r15) # move return PSW ni __LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 - mvc __LC_ASYNC_ENTER_TIMER(8),__LC_SAVE_AREA+104 tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? jno 0f stpt __LC_EXIT_TIMER @@ -909,15 +907,16 @@ cleanup_critical: cleanup_system_call: mvc __LC_RETURN_PSW(16),0(%r12) - cghi %r12,__LC_MCK_OLD_PSW - je 0f - la %r12,__LC_SAVE_AREA+32 - j 1f -0: la %r12,__LC_SAVE_AREA+64 -1: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+8) jh 0f + mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER + cghi %r12,__LC_MCK_OLD_PSW + je 0f mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER +0: cghi %r12,__LC_MCK_OLD_PSW + la %r12,__LC_SAVE_AREA+64 + je 0f + la %r12,__LC_SAVE_AREA+32 0: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+16) jhe cleanup_vtime clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn) @@ -958,19 +957,22 @@ cleanup_sysc_tif: cleanup_sysc_restore: clc 8(8,%r12),BASED(cleanup_sysc_restore_insn) - je 3f + je 2f clc 8(8,%r12),BASED(cleanup_sysc_restore_insn+8) jhe 0f + mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER + cghi %r12,__LC_MCK_OLD_PSW + je 0f mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER 0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) cghi %r12,__LC_MCK_OLD_PSW - jne 1f - mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) - j 2f -1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) -2: lmg %r0,%r11,SP_R0(%r15) + la %r12,__LC_SAVE_AREA+64 + je 1f + la %r12,__LC_SAVE_AREA+32 +1: mvc 0(32,%r12),SP_R12(%r15) + lmg %r0,%r11,SP_R0(%r15) lg %r15,SP_R15(%r15) -3: la %r12,__LC_RETURN_PSW +2: la %r12,__LC_RETURN_PSW br %r14 cleanup_sysc_restore_insn: .quad sysc_done - 4 @@ -984,19 +986,15 @@ cleanup_io_tif: cleanup_io_restore: clc 8(8,%r12),BASED(cleanup_io_restore_insn) - je 3f + je 1f clc 8(8,%r12),BASED(cleanup_io_restore_insn+8) jhe 0f - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER + mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER 0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) - cghi %r12,__LC_MCK_OLD_PSW - jne 1f mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) - j 2f -1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) -2: lmg %r0,%r11,SP_R0(%r15) + lmg %r0,%r11,SP_R0(%r15) lg %r15,SP_R15(%r15) -3: la %r12,__LC_RETURN_PSW +1: la %r12,__LC_RETURN_PSW br %r14 cleanup_io_restore_insn: .quad io_done - 4 diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 015e27d..ac15139 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -255,7 +255,8 @@ void notrace s390_do_machine_check(struct pt_regs *regs) int umode; nmi_enter(); - s390_idle_check(); + s390_idle_check(regs, S390_lowcore.mcck_clock, + S390_lowcore.mcck_enter_timer); mci = (struct mci *) &S390_lowcore.mcck_interruption_code; mcck = &__get_cpu_var(cpu_mcck); diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c index 59618bc..9ce641b 100644 --- a/arch/s390/kernel/s390_ext.c +++ b/arch/s390/kernel/s390_ext.c @@ -120,7 +120,8 @@ void __irq_entry do_extint(struct pt_regs *regs, unsigned short code) struct pt_regs *old_regs; old_regs = set_irq_regs(regs); - s390_idle_check(); + s390_idle_check(regs, S390_lowcore.int_clock, + S390_lowcore.async_enter_timer); irq_enter(); if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) /* Serve timer interrupts first. */ diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index b59a812..3479f1b 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -121,32 +121,35 @@ void account_system_vtime(struct task_struct *tsk) } EXPORT_SYMBOL_GPL(account_system_vtime); -void vtime_start_cpu(void) +void vtime_start_cpu(__u64 int_clock, __u64 enter_timer) { struct s390_idle_data *idle = &__get_cpu_var(s390_idle); struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); __u64 idle_time, expires; + if (idle->idle_enter == 0ULL) + return; + /* Account time spent with enabled wait psw loaded as idle time. */ - idle_time = S390_lowcore.int_clock - idle->idle_enter; + idle_time = int_clock - idle->idle_enter; account_idle_time(idle_time); S390_lowcore.steal_timer += idle->idle_enter - S390_lowcore.last_update_clock; - S390_lowcore.last_update_clock = S390_lowcore.int_clock; + S390_lowcore.last_update_clock = int_clock; /* Account system time spent going idle. */ S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle; - S390_lowcore.last_update_timer = S390_lowcore.async_enter_timer; + S390_lowcore.last_update_timer = enter_timer; /* Restart vtime CPU timer */ if (vq->do_spt) { /* Program old expire value but first save progress. */ - expires = vq->idle - S390_lowcore.async_enter_timer; + expires = vq->idle - enter_timer; expires += get_vtimer(); set_vtimer(expires); } else { /* Don't account the CPU timer delta while the cpu was idle. */ - vq->elapsed -= vq->idle - S390_lowcore.async_enter_timer; + vq->elapsed -= vq->idle - enter_timer; } idle->sequence++; diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 5feea1a..f4e6cf3 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -616,7 +616,8 @@ void __irq_entry do_IRQ(struct pt_regs *regs) struct pt_regs *old_regs; old_regs = set_irq_regs(regs); - s390_idle_check(); + s390_idle_check(regs, S390_lowcore.int_clock, + S390_lowcore.async_enter_timer); irq_enter(); __get_cpu_var(s390_idle).nohz_delay = 1; if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) -- cgit v0.10.2 From cd3b70f5d4d82f85d1e1d6e822f38ae098cf7c72 Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Mon, 17 May 2010 10:00:04 +0200 Subject: [S390] virtualization aware cpu measurement Use the SPP instruction to set a tag on entry to / exit of the virtual machine context. This allows the cpu measurement facility to distinguish the samples from the host and the different guests. Signed-off-by: Carsten Otte diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 2c02d46..0f97ef2 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -267,7 +267,8 @@ struct _lowcore { __u64 vdso_per_cpu_data; /* 0x0358 */ __u64 machine_flags; /* 0x0360 */ __u64 ftrace_func; /* 0x0368 */ - __u8 pad_0x0370[0x0380-0x0370]; /* 0x0370 */ + __u64 sie_hook; /* 0x0370 */ + __u64 cmf_hpp; /* 0x0378 */ /* Interrupt response block. */ __u8 irb[64]; /* 0x0380 */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 9ab6bd3..25e831d 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -2,7 +2,7 @@ * include/asm-s390/setup.h * * S390 version - * Copyright IBM Corp. 1999,2006 + * Copyright IBM Corp. 1999,2010 */ #ifndef _ASM_S390_SETUP_H @@ -72,6 +72,7 @@ extern unsigned int user_mode; #define MACHINE_FLAG_HPAGE (1UL << 10) #define MACHINE_FLAG_PFMF (1UL << 11) #define MACHINE_FLAG_LPAR (1UL << 12) +#define MACHINE_FLAG_SPP (1UL << 13) #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) @@ -88,6 +89,7 @@ extern unsigned int user_mode; #define MACHINE_HAS_MVCOS (0) #define MACHINE_HAS_HPAGE (0) #define MACHINE_HAS_PFMF (0) +#define MACHINE_HAS_SPP (0) #else /* __s390x__ */ #define MACHINE_HAS_IEEE (1) #define MACHINE_HAS_CSP (1) @@ -97,6 +99,7 @@ extern unsigned int user_mode; #define MACHINE_HAS_MVCOS (S390_lowcore.machine_flags & MACHINE_FLAG_MVCOS) #define MACHINE_HAS_HPAGE (S390_lowcore.machine_flags & MACHINE_FLAG_HPAGE) #define MACHINE_HAS_PFMF (S390_lowcore.machine_flags & MACHINE_FLAG_PFMF) +#define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP) #endif /* __s390x__ */ #define ZFCPDUMP_HSA_SIZE (32UL<<20) diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 816d81f..44a4336 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -131,6 +131,8 @@ int main(void) DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock)); DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func)); + DEFINE(__LC_SIE_HOOK, offsetof(struct _lowcore, sie_hook)); + DEFINE(__LC_CMF_HPP, offsetof(struct _lowcore, cmf_hpp)); DEFINE(__LC_IRB, offsetof(struct _lowcore, irb)); DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area)); DEFINE(__LC_CLOCK_COMP_SAVE_AREA, offsetof(struct _lowcore, clock_comp_save_area)); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 2d92c2c..c00856a 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -356,6 +356,7 @@ static __init void detect_machine_facilities(void) { #ifdef CONFIG_64BIT unsigned int facilities; + unsigned long long facility_bits; facilities = stfl(); if (facilities & (1 << 28)) @@ -364,6 +365,9 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_PFMF; if (facilities & (1 << 4)) S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS; + if ((stfle(&facility_bits, 1) > 0) && + (facility_bits & (1ULL << (63 - 40)))) + S390_lowcore.machine_flags |= MACHINE_FLAG_SPP; #endif } diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 6536f5c..829b759 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -2,7 +2,7 @@ * arch/s390/kernel/entry64.S * S390 low-level entry points. * - * Copyright (C) IBM Corp. 1999,2006 + * Copyright (C) IBM Corp. 1999,2010 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Hartmut Penner (hp@de.ibm.com), * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), @@ -59,6 +59,16 @@ _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ #define BASED(name) name-system_call(%r13) + .macro HANDLE_SIE_INTERCEPT +#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) + lg %r3,__LC_SIE_HOOK + ltgr %r3,%r3 + jz 0f + basr %r14,%r3 + 0: +#endif + .endm + #ifdef CONFIG_TRACE_IRQFLAGS .macro TRACE_IRQS_ON basr %r2,%r0 @@ -466,6 +476,7 @@ pgm_check_handler: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER pgm_no_vtime: + HANDLE_SIE_INTERCEPT TRACE_IRQS_CHECK_OFF lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct mvc SP_ARGS(8,%r15),__LC_LAST_BREAK @@ -507,6 +518,7 @@ pgm_per_std: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER pgm_no_vtime2: + HANDLE_SIE_INTERCEPT TRACE_IRQS_CHECK_OFF lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct lg %r1,__TI_task(%r9) @@ -570,6 +582,7 @@ io_int_handler: mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER io_no_vtime: lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct + HANDLE_SIE_INTERCEPT TRACE_IRQS_OFF la %r2,SP_PTREGS(%r15) # address of register-save area brasl %r14,do_IRQ # call standard irq handler @@ -595,15 +608,6 @@ io_done: io_work: tm SP_PSW+1(%r15),0x01 # returning to user ? jo io_work_user # yes -> do resched & signal -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) - lg %r2,SP_PSW+8(%r15) # check if current instruction is SIE - lh %r1,0(%r2) - chi %r1,-19948 # signed 16 bit compare with 0xb214 - jne 0f # no -> leave PSW alone - aghi %r2,4 # yes-> add 4 bytes to leave SIE - stg %r2,SP_PSW+8(%r15) -0: -#endif #ifdef CONFIG_PREEMPT # check for preemptive scheduling icm %r0,15,__TI_precount(%r9) @@ -712,6 +716,7 @@ ext_int_handler: mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER ext_no_vtime: lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct + HANDLE_SIE_INTERCEPT TRACE_IRQS_OFF la %r2,SP_PTREGS(%r15) # address of register-save area llgh %r3,__LC_EXT_INT_CODE # get interruption code @@ -786,6 +791,7 @@ mcck_no_vtime: stosm __SF_EMPTY(%r15),0x04 # turn dat on tm __TI_flags+7(%r9),_TIF_MCCK_PENDING jno mcck_return + HANDLE_SIE_INTERCEPT TRACE_IRQS_OFF brasl %r14,s390_handle_mcck TRACE_IRQS_ON diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 5987524..7d89324 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -2,7 +2,7 @@ * arch/s390/kernel/setup.c * * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright (C) IBM Corp. 1999,2010 * Author(s): Hartmut Penner (hp@de.ibm.com), * Martin Schwidefsky (schwidefsky@de.ibm.com) * @@ -401,6 +401,7 @@ setup_lowcore(void) lc->io_new_psw.mask = psw_kernel_bits; lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler; lc->clock_comparator = -1ULL; + lc->cmf_hpp = -1ULL; lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE; lc->async_stack = (unsigned long) __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE; diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index a725158..2f4b687c 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -33,6 +33,17 @@ config KVM If unsure, say N. +config KVM_AWARE_CMF + depends on KVM + bool "KVM aware sampling" + ---help--- + This option enhances the sampling data from the CPU Measurement + Facility with additional information, that allows to distinguish + guest(s) and host when using the kernel based virtual machine + functionality. + + If unsure, say N. + # OK, it's a little counter-intuitive to do this, but it puts it neatly under # the virtualization menu. source drivers/vhost/Kconfig diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S index 934fd6a..31646bd 100644 --- a/arch/s390/kvm/sie64a.S +++ b/arch/s390/kvm/sie64a.S @@ -1,20 +1,60 @@ /* * sie64a.S - low level sie call * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008,2010 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) * as published by the Free Software Foundation. * * Author(s): Heiko Carstens + * Christian Ehrhardt */ #include #include +#include +#include +#include +#include + +_TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) + +/* + * offsets into stackframe + * SP_ = offsets into stack sie64 is called with + * SPI_ = offsets into irq stack + */ +SP_GREGS = __SF_EMPTY +SP_HOOK = __SF_EMPTY+8 +SP_GPP = __SF_EMPTY+16 +SPI_PSW = STACK_FRAME_OVERHEAD + __PT_PSW + -SP_R5 = 5 * 8 # offset into stackframe -SP_R6 = 6 * 8 + .macro SPP newpp +#ifdef CONFIG_KVM_AWARE_CMF + tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_SPP + jz 0f + .insn s,0xb2800000,\newpp + 0: +#endif + .endm + +sie_irq_handler: + SPP __LC_CMF_HPP # set host id + larl %r2,sie_inst + clg %r2,SPI_PSW+8(0,%r15) # intercepted sie + jne 1f + xc __LC_SIE_HOOK(8),__LC_SIE_HOOK + lg %r2,__LC_THREAD_INFO # pointer thread_info struct + tm __TI_flags+7(%r2),_TIF_EXIT_SIE + jz 0f + larl %r2,sie_exit # work pending, leave sie + stg %r2,__LC_RETURN_PSW+8 + br %r14 +0: larl %r2,sie_reenter # re-enter with guest id + stg %r2,__LC_RETURN_PSW+8 +1: br %r14 /* * sie64a calling convention: @@ -23,23 +63,34 @@ SP_R6 = 6 * 8 */ .globl sie64a sie64a: - lgr %r5,%r3 - stmg %r5,%r14,SP_R5(%r15) # save register on entry - lgr %r14,%r2 # pointer to sie control block - lmg %r0,%r13,0(%r3) # load guest gprs 0-13 + stg %r3,SP_GREGS(%r15) # save guest register save area + stmg %r6,%r14,__SF_GPRS(%r15) # save registers on entry + lgr %r14,%r2 # pointer to sie control block + larl %r5,sie_irq_handler + stg %r2,SP_GPP(%r15) + stg %r5,SP_HOOK(%r15) # save hook target + lmg %r0,%r13,0(%r3) # load guest gprs 0-13 +sie_reenter: + mvc __LC_SIE_HOOK(8),SP_HOOK(%r15) + SPP SP_GPP(%r15) # set guest id sie_inst: sie 0(%r14) - lg %r14,SP_R5(%r15) - stmg %r0,%r13,0(%r14) # save guest gprs 0-13 + xc __LC_SIE_HOOK(8),__LC_SIE_HOOK + SPP __LC_CMF_HPP # set host id +sie_exit: + lg %r14,SP_GREGS(%r15) + stmg %r0,%r13,0(%r14) # save guest gprs 0-13 lghi %r2,0 - lmg %r6,%r14,SP_R6(%r15) + lmg %r6,%r14,__SF_GPRS(%r15) br %r14 sie_err: - lg %r14,SP_R5(%r15) - stmg %r0,%r13,0(%r14) # save guest gprs 0-13 + xc __LC_SIE_HOOK(8),__LC_SIE_HOOK + SPP __LC_CMF_HPP # set host id + lg %r14,SP_GREGS(%r15) + stmg %r0,%r13,0(%r14) # save guest gprs 0-13 lghi %r2,-EFAULT - lmg %r6,%r14,SP_R6(%r15) + lmg %r6,%r14,__SF_GPRS(%r15) br %r14 .section __ex_table,"a" -- cgit v0.10.2 From 86f2552bbd0e17b19bb5e9881042533eaea553c7 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 17 May 2010 10:00:05 +0200 Subject: [S390] add breaking event address for user space Copy the last breaking event address from the lowcore to a new field in the thread_struct on each system entry. Add a new ptrace request PTRACE_GET_LAST_BREAK and a new utrace regset REGSET_LAST_BREAK to query the last breaking event. This is useful for debugging wild branches in user space code. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index fef9b33..e2c218d 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -328,8 +328,8 @@ struct pt_regs psw_t psw; unsigned long gprs[NUM_GPRS]; unsigned long orig_gpr2; - unsigned short svcnr; unsigned short ilc; + unsigned short svcnr; }; #endif @@ -436,6 +436,7 @@ typedef struct #define PTRACE_PEEKDATA_AREA 0x5003 #define PTRACE_POKETEXT_AREA 0x5004 #define PTRACE_POKEDATA_AREA 0x5005 +#define PTRACE_GET_LAST_BREAK 0x5006 /* * PT_PROT definition is loosely based on hppa bsd definition in diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 34f0873..be3d3f9 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -50,6 +50,7 @@ struct thread_info { struct restart_block restart_block; __u64 user_timer; __u64 system_timer; + unsigned long last_break; /* last breaking-event-address. */ }; /* diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 44a4336..d9b490a 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -39,6 +39,7 @@ int main(void) DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count)); DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer)); DEFINE(__TI_system_timer, offsetof(struct thread_info, system_timer)); + DEFINE(__TI_last_break, offsetof(struct thread_info, last_break)); BLANK(); DEFINE(__PT_ARGS, offsetof(struct pt_regs, args)); DEFINE(__PT_PSW, offsetof(struct pt_regs, psw)); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 0e2b162..d5e3e60 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -180,9 +180,9 @@ STACK_SIZE = 1 << STACK_SHIFT s %r15,BASED(.Lc_spsize) # make room for registers & psw mvc SP_PSW(8,%r15),0(%r12) # move user PSW to stack st %r2,SP_ORIG_R2(%r15) # store original content of gpr 2 - icm %r12,3,__LC_SVC_ILC + icm %r12,12,__LC_SVC_ILC stm %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack - st %r12,SP_SVCNR(%r15) + st %r12,SP_ILC(%r15) mvc SP_R12(16,%r15),\savearea # move %r12-%r15 to stack la %r12,0 st %r12,__SF_BACKCHAIN(%r15) # clear back chain diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 829b759..178d925 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -126,31 +126,35 @@ _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ * R15 - kernel stack pointer */ - .macro SAVE_ALL_BASE savearea - stmg %r12,%r15,\savearea - larl %r13,system_call - .endm - .macro SAVE_ALL_SVC psworg,savearea - la %r12,\psworg + stmg %r11,%r15,\savearea lg %r15,__LC_KERNEL_STACK # problem state -> load ksp + aghi %r15,-SP_SIZE # make room for registers & psw + lg %r11,__LC_LAST_BREAK .endm - .macro SAVE_ALL_SYNC psworg,savearea - la %r12,\psworg + .macro SAVE_ALL_PGM psworg,savearea + stmg %r11,%r15,\savearea tm \psworg+1,0x01 # test problem state bit - jz 2f # skip stack setup save - lg %r15,__LC_KERNEL_STACK # problem state -> load ksp #ifdef CONFIG_CHECK_STACK - j 3f -2: tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - jz stack_overflow -3: + jnz 1f + tml %r15,STACK_SIZE - CONFIG_STACK_GUARD + jnz 2f + la %r12,\psworg + j stack_overflow +#else + jz 2f #endif -2: +1: lg %r15,__LC_KERNEL_STACK # problem state -> load ksp +2: aghi %r15,-SP_SIZE # make room for registers & psw + larl %r13,system_call + lg %r11,__LC_LAST_BREAK .endm .macro SAVE_ALL_ASYNC psworg,savearea + stmg %r11,%r15,\savearea + larl %r13,system_call + lg %r11,__LC_LAST_BREAK la %r12,\psworg tm \psworg+1,0x01 # test problem state bit jnz 1f # from user -> load kernel stack @@ -164,27 +168,23 @@ _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ 0: lg %r14,__LC_ASYNC_STACK # are we already on the async. stack ? slgr %r14,%r15 srag %r14,%r14,STACK_SHIFT - jz 2f -1: lg %r15,__LC_ASYNC_STACK # load async stack #ifdef CONFIG_CHECK_STACK - j 3f -2: tml %r15,STACK_SIZE - CONFIG_STACK_GUARD - jz stack_overflow -3: + jnz 1f + tml %r15,STACK_SIZE - CONFIG_STACK_GUARD + jnz 2f + j stack_overflow +#else + jz 2f #endif -2: +1: lg %r15,__LC_ASYNC_STACK # load async stack +2: aghi %r15,-SP_SIZE # make room for registers & psw .endm - .macro CREATE_STACK_FRAME psworg,savearea - aghi %r15,-SP_SIZE # make room for registers & psw - mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack + .macro CREATE_STACK_FRAME savearea + xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stg %r2,SP_ORIG_R2(%r15) # store original content of gpr 2 - icm %r12,3,__LC_SVC_ILC - stmg %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack - st %r12,SP_SVCNR(%r15) - mvc SP_R12(32,%r15),\savearea # move %r12-%r15 to stack - la %r12,0 - stg %r12,__SF_BACKCHAIN(%r15) + mvc SP_R11(40,%r15),\savearea # move %r11-%r15 to stack + stmg %r0,%r10,SP_R0(%r15) # store gprs %r0-%r10 to kernel stack .endm .macro RESTORE_ALL psworg,sync @@ -200,6 +200,13 @@ _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ lpswe \psworg # back to caller .endm + .macro LAST_BREAK + srag %r10,%r11,23 + jz 0f + stg %r11,__TI_last_break(%r12) +0: + .endm + /* * Scheduler resume function, called by switch_to * gpr2 = (task_struct *) prev @@ -245,37 +252,38 @@ __critical_start: system_call: stpt __LC_SYNC_ENTER_TIMER sysc_saveall: - SAVE_ALL_BASE __LC_SAVE_AREA SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA - llgh %r7,__LC_SVC_INT_CODE # get svc number from lowcore + CREATE_STACK_FRAME __LC_SAVE_AREA + mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW + mvc SP_ILC(4,%r15),__LC_SVC_ILC + stg %r7,SP_ARGS(%r15) + lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER sysc_stime: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER sysc_update: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER + LAST_BREAK sysc_do_svc: - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct - ltgr %r7,%r7 # test for svc 0 + llgh %r7,SP_SVCNR(%r15) + slag %r7,%r7,2 # shift and test for svc 0 jnz sysc_nr_ok # svc 0: system call number in %r1 - cl %r1,BASED(.Lnr_syscalls) + llgfr %r1,%r1 # clear high word in r1 + cghi %r1,NR_syscalls jnl sysc_nr_ok - lgfr %r7,%r1 # clear high word in r1 + sth %r1,SP_SVCNR(%r15) + slag %r7,%r1,2 # shift and test for svc 0 sysc_nr_ok: - mvc SP_ARGS(8,%r15),SP_R7(%r15) -sysc_do_restart: - sth %r7,SP_SVCNR(%r15) - sllg %r7,%r7,2 # svc number * 4 larl %r10,sys_call_table #ifdef CONFIG_COMPAT - tm __TI_flags+5(%r9),(_TIF_31BIT>>16) # running in 31 bit mode ? + tm __TI_flags+5(%r12),(_TIF_31BIT>>16) # running in 31 bit mode ? jno sysc_noemu larl %r10,sys_call_table_emu # use 31 bit emulation system calls sysc_noemu: #endif - tm __TI_flags+6(%r9),_TIF_SYSCALL + tm __TI_flags+6(%r12),_TIF_SYSCALL lgf %r8,0(%r7,%r10) # load address of system call routine jnz sysc_tracesys basr %r14,%r8 # call sys_xxxx @@ -284,7 +292,7 @@ sysc_noemu: sysc_return: LOCKDEP_SYS_EXIT sysc_tif: - tm __TI_flags+7(%r9),_TIF_WORK_SVC + tm __TI_flags+7(%r12),_TIF_WORK_SVC jnz sysc_work # there is work to do (signals etc.) sysc_restore: RESTORE_ALL __LC_RETURN_PSW,1 @@ -301,17 +309,17 @@ sysc_work: # One of the work bits is on. Find out which one. # sysc_work_tif: - tm __TI_flags+7(%r9),_TIF_MCCK_PENDING + tm __TI_flags+7(%r12),_TIF_MCCK_PENDING jo sysc_mcck_pending - tm __TI_flags+7(%r9),_TIF_NEED_RESCHED + tm __TI_flags+7(%r12),_TIF_NEED_RESCHED jo sysc_reschedule - tm __TI_flags+7(%r9),_TIF_SIGPENDING + tm __TI_flags+7(%r12),_TIF_SIGPENDING jo sysc_sigpending - tm __TI_flags+7(%r9),_TIF_NOTIFY_RESUME + tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME jo sysc_notify_resume - tm __TI_flags+7(%r9),_TIF_RESTART_SVC + tm __TI_flags+7(%r12),_TIF_RESTART_SVC jo sysc_restart - tm __TI_flags+7(%r9),_TIF_SINGLE_STEP + tm __TI_flags+7(%r12),_TIF_SINGLE_STEP jo sysc_singlestep j sysc_return # beware of critical section cleanup @@ -333,12 +341,12 @@ sysc_mcck_pending: # _TIF_SIGPENDING is set, call do_signal # sysc_sigpending: - ni __TI_flags+7(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP + ni __TI_flags+7(%r12),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP la %r2,SP_PTREGS(%r15) # load pt_regs brasl %r14,do_signal # call do_signal - tm __TI_flags+7(%r9),_TIF_RESTART_SVC + tm __TI_flags+7(%r12),_TIF_RESTART_SVC jo sysc_restart - tm __TI_flags+7(%r9),_TIF_SINGLE_STEP + tm __TI_flags+7(%r12),_TIF_SINGLE_STEP jo sysc_singlestep j sysc_return @@ -354,17 +362,19 @@ sysc_notify_resume: # _TIF_RESTART_SVC is set, set up registers and restart svc # sysc_restart: - ni __TI_flags+7(%r9),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC + ni __TI_flags+7(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC lg %r7,SP_R2(%r15) # load new svc number mvc SP_R2(8,%r15),SP_ORIG_R2(%r15) # restore first argument lmg %r2,%r6,SP_R2(%r15) # load svc arguments - j sysc_do_restart # restart svc + sth %r7,SP_SVCNR(%r15) + slag %r7,%r7,2 + j sysc_nr_ok # restart svc # # _TIF_SINGLE_STEP is set, call do_single_step # sysc_singlestep: - ni __TI_flags+7(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP + ni __TI_flags+7(%r12),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number la %r2,SP_PTREGS(%r15) # address of register-save area larl %r14,sysc_return # load adr. of system return @@ -377,8 +387,8 @@ sysc_singlestep: sysc_tracesys: la %r2,SP_PTREGS(%r15) # load pt_regs la %r3,0 - srl %r7,2 - stg %r7,SP_R2(%r15) + llgh %r0,SP_SVCNR(%r15) + stg %r0,SP_R2(%r15) brasl %r14,do_syscall_trace_enter lghi %r0,NR_syscalls clgr %r0,%r2 @@ -391,7 +401,7 @@ sysc_tracego: basr %r14,%r8 # call sys_xxx stg %r2,SP_R2(%r15) # store return value sysc_tracenogo: - tm __TI_flags+6(%r9),_TIF_SYSCALL + tm __TI_flags+6(%r12),_TIF_SYSCALL jz sysc_return la %r2,SP_PTREGS(%r15) # load pt_regs larl %r14,sysc_return # return point is sysc_return @@ -403,7 +413,7 @@ sysc_tracenogo: .globl ret_from_fork ret_from_fork: lg %r13,__LC_SVC_NEW_PSW+8 - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct + lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct tm SP_PSW+1(%r15),0x01 # forking a kernel thread ? jo 0f stg %r15,SP_R15(%r15) # store stack pointer for new kthread @@ -437,8 +447,8 @@ kernel_execve: lg %r15,__LC_KERNEL_STACK # load ksp aghi %r15,-SP_SIZE # make room for registers & psw lg %r13,__LC_SVC_NEW_PSW+8 - lg %r9,__LC_THREAD_INFO mvc SP_PTREGS(__PT_SIZE,%r15),0(%r12) # copy pt_regs + lg %r12,__LC_THREAD_INFO xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts @@ -465,21 +475,23 @@ pgm_check_handler: * for LPSW?). */ stpt __LC_SYNC_ENTER_TIMER - SAVE_ALL_BASE __LC_SAVE_AREA tm __LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception jnz pgm_per # got per exception -> special case - SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA + SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA + CREATE_STACK_FRAME __LC_SAVE_AREA + xc SP_ILC(4,%r15),SP_ILC(%r15) + mvc SP_PSW(16,%r15),__LC_PGM_OLD_PSW + lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct tm SP_PSW+1(%r15),0x01 # interrupting from user ? jz pgm_no_vtime UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER + LAST_BREAK pgm_no_vtime: HANDLE_SIE_INTERCEPT TRACE_IRQS_CHECK_OFF - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct - mvc SP_ARGS(8,%r15),__LC_LAST_BREAK + stg %r11,SP_ARGS(%r15) lgf %r3,__LC_PGM_ILC # load program interruption code lghi %r8,0x7f ngr %r8,%r3 @@ -503,31 +515,32 @@ pgm_per: clc __LC_PGM_OLD_PSW(16),__LC_SVC_NEW_PSW je pgm_svcper # no interesting special case, ignore PER event - lmg %r12,%r15,__LC_SAVE_AREA lpswe __LC_PGM_OLD_PSW # # Normal per exception # pgm_per_std: - SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA + SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA + CREATE_STACK_FRAME __LC_SAVE_AREA + mvc SP_PSW(16,%r15),__LC_PGM_OLD_PSW + lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct tm SP_PSW+1(%r15),0x01 # interrupting from user ? jz pgm_no_vtime2 UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER + LAST_BREAK pgm_no_vtime2: HANDLE_SIE_INTERCEPT TRACE_IRQS_CHECK_OFF - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct - lg %r1,__TI_task(%r9) + lg %r1,__TI_task(%r12) tm SP_PSW+1(%r15),0x01 # kernel per event ? jz kernel_per mvc __THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID mvc __THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID - oi __TI_flags+7(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP + oi __TI_flags+7(%r12),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP lgf %r3,__LC_PGM_ILC # load program interruption code lghi %r8,0x7f ngr %r8,%r3 # clear per-event-bit and ilc @@ -538,19 +551,21 @@ pgm_no_vtime2: # it was a single stepped SVC that is causing all the trouble # pgm_svcper: - SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA + SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA + CREATE_STACK_FRAME __LC_SAVE_AREA + mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW + mvc SP_ILC(4,%r15),__LC_SVC_ILC + lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - llgh %r7,__LC_SVC_INT_CODE # get svc number from lowcore - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct + LAST_BREAK TRACE_IRQS_OFF - lg %r8,__TI_task(%r9) + lg %r8,__TI_task(%r12) mvc __THREAD_per+__PER_atmid(2,%r8),__LC_PER_ATMID mvc __THREAD_per+__PER_address(8,%r8),__LC_PER_ADDRESS mvc __THREAD_per+__PER_access_id(1,%r8),__LC_PER_ACCESS_ID - oi __TI_flags+7(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP + oi __TI_flags+7(%r12),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts lmg %r2,%r6,SP_R2(%r15) # load svc arguments @@ -572,16 +587,17 @@ kernel_per: io_int_handler: stck __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER - SAVE_ALL_BASE __LC_SAVE_AREA+32 - SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 - CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 + SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+40 + CREATE_STACK_FRAME __LC_SAVE_AREA+40 + mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack + lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct tm SP_PSW+1(%r15),0x01 # interrupting from user ? jz io_no_vtime UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER + LAST_BREAK io_no_vtime: - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct HANDLE_SIE_INTERCEPT TRACE_IRQS_OFF la %r2,SP_PTREGS(%r15) # address of register-save area @@ -590,7 +606,7 @@ io_return: LOCKDEP_SYS_EXIT TRACE_IRQS_ON io_tif: - tm __TI_flags+7(%r9),_TIF_WORK_INT + tm __TI_flags+7(%r12),_TIF_WORK_INT jnz io_work # there is work to do (signals etc.) io_restore: RESTORE_ALL __LC_RETURN_PSW,0 @@ -610,7 +626,7 @@ io_work: jo io_work_user # yes -> do resched & signal #ifdef CONFIG_PREEMPT # check for preemptive scheduling - icm %r0,15,__TI_precount(%r9) + icm %r0,15,__TI_precount(%r12) jnz io_restore # preemption is disabled tm __TI_flags+7(%r12),_TIF_NEED_RESCHED jno io_restore @@ -645,13 +661,13 @@ io_work_user: # and _TIF_MCCK_PENDING # io_work_tif: - tm __TI_flags+7(%r9),_TIF_MCCK_PENDING + tm __TI_flags+7(%r12),_TIF_MCCK_PENDING jo io_mcck_pending - tm __TI_flags+7(%r9),_TIF_NEED_RESCHED + tm __TI_flags+7(%r12),_TIF_NEED_RESCHED jo io_reschedule - tm __TI_flags+7(%r9),_TIF_SIGPENDING + tm __TI_flags+7(%r12),_TIF_SIGPENDING jo io_sigpending - tm __TI_flags+7(%r9),_TIF_NOTIFY_RESUME + tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME jo io_notify_resume j io_return # beware of critical section cleanup @@ -706,16 +722,17 @@ io_notify_resume: ext_int_handler: stck __LC_INT_CLOCK stpt __LC_ASYNC_ENTER_TIMER - SAVE_ALL_BASE __LC_SAVE_AREA+32 - SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 - CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 + SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+40 + CREATE_STACK_FRAME __LC_SAVE_AREA+40 + mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack + lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct tm SP_PSW+1(%r15),0x01 # interrupting from user ? jz ext_no_vtime UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER + LAST_BREAK ext_no_vtime: - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct HANDLE_SIE_INTERCEPT TRACE_IRQS_OFF la %r2,SP_PTREGS(%r15) # address of register-save area @@ -734,7 +751,9 @@ mcck_int_handler: la %r1,4095 # revalidate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs - SAVE_ALL_BASE __LC_SAVE_AREA+64 + stmg %r11,%r15,__LC_SAVE_AREA+80 + larl %r13,system_call + lg %r11,__LC_LAST_BREAK la %r12,__LC_MCK_OLD_PSW tm __LC_MCCK_CODE,0x80 # system damage? jo mcck_int_main # yes -> rest of mcck code invalid @@ -769,7 +788,10 @@ mcck_int_main: srag %r14,%r14,PAGE_SHIFT jz 0f lg %r15,__LC_PANIC_STACK # load panic stack -0: CREATE_STACK_FRAME __LC_MCK_OLD_PSW,__LC_SAVE_AREA+64 +0: aghi %r15,-SP_SIZE # make room for registers & psw + CREATE_STACK_FRAME __LC_SAVE_AREA+80 + mvc SP_PSW(16,%r15),0(%r12) + lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct tm __LC_MCCK_CODE+2,0x08 # mwp of old psw valid? jno mcck_no_vtime # no -> no timer update tm SP_PSW+1(%r15),0x01 # interrupting from user ? @@ -777,8 +799,8 @@ mcck_int_main: UPDATE_VTIME __LC_EXIT_TIMER,__LC_MCCK_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_MCCK_ENTER_TIMER + LAST_BREAK mcck_no_vtime: - lg %r9,__LC_THREAD_INFO # load pointer to thread_info struct la %r2,SP_PTREGS(%r15) # load pt_regs brasl %r14,s390_do_machine_check tm SP_PSW+1(%r15),0x01 # returning to user ? @@ -789,7 +811,7 @@ mcck_no_vtime: xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain lgr %r15,%r1 stosm __SF_EMPTY(%r15),0x04 # turn dat on - tm __TI_flags+7(%r9),_TIF_MCCK_PENDING + tm __TI_flags+7(%r12),_TIF_MCCK_PENDING jno mcck_return HANDLE_SIE_INTERCEPT TRACE_IRQS_OFF @@ -803,6 +825,7 @@ mcck_return: jno 0f stpt __LC_EXIT_TIMER 0: lpswe __LC_RETURN_MCCK_PSW # back to caller +mcck_done: /* * Restart interruption handler, kick starter for additional CPUs @@ -858,14 +881,14 @@ stack_overflow: lg %r15,__LC_PANIC_STACK # change to panic stack aghi %r15,-SP_SIZE mvc SP_PSW(16,%r15),0(%r12) # move user PSW to stack - stmg %r0,%r11,SP_R0(%r15) # store gprs %r0-%r11 to kernel stack + stmg %r0,%r10,SP_R0(%r15) # store gprs %r0-%r10 to kernel stack la %r1,__LC_SAVE_AREA chi %r12,__LC_SVC_OLD_PSW je 0f chi %r12,__LC_PGM_OLD_PSW je 0f - la %r1,__LC_SAVE_AREA+32 -0: mvc SP_R12(32,%r15),0(%r1) # move %r12-%r15 to stack + la %r1,__LC_SAVE_AREA+40 +0: mvc SP_R11(40,%r15),0(%r1) # move %r11-%r15 to stack mvc SP_ARGS(8,%r15),__LC_LAST_BREAK xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # clear back chain la %r2,SP_PTREGS(%r15) # load pt_regs @@ -920,21 +943,23 @@ cleanup_system_call: je 0f mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER 0: cghi %r12,__LC_MCK_OLD_PSW - la %r12,__LC_SAVE_AREA+64 + la %r12,__LC_SAVE_AREA+80 je 0f - la %r12,__LC_SAVE_AREA+32 + la %r12,__LC_SAVE_AREA+40 0: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+16) jhe cleanup_vtime clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn) jh 0f - mvc __LC_SAVE_AREA(32),0(%r12) -0: stg %r13,8(%r12) - stg %r12,__LC_SAVE_AREA+96 # argh - SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA - CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA - lg %r12,__LC_SAVE_AREA+96 # argh - stg %r15,24(%r12) - llgh %r7,__LC_SVC_INT_CODE + mvc __LC_SAVE_AREA(40),0(%r12) +0: lg %r15,__LC_KERNEL_STACK # problem state -> load ksp + aghi %r15,-SP_SIZE # make room for registers & psw + stg %r15,32(%r12) + stg %r11,0(%r12) + CREATE_STACK_FRAME __LC_SAVE_AREA + mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW + mvc SP_ILC(4,%r15),__LC_SVC_ILC + stg %r7,SP_ARGS(%r15) + mvc 8(8,%r12),__LC_THREAD_INFO cleanup_vtime: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24) jhe cleanup_stime @@ -945,7 +970,11 @@ cleanup_stime: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER cleanup_update: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER - mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_system_call+8) + srag %r12,%r11,23 + lg %r12,__LC_THREAD_INFO + jz 0f + stg %r11,__TI_last_break(%r12) +0: mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_system_call+8) la %r12,__LC_RETURN_PSW br %r14 cleanup_system_call_insn: @@ -972,11 +1001,11 @@ cleanup_sysc_restore: mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER 0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) cghi %r12,__LC_MCK_OLD_PSW - la %r12,__LC_SAVE_AREA+64 + la %r12,__LC_SAVE_AREA+80 je 1f - la %r12,__LC_SAVE_AREA+32 -1: mvc 0(32,%r12),SP_R12(%r15) - lmg %r0,%r11,SP_R0(%r15) + la %r12,__LC_SAVE_AREA+40 +1: mvc 0(40,%r12),SP_R11(%r15) + lmg %r0,%r10,SP_R0(%r15) lg %r15,SP_R15(%r15) 2: la %r12,__LC_RETURN_PSW br %r14 @@ -997,8 +1026,8 @@ cleanup_io_restore: jhe 0f mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER 0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) - mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) - lmg %r0,%r11,SP_R0(%r15) + mvc __LC_SAVE_AREA+80(40),SP_R11(%r15) + lmg %r0,%r10,SP_R0(%r15) lg %r15,SP_R15(%r15) 1: la %r12,__LC_RETURN_PSW br %r14 @@ -1010,13 +1039,6 @@ cleanup_io_restore_insn: * Integer constants */ .align 4 -.Lconst: -.Lnr_syscalls: .long NR_syscalls -.L0x0130: .short 0x130 -.L0x0140: .short 0x140 -.L0x0150: .short 0x150 -.L0x0160: .short 0x160 -.L0x0170: .short 0x170 .Lcritical_start: .quad __critical_start .Lcritical_end: diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 9f654da..83339d3 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -57,6 +57,7 @@ enum s390_regset { REGSET_GENERAL, REGSET_FP, + REGSET_LAST_BREAK, REGSET_GENERAL_EXTENDED, }; @@ -381,6 +382,10 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) copied += sizeof(unsigned long); } return 0; + case PTRACE_GET_LAST_BREAK: + put_user(task_thread_info(child)->last_break, + (unsigned long __user *) data); + return 0; default: /* Removing high order bit from addr (only for 31 bit). */ addr &= PSW_ADDR_INSN; @@ -633,6 +638,10 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, copied += sizeof(unsigned int); } return 0; + case PTRACE_GET_LAST_BREAK: + put_user(task_thread_info(child)->last_break, + (unsigned int __user *) data); + return 0; } return compat_ptrace_request(child, request, addr, data); } @@ -797,6 +806,28 @@ static int s390_fpregs_set(struct task_struct *target, return rc; } +#ifdef CONFIG_64BIT + +static int s390_last_break_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + if (count > 0) { + if (kbuf) { + unsigned long *k = kbuf; + *k = task_thread_info(target)->last_break; + } else { + unsigned long __user *u = ubuf; + if (__put_user(task_thread_info(target)->last_break, u)) + return -EFAULT; + } + } + return 0; +} + +#endif + static const struct user_regset s390_regsets[] = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, @@ -814,6 +845,15 @@ static const struct user_regset s390_regsets[] = { .get = s390_fpregs_get, .set = s390_fpregs_set, }, +#ifdef CONFIG_64BIT + [REGSET_LAST_BREAK] = { + .core_note_type = NT_S390_LAST_BREAK, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .get = s390_last_break_get, + }, +#endif }; static const struct user_regset_view user_s390_view = { @@ -948,6 +988,27 @@ static int s390_compat_regs_high_set(struct task_struct *target, return rc; } +static int s390_compat_last_break_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + compat_ulong_t last_break; + + if (count > 0) { + last_break = task_thread_info(target)->last_break; + if (kbuf) { + unsigned long *k = kbuf; + *k = last_break; + } else { + unsigned long __user *u = ubuf; + if (__put_user(last_break, u)) + return -EFAULT; + } + } + return 0; +} + static const struct user_regset s390_compat_regsets[] = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, @@ -965,6 +1026,13 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_fpregs_get, .set = s390_fpregs_set, }, + [REGSET_LAST_BREAK] = { + .core_note_type = NT_S390_LAST_BREAK, + .n = 1, + .size = sizeof(long), + .align = sizeof(long), + .get = s390_compat_last_break_get, + }, [REGSET_GENERAL_EXTENDED] = { .core_note_type = NT_S390_HIGH_GPRS, .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 6289945..ee7ac8b 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -313,6 +313,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, To avoid breaking binary compatibility, they are passed as args. */ regs->gprs[4] = current->thread.trap_no; regs->gprs[5] = current->thread.prot_addr; + regs->gprs[6] = task_thread_info(current)->last_break; /* Place signal number on stack to allow backtrace from handler. */ if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) @@ -376,6 +377,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->gprs[2] = map_signal(sig); regs->gprs[3] = (unsigned long) &frame->info; regs->gprs[4] = (unsigned long) &frame->uc; + regs->gprs[5] = task_thread_info(current)->last_break; return 0; give_sigsegv: diff --git a/include/linux/elf.h b/include/linux/elf.h index 5978584..4d60801 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -394,6 +394,7 @@ typedef struct elf64_shdr { #define NT_S390_TODPREG 0x303 /* s390 TOD programmable register */ #define NT_S390_CTRS 0x304 /* s390 control registers */ #define NT_S390_PREFIX 0x305 /* s390 prefix register */ +#define NT_S390_LAST_BREAK 0x306 /* s390 breaking event address */ /* Note header in a PT_NOTE section */ -- cgit v0.10.2 From f73a2b03c59b95a3ee8eebcc127350c77c950e87 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 May 2010 10:00:06 +0200 Subject: [S390] vmcp: disallow modular build Change the tristate Kbuild option into a bool option so that the module is either builtin or not available at all. There have been too many cases where people were missing the 'vmcp' device node and unable to send z/VM CP commands. So let's make sure that on distros it will always be present. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig index 4e34d36..40834f1 100644 --- a/drivers/s390/char/Kconfig +++ b/drivers/s390/char/Kconfig @@ -148,13 +148,12 @@ config VMLOGRDR This driver depends on the IUCV support driver. config VMCP - tristate "Support for the z/VM CP interface (VM only)" + bool "Support for the z/VM CP interface" depends on S390 help Select this option if you want to be able to interact with the control program on z/VM - config MONREADER tristate "API for reading z/VM monitor service records" depends on IUCV diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c index 5bb59d3..04e532e 100644 --- a/drivers/s390/char/vmcp.c +++ b/drivers/s390/char/vmcp.c @@ -1,24 +1,20 @@ /* - * Copyright IBM Corp. 2004,2007 + * Copyright IBM Corp. 2004,2010 * Interface implementation for communication with the z/VM control program - * Author(s): Christian Borntraeger * + * Author(s): Christian Borntraeger * * z/VMs CP offers the possibility to issue commands via the diagnose code 8 * this driver implements a character device that issues these commands and * returns the answer of CP. - + * * The idea of this driver is based on cpint from Neale Ferguson and #CP in CMS */ -#define KMSG_COMPONENT "vmcp" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - #include #include #include #include -#include #include #include #include @@ -26,10 +22,6 @@ #include #include "vmcp.h" -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Christian Borntraeger "); -MODULE_DESCRIPTION("z/VM CP interface"); - static debug_info_t *vmcp_debug; static int vmcp_open(struct inode *inode, struct file *file) @@ -197,11 +189,8 @@ static int __init vmcp_init(void) { int ret; - if (!MACHINE_IS_VM) { - pr_warning("The z/VM CP interface device driver cannot be " - "loaded without z/VM\n"); - return -ENODEV; - } + if (!MACHINE_IS_VM) + return 0; vmcp_debug = debug_register("vmcp", 1, 1, 240); if (!vmcp_debug) @@ -214,19 +203,8 @@ static int __init vmcp_init(void) } ret = misc_register(&vmcp_dev); - if (ret) { + if (ret) debug_unregister(vmcp_debug); - return ret; - } - - return 0; -} - -static void __exit vmcp_exit(void) -{ - misc_deregister(&vmcp_dev); - debug_unregister(vmcp_debug); + return ret; } - -module_init(vmcp_init); -module_exit(vmcp_exit); +device_initcall(vmcp_init); -- cgit v0.10.2 From 58ea91c05346f7c6336e6248b743aa9a8e1c19a9 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 17 May 2010 10:00:07 +0200 Subject: [S390] avoid default_llseek in s390 drivers Use nonseekable_open for a couple of s390 device drivers. This avoids the use of default_llseek function which has a dependency on the BKL. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index c53f8ac..95c1aaa 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -145,7 +145,7 @@ static int hypfs_open(struct inode *inode, struct file *filp) } mutex_unlock(&fs_info->lock); } - return 0; + return nonseekable_open(inode, filp); } static ssize_t hypfs_aio_read(struct kiocb *iocb, const struct iovec *iov, diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 0168472..9819226 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -655,6 +655,7 @@ found: p_info->act_entry_offset = 0; file->private_data = p_info; debug_info_get(debug_info); + nonseekable_open(inode, file); out: mutex_unlock(&debug_mutex); return rc; diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index 0eabcca..857dfcb 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -484,6 +484,7 @@ fs3270_open(struct inode *inode, struct file *filp) raw3270_del_view(&fp->view); goto out; } + nonseekable_open(inode, filp); filp->private_data = fp; out: mutex_unlock(&fs3270_mutex); diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 7217966..f5ea338 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -445,7 +445,7 @@ static int zcore_memmap_open(struct inode *inode, struct file *filp) } kfree(chunk_array); filp->private_data = buf; - return 0; + return nonseekable_open(inode, filp); } static int zcore_memmap_release(struct inode *inode, struct file *filp) @@ -473,7 +473,7 @@ static ssize_t zcore_reipl_write(struct file *filp, const char __user *buf, static int zcore_reipl_open(struct inode *inode, struct file *filp) { - return 0; + return nonseekable_open(inode, filp); } static int zcore_reipl_release(struct inode *inode, struct file *filp) diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c index 3b6f4ad..a83877c 100644 --- a/drivers/s390/cio/chsc_sch.c +++ b/drivers/s390/cio/chsc_sch.c @@ -803,6 +803,7 @@ static long chsc_ioctl(struct file *filp, unsigned int cmd, static const struct file_operations chsc_fops = { .owner = THIS_MODULE, + .open = nonseekable_open, .unlocked_ioctl = chsc_ioctl, .compat_ioctl = chsc_ioctl, }; diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index da6df04..ac94ac7 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -1065,6 +1065,7 @@ static ssize_t cio_settle_write(struct file *file, const char __user *buf, } static const struct file_operations cio_settle_proc_fops = { + .open = nonseekable_open, .write = cio_settle_write, }; diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 304caf5..41e0aae 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -302,7 +302,7 @@ static ssize_t zcrypt_write(struct file *filp, const char __user *buf, static int zcrypt_open(struct inode *inode, struct file *filp) { atomic_inc(&zcrypt_open_count); - return 0; + return nonseekable_open(inode, filp); } /** diff --git a/drivers/s390/scsi/zfcp_cfdc.c b/drivers/s390/scsi/zfcp_cfdc.c index 25d9e0a..1a2db0a 100644 --- a/drivers/s390/scsi/zfcp_cfdc.c +++ b/drivers/s390/scsi/zfcp_cfdc.c @@ -254,6 +254,7 @@ static long zfcp_cfdc_dev_ioctl(struct file *file, unsigned int command, } static const struct file_operations zfcp_cfdc_fops = { + .open = nonseekable_open, .unlocked_ioctl = zfcp_cfdc_dev_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = zfcp_cfdc_dev_ioctl -- cgit v0.10.2 From a78f1c4c1dc4b9dedd17c46dd414f26c7bf1e503 Mon Sep 17 00:00:00 2001 From: Christoph Egger Date: Mon, 17 May 2010 10:00:08 +0200 Subject: [S390] vdso: remove redundant check for CONFIG_64BIT This is a check for CONFIG_64BIT inside a block that is only active when CONFIG_64BIT is set. So the check is actually useless and potentially irritating. Signed-off-by: Christoph Egger Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 6bc9c19..6b83870 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -102,11 +102,7 @@ static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd) /* * Allocate/free per cpu vdso data. */ -#ifdef CONFIG_64BIT #define SEGMENT_ORDER 2 -#else -#define SEGMENT_ORDER 1 -#endif int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore) { -- cgit v0.10.2 From f3cb31e495668eae568c584c666631e26c68bdea Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 17 May 2010 10:00:09 +0200 Subject: [S390] vdso: add missing vdso_install target Add missing vdso_install target to install the unstripped vdso images into $(MODLIB)/vdso/. These files are helpful when containing additional debugging information. Signed-off-by: Hendrik Brueckner Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 0da1074..30c5f01 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -116,6 +116,12 @@ image bzImage: vmlinux zfcpdump: $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ +vdso_install: +ifeq ($(CONFIG_64BIT),y) + $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@ +endif + $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@ + archclean: $(Q)$(MAKE) $(clean)=$(boot) -- cgit v0.10.2 From 501183f2ed74434e30a1b039b2f3af30f1f3f461 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Mon, 17 May 2010 10:00:10 +0200 Subject: [S390] dasd: add dynamic pav toleration For base Parallel Access Volume (PAV) there is a fixed mapping of base and alias devices. With dynamic PAV this mapping can be changed so that an alias device is used with another base device. This patch enables the DASD device driver to tolerate dynamic PAV changes. Signed-off-by: Stefan Haberland Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index fa2339c..0e86247 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -65,6 +65,7 @@ static void dasd_device_tasklet(struct dasd_device *); static void dasd_block_tasklet(struct dasd_block *); static void do_kick_device(struct work_struct *); static void do_restore_device(struct work_struct *); +static void do_reload_device(struct work_struct *); static void dasd_return_cqr_cb(struct dasd_ccw_req *, void *); static void dasd_device_timeout(unsigned long); static void dasd_block_timeout(unsigned long); @@ -115,6 +116,7 @@ struct dasd_device *dasd_alloc_device(void) device->timer.data = (unsigned long) device; INIT_WORK(&device->kick_work, do_kick_device); INIT_WORK(&device->restore_device, do_restore_device); + INIT_WORK(&device->reload_device, do_reload_device); device->state = DASD_STATE_NEW; device->target = DASD_STATE_NEW; mutex_init(&device->state_mutex); @@ -521,6 +523,26 @@ void dasd_kick_device(struct dasd_device *device) } /* + * dasd_reload_device will schedule a call do do_reload_device to the kernel + * event daemon. + */ +static void do_reload_device(struct work_struct *work) +{ + struct dasd_device *device = container_of(work, struct dasd_device, + reload_device); + device->discipline->reload(device); + dasd_put_device(device); +} + +void dasd_reload_device(struct dasd_device *device) +{ + dasd_get_device(device); + /* queue call to dasd_reload_device to the kernel event daemon. */ + schedule_work(&device->reload_device); +} +EXPORT_SYMBOL(dasd_reload_device); + +/* * dasd_restore_device will schedule a call do do_restore_device to the kernel * event daemon. */ diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index 6632649..85bfd87 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -1418,9 +1418,29 @@ static struct dasd_ccw_req *dasd_3990_erp_inspect_alias( struct dasd_ccw_req *erp) { struct dasd_ccw_req *cqr = erp->refers; + char *sense; if (cqr->block && (cqr->block->base != cqr->startdev)) { + + sense = dasd_get_sense(&erp->refers->irb); + /* + * dynamic pav may have changed base alias mapping + */ + if (!test_bit(DASD_FLAG_OFFLINE, &cqr->startdev->flags) && sense + && (sense[0] == 0x10) && (sense[7] == 0x0F) + && (sense[8] == 0x67)) { + /* + * remove device from alias handling to prevent new + * requests from being scheduled on the + * wrong alias device + */ + dasd_alias_remove_device(cqr->startdev); + + /* schedule worker to reload device */ + dasd_reload_device(cqr->startdev); + } + if (cqr->startdev->features & DASD_FEATURE_ERPLOG) { DBF_DEV_EVENT(DBF_ERR, cqr->startdev, "ERP on alias device for request %p," diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 8c48142..a564b99 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -642,6 +642,14 @@ int dasd_alias_add_device(struct dasd_device *device) return rc; } +int dasd_alias_update_add_device(struct dasd_device *device) +{ + struct dasd_eckd_private *private; + private = (struct dasd_eckd_private *) device->private; + private->lcu->flags |= UPDATE_PENDING; + return dasd_alias_add_device(device); +} + int dasd_alias_remove_device(struct dasd_device *device) { struct dasd_eckd_private *private; diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 0cb2331..4305c23 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1451,6 +1451,7 @@ static int dasd_eckd_ready_to_online(struct dasd_device *device) static int dasd_eckd_online_to_ready(struct dasd_device *device) { + cancel_work_sync(&device->reload_device); return dasd_alias_remove_device(device); }; @@ -1709,10 +1710,27 @@ static void dasd_eckd_handle_unsolicited_interrupt(struct dasd_device *device, { char mask; char *sense = NULL; + struct dasd_eckd_private *private; + private = (struct dasd_eckd_private *) device->private; /* first of all check for state change pending interrupt */ mask = DEV_STAT_ATTENTION | DEV_STAT_DEV_END | DEV_STAT_UNIT_EXCEP; if ((scsw_dstat(&irb->scsw) & mask) == mask) { + /* for alias only and not in offline processing*/ + if (!device->block && private->lcu && + !test_bit(DASD_FLAG_OFFLINE, &device->flags)) { + /* + * the state change could be caused by an alias + * reassignment remove device from alias handling + * to prevent new requests from being scheduled on + * the wrong alias device + */ + dasd_alias_remove_device(device); + + /* schedule worker to reload device */ + dasd_reload_device(device); + } + dasd_generic_handle_state_change(device); return; } @@ -3259,7 +3277,7 @@ static void dasd_eckd_dump_sense(struct dasd_device *device, dasd_eckd_dump_sense_ccw(device, req, irb); } -int dasd_eckd_pm_freeze(struct dasd_device *device) +static int dasd_eckd_pm_freeze(struct dasd_device *device) { /* * the device should be disconnected from our LCU structure @@ -3272,7 +3290,7 @@ int dasd_eckd_pm_freeze(struct dasd_device *device) return 0; } -int dasd_eckd_restore_device(struct dasd_device *device) +static int dasd_eckd_restore_device(struct dasd_device *device) { struct dasd_eckd_private *private; struct dasd_eckd_characteristics temp_rdc_data; @@ -3336,6 +3354,53 @@ out_err: return -1; } +static int dasd_eckd_reload_device(struct dasd_device *device) +{ + struct dasd_eckd_private *private; + int rc, old_base; + char uid[60]; + + private = (struct dasd_eckd_private *) device->private; + old_base = private->uid.base_unit_addr; + /* Read Configuration Data */ + rc = dasd_eckd_read_conf(device); + if (rc) + goto out_err; + + rc = dasd_eckd_generate_uid(device, &private->uid); + if (rc) + goto out_err; + + dasd_set_uid(device->cdev, &private->uid); + + /* + * update unit address configuration and + * add device to alias management + */ + dasd_alias_update_add_device(device); + + if (old_base != private->uid.base_unit_addr) { + if (strlen(private->uid.vduit) > 0) + snprintf(uid, 60, "%s.%s.%04x.%02x.%s", + private->uid.vendor, private->uid.serial, + private->uid.ssid, private->uid.base_unit_addr, + private->uid.vduit); + else + snprintf(uid, 60, "%s.%s.%04x.%02x", + private->uid.vendor, private->uid.serial, + private->uid.ssid, + private->uid.base_unit_addr); + + dev_info(&device->cdev->dev, + "An Alias device was reassigned to a new base device " + "with UID: %s\n", uid); + } + return 0; + +out_err: + return -1; +} + static struct ccw_driver dasd_eckd_driver = { .name = "dasd-eckd", .owner = THIS_MODULE, @@ -3389,6 +3454,7 @@ static struct dasd_discipline dasd_eckd_discipline = { .ioctl = dasd_eckd_ioctl, .freeze = dasd_eckd_pm_freeze, .restore = dasd_eckd_restore_device, + .reload = dasd_eckd_reload_device, }; static int __init diff --git a/drivers/s390/block/dasd_eckd.h b/drivers/s390/block/dasd_eckd.h index 864d53c..dd6385a 100644 --- a/drivers/s390/block/dasd_eckd.h +++ b/drivers/s390/block/dasd_eckd.h @@ -426,7 +426,6 @@ struct alias_pav_group { struct dasd_device *next; }; - struct dasd_eckd_private { struct dasd_eckd_characteristics rdc_data; u8 *conf_data; @@ -463,4 +462,5 @@ void dasd_alias_handle_summary_unit_check(struct dasd_device *, struct irb *); void dasd_eckd_reset_ccw_to_base_io(struct dasd_ccw_req *); void dasd_alias_lcu_setup_complete(struct dasd_device *); void dasd_alias_wait_for_lcu_setup(struct dasd_device *); +int dasd_alias_update_add_device(struct dasd_device *); #endif /* DASD_ECKD_H */ diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index a91d4a9..1ae7b12 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -312,6 +312,9 @@ struct dasd_discipline { /* suspend/resume functions */ int (*freeze) (struct dasd_device *); int (*restore) (struct dasd_device *); + + /* reload device after state change */ + int (*reload) (struct dasd_device *); }; extern struct dasd_discipline *dasd_diag_discipline_pointer; @@ -386,6 +389,7 @@ struct dasd_device { struct tasklet_struct tasklet; struct work_struct kick_work; struct work_struct restore_device; + struct work_struct reload_device; struct timer_list timer; debug_info_t *debug_area; @@ -582,6 +586,7 @@ void dasd_enable_device(struct dasd_device *); void dasd_set_target_state(struct dasd_device *, int); void dasd_kick_device(struct dasd_device *); void dasd_restore_device(struct dasd_device *); +void dasd_reload_device(struct dasd_device *); void dasd_add_request_head(struct dasd_ccw_req *); void dasd_add_request_tail(struct dasd_ccw_req *); -- cgit v0.10.2 From 2dedf0d9eadf39660f2e1686b5d36e4a7515803f Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Mon, 17 May 2010 10:00:11 +0200 Subject: [S390] dasd: remove uid from devmap Remove the duplicate of the DASD uid from the devmap structure. Use the uid from the device private structure instead. This also removes a lockdep warning complaining about a possible SOFTIRQ-safe -> SOFTIRQ-unsafe lock order. Signed-off-by: Stefan Haberland Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index a564b99..4155805 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -190,20 +190,21 @@ int dasd_alias_make_device_known_to_lcu(struct dasd_device *device) struct alias_server *server, *newserver; struct alias_lcu *lcu, *newlcu; int is_lcu_known; - struct dasd_uid *uid; + struct dasd_uid uid; private = (struct dasd_eckd_private *) device->private; - uid = &private->uid; + + device->discipline->get_uid(device, &uid); spin_lock_irqsave(&aliastree.lock, flags); is_lcu_known = 1; - server = _find_server(uid); + server = _find_server(&uid); if (!server) { spin_unlock_irqrestore(&aliastree.lock, flags); - newserver = _allocate_server(uid); + newserver = _allocate_server(&uid); if (IS_ERR(newserver)) return PTR_ERR(newserver); spin_lock_irqsave(&aliastree.lock, flags); - server = _find_server(uid); + server = _find_server(&uid); if (!server) { list_add(&newserver->server, &aliastree.serverlist); server = newserver; @@ -214,14 +215,14 @@ int dasd_alias_make_device_known_to_lcu(struct dasd_device *device) } } - lcu = _find_lcu(server, uid); + lcu = _find_lcu(server, &uid); if (!lcu) { spin_unlock_irqrestore(&aliastree.lock, flags); - newlcu = _allocate_lcu(uid); + newlcu = _allocate_lcu(&uid); if (IS_ERR(newlcu)) return PTR_ERR(newlcu); spin_lock_irqsave(&aliastree.lock, flags); - lcu = _find_lcu(server, uid); + lcu = _find_lcu(server, &uid); if (!lcu) { list_add(&newlcu->lcu, &server->lculist); lcu = newlcu; @@ -256,20 +257,20 @@ void dasd_alias_lcu_setup_complete(struct dasd_device *device) unsigned long flags; struct alias_server *server; struct alias_lcu *lcu; - struct dasd_uid *uid; + struct dasd_uid uid; private = (struct dasd_eckd_private *) device->private; - uid = &private->uid; + device->discipline->get_uid(device, &uid); lcu = NULL; spin_lock_irqsave(&aliastree.lock, flags); - server = _find_server(uid); + server = _find_server(&uid); if (server) - lcu = _find_lcu(server, uid); + lcu = _find_lcu(server, &uid); spin_unlock_irqrestore(&aliastree.lock, flags); if (!lcu) { DBF_EVENT_DEVID(DBF_ERR, device->cdev, "could not find lcu for %04x %02x", - uid->ssid, uid->real_unit_addr); + uid.ssid, uid.real_unit_addr); WARN_ON(1); return; } @@ -282,20 +283,20 @@ void dasd_alias_wait_for_lcu_setup(struct dasd_device *device) unsigned long flags; struct alias_server *server; struct alias_lcu *lcu; - struct dasd_uid *uid; + struct dasd_uid uid; private = (struct dasd_eckd_private *) device->private; - uid = &private->uid; + device->discipline->get_uid(device, &uid); lcu = NULL; spin_lock_irqsave(&aliastree.lock, flags); - server = _find_server(uid); + server = _find_server(&uid); if (server) - lcu = _find_lcu(server, uid); + lcu = _find_lcu(server, &uid); spin_unlock_irqrestore(&aliastree.lock, flags); if (!lcu) { DBF_EVENT_DEVID(DBF_ERR, device->cdev, "could not find lcu for %04x %02x", - uid->ssid, uid->real_unit_addr); + uid.ssid, uid.real_unit_addr); WARN_ON(1); return; } @@ -314,9 +315,11 @@ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device) struct alias_lcu *lcu; struct alias_server *server; int was_pending; + struct dasd_uid uid; private = (struct dasd_eckd_private *) device->private; lcu = private->lcu; + device->discipline->get_uid(device, &uid); spin_lock_irqsave(&lcu->lock, flags); list_del_init(&device->alias_list); /* make sure that the workers don't use this device */ @@ -353,7 +356,7 @@ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device) _schedule_lcu_update(lcu, NULL); spin_unlock(&lcu->lock); } - server = _find_server(&private->uid); + server = _find_server(&uid); if (server && list_empty(&server->lculist)) { list_del(&server->server); _free_server(server); @@ -366,19 +369,30 @@ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device) * in the lcu is up to date and will update the device uid before * adding it to a pav group. */ + static int _add_device_to_lcu(struct alias_lcu *lcu, - struct dasd_device *device) + struct dasd_device *device, + struct dasd_device *pos) { struct dasd_eckd_private *private; struct alias_pav_group *group; - struct dasd_uid *uid; + struct dasd_uid uid; + unsigned long flags; private = (struct dasd_eckd_private *) device->private; - uid = &private->uid; - uid->type = lcu->uac->unit[uid->real_unit_addr].ua_type; - uid->base_unit_addr = lcu->uac->unit[uid->real_unit_addr].base_ua; - dasd_set_uid(device->cdev, &private->uid); + + /* only lock if not already locked */ + if (device != pos) + spin_lock_irqsave_nested(get_ccwdev_lock(device->cdev), flags, + CDEV_NESTED_SECOND); + private->uid.type = lcu->uac->unit[private->uid.real_unit_addr].ua_type; + private->uid.base_unit_addr = + lcu->uac->unit[private->uid.real_unit_addr].base_ua; + uid = private->uid; + + if (device != pos) + spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); /* if we have no PAV anyway, we don't need to bother with PAV groups */ if (lcu->pav == NO_PAV) { @@ -386,25 +400,25 @@ static int _add_device_to_lcu(struct alias_lcu *lcu, return 0; } - group = _find_group(lcu, uid); + group = _find_group(lcu, &uid); if (!group) { group = kzalloc(sizeof(*group), GFP_ATOMIC); if (!group) return -ENOMEM; - memcpy(group->uid.vendor, uid->vendor, sizeof(uid->vendor)); - memcpy(group->uid.serial, uid->serial, sizeof(uid->serial)); - group->uid.ssid = uid->ssid; - if (uid->type == UA_BASE_DEVICE) - group->uid.base_unit_addr = uid->real_unit_addr; + memcpy(group->uid.vendor, uid.vendor, sizeof(uid.vendor)); + memcpy(group->uid.serial, uid.serial, sizeof(uid.serial)); + group->uid.ssid = uid.ssid; + if (uid.type == UA_BASE_DEVICE) + group->uid.base_unit_addr = uid.real_unit_addr; else - group->uid.base_unit_addr = uid->base_unit_addr; - memcpy(group->uid.vduit, uid->vduit, sizeof(uid->vduit)); + group->uid.base_unit_addr = uid.base_unit_addr; + memcpy(group->uid.vduit, uid.vduit, sizeof(uid.vduit)); INIT_LIST_HEAD(&group->group); INIT_LIST_HEAD(&group->baselist); INIT_LIST_HEAD(&group->aliaslist); list_add(&group->group, &lcu->grouplist); } - if (uid->type == UA_BASE_DEVICE) + if (uid.type == UA_BASE_DEVICE) list_move(&device->alias_list, &group->baselist); else list_move(&device->alias_list, &group->aliaslist); @@ -525,7 +539,10 @@ static int _lcu_update(struct dasd_device *refdev, struct alias_lcu *lcu) if (rc) return rc; - spin_lock_irqsave(&lcu->lock, flags); + /* need to take cdev lock before lcu lock */ + spin_lock_irqsave_nested(get_ccwdev_lock(refdev->cdev), flags, + CDEV_NESTED_FIRST); + spin_lock(&lcu->lock); lcu->pav = NO_PAV; for (i = 0; i < MAX_DEVICES_PER_LCU; ++i) { switch (lcu->uac->unit[i].ua_type) { @@ -542,9 +559,10 @@ static int _lcu_update(struct dasd_device *refdev, struct alias_lcu *lcu) list_for_each_entry_safe(device, tempdev, &lcu->active_devices, alias_list) { - _add_device_to_lcu(lcu, device); + _add_device_to_lcu(lcu, device, refdev); } - spin_unlock_irqrestore(&lcu->lock, flags); + spin_unlock(&lcu->lock); + spin_unlock_irqrestore(get_ccwdev_lock(refdev->cdev), flags); return 0; } @@ -628,9 +646,12 @@ int dasd_alias_add_device(struct dasd_device *device) private = (struct dasd_eckd_private *) device->private; lcu = private->lcu; rc = 0; - spin_lock_irqsave(&lcu->lock, flags); + + /* need to take cdev lock before lcu lock */ + spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); + spin_lock(&lcu->lock); if (!(lcu->flags & UPDATE_PENDING)) { - rc = _add_device_to_lcu(lcu, device); + rc = _add_device_to_lcu(lcu, device, device); if (rc) lcu->flags |= UPDATE_PENDING; } @@ -638,7 +659,8 @@ int dasd_alias_add_device(struct dasd_device *device) list_move(&device->alias_list, &lcu->active_devices); _schedule_lcu_update(lcu, device); } - spin_unlock_irqrestore(&lcu->lock, flags); + spin_unlock(&lcu->lock); + spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); return rc; } @@ -748,19 +770,30 @@ static void _restart_all_base_devices_on_lcu(struct alias_lcu *lcu) struct alias_pav_group *pavgroup; struct dasd_device *device; struct dasd_eckd_private *private; + unsigned long flags; /* active and inactive list can contain alias as well as base devices */ list_for_each_entry(device, &lcu->active_devices, alias_list) { private = (struct dasd_eckd_private *) device->private; - if (private->uid.type != UA_BASE_DEVICE) + spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); + if (private->uid.type != UA_BASE_DEVICE) { + spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), + flags); continue; + } + spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); dasd_schedule_block_bh(device->block); dasd_schedule_device_bh(device); } list_for_each_entry(device, &lcu->inactive_devices, alias_list) { private = (struct dasd_eckd_private *) device->private; - if (private->uid.type != UA_BASE_DEVICE) + spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); + if (private->uid.type != UA_BASE_DEVICE) { + spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), + flags); continue; + } + spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); dasd_schedule_block_bh(device->block); dasd_schedule_device_bh(device); } diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index eff9c81..34d51dd 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -49,7 +49,6 @@ struct dasd_devmap { unsigned int devindex; unsigned short features; struct dasd_device *device; - struct dasd_uid uid; }; /* @@ -936,42 +935,46 @@ dasd_device_status_show(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(status, 0444, dasd_device_status_show, NULL); -static ssize_t -dasd_alias_show(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t dasd_alias_show(struct device *dev, + struct device_attribute *attr, char *buf) { - struct dasd_devmap *devmap; - int alias; + struct dasd_device *device; + struct dasd_uid uid; - devmap = dasd_find_busid(dev_name(dev)); - spin_lock(&dasd_devmap_lock); - if (IS_ERR(devmap) || strlen(devmap->uid.vendor) == 0) { - spin_unlock(&dasd_devmap_lock); + device = dasd_device_from_cdev(to_ccwdev(dev)); + if (IS_ERR(device)) return sprintf(buf, "0\n"); + + if (device->discipline && device->discipline->get_uid && + !device->discipline->get_uid(device, &uid)) { + if (uid.type == UA_BASE_PAV_ALIAS || + uid.type == UA_HYPER_PAV_ALIAS) + return sprintf(buf, "1\n"); } - if (devmap->uid.type == UA_BASE_PAV_ALIAS || - devmap->uid.type == UA_HYPER_PAV_ALIAS) - alias = 1; - else - alias = 0; - spin_unlock(&dasd_devmap_lock); - return sprintf(buf, alias ? "1\n" : "0\n"); + dasd_put_device(device); + + return sprintf(buf, "0\n"); } static DEVICE_ATTR(alias, 0444, dasd_alias_show, NULL); -static ssize_t -dasd_vendor_show(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t dasd_vendor_show(struct device *dev, + struct device_attribute *attr, char *buf) { - struct dasd_devmap *devmap; + struct dasd_device *device; + struct dasd_uid uid; char *vendor; - devmap = dasd_find_busid(dev_name(dev)); - spin_lock(&dasd_devmap_lock); - if (!IS_ERR(devmap) && strlen(devmap->uid.vendor) > 0) - vendor = devmap->uid.vendor; - else - vendor = ""; - spin_unlock(&dasd_devmap_lock); + device = dasd_device_from_cdev(to_ccwdev(dev)); + vendor = ""; + if (IS_ERR(device)) + return snprintf(buf, PAGE_SIZE, "%s\n", vendor); + + if (device->discipline && device->discipline->get_uid && + !device->discipline->get_uid(device, &uid)) + vendor = uid.vendor; + + dasd_put_device(device); return snprintf(buf, PAGE_SIZE, "%s\n", vendor); } @@ -985,48 +988,51 @@ static DEVICE_ATTR(vendor, 0444, dasd_vendor_show, NULL); static ssize_t dasd_uid_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct dasd_devmap *devmap; + struct dasd_device *device; + struct dasd_uid uid; char uid_string[UID_STRLEN]; char ua_string[3]; - struct dasd_uid *uid; - devmap = dasd_find_busid(dev_name(dev)); - spin_lock(&dasd_devmap_lock); - if (IS_ERR(devmap) || strlen(devmap->uid.vendor) == 0) { - spin_unlock(&dasd_devmap_lock); - return sprintf(buf, "\n"); - } - uid = &devmap->uid; - switch (uid->type) { - case UA_BASE_DEVICE: - sprintf(ua_string, "%02x", uid->real_unit_addr); - break; - case UA_BASE_PAV_ALIAS: - sprintf(ua_string, "%02x", uid->base_unit_addr); - break; - case UA_HYPER_PAV_ALIAS: - sprintf(ua_string, "xx"); - break; - default: - /* should not happen, treat like base device */ - sprintf(ua_string, "%02x", uid->real_unit_addr); - break; + device = dasd_device_from_cdev(to_ccwdev(dev)); + uid_string[0] = 0; + if (IS_ERR(device)) + return snprintf(buf, PAGE_SIZE, "%s\n", uid_string); + + if (device->discipline && device->discipline->get_uid && + !device->discipline->get_uid(device, &uid)) { + switch (uid.type) { + case UA_BASE_DEVICE: + snprintf(ua_string, sizeof(ua_string), "%02x", + uid.real_unit_addr); + break; + case UA_BASE_PAV_ALIAS: + snprintf(ua_string, sizeof(ua_string), "%02x", + uid.base_unit_addr); + break; + case UA_HYPER_PAV_ALIAS: + snprintf(ua_string, sizeof(ua_string), "xx"); + break; + default: + /* should not happen, treat like base device */ + snprintf(ua_string, sizeof(ua_string), "%02x", + uid.real_unit_addr); + break; + } + + if (strlen(uid.vduit) > 0) + snprintf(uid_string, sizeof(uid_string), + "%s.%s.%04x.%s.%s", + uid.vendor, uid.serial, uid.ssid, ua_string, + uid.vduit); + else + snprintf(uid_string, sizeof(uid_string), + "%s.%s.%04x.%s", + uid.vendor, uid.serial, uid.ssid, ua_string); } - if (strlen(uid->vduit) > 0) - snprintf(uid_string, sizeof(uid_string), - "%s.%s.%04x.%s.%s", - uid->vendor, uid->serial, - uid->ssid, ua_string, - uid->vduit); - else - snprintf(uid_string, sizeof(uid_string), - "%s.%s.%04x.%s", - uid->vendor, uid->serial, - uid->ssid, ua_string); - spin_unlock(&dasd_devmap_lock); + dasd_put_device(device); + return snprintf(buf, PAGE_SIZE, "%s\n", uid_string); } - static DEVICE_ATTR(uid, 0444, dasd_uid_show, NULL); /* @@ -1094,50 +1100,6 @@ static struct attribute_group dasd_attr_group = { }; /* - * Return copy of the device unique identifier. - */ -int -dasd_get_uid(struct ccw_device *cdev, struct dasd_uid *uid) -{ - struct dasd_devmap *devmap; - - devmap = dasd_find_busid(dev_name(&cdev->dev)); - if (IS_ERR(devmap)) - return PTR_ERR(devmap); - spin_lock(&dasd_devmap_lock); - *uid = devmap->uid; - spin_unlock(&dasd_devmap_lock); - return 0; -} -EXPORT_SYMBOL_GPL(dasd_get_uid); - -/* - * Register the given device unique identifier into devmap struct. - * In addition check if the related storage server subsystem ID is already - * contained in the dasd_server_ssid_list. If subsystem ID is not contained, - * create new entry. - * Return 0 if server was already in serverlist, - * 1 if the server was added successful - * <0 in case of error. - */ -int -dasd_set_uid(struct ccw_device *cdev, struct dasd_uid *uid) -{ - struct dasd_devmap *devmap; - - devmap = dasd_find_busid(dev_name(&cdev->dev)); - if (IS_ERR(devmap)) - return PTR_ERR(devmap); - - spin_lock(&dasd_devmap_lock); - devmap->uid = *uid; - spin_unlock(&dasd_devmap_lock); - - return 0; -} -EXPORT_SYMBOL_GPL(dasd_set_uid); - -/* * Return value of the specified feature. */ int diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 4305c23..5b1cd8d 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -692,18 +692,20 @@ dasd_eckd_cdl_reclen(int recid) /* * Generate device unique id that specifies the physical device. */ -static int dasd_eckd_generate_uid(struct dasd_device *device, - struct dasd_uid *uid) +static int dasd_eckd_generate_uid(struct dasd_device *device) { struct dasd_eckd_private *private; + struct dasd_uid *uid; int count; + unsigned long flags; private = (struct dasd_eckd_private *) device->private; if (!private) return -ENODEV; if (!private->ned || !private->gneq) return -ENODEV; - + uid = &private->uid; + spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); memset(uid, 0, sizeof(struct dasd_uid)); memcpy(uid->vendor, private->ned->HDA_manufacturer, sizeof(uid->vendor) - 1); @@ -726,9 +728,25 @@ static int dasd_eckd_generate_uid(struct dasd_device *device, private->vdsneq->uit[count]); } } + spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); return 0; } +static int dasd_eckd_get_uid(struct dasd_device *device, struct dasd_uid *uid) +{ + struct dasd_eckd_private *private; + unsigned long flags; + + if (device->private) { + private = (struct dasd_eckd_private *)device->private; + spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); + *uid = private->uid; + spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); + return 0; + } + return -EINVAL; +} + static struct dasd_ccw_req *dasd_eckd_build_rcd_lpm(struct dasd_device *device, void *rcd_buffer, struct ciw *ciw, __u8 lpm) @@ -1088,6 +1106,7 @@ dasd_eckd_check_characteristics(struct dasd_device *device) { struct dasd_eckd_private *private; struct dasd_block *block; + struct dasd_uid temp_uid; int is_known, rc; int readonly; @@ -1124,13 +1143,13 @@ dasd_eckd_check_characteristics(struct dasd_device *device) if (rc) goto out_err1; - /* Generate device unique id and register in devmap */ - rc = dasd_eckd_generate_uid(device, &private->uid); + /* Generate device unique id */ + rc = dasd_eckd_generate_uid(device); if (rc) goto out_err1; - dasd_set_uid(device->cdev, &private->uid); - if (private->uid.type == UA_BASE_DEVICE) { + dasd_eckd_get_uid(device, &temp_uid); + if (temp_uid.type == UA_BASE_DEVICE) { block = dasd_alloc_block(); if (IS_ERR(block)) { DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "%s", @@ -3305,15 +3324,16 @@ static int dasd_eckd_restore_device(struct dasd_device *device) if (rc) goto out_err; - /* Generate device unique id and register in devmap */ - rc = dasd_eckd_generate_uid(device, &private->uid); - dasd_get_uid(device->cdev, &temp_uid); + dasd_eckd_get_uid(device, &temp_uid); + /* Generate device unique id */ + rc = dasd_eckd_generate_uid(device); + spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); if (memcmp(&private->uid, &temp_uid, sizeof(struct dasd_uid)) != 0) dev_err(&device->cdev->dev, "The UID of the DASD has " "changed\n"); + spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); if (rc) goto out_err; - dasd_set_uid(device->cdev, &private->uid); /* register lcu with alias handling, enable PAV if this is a new lcu */ is_known = dasd_alias_make_device_known_to_lcu(device); @@ -3358,42 +3378,45 @@ static int dasd_eckd_reload_device(struct dasd_device *device) { struct dasd_eckd_private *private; int rc, old_base; - char uid[60]; + char print_uid[60]; + struct dasd_uid uid; + unsigned long flags; private = (struct dasd_eckd_private *) device->private; + + spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); old_base = private->uid.base_unit_addr; + spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); + /* Read Configuration Data */ rc = dasd_eckd_read_conf(device); if (rc) goto out_err; - rc = dasd_eckd_generate_uid(device, &private->uid); + rc = dasd_eckd_generate_uid(device); if (rc) goto out_err; - - dasd_set_uid(device->cdev, &private->uid); - /* * update unit address configuration and * add device to alias management */ dasd_alias_update_add_device(device); - if (old_base != private->uid.base_unit_addr) { - if (strlen(private->uid.vduit) > 0) - snprintf(uid, 60, "%s.%s.%04x.%02x.%s", - private->uid.vendor, private->uid.serial, - private->uid.ssid, private->uid.base_unit_addr, - private->uid.vduit); + dasd_eckd_get_uid(device, &uid); + + if (old_base != uid.base_unit_addr) { + if (strlen(uid.vduit) > 0) + snprintf(print_uid, sizeof(print_uid), + "%s.%s.%04x.%02x.%s", uid.vendor, uid.serial, + uid.ssid, uid.base_unit_addr, uid.vduit); else - snprintf(uid, 60, "%s.%s.%04x.%02x", - private->uid.vendor, private->uid.serial, - private->uid.ssid, - private->uid.base_unit_addr); + snprintf(print_uid, sizeof(print_uid), + "%s.%s.%04x.%02x", uid.vendor, uid.serial, + uid.ssid, uid.base_unit_addr); dev_info(&device->cdev->dev, "An Alias device was reassigned to a new base device " - "with UID: %s\n", uid); + "with UID: %s\n", print_uid); } return 0; @@ -3455,6 +3478,7 @@ static struct dasd_discipline dasd_eckd_discipline = { .freeze = dasd_eckd_pm_freeze, .restore = dasd_eckd_restore_device, .reload = dasd_eckd_reload_device, + .get_uid = dasd_eckd_get_uid, }; static int __init diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 1ae7b12..32fac18 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -81,6 +81,10 @@ struct dasd_block; #define DASD_SIM_MSG_TO_OP 0x03 #define DASD_SIM_LOG 0x0C +/* lock class for nested cdev lock */ +#define CDEV_NESTED_FIRST 1 +#define CDEV_NESTED_SECOND 2 + /* * SECTION: MACROs for klogd and s390 debug feature (dbf) */ @@ -229,6 +233,24 @@ struct dasd_ccw_req { typedef struct dasd_ccw_req *(*dasd_erp_fn_t) (struct dasd_ccw_req *); /* + * Unique identifier for dasd device. + */ +#define UA_NOT_CONFIGURED 0x00 +#define UA_BASE_DEVICE 0x01 +#define UA_BASE_PAV_ALIAS 0x02 +#define UA_HYPER_PAV_ALIAS 0x03 + +struct dasd_uid { + __u8 type; + char vendor[4]; + char serial[15]; + __u16 ssid; + __u8 real_unit_addr; + __u8 base_unit_addr; + char vduit[33]; +}; + +/* * the struct dasd_discipline is * sth like a table of virtual functions, if you think of dasd_eckd * inheriting dasd... @@ -315,29 +337,13 @@ struct dasd_discipline { /* reload device after state change */ int (*reload) (struct dasd_device *); + + int (*get_uid) (struct dasd_device *, struct dasd_uid *); }; extern struct dasd_discipline *dasd_diag_discipline_pointer; /* - * Unique identifier for dasd device. - */ -#define UA_NOT_CONFIGURED 0x00 -#define UA_BASE_DEVICE 0x01 -#define UA_BASE_PAV_ALIAS 0x02 -#define UA_HYPER_PAV_ALIAS 0x03 - -struct dasd_uid { - __u8 type; - char vendor[4]; - char serial[15]; - __u16 ssid; - __u8 real_unit_addr; - __u8 base_unit_addr; - char vduit[33]; -}; - -/* * Notification numbers for extended error reporting notifications: * The DASD_EER_DISABLE notification is sent before a dasd_device (and it's * eer pointer) is freed. The error reporting module needs to do all necessary @@ -634,8 +640,6 @@ void dasd_devmap_exit(void); struct dasd_device *dasd_create_device(struct ccw_device *); void dasd_delete_device(struct dasd_device *); -int dasd_get_uid(struct ccw_device *, struct dasd_uid *); -int dasd_set_uid(struct ccw_device *, struct dasd_uid *); int dasd_get_feature(struct ccw_device *, int); int dasd_set_feature(struct ccw_device *, int, int); -- cgit v0.10.2 From 10d3858950557cd3cc05f647ede597114c610177 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 May 2010 10:00:12 +0200 Subject: [S390] topology: expose core identifier Provide a topology_core_id define which makes sure that the contents of /sys/devices/system/cpu/cpuX/topology/core_id indeed do contain the core id and not always 0. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 6e7211a..dc8a672 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -7,8 +7,10 @@ const struct cpumask *cpu_coregroup_mask(unsigned int cpu); +extern unsigned char cpu_core_id[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS]; +#define topology_core_id(cpu) (cpu_core_id[cpu]) #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) int topology_set_cpu_management(int fc); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 247b4c2..bcef007 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -37,7 +37,8 @@ struct tl_cpu { }; struct tl_container { - unsigned char reserved[8]; + unsigned char reserved[7]; + unsigned char id; }; union tl_entry { @@ -58,6 +59,7 @@ struct tl_info { struct core_info { struct core_info *next; + unsigned char id; cpumask_t mask; }; @@ -73,6 +75,7 @@ static DECLARE_WORK(topology_work, topology_work_fn); static DEFINE_SPINLOCK(topology_lock); cpumask_t cpu_core_map[NR_CPUS]; +unsigned char cpu_core_id[NR_CPUS]; static cpumask_t cpu_coregroup_map(unsigned int cpu) { @@ -116,6 +119,7 @@ static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core) for_each_present_cpu(lcpu) { if (cpu_logical_map(lcpu) == rcpu) { cpu_set(lcpu, core->mask); + cpu_core_id[lcpu] = core->id; smp_cpu_polarization[lcpu] = tl_cpu->pp; } } @@ -158,6 +162,7 @@ static void tl_to_cores(struct tl_info *info) break; case 1: core = core->next; + core->id = tle->container.id; break; case 0: add_cpus_to_core(&tle->cpu, core); -- cgit v0.10.2 From 6ea50968274673ea7b58695efec2eeb55ca2f5dd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 May 2010 10:00:13 +0200 Subject: [S390] user space fault: report fault before calling do_exit Report user space faults before calling do_exit, since do_exit does not return and therefore we will never see the fault message on the console. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 6e7ad63..e605f07 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -390,8 +390,8 @@ static void default_trap_handler(struct pt_regs * regs, long interruption_code) { if (regs->psw.mask & PSW_MASK_PSTATE) { local_irq_enable(); - do_exit(SIGSEGV); report_user_fault(interruption_code, regs); + do_exit(SIGSEGV); } else die("Unknown program exception", regs, interruption_code); } -- cgit v0.10.2 From 09a308f384c4ad2fb45959f5da9918e812207c50 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 17 May 2010 10:00:14 +0200 Subject: [S390] qdio: count number of qdio interrupts Add missing increment for the qdio interrupt counter. Signed-off-by: Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 88be7b9..1fc8126 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -955,6 +955,9 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, return; } + if (irq_ptr->perf_stat_enabled) + irq_ptr->perf_stat.qdio_int++; + if (IS_ERR(irb)) { switch (PTR_ERR(irb)) { case -EIO: -- cgit v0.10.2 From f3eb20fafdc10aea0fb13b113ac3b9a3dc9a5dc6 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 17 May 2010 10:00:15 +0200 Subject: [S390] qdio: prevent starvation on PCI devices If adapter interrupts are not available and traditional IO interrupts are used for qdio the inbound tasklet continued to run if new data arrived. That could possibly block other tasklets scheduled on the same CPU. If new data arrives schedule the tasklet again instead of directly processing the new data. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 1fc8126..f4fd6cd 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -606,7 +606,7 @@ static void qdio_kick_handler(struct qdio_q *q) static void __qdio_inbound_processing(struct qdio_q *q) { qperf_inc(q, tasklet_inbound); -again: + if (!qdio_inbound_q_moved(q)) return; @@ -615,7 +615,10 @@ again: if (!qdio_inbound_q_done(q)) { /* means poll time is not yet over */ qperf_inc(q, tasklet_inbound_resched); - goto again; + if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) { + tasklet_schedule(&q->tasklet); + return; + } } qdio_stop_polling(q); @@ -625,7 +628,8 @@ again: */ if (!qdio_inbound_q_done(q)) { qperf_inc(q, tasklet_inbound_resched2); - goto again; + if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) + tasklet_schedule(&q->tasklet); } } -- cgit v0.10.2 From 5382fe11d90e3ed9602ce655e523852e3dbf3e35 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 17 May 2010 10:00:16 +0200 Subject: [S390] qdio: remove memset hack Remove memset hack that relied on the layout of struct qdio_q to avoid deletion of the slib pointer. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index 48aa064..da80e9b 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -296,10 +296,8 @@ struct qdio_q { struct qdio_irq *irq_ptr; struct sl *sl; /* - * Warning: Leave this member at the end so it won't be cleared in - * qdio_fill_qs. A page is allocated under this pointer and used for - * slib and sl. slib is 2048 bytes big and sl points to offset - * PAGE_SIZE / 2. + * A page is allocated under this pointer and used for slib and sl. + * slib is 2048 bytes big and sl points to offset PAGE_SIZE / 2. */ struct slib *slib; } __attribute__ ((aligned(256))); diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 7f4a754..6326b67 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -106,10 +106,12 @@ int qdio_allocate_qs(struct qdio_irq *irq_ptr, int nr_input_qs, int nr_output_qs static void setup_queues_misc(struct qdio_q *q, struct qdio_irq *irq_ptr, qdio_handler_t *handler, int i) { - /* must be cleared by every qdio_establish */ - memset(q, 0, ((char *)&q->slib) - ((char *)q)); - memset(q->slib, 0, PAGE_SIZE); + struct slib *slib = q->slib; + /* queue must be cleared for qdio_establish */ + memset(q, 0, sizeof(*q)); + memset(slib, 0, PAGE_SIZE); + q->slib = slib; q->irq_ptr = irq_ptr; q->mask = 1 << (31 - i); q->nr = i; -- cgit v0.10.2 From 3a601bfef33c82537511c7d970d8010c38aaecd1 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 17 May 2010 10:00:17 +0200 Subject: [S390] qdio: dont convert timestamps to microseconds Don't convert timestamps to microseconds, use timestamps returned by get_clock() directly. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index da80e9b..f0037ee 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -13,8 +13,8 @@ #include #include "chsc.h" -#define QDIO_BUSY_BIT_PATIENCE 100 /* 100 microseconds */ -#define QDIO_INPUT_THRESHOLD 500 /* 500 microseconds */ +#define QDIO_BUSY_BIT_PATIENCE (100 << 12) /* 100 microseconds */ +#define QDIO_INPUT_THRESHOLD (500 << 12) /* 500 microseconds */ /* * if an asynchronous HiperSockets queue runs full, the 10 seconds timer wait @@ -370,11 +370,6 @@ static inline int multicast_outbound(struct qdio_q *q) (q->nr == q->irq_ptr->nr_output_qs - 1); } -static inline unsigned long long get_usecs(void) -{ - return monotonic_clock() >> 12; -} - #define pci_out_supported(q) \ (q->irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED) #define is_qebsm(q) (q->irq_ptr->sch_token != 0) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index f4fd6cd..dafb3e5 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -336,10 +336,10 @@ again: WARN_ON(queue_type(q) != QDIO_IQDIO_QFMT || cc != 2); if (!start_time) { - start_time = get_usecs(); + start_time = get_clock(); goto again; } - if ((get_usecs() - start_time) < QDIO_BUSY_BIT_PATIENCE) + if ((get_clock() - start_time) < QDIO_BUSY_BIT_PATIENCE) goto again; } return cc; @@ -536,7 +536,7 @@ static int qdio_inbound_q_moved(struct qdio_q *q) if ((bufnr != q->last_move) || q->qdio_error) { q->last_move = bufnr; if (!is_thinint_irq(q->irq_ptr) && MACHINE_IS_LPAR) - q->u.in.timestamp = get_usecs(); + q->u.in.timestamp = get_clock(); return 1; } else return 0; @@ -567,7 +567,7 @@ static inline int qdio_inbound_q_done(struct qdio_q *q) * At this point we know, that inbound first_to_check * has (probably) not moved (see qdio_inbound_processing). */ - if (get_usecs() > q->u.in.timestamp + QDIO_INPUT_THRESHOLD) { + if (get_clock() > q->u.in.timestamp + QDIO_INPUT_THRESHOLD) { DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "in done:%02x", q->first_to_check); return 1; -- cgit v0.10.2 From d0c9d4a89fff4352b20ae8704b84cd99a8372f66 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 17 May 2010 10:00:18 +0200 Subject: [S390] qdio: set correct bit in dsci The state change indicator is bit 7 not bit 0 of the dsci. Use the correct bit for setting the indicator. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c index ce5f891..8daf1b9 100644 --- a/drivers/s390/cio/qdio_thinint.c +++ b/drivers/s390/cio/qdio_thinint.c @@ -95,7 +95,7 @@ void tiqdio_add_input_queues(struct qdio_irq *irq_ptr) for_each_input_queue(irq_ptr, q, i) list_add_rcu(&q->entry, &tiq_list); mutex_unlock(&tiq_list_lock); - xchg(irq_ptr->dsci, 1); + xchg(irq_ptr->dsci, 1 << 7); } void tiqdio_remove_input_queues(struct qdio_irq *irq_ptr) @@ -173,7 +173,7 @@ static void tiqdio_thinint_handler(void *ind, void *drv_data) /* prevent racing */ if (*tiqdio_alsi) - xchg(&q_indicators[TIQDIO_SHARED_IND].ind, 1); + xchg(&q_indicators[TIQDIO_SHARED_IND].ind, 1 << 7); } } -- cgit v0.10.2 From cc961d400e06e78c7aa39aeab1f001eb8f76ef90 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 17 May 2010 10:00:19 +0200 Subject: [S390] qdio: remove API wrappers Remove qdio API wrappers used by qeth and replace them by calling the appropriate functions directly. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 9b04b11..0eaae626 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -368,14 +368,12 @@ struct qdio_initialize { #define QDIO_FLAG_SYNC_OUTPUT 0x02 #define QDIO_FLAG_PCI_OUT 0x10 -extern int qdio_initialize(struct qdio_initialize *); extern int qdio_allocate(struct qdio_initialize *); extern int qdio_establish(struct qdio_initialize *); extern int qdio_activate(struct ccw_device *); extern int do_QDIO(struct ccw_device *cdev, unsigned int callflags, int q_nr, unsigned int bufnr, unsigned int count); -extern int qdio_cleanup(struct ccw_device*, int); extern int qdio_shutdown(struct ccw_device*, int); extern int qdio_free(struct ccw_device *); extern int qdio_get_ssqd_desc(struct ccw_device *dev, struct qdio_ssqd_desc*); diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index dafb3e5..00520f9 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1023,30 +1023,6 @@ int qdio_get_ssqd_desc(struct ccw_device *cdev, } EXPORT_SYMBOL_GPL(qdio_get_ssqd_desc); -/** - * qdio_cleanup - shutdown queues and free data structures - * @cdev: associated ccw device - * @how: use halt or clear to shutdown - * - * This function calls qdio_shutdown() for @cdev with method @how. - * and qdio_free(). The qdio_free() return value is ignored since - * !irq_ptr is already checked. - */ -int qdio_cleanup(struct ccw_device *cdev, int how) -{ - struct qdio_irq *irq_ptr = cdev->private->qdio_data; - int rc; - - if (!irq_ptr) - return -ENODEV; - - rc = qdio_shutdown(cdev, how); - - qdio_free(cdev); - return rc; -} -EXPORT_SYMBOL_GPL(qdio_cleanup); - static void qdio_shutdown_queues(struct ccw_device *cdev) { struct qdio_irq *irq_ptr = cdev->private->qdio_data; @@ -1164,28 +1140,6 @@ int qdio_free(struct ccw_device *cdev) EXPORT_SYMBOL_GPL(qdio_free); /** - * qdio_initialize - allocate and establish queues for a qdio subchannel - * @init_data: initialization data - * - * This function first allocates queues via qdio_allocate() and on success - * establishes them via qdio_establish(). - */ -int qdio_initialize(struct qdio_initialize *init_data) -{ - int rc; - - rc = qdio_allocate(init_data); - if (rc) - return rc; - - rc = qdio_establish(init_data); - if (rc) - qdio_free(init_data->cdev); - return rc; -} -EXPORT_SYMBOL_GPL(qdio_initialize); - -/** * qdio_allocate - allocate qdio queues and associated data * @init_data: initialization data */ diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 3ba738b..28f7134 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1292,13 +1292,14 @@ int qeth_qdio_clear_card(struct qeth_card *card, int use_halt) QETH_QDIO_CLEANING)) { case QETH_QDIO_ESTABLISHED: if (card->info.type == QETH_CARD_TYPE_IQD) - rc = qdio_cleanup(CARD_DDEV(card), + rc = qdio_shutdown(CARD_DDEV(card), QDIO_FLAG_CLEANUP_USING_HALT); else - rc = qdio_cleanup(CARD_DDEV(card), + rc = qdio_shutdown(CARD_DDEV(card), QDIO_FLAG_CLEANUP_USING_CLEAR); if (rc) QETH_DBF_TEXT_(TRACE, 3, "1err%d", rc); + qdio_free(CARD_DDEV(card)); atomic_set(&card->qdio.state, QETH_QDIO_ALLOCATED); break; case QETH_QDIO_CLEANING: @@ -3810,10 +3811,18 @@ static int qeth_qdio_establish(struct qeth_card *card) if (atomic_cmpxchg(&card->qdio.state, QETH_QDIO_ALLOCATED, QETH_QDIO_ESTABLISHED) == QETH_QDIO_ALLOCATED) { - rc = qdio_initialize(&init_data); - if (rc) + rc = qdio_allocate(&init_data); + if (rc) { + atomic_set(&card->qdio.state, QETH_QDIO_ALLOCATED); + goto out; + } + rc = qdio_establish(&init_data); + if (rc) { atomic_set(&card->qdio.state, QETH_QDIO_ALLOCATED); + qdio_free(CARD_DDEV(card)); + } } +out: kfree(out_sbal_ptrs); kfree(in_sbal_ptrs); kfree(qib_param_field); -- cgit v0.10.2 From 57b28f66316d287b9dbf7b28358ca90257230769 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 17 May 2010 10:00:20 +0200 Subject: [S390] s390_hypfs: Add new attributes In order to access the data of the hypfs diagnose calls from user space also in binary form, this patch adds two new attributes in debugfs: * z/VM: s390_hypfs/d2fc_bin * LPAR: s390_hypfs/d204_bin Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h index aea5720..fa487d4 100644 --- a/arch/s390/hypfs/hypfs.h +++ b/arch/s390/hypfs/hypfs.h @@ -11,6 +11,7 @@ #include #include +#include #define REG_FILE_MODE 0440 #define UPDATE_FILE_MODE 0220 @@ -34,6 +35,9 @@ extern int hypfs_diag_create_files(struct super_block *sb, struct dentry *root); /* VM Hypervisor */ extern int hypfs_vm_init(void); +extern void hypfs_vm_exit(void); extern int hypfs_vm_create_files(struct super_block *sb, struct dentry *root); +/* Directory for debugfs files */ +extern struct dentry *hypfs_dbfs_dir; #endif /* _HYPFS_H_ */ diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 5b1acdb..1211bb1 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "hypfs.h" @@ -22,6 +23,8 @@ #define CPU_NAME_LEN 16 /* type name len of cpus in diag224 name table */ #define TMP_SIZE 64 /* size of temporary buffers */ +#define DBFS_D204_HDR_VERSION 0 + /* diag 204 subcodes */ enum diag204_sc { SUBC_STIB4 = 4, @@ -47,6 +50,8 @@ static void *diag204_buf; /* 4K aligned buffer for diag204 data */ static void *diag204_buf_vmalloc; /* vmalloc pointer for diag204 data */ static int diag204_buf_pages; /* number of pages for diag204 data */ +static struct dentry *dbfs_d204_file; + /* * DIAG 204 data structures and member access functions. * @@ -364,18 +369,21 @@ static void diag204_free_buffer(void) } else { free_pages((unsigned long) diag204_buf, 0); } - diag204_buf_pages = 0; diag204_buf = NULL; } +static void *page_align_ptr(void *ptr) +{ + return (void *) PAGE_ALIGN((unsigned long) ptr); +} + static void *diag204_alloc_vbuf(int pages) { /* The buffer has to be page aligned! */ diag204_buf_vmalloc = vmalloc(PAGE_SIZE * (pages + 1)); if (!diag204_buf_vmalloc) return ERR_PTR(-ENOMEM); - diag204_buf = (void*)((unsigned long)diag204_buf_vmalloc - & ~0xfffUL) + 0x1000; + diag204_buf = page_align_ptr(diag204_buf_vmalloc); diag204_buf_pages = pages; return diag204_buf; } @@ -468,17 +476,26 @@ fail_alloc: return rc; } +static int diag204_do_store(void *buf, int pages) +{ + int rc; + + rc = diag204((unsigned long) diag204_store_sc | + (unsigned long) diag204_info_type, pages, buf); + return rc < 0 ? -ENOSYS : 0; +} + static void *diag204_store(void) { void *buf; - int pages; + int pages, rc; buf = diag204_get_buffer(diag204_info_type, &pages); if (IS_ERR(buf)) goto out; - if (diag204((unsigned long)diag204_store_sc | - (unsigned long)diag204_info_type, pages, buf) < 0) - return ERR_PTR(-ENOSYS); + rc = diag204_do_store(buf, pages); + if (rc) + return ERR_PTR(rc); out: return buf; } @@ -526,6 +543,92 @@ static int diag224_idx2name(int index, char *name) return 0; } +struct dbfs_d204_hdr { + u64 len; /* Length of d204 buffer without header */ + u16 version; /* Version of header */ + u8 sc; /* Used subcode */ + char reserved[53]; +} __attribute__ ((packed)); + +struct dbfs_d204 { + struct dbfs_d204_hdr hdr; /* 64 byte header */ + char buf[]; /* d204 buffer */ +} __attribute__ ((packed)); + +struct dbfs_d204_private { + struct dbfs_d204 *d204; /* Aligned d204 data with header */ + void *base; /* Base pointer (needed for vfree) */ +}; + +static int dbfs_d204_open(struct inode *inode, struct file *file) +{ + struct dbfs_d204_private *data; + struct dbfs_d204 *d204; + int rc, buf_size; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + buf_size = PAGE_SIZE * (diag204_buf_pages + 1) + sizeof(d204->hdr); + data->base = vmalloc(buf_size); + if (!data->base) { + rc = -ENOMEM; + goto fail_kfree_data; + } + memset(data->base, 0, buf_size); + d204 = page_align_ptr(data->base + sizeof(d204->hdr)) + - sizeof(d204->hdr); + rc = diag204_do_store(&d204->buf, diag204_buf_pages); + if (rc) + goto fail_vfree_base; + d204->hdr.version = DBFS_D204_HDR_VERSION; + d204->hdr.len = PAGE_SIZE * diag204_buf_pages; + d204->hdr.sc = diag204_store_sc; + data->d204 = d204; + file->private_data = data; + return nonseekable_open(inode, file); + +fail_vfree_base: + vfree(data->base); +fail_kfree_data: + kfree(data); + return rc; +} + +static int dbfs_d204_release(struct inode *inode, struct file *file) +{ + struct dbfs_d204_private *data = file->private_data; + + vfree(data->base); + kfree(data); + return 0; +} + +static ssize_t dbfs_d204_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct dbfs_d204_private *data = file->private_data; + + return simple_read_from_buffer(buf, size, ppos, data->d204, + data->d204->hdr.len + + sizeof(data->d204->hdr)); +} + +static const struct file_operations dbfs_d204_ops = { + .open = dbfs_d204_open, + .read = dbfs_d204_read, + .release = dbfs_d204_release, +}; + +static int hypfs_dbfs_init(void) +{ + dbfs_d204_file = debugfs_create_file("diag_204", 0400, hypfs_dbfs_dir, + NULL, &dbfs_d204_ops); + if (IS_ERR(dbfs_d204_file)) + return PTR_ERR(dbfs_d204_file); + return 0; +} + __init int hypfs_diag_init(void) { int rc; @@ -540,11 +643,17 @@ __init int hypfs_diag_init(void) pr_err("The hardware system does not provide all " "functions required by hypfs\n"); } + if (diag204_info_type == INFO_EXT) { + rc = hypfs_dbfs_init(); + if (rc) + diag204_free_buffer(); + } return rc; } void hypfs_diag_exit(void) { + debugfs_remove(dbfs_d204_file); diag224_delete_name_table(); diag204_free_buffer(); } diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index f0b0d31..ee5ab1a 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -10,14 +10,18 @@ #include #include #include +#include #include "hypfs.h" #define NAME_LEN 8 +#define DBFS_D2FC_HDR_VERSION 0 static char local_guest[] = " "; static char all_guests[] = "* "; static char *guest_query; +static struct dentry *dbfs_d2fc_file; + struct diag2fc_data { __u32 version; __u32 flags; @@ -76,23 +80,26 @@ static int diag2fc(int size, char* query, void *addr) return -residual_cnt; } -static struct diag2fc_data *diag2fc_store(char *query, int *count) +/* + * Allocate buffer for "query" and store diag 2fc at "offset" + */ +static void *diag2fc_store(char *query, unsigned int *count, int offset) { + void *data; int size; - struct diag2fc_data *data; do { size = diag2fc(0, query, NULL); if (size < 0) return ERR_PTR(-EACCES); - data = vmalloc(size); + data = vmalloc(size + offset); if (!data) return ERR_PTR(-ENOMEM); - if (diag2fc(size, query, data) == 0) + if (diag2fc(size, query, data + offset) == 0) break; vfree(data); } while (1); - *count = (size / sizeof(*data)); + *count = (size / sizeof(struct diag2fc_data)); return data; } @@ -168,9 +175,10 @@ int hypfs_vm_create_files(struct super_block *sb, struct dentry *root) { struct dentry *dir, *file; struct diag2fc_data *data; - int rc, i, count = 0; + unsigned int count = 0; + int rc, i; - data = diag2fc_store(guest_query, &count); + data = diag2fc_store(guest_query, &count, 0); if (IS_ERR(data)) return PTR_ERR(data); @@ -218,8 +226,61 @@ failed: return rc; } +struct dbfs_d2fc_hdr { + u64 len; /* Length of d2fc buffer without header */ + u16 version; /* Version of header */ + char tod_ext[16]; /* TOD clock for d2fc */ + u64 count; /* Number of VM guests in d2fc buffer */ + char reserved[30]; +} __attribute__ ((packed)); + +struct dbfs_d2fc { + struct dbfs_d2fc_hdr hdr; /* 64 byte header */ + char buf[]; /* d2fc buffer */ +} __attribute__ ((packed)); + +static int dbfs_d2fc_open(struct inode *inode, struct file *file) +{ + struct dbfs_d2fc *data; + unsigned int count; + + data = diag2fc_store(guest_query, &count, sizeof(data->hdr)); + if (IS_ERR(data)) + return PTR_ERR(data); + get_clock_ext(data->hdr.tod_ext); + data->hdr.len = count * sizeof(struct diag2fc_data); + data->hdr.version = DBFS_D2FC_HDR_VERSION; + data->hdr.count = count; + memset(&data->hdr.reserved, 0, sizeof(data->hdr.reserved)); + file->private_data = data; + return nonseekable_open(inode, file); +} + +static int dbfs_d2fc_release(struct inode *inode, struct file *file) +{ + diag2fc_free(file->private_data); + return 0; +} + +static ssize_t dbfs_d2fc_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct dbfs_d2fc *data = file->private_data; + + return simple_read_from_buffer(buf, size, ppos, data, data->hdr.len + + sizeof(struct dbfs_d2fc_hdr)); +} + +static const struct file_operations dbfs_d2fc_ops = { + .open = dbfs_d2fc_open, + .read = dbfs_d2fc_read, + .release = dbfs_d2fc_release, +}; + int hypfs_vm_init(void) { + if (!MACHINE_IS_VM) + return 0; if (diag2fc(0, all_guests, NULL) > 0) guest_query = all_guests; else if (diag2fc(0, local_guest, NULL) > 0) @@ -227,5 +288,17 @@ int hypfs_vm_init(void) else return -EACCES; + dbfs_d2fc_file = debugfs_create_file("diag_2fc", 0400, hypfs_dbfs_dir, + NULL, &dbfs_d2fc_ops); + if (IS_ERR(dbfs_d2fc_file)) + return PTR_ERR(dbfs_d2fc_file); + return 0; } + +void hypfs_vm_exit(void) +{ + if (!MACHINE_IS_VM) + return; + debugfs_remove(dbfs_d2fc_file); +} diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 95c1aaa..6b120f0 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -46,6 +46,8 @@ static const struct super_operations hypfs_s_ops; /* start of list of all dentries, which have to be deleted on update */ static struct dentry *hypfs_last_dentry; +struct dentry *hypfs_dbfs_dir; + static void hypfs_update_update(struct super_block *sb) { struct hypfs_sb_info *sb_info = sb->s_fs_info; @@ -468,20 +470,22 @@ static int __init hypfs_init(void) { int rc; - if (MACHINE_IS_VM) { - if (hypfs_vm_init()) - /* no diag 2fc, just exit */ - return -ENODATA; - } else { - if (hypfs_diag_init()) { - rc = -ENODATA; - goto fail_diag; - } + hypfs_dbfs_dir = debugfs_create_dir("s390_hypfs", NULL); + if (IS_ERR(hypfs_dbfs_dir)) + return PTR_ERR(hypfs_dbfs_dir); + + if (hypfs_diag_init()) { + rc = -ENODATA; + goto fail_debugfs_remove; + } + if (hypfs_vm_init()) { + rc = -ENODATA; + goto fail_hypfs_diag_exit; } s390_kobj = kobject_create_and_add("s390", hypervisor_kobj); if (!s390_kobj) { rc = -ENOMEM; - goto fail_sysfs; + goto fail_hypfs_vm_exit; } rc = register_filesystem(&hypfs_type); if (rc) @@ -490,18 +494,22 @@ static int __init hypfs_init(void) fail_filesystem: kobject_put(s390_kobj); -fail_sysfs: - if (!MACHINE_IS_VM) - hypfs_diag_exit(); -fail_diag: +fail_hypfs_vm_exit: + hypfs_vm_exit(); +fail_hypfs_diag_exit: + hypfs_diag_exit(); +fail_debugfs_remove: + debugfs_remove(hypfs_dbfs_dir); + pr_err("Initialization of hypfs failed with rc=%i\n", rc); return rc; } static void __exit hypfs_exit(void) { - if (!MACHINE_IS_VM) - hypfs_diag_exit(); + hypfs_diag_exit(); + hypfs_vm_exit(); + debugfs_remove(hypfs_dbfs_dir); unregister_filesystem(&hypfs_type); kobject_put(s390_kobj); } diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index f174bda..09d345a 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -61,11 +61,15 @@ static inline unsigned long long get_clock (void) return clk; } +static inline void get_clock_ext(char *clk) +{ + asm volatile("stcke %0" : "=Q" (*clk) : : "cc"); +} + static inline unsigned long long get_clock_xt(void) { unsigned char clk[16]; - - asm volatile("stcke %0" : "=Q" (clk) : : "cc"); + get_clock_ext(clk); return *((unsigned long long *)&clk[1]); } -- cgit v0.10.2 From ab3c68ee5fd329ba48094d3417fd60e30ea14a87 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 May 2010 10:00:21 +0200 Subject: [S390] debug: enable exception-trace debug facility The exception-trace facility on x86 and other architectures prints traces to dmesg whenever a user space application crashes. s390 has such a feature since ages however it is called userprocess_debug and is enabled differently. This patch makes sure that whenever one of the two procfs files /proc/sys/kernel/userprocess_debug /proc/sys/debug/exception-trace is modified the contents of the second one changes as well. That way we keep backwards compatibilty but also support the same interface like other architectures do. Besides that the output of the traces is improved since it will now also contain the corresponding filename of the vma (when available) where the process caused a fault or trap. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 0d8cd9b..79d0ca0 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -444,13 +444,6 @@ config FORCE_MAX_ZONEORDER int default "9" -config PROCESS_DEBUG - bool "Show crashed user process info" - help - Say Y to print all process fault locations to the console. This is - a debugging option; you probably do not want to set it unless you - are an S390 port maintainer. - config PFAULT bool "Pseudo page fault support" help diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index e605f07..5d8f0f3 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -46,13 +46,7 @@ pgm_check_handler_t *pgm_check_table[128]; -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_PROCESS_DEBUG -int sysctl_userprocess_debug = 1; -#else -int sysctl_userprocess_debug = 0; -#endif -#endif +int show_unhandled_signals; extern pgm_check_handler_t do_protection_exception; extern pgm_check_handler_t do_dat_exception; @@ -315,18 +309,19 @@ void die(const char * str, struct pt_regs * regs, long err) do_exit(SIGSEGV); } -static void inline -report_user_fault(long interruption_code, struct pt_regs *regs) +static void inline report_user_fault(struct pt_regs *regs, long int_code, + int signr) { -#if defined(CONFIG_SYSCTL) - if (!sysctl_userprocess_debug) + if ((task_pid_nr(current) > 1) && !show_unhandled_signals) return; -#endif -#if defined(CONFIG_SYSCTL) || defined(CONFIG_PROCESS_DEBUG) - printk("User process fault: interruption code 0x%lX\n", - interruption_code); + if (!unhandled_signal(current, signr)) + return; + if (!printk_ratelimit()) + return; + printk("User process fault: interruption code 0x%lX ", int_code); + print_vma_addr("in ", regs->psw.addr & PSW_ADDR_INSN); + printk("\n"); show_regs(regs); -#endif } int is_valid_bugaddr(unsigned long addr) @@ -354,7 +349,7 @@ static void __kprobes inline do_trap(long interruption_code, int signr, tsk->thread.trap_no = interruption_code & 0xffff; force_sig_info(signr, info, tsk); - report_user_fault(interruption_code, regs); + report_user_fault(regs, interruption_code, signr); } else { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); @@ -390,7 +385,7 @@ static void default_trap_handler(struct pt_regs * regs, long interruption_code) { if (regs->psw.mask & PSW_MASK_PSTATE) { local_irq_enable(); - report_user_fault(interruption_code, regs); + report_user_fault(regs, interruption_code, SIGSEGV); do_exit(SIGSEGV); } else die("Unknown program exception", regs, interruption_code); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 3040d7c..2505b2e 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -48,10 +48,6 @@ #define __PF_RES_FIELD 0x8000000000000000ULL #endif /* CONFIG_64BIT */ -#ifdef CONFIG_SYSCTL -extern int sysctl_userprocess_debug; -#endif - #define VM_FAULT_BADCONTEXT 0x010000 #define VM_FAULT_BADMAP 0x020000 #define VM_FAULT_BADACCESS 0x040000 @@ -120,6 +116,22 @@ static inline int user_space_fault(unsigned long trans_exc_code) return trans_exc_code != 3; } +static inline void report_user_fault(struct pt_regs *regs, long int_code, + int signr, unsigned long address) +{ + if ((task_pid_nr(current) > 1) && !show_unhandled_signals) + return; + if (!unhandled_signal(current, signr)) + return; + if (!printk_ratelimit()) + return; + printk("User process fault: interruption code 0x%lX ", int_code); + print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN); + printk("\n"); + printk("failing address: %lX\n", address); + show_regs(regs); +} + /* * Send SIGSEGV to task. This is an external routine * to keep the stack usage of do_page_fault small. @@ -133,17 +145,7 @@ static noinline void do_sigsegv(struct pt_regs *regs, long int_code, address = trans_exc_code & __FAIL_ADDR_MASK; current->thread.prot_addr = address; current->thread.trap_no = int_code; -#if defined(CONFIG_SYSCTL) || defined(CONFIG_PROCESS_DEBUG) -#if defined(CONFIG_SYSCTL) - if (sysctl_userprocess_debug) -#endif - { - printk("User process fault: interruption code 0x%lX\n", - int_code); - printk("failing address: %lX\n", address); - show_regs(regs); - } -#endif + report_user_fault(regs, int_code, SIGSEGV, address); si.si_signo = SIGSEGV; si.si_code = si_code; si.si_addr = (void __user *) address; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8686b0f..90f536d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -621,7 +621,7 @@ static struct ctl_table kern_table[] = { #endif { .procname = "userprocess_debug", - .data = &sysctl_userprocess_debug, + .data = &show_unhandled_signals, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, @@ -1431,7 +1431,8 @@ static struct ctl_table fs_table[] = { }; static struct ctl_table debug_table[] = { -#if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) +#if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \ + defined(CONFIG_S390) { .procname = "exception-trace", .data = &show_unhandled_signals, -- cgit v0.10.2 From 777a5510093a6d6443351160c6969a0e66f3ba8a Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 17 May 2010 10:00:22 +0200 Subject: [S390] drivers/s390/char: Use kstrdup Use kstrdup when the goal of an allocation is copy a string into the allocated region. Additionally drop the now unused variable len. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression from,to; expression flag,E1,E2; statement S; @@ - to = kmalloc(strlen(from) + 1,flag); + to = kstrdup(from, flag); ... when != \(from = E1 \| to = E1 \) if (to==NULL || ...) S ... when != \(from = E2 \| to = E2 \) - strcpy(to, from); // Signed-off-by: Julia Lawall Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c index cb6bffe..24021fd 100644 --- a/drivers/s390/char/keyboard.c +++ b/drivers/s390/char/keyboard.c @@ -49,7 +49,7 @@ static unsigned char ret_diacr[NR_DEAD] = { struct kbd_data * kbd_alloc(void) { struct kbd_data *kbd; - int i, len; + int i; kbd = kzalloc(sizeof(struct kbd_data), GFP_KERNEL); if (!kbd) @@ -72,11 +72,10 @@ kbd_alloc(void) { goto out_maps; for (i = 0; i < ARRAY_SIZE(func_table); i++) { if (func_table[i]) { - len = strlen(func_table[i]) + 1; - kbd->func_table[i] = kmalloc(len, GFP_KERNEL); + kbd->func_table[i] = kstrdup(func_table[i], + GFP_KERNEL); if (!kbd->func_table[i]) goto out_func; - memcpy(kbd->func_table[i], func_table[i], len); } } kbd->fn_handler = -- cgit v0.10.2 From 939e379e9e183ae6291ac7caa4a5e1dfadae4ccc Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 17 May 2010 10:00:23 +0200 Subject: [S390] drivers/s390/char: Use kmemdup Use kmemdup when some other buffer is immediately copied into the allocated region. A simplified version of the semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression from,to,size,flag; statement S; @@ - to = \(kmalloc\|kzalloc\)(size,flag); + to = kmemdup(from,size,flag); if (to==NULL || ...) S - memcpy(to, from, size); // Signed-off-by: Julia Lawall Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c index 24021fd..18d9a49 100644 --- a/drivers/s390/char/keyboard.c +++ b/drivers/s390/char/keyboard.c @@ -59,12 +59,11 @@ kbd_alloc(void) { goto out_kbd; for (i = 0; i < ARRAY_SIZE(key_maps); i++) { if (key_maps[i]) { - kbd->key_maps[i] = - kmalloc(sizeof(u_short)*NR_KEYS, GFP_KERNEL); + kbd->key_maps[i] = kmemdup(key_maps[i], + sizeof(u_short) * NR_KEYS, + GFP_KERNEL); if (!kbd->key_maps[i]) goto out_maps; - memcpy(kbd->key_maps[i], key_maps[i], - sizeof(u_short)*NR_KEYS); } } kbd->func_table = kzalloc(sizeof(func_table), GFP_KERNEL); @@ -82,12 +81,11 @@ kbd_alloc(void) { kzalloc(sizeof(fn_handler_fn *) * NR_FN_HANDLER, GFP_KERNEL); if (!kbd->fn_handler) goto out_func; - kbd->accent_table = - kmalloc(sizeof(struct kbdiacruc)*MAX_DIACR, GFP_KERNEL); + kbd->accent_table = kmemdup(accent_table, + sizeof(struct kbdiacruc) * MAX_DIACR, + GFP_KERNEL); if (!kbd->accent_table) goto out_fn_handler; - memcpy(kbd->accent_table, accent_table, - sizeof(struct kbdiacruc)*MAX_DIACR); kbd->accent_table_size = accent_table_size; return kbd; -- cgit v0.10.2