From 449666dd1efacaf3f0eb020cc617dd702356575c Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Tue, 26 Nov 2013 14:55:56 +0100 Subject: s390/cio: More efficient handling of CHPID availability events The CIO layer processes hardware events that indicate that a channel path has become available by performing a scan of available subchannels using the Store Subchannel (STSCH) instruction. Performing too many STSCH instructions in a tight loop can cause high Hypervisor overhead which can negatively impact the performance of the virtual machine as a whole. This patch reduces the number of STSCH instructions performed while processing a resource accessibility event and while varying a CHPID online. In both cases, Linux first performs a STSCH instruction on each unused subchannel to see if the subchannel has become available. If the STSCH instruction indicates that the subchannel is available, a full evaluation of this subchannel is scheduled. Since the full evaluation includes performing a STSCH instruction, the initial STSCH is unnecessary and can be removed. Signed-off-by: Peter Oberparleiter Reviewed-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 13299f9..eee70cb 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -237,26 +237,6 @@ void chsc_chp_offline(struct chp_id chpid) for_each_subchannel_staged(s390_subchannel_remove_chpid, NULL, &link); } -static int s390_process_res_acc_new_sch(struct subchannel_id schid, void *data) -{ - struct schib schib; - /* - * We don't know the device yet, but since a path - * may be available now to the device we'll have - * to do recognition again. - * Since we don't have any idea about which chpid - * that beast may be on we'll have to do a stsch - * on all devices, grr... - */ - if (stsch_err(schid, &schib)) - /* We're through */ - return -ENXIO; - - /* Put it on the slow path. */ - css_schedule_eval(schid); - return 0; -} - static int __s390_process_res_acc(struct subchannel *sch, void *data) { spin_lock_irq(sch->lock); @@ -287,8 +267,8 @@ static void s390_process_res_acc(struct chp_link *link) * The more information we have (info), the less scanning * will we have to do. */ - for_each_subchannel_staged(__s390_process_res_acc, - s390_process_res_acc_new_sch, link); + for_each_subchannel_staged(__s390_process_res_acc, NULL, link); + css_schedule_reprobe(); } static int @@ -663,19 +643,6 @@ static int s390_subchannel_vary_chpid_on(struct subchannel *sch, void *data) return 0; } -static int -__s390_vary_chpid_on(struct subchannel_id schid, void *data) -{ - struct schib schib; - - if (stsch_err(schid, &schib)) - /* We're through */ - return -ENXIO; - /* Put it on the slow path. */ - css_schedule_eval(schid); - return 0; -} - /** * chsc_chp_vary - propagate channel-path vary operation to subchannels * @chpid: channl-path ID @@ -694,7 +661,8 @@ int chsc_chp_vary(struct chp_id chpid, int on) /* Try to update the channel path description. */ chp_update_desc(chp); for_each_subchannel_staged(s390_subchannel_vary_chpid_on, - __s390_vary_chpid_on, &chpid); + NULL, &chpid); + css_schedule_reprobe(); } else for_each_subchannel_staged(s390_subchannel_vary_chpid_off, NULL, &chpid); -- cgit v0.10.2 From b207f5a8f9a60fda4e995151af7d8670015e261d Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Tue, 26 Nov 2013 14:57:13 +0100 Subject: s390/cio: Relax subchannel scan loop The CIO layer scans for newly available I/O devices by performing a scan of available subchannels using the Store Subchannel (STSCH) instruction. This processing can take a significant amount of time during which no other task can run on the same CPU (unless CONFIG_PREEMPT has been enabled). As a result, scheduling latencies for other tasks are increased noticeably, especially on a single-CPU system. Fix this problem by explicitly allowing other tasks to be scheduled each time a subchannel has been processed. Signed-off-by: Peter Oberparleiter Reviewed-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 8c2cb87..c1f9cc7 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -553,6 +553,9 @@ static int slow_eval_unknown_fn(struct subchannel_id schid, void *data) default: rc = 0; } + /* Allow scheduling here since the containing loop might + * take a while. */ + cond_resched(); } return rc; } -- cgit v0.10.2 From 175746eb06a8925274558793814d8c802dc48276 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Tue, 26 Nov 2013 14:58:08 +0100 Subject: s390/cio: Delay scan for newly available I/O devices The CIO layer scans for newly available I/O devices by performing a scan of available subchannels using the Store Subchannel (STSCH) instruction. Performing too many STSCH instructions in a tight loop can cause high Hypervisor overhead which can negatively impact the performance of the virtual machine as a whole. A subchannel scan is triggered for example during a hardware event that indicates that a channel path has become available. It is also triggered by the DASD device driver for each device that is set online. This patch reduces the number of STSCH instructions being performed by delaying the start of the actual subchannel scan by 1 second. Multiple scan requests that are scheduled during this time will be merged into a single scan loop. The trade-off consists of a short delay that is introduced between the time that the event is processed and a newly available device becoming usable. This delay should be acceptable since it only affects devices that have not been in use before. Signed-off-by: Peter Oberparleiter Reviewed-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c index a9fe3de..77ee4da 100644 --- a/drivers/s390/cio/blacklist.c +++ b/drivers/s390/cio/blacklist.c @@ -269,7 +269,7 @@ static int blacklist_parse_proc_parameters(char *buf) else return -EINVAL; - css_schedule_reprobe(); + css_schedule_eval_all_unreg(0); return rc; } diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index c1f9cc7..604333e 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -575,7 +575,7 @@ static void css_slow_path_func(struct work_struct *unused) spin_unlock_irqrestore(&slow_subchannel_lock, flags); } -static DECLARE_WORK(slow_path_work, css_slow_path_func); +static DECLARE_DELAYED_WORK(slow_path_work, css_slow_path_func); struct workqueue_struct *cio_work_q; void css_schedule_eval(struct subchannel_id schid) @@ -585,7 +585,7 @@ void css_schedule_eval(struct subchannel_id schid) spin_lock_irqsave(&slow_subchannel_lock, flags); idset_sch_add(slow_subchannel_set, schid); atomic_set(&css_eval_scheduled, 1); - queue_work(cio_work_q, &slow_path_work); + queue_delayed_work(cio_work_q, &slow_path_work, 0); spin_unlock_irqrestore(&slow_subchannel_lock, flags); } @@ -596,7 +596,7 @@ void css_schedule_eval_all(void) spin_lock_irqsave(&slow_subchannel_lock, flags); idset_fill(slow_subchannel_set); atomic_set(&css_eval_scheduled, 1); - queue_work(cio_work_q, &slow_path_work); + queue_delayed_work(cio_work_q, &slow_path_work, 0); spin_unlock_irqrestore(&slow_subchannel_lock, flags); } @@ -609,7 +609,7 @@ static int __unset_registered(struct device *dev, void *data) return 0; } -static void css_schedule_eval_all_unreg(void) +void css_schedule_eval_all_unreg(unsigned long delay) { unsigned long flags; struct idset *unreg_set; @@ -627,7 +627,7 @@ static void css_schedule_eval_all_unreg(void) spin_lock_irqsave(&slow_subchannel_lock, flags); idset_add_set(slow_subchannel_set, unreg_set); atomic_set(&css_eval_scheduled, 1); - queue_work(cio_work_q, &slow_path_work); + queue_delayed_work(cio_work_q, &slow_path_work, delay); spin_unlock_irqrestore(&slow_subchannel_lock, flags); idset_free(unreg_set); } @@ -640,7 +640,8 @@ void css_wait_for_slow_path(void) /* Schedule reprobing of all unregistered subchannels. */ void css_schedule_reprobe(void) { - css_schedule_eval_all_unreg(); + /* Schedule with a delay to allow merging of subsequent calls. */ + css_schedule_eval_all_unreg(1 * HZ); } EXPORT_SYMBOL_GPL(css_schedule_reprobe); diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h index 2935132..2c9107e 100644 --- a/drivers/s390/cio/css.h +++ b/drivers/s390/cio/css.h @@ -133,6 +133,7 @@ extern struct channel_subsystem *channel_subsystems[]; /* Helper functions to build lists for the slow path. */ void css_schedule_eval(struct subchannel_id schid); void css_schedule_eval_all(void); +void css_schedule_eval_all_unreg(unsigned long delay); int css_complete_work(void); int sch_is_pseudo_sch(struct subchannel *); -- cgit v0.10.2 From 47d30674d6c79197fa7bfb0d09d5857f61e0277b Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Tue, 26 Nov 2013 14:59:21 +0100 Subject: s390/css: Prevent unnecessary allocation in subchannel loop Subchannel looping function for_each_subchannel_staged() allocates a subchannel-ID-bitmap to efficiently iterate over the list of known and unknown subchannels. Since this function is also used to iterate over known-subchannels only, optimize that case by not requiring the ID-bitmap allocation and falling back to simple bus_for_each_dev() looping. Signed-off-by: Peter Oberparleiter Reviewed-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 604333e..0268e5f 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -69,7 +69,8 @@ static int call_fn_known_sch(struct device *dev, void *data) struct cb_data *cb = data; int rc = 0; - idset_sch_del(cb->set, sch->schid); + if (cb->set) + idset_sch_del(cb->set, sch->schid); if (cb->fn_known_sch) rc = cb->fn_known_sch(sch, cb->data); return rc; @@ -115,6 +116,13 @@ int for_each_subchannel_staged(int (*fn_known)(struct subchannel *, void *), cb.fn_known_sch = fn_known; cb.fn_unknown_sch = fn_unknown; + if (fn_known && !fn_unknown) { + /* Skip idset allocation in case of known-only loop. */ + cb.set = NULL; + return bus_for_each_dev(&css_bus_type, NULL, &cb, + call_fn_known_sch); + } + cb.set = idset_sch_new(); if (!cb.set) /* fall back to brute force scanning in case of oom */ -- cgit v0.10.2 From e6b2551425d961e68e70e806e22ec40787f7ed67 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Tue, 26 Nov 2013 15:00:37 +0100 Subject: s390/blacklist: Perform subchannel scan only when needed Move scheduling of a subchannel scan to those instances where new devices may actually have become available. This reduces unnecessary scan work in case devices were added to the blacklist. Signed-off-by: Peter Oberparleiter Reviewed-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c index 77ee4da..b3f791b 100644 --- a/drivers/s390/cio/blacklist.c +++ b/drivers/s390/cio/blacklist.c @@ -260,16 +260,16 @@ static int blacklist_parse_proc_parameters(char *buf) parm = strsep(&buf, " "); - if (strcmp("free", parm) == 0) + if (strcmp("free", parm) == 0) { rc = blacklist_parse_parameters(buf, free, 0); - else if (strcmp("add", parm) == 0) + css_schedule_eval_all_unreg(0); + } else if (strcmp("add", parm) == 0) rc = blacklist_parse_parameters(buf, add, 0); else if (strcmp("purge", parm) == 0) return ccw_purge_blacklisted(); else return -EINVAL; - css_schedule_eval_all_unreg(0); return rc; } -- cgit v0.10.2 From dea24190fbfb340dea5224e32161955514bef31b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 3 Dec 2013 10:06:29 +0100 Subject: s390/smp: only send external call ipi if needed If the per cpu ec_mask bit of the receiving cpu is already set there is no need to send an ipi, since a different cpu has already sent an ipi and the receiving cpu has not yet executed the external call ipi handler. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index dc4a534..86b2913 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -159,9 +159,9 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) { int order; - set_bit(ec_bit, &pcpu->ec_mask); - order = pcpu_running(pcpu) ? - SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL; + if (test_and_set_bit(ec_bit, &pcpu->ec_mask)) + return; + order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL; pcpu_sigp_retry(pcpu, order, 0); } -- cgit v0.10.2 From 1c182a628075af7431edb8155601740867b5ae51 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 3 Dec 2013 11:09:10 +0100 Subject: s390/ptrace: simplify enable/disable single step The user_enable_single_step() and user_disable_sindle_step() functions are always called on the inferior, never for the currently active process. Remove the unnecessary check for the current process and the update_cr_regs() call from the enable/disable functions. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index e65c91c..c369a26 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -107,15 +107,11 @@ void update_cr_regs(struct task_struct *task) void user_enable_single_step(struct task_struct *task) { set_tsk_thread_flag(task, TIF_SINGLE_STEP); - if (task == current) - update_cr_regs(task); } void user_disable_single_step(struct task_struct *task) { clear_tsk_thread_flag(task, TIF_SINGLE_STEP); - if (task == current) - update_cr_regs(task); } /* -- cgit v0.10.2 From c63badebfebacdba827ab1cc1d420fc81bd8d818 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 3 Dec 2013 14:57:18 +0100 Subject: s390: optimize control register update It is less expensive to update control registers 0 and 2 with two individual stctg/lctlg instructions as with a single one that spans control register 0, 1 and 2. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index c369a26..f6be608 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -56,25 +56,26 @@ void update_cr_regs(struct task_struct *task) #ifdef CONFIG_64BIT /* Take care of the enable/disable of transactional execution. */ if (MACHINE_HAS_TE) { - unsigned long cr[3], cr_new[3]; + unsigned long cr, cr_new; - __ctl_store(cr, 0, 2); - cr_new[1] = cr[1]; + __ctl_store(cr, 0, 0); /* Set or clear transaction execution TXC bit 8. */ + cr_new = cr | (1UL << 55); if (task->thread.per_flags & PER_FLAG_NO_TE) - cr_new[0] = cr[0] & ~(1UL << 55); - else - cr_new[0] = cr[0] | (1UL << 55); + cr_new &= ~(1UL << 55); + if (cr_new != cr) + __ctl_load(cr, 0, 0); /* Set or clear transaction execution TDC bits 62 and 63. */ - cr_new[2] = cr[2] & ~3UL; + __ctl_store(cr, 2, 2); + cr_new = cr & ~3UL; if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) - cr_new[2] |= 1UL; + cr_new |= 1UL; else - cr_new[2] |= 2UL; + cr_new |= 2UL; } - if (memcmp(&cr_new, &cr, sizeof(cr))) - __ctl_load(cr_new, 0, 2); + if (cr_new != cr) + __ctl_load(cr_new, 2, 2); } #endif /* Copy user specified PER registers */ -- cgit v0.10.2 From 03439e7d0a7ab3d77a74523b9ba64736c0fc28de Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 4 Dec 2013 14:29:11 +0100 Subject: s390/3270: fix use after free of tty3270_screen structure The deactivation and freeing of the tty view of the 3270 device can race with a tty3270_update invocation via the update timer. To fix this move the del_timer_sync call for the update timer from tty3270_free_view to tty3270_free prior to the tty3270_free_screen call. Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c index 3f4ca4e..07cf182 100644 --- a/drivers/s390/char/tty3270.c +++ b/drivers/s390/char/tty3270.c @@ -125,10 +125,7 @@ static void tty3270_resize_work(struct work_struct *work); */ static void tty3270_set_timer(struct tty3270 *tp, int expires) { - if (expires == 0) - del_timer(&tp->timer); - else - mod_timer(&tp->timer, jiffies + expires); + mod_timer(&tp->timer, jiffies + expires); } /* @@ -744,7 +741,6 @@ tty3270_free_view(struct tty3270 *tp) { int pages; - del_timer_sync(&tp->timer); kbd_free(tp->kbd); raw3270_request_free(tp->kreset); raw3270_request_free(tp->read); @@ -877,6 +873,7 @@ tty3270_free(struct raw3270_view *view) { struct tty3270 *tp = container_of(view, struct tty3270, view); + del_timer_sync(&tp->timer); tty3270_free_screen(tp->screen, tp->view.rows); tty3270_free_view(tp); } -- cgit v0.10.2 From 96619fc1b3d06703113ab4c5cd8c15f35a42dc99 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 5 Dec 2013 12:42:09 +0100 Subject: s390/smp: reduce memory consumption of pcpu_devices array Remove the embedded struct cpu from struct pcpu and replace it with a pointer instead. The struct cpu now gets allocated when a new cpu gets detected. The size of the pcpu_devices array (NR_CPUS * sizeof(struct pcpu)) gets reduced by nearly 120KB. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 86b2913..8ceefc9 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -59,7 +59,7 @@ enum { }; struct pcpu { - struct cpu cpu; + struct cpu *cpu; struct _lowcore *lowcore; /* lowcore page(s) for the cpu */ unsigned long async_stack; /* async stack for the cpu */ unsigned long panic_stack; /* panic stack for the cpu */ @@ -958,7 +958,7 @@ static int smp_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned int)(long)hcpu; - struct cpu *c = &pcpu_devices[cpu].cpu; + struct cpu *c = pcpu_devices[cpu].cpu; struct device *s = &c->dev; int err = 0; @@ -975,10 +975,15 @@ static int smp_cpu_notify(struct notifier_block *self, unsigned long action, static int smp_add_present_cpu(int cpu) { - struct cpu *c = &pcpu_devices[cpu].cpu; - struct device *s = &c->dev; + struct device *s; + struct cpu *c; int rc; + c = kzalloc(sizeof(*c), GFP_KERNEL); + if (!c) + return -ENOMEM; + pcpu_devices[cpu].cpu = c; + s = &c->dev; c->hotpluggable = 1; rc = register_cpu(c, cpu); if (rc) -- cgit v0.10.2 From 41932bc1c86e527f866acfcd26506da3bd20509b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 10 Dec 2013 16:18:07 +0100 Subject: s390/compat: correct check for EFAULT in rt-signal frame creation The return code of the __put_user call to store the rt_sigreturn system call to the user stack if not properly checked, the err variable is only checked before to the __put_user. Use an if statement instead. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 95e7ba0..8b84bc3 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -412,8 +412,9 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE; } else { regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE; - err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, - (u16 __force __user *)(frame->retcode)); + if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, + (u16 __force __user *)(frame->retcode))) + goto give_sigsegv; } /* Set up backchain. */ -- cgit v0.10.2 From 333cce91f384409fb6a3e656dc11e21e872c5ea2 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 5 Dec 2013 18:46:51 +0100 Subject: s390/sclp_early: Get rid of sclp_early_read_info_sccb_valid The early sclp detect functions gather the available SCLP facility information. The sclp_early_read_info_sccb_valid indicates whether the early sclp request was valid. However, one external reference to it checks for particular sclp facility bits and this should be sufficient. Another occurance is in the sclp_get_ipl_info() function that is called later. Because all information are available at the early stage, save the ipl information when detecting the sclp facilities. Hence, no more checks for sclp_early_read_info_sccb_valid are required. Signed-off-by: Hendrik Brueckner Reviewed-by: Michael Holzheu Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index 6fbe096..fea76ae 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -183,7 +183,6 @@ extern unsigned long sclp_console_full; extern u8 sclp_fac84; extern unsigned long long sclp_rzm; extern unsigned long long sclp_rnmax; -extern __initdata int sclp_early_read_info_sccb_valid; /* useful inlines */ diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index eaa21d5..cb3c4e0 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -455,8 +455,6 @@ static int __init sclp_detect_standby_memory(void) if (OLDMEM_BASE) /* No standby memory in kdump mode */ return 0; - if (!sclp_early_read_info_sccb_valid) - return 0; if ((sclp_facilities & 0xe00000000000ULL) != 0xe00000000000ULL) return 0; rc = -ENOMEM; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 1465e95..e4e5b32 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -38,8 +38,8 @@ struct read_info_sccb { static __initdata struct read_info_sccb early_read_info_sccb; static __initdata char sccb_early[PAGE_SIZE] __aligned(PAGE_SIZE); static unsigned long sclp_hsa_size; +static struct sclp_ipl_info sclp_ipl_info; -__initdata int sclp_early_read_info_sccb_valid; u64 sclp_facilities; u8 sclp_fac84; unsigned long long sclp_rzm; @@ -63,10 +63,9 @@ out: return rc; } -static void __init sclp_read_info_early(void) +static int __init sclp_read_info_early(void) { - int rc; - int i; + int rc, i; struct read_info_sccb *sccb; sclp_cmdw_t commands[] = {SCLP_CMDW_READ_SCP_INFO_FORCED, SCLP_CMDW_READ_SCP_INFO}; @@ -83,21 +82,19 @@ static void __init sclp_read_info_early(void) if (rc) break; - if (sccb->header.response_code == 0x10) { - sclp_early_read_info_sccb_valid = 1; - break; - } + if (sccb->header.response_code == 0x10) + return 0; if (sccb->header.response_code != 0x1f0) break; } + return -EIO; } static void __init sclp_facilities_detect(void) { struct read_info_sccb *sccb; - sclp_read_info_early(); - if (!sclp_early_read_info_sccb_valid) + if (sclp_read_info_early()) return; sccb = &early_read_info_sccb; @@ -108,6 +105,12 @@ static void __init sclp_facilities_detect(void) sclp_rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; sclp_rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; sclp_rzm <<= 20; + + /* Save IPL information */ + sclp_ipl_info.is_valid = 1; + if (sccb->flags & 0x2) + sclp_ipl_info.has_dump = 1; + memcpy(&sclp_ipl_info.loadparm, &sccb->loadparm, LOADPARM_LEN); } bool __init sclp_has_linemode(void) @@ -146,19 +149,12 @@ unsigned long long sclp_get_rzm(void) /* * This function will be called after sclp_facilities_detect(), which gets - * called from early.c code. Therefore the sccb should have valid contents. + * called from early.c code. The sclp_facilities_detect() function retrieves + * and saves the IPL information. */ void __init sclp_get_ipl_info(struct sclp_ipl_info *info) { - struct read_info_sccb *sccb; - - if (!sclp_early_read_info_sccb_valid) - return; - sccb = &early_read_info_sccb; - info->is_valid = 1; - if (sccb->flags & 0x2) - info->has_dump = 1; - memcpy(&info->loadparm, &sccb->loadparm, LOADPARM_LEN); + *info = sclp_ipl_info; } static int __init sclp_cmd_early(sclp_cmdw_t cmd, void *sccb) -- cgit v0.10.2 From 56e57a84a7856130f45009ce1713dc8ec8e59887 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 5 Dec 2013 19:03:50 +0100 Subject: s390/sclp_early: Replace early_read_info_sccb with sccb_early Replace early_read_info_sccb and use sccb_early instead. Also saves some memory. Signed-off-by: Hendrik Brueckner Reviewed-by: Michael Holzheu Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index e4e5b32..dc4ed04 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -35,8 +35,7 @@ struct read_info_sccb { u8 _reserved5[4096 - 112]; /* 112-4095 */ } __packed __aligned(PAGE_SIZE); -static __initdata struct read_info_sccb early_read_info_sccb; -static __initdata char sccb_early[PAGE_SIZE] __aligned(PAGE_SIZE); +static char sccb_early[PAGE_SIZE] __aligned(PAGE_SIZE) __initdata; static unsigned long sclp_hsa_size; static struct sclp_ipl_info sclp_ipl_info; @@ -63,14 +62,12 @@ out: return rc; } -static int __init sclp_read_info_early(void) +static int __init sclp_read_info_early(struct read_info_sccb *sccb) { int rc, i; - struct read_info_sccb *sccb; sclp_cmdw_t commands[] = {SCLP_CMDW_READ_SCP_INFO_FORCED, SCLP_CMDW_READ_SCP_INFO}; - sccb = &early_read_info_sccb; for (i = 0; i < ARRAY_SIZE(commands); i++) { do { memset(sccb, 0, sizeof(*sccb)); @@ -92,12 +89,11 @@ static int __init sclp_read_info_early(void) static void __init sclp_facilities_detect(void) { - struct read_info_sccb *sccb; + struct read_info_sccb *sccb = (void *) &sccb_early; - if (sclp_read_info_early()) + if (sclp_read_info_early(sccb)) return; - sccb = &early_read_info_sccb; sclp_facilities = sccb->facilities; sclp_fac84 = sccb->fac84; if (sccb->fac85 & 0x02) -- cgit v0.10.2 From 5d5de1a068efffb0dcc03235e6fa258201096f02 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 5 Dec 2013 19:13:36 +0100 Subject: s390/sclp_early: Pass sccb pointer to every *_detect() function Add a sccb pointer parameter to *_detect() functions instead of accessing the global sccb_early variable directly. Signed-off-by: Hendrik Brueckner Reviewed-by: Michael Holzheu Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index dc4ed04..1af3555 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -87,10 +87,8 @@ static int __init sclp_read_info_early(struct read_info_sccb *sccb) return -EIO; } -static void __init sclp_facilities_detect(void) +static void __init sclp_facilities_detect(struct read_info_sccb *sccb) { - struct read_info_sccb *sccb = (void *) &sccb_early; - if (sclp_read_info_early(sccb)) return; @@ -181,11 +179,10 @@ static void __init sccb_init_eq_size(struct sdias_sccb *sccb) sccb->evbuf.dbs = 1; } -static int __init sclp_set_event_mask(unsigned long receive_mask, +static int __init sclp_set_event_mask(struct init_sccb *sccb, + unsigned long receive_mask, unsigned long send_mask) { - struct init_sccb *sccb = (void *) &sccb_early; - memset(sccb, 0, sizeof(*sccb)); sccb->header.length = sizeof(*sccb); sccb->mask_length = sizeof(sccb_mask_t); @@ -194,10 +191,8 @@ static int __init sclp_set_event_mask(unsigned long receive_mask, return sclp_cmd_early(SCLP_CMDW_WRITE_EVENT_MASK, sccb); } -static long __init sclp_hsa_size_init(void) +static long __init sclp_hsa_size_init(struct sdias_sccb *sccb) { - struct sdias_sccb *sccb = (void *) &sccb_early; - sccb_init_eq_size(sccb); if (sclp_cmd_early(SCLP_CMDW_WRITE_EVENT_DATA, sccb)) return -EIO; @@ -206,10 +201,8 @@ static long __init sclp_hsa_size_init(void) return 0; } -static long __init sclp_hsa_copy_wait(void) +static long __init sclp_hsa_copy_wait(struct sccb_header *sccb) { - struct sccb_header *sccb = (void *) &sccb_early; - memset(sccb, 0, PAGE_SIZE); sccb->length = PAGE_SIZE; if (sclp_cmd_early(SCLP_CMDW_READ_EVENT_DATA, sccb)) @@ -222,25 +215,25 @@ unsigned long sclp_get_hsa_size(void) return sclp_hsa_size; } -static void __init sclp_hsa_size_detect(void) +static void __init sclp_hsa_size_detect(void *sccb) { long size; /* First try synchronous interface (LPAR) */ - if (sclp_set_event_mask(0, 0x40000010)) + if (sclp_set_event_mask(sccb, 0, 0x40000010)) return; - size = sclp_hsa_size_init(); + size = sclp_hsa_size_init(sccb); if (size < 0) return; if (size != 0) goto out; /* Then try asynchronous interface (z/VM) */ - if (sclp_set_event_mask(0x00000010, 0x40000010)) + if (sclp_set_event_mask(sccb, 0x00000010, 0x40000010)) return; - size = sclp_hsa_size_init(); + size = sclp_hsa_size_init(sccb); if (size < 0) return; - size = sclp_hsa_copy_wait(); + size = sclp_hsa_copy_wait(sccb); if (size < 0) return; out: @@ -249,7 +242,9 @@ out: void __init sclp_early_detect(void) { - sclp_facilities_detect(); - sclp_hsa_size_detect(); - sclp_set_event_mask(0, 0); + void *sccb = &sccb_early; + + sclp_facilities_detect(sccb); + sclp_hsa_size_detect(sccb); + sclp_set_event_mask(sccb, 0, 0); } -- cgit v0.10.2 From 52733e0152dad719ed6374b56fd1c33e784e44b3 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 5 Dec 2013 19:28:39 +0100 Subject: s390/sclp_early: Add function to detect sclp console capabilities Add SCLP console detect functions to encapsulate detection of SCLP console capabilities, for example, VT220 support. Reuse the sclp_send/receive masks that were stored by the most recent sclp_set_event_mask() call to prevent unnecessary SCLP calls. Signed-off-by: Hendrik Brueckner Reviewed-by: Michael Holzheu Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 2f39095..220e171 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -52,8 +52,8 @@ int sclp_chp_configure(struct chp_id chpid); int sclp_chp_deconfigure(struct chp_id chpid); int sclp_chp_read_info(struct sclp_chp_info *info); void sclp_get_ipl_info(struct sclp_ipl_info *info); -bool sclp_has_linemode(void); -bool sclp_has_vt220(void); +bool __init sclp_has_linemode(void); +bool __init sclp_has_vt220(void); int sclp_pci_configure(u32 fid); int sclp_pci_deconfigure(u32 fid); int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode); diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 1af3555..82f2c38 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -36,6 +36,8 @@ struct read_info_sccb { } __packed __aligned(PAGE_SIZE); static char sccb_early[PAGE_SIZE] __aligned(PAGE_SIZE) __initdata; +static unsigned int sclp_con_has_vt220 __initdata; +static unsigned int sclp_con_has_linemode __initdata; static unsigned long sclp_hsa_size; static struct sclp_ipl_info sclp_ipl_info; @@ -109,26 +111,12 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) bool __init sclp_has_linemode(void) { - struct init_sccb *sccb = (void *) &sccb_early; - - if (sccb->header.response_code != 0x20) - return 0; - if (!(sccb->sclp_send_mask & (EVTYP_OPCMD_MASK | EVTYP_PMSGCMD_MASK))) - return 0; - if (!(sccb->sclp_receive_mask & (EVTYP_MSG_MASK | EVTYP_PMSGCMD_MASK))) - return 0; - return 1; + return !!sclp_con_has_linemode; } bool __init sclp_has_vt220(void) { - struct init_sccb *sccb = (void *) &sccb_early; - - if (sccb->header.response_code != 0x20) - return 0; - if (sccb->sclp_send_mask & EVTYP_VT220MSG_MASK) - return 1; - return 0; + return !!sclp_con_has_vt220; } unsigned long long sclp_get_rnmax(void) @@ -240,11 +228,37 @@ out: sclp_hsa_size = size; } +static unsigned int __init sclp_con_check_linemode(struct init_sccb *sccb) +{ + if (!(sccb->sclp_send_mask & (EVTYP_OPCMD_MASK | EVTYP_PMSGCMD_MASK))) + return 0; + if (!(sccb->sclp_receive_mask & (EVTYP_MSG_MASK | EVTYP_PMSGCMD_MASK))) + return 0; + return 1; +} + +static void __init sclp_console_detect(struct init_sccb *sccb) +{ + if (sccb->header.response_code != 0x20) + return; + + if (sccb->sclp_send_mask & EVTYP_VT220MSG_MASK) + sclp_con_has_vt220 = 1; + + if (sclp_con_check_linemode(sccb)) + sclp_con_has_linemode = 1; +} + void __init sclp_early_detect(void) { void *sccb = &sccb_early; sclp_facilities_detect(sccb); sclp_hsa_size_detect(sccb); + + /* Turn off SCLP event notifications. Also save remote masks in the + * sccb. These are sufficient to detect sclp console capabilities. + */ sclp_set_event_mask(sccb, 0, 0); + sclp_console_detect(sccb); } -- cgit v0.10.2 From cf48ad83278aad39d4a0158cd085b6038b2941e6 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 11 Dec 2013 12:15:52 +0100 Subject: s390/oprofile: move hwsampler interfaces to cpu_mf.h Extract and move the oprofile hwsampler data structures and interfaces to the cpu_mf.h header file which contains common interface definitions for the various CPU-measurement facilities. This change is necessary for a new perf PMU. Few interface names have been revised to fit to the latest CPU-measurement facilities documentation. Also declare the data structures as __packed and correct checkpatch findings. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index c879fad..f6dddea 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -56,6 +56,78 @@ struct cpumf_ctr_info { u32 reserved2[12]; } __packed; +/* QUERY SAMPLING INFORMATION block */ +struct hws_qsi_info_block { /* Bit(s) */ + unsigned int b0_13:14; /* 0-13: zeros */ + unsigned int as:1; /* 14: sampling authorisation control*/ + unsigned int b15_21:7; /* 15-21: zeros */ + unsigned int es:1; /* 22: sampling enable control */ + unsigned int b23_29:7; /* 23-29: zeros */ + unsigned int cs:1; /* 30: sampling activation control */ + unsigned int:1; /* 31: reserved */ + unsigned int bsdes:16; /* 4-5: size of basic sampling entry */ + unsigned int dsdes:16; /* 6-7: size of diagnostic sampling entry */ + unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */ + unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/ + unsigned long tear; /* 24-31: TEAR contents */ + unsigned long dear; /* 32-39: DEAR contents */ + unsigned int rsvrd0; /* 40-43: reserved */ + unsigned int cpu_speed; /* 44-47: CPU speed */ + unsigned long long rsvrd1; /* 48-55: reserved */ + unsigned long long rsvrd2; /* 56-63: reserved */ +} __packed; + +/* SET SAMPLING CONTROLS request block */ +struct hws_lsctl_request_block { + unsigned int s:1; /* 0: maximum buffer indicator */ + unsigned int h:1; /* 1: part. level reserved for VM use*/ + unsigned long long b2_53:52;/* 2-53: zeros */ + unsigned int es:1; /* 54: sampling enable control */ + unsigned int b55_61:7; /* 55-61: - zeros */ + unsigned int cs:1; /* 62: sampling activation control */ + unsigned int b63:1; /* 63: zero */ + unsigned long interval; /* 8-15: sampling interval */ + unsigned long tear; /* 16-23: TEAR contents */ + unsigned long dear; /* 24-31: DEAR contents */ + /* 32-63: */ + unsigned long rsvrd1; /* reserved */ + unsigned long rsvrd2; /* reserved */ + unsigned long rsvrd3; /* reserved */ + unsigned long rsvrd4; /* reserved */ +} __packed; + + +struct hws_data_entry { + unsigned int def:16; /* 0-15 Data Entry Format */ + unsigned int R:4; /* 16-19 reserved */ + unsigned int U:4; /* 20-23 Number of unique instruct. */ + unsigned int z:2; /* zeros */ + unsigned int T:1; /* 26 PSW DAT mode */ + unsigned int W:1; /* 27 PSW wait state */ + unsigned int P:1; /* 28 PSW Problem state */ + unsigned int AS:2; /* 29-30 PSW address-space control */ + unsigned int I:1; /* 31 entry valid or invalid */ + unsigned int:16; + unsigned int prim_asn:16; /* primary ASN */ + unsigned long long ia; /* Instruction Address */ + unsigned long long gpp; /* Guest Program Parameter */ + unsigned long long hpp; /* Host Program Parameter */ +} __packed; + +struct hws_trailer_entry { + unsigned int f:1; /* 0 - Block Full Indicator */ + unsigned int a:1; /* 1 - Alert request control */ + unsigned int t:1; /* 2 - Timestamp format */ + unsigned long long:61; /* 3 - 63: Reserved */ + unsigned long long overflow; /* 64 - sample Overflow count */ + unsigned long long timestamp; /* 16 - time-stamp */ + unsigned long long timestamp1; /* */ + unsigned long long reserved1; /* 32 -Reserved */ + unsigned long long reserved2; /* */ + unsigned long long progusage1; /* 48 - reserved for programming use */ + unsigned long long progusage2; /* */ +} __packed; + /* Query counter information */ static inline int qctri(struct cpumf_ctr_info *info) { @@ -99,4 +171,70 @@ static inline int ecctr(u64 ctr, u64 *val) return cc; } +/* Query sampling information */ +static inline int qsi(struct hws_qsi_info_block *info) +{ + int cc; + cc = 1; + + asm volatile( + "0: .insn s,0xb2860000,0(%1)\n" + "1: lhi %0,0\n" + "2:\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : "=d" (cc), "+a" (info) + : "m" (*info) + : "cc", "memory"); + + return cc ? -EINVAL : 0; +} + +/* Load sampling controls */ +static inline int lsctl(struct hws_lsctl_request_block *req) +{ + int cc; + + cc = 1; + asm volatile( + "0: .insn s,0xb2870000,0(%1)\n" + "1: ipm %0\n" + " srl %0,28\n" + "2:\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : "+d" (cc), "+a" (req) + : "m" (*req) + : "cc", "memory"); + + return cc ? -EINVAL : 0; +} + +/* Sampling control helper functions */ + +#define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL +#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL + +/* Return pointer to trailer entry of an sample data block */ +static inline unsigned long *trailer_entry_ptr(unsigned long v) +{ + void *ret; + + ret = (void *) v; + ret += PAGE_SIZE; + ret -= sizeof(struct hws_trailer_entry); + + return (unsigned long *) ret; +} + +/* Return if the entry in the sample data block table (sdbt) + * is a link to the next sdbt */ +static inline int is_link_entry(unsigned long *s) +{ + return *s & 0x1ul ? 1 : 0; +} + +/* Return pointer to the linked sdbt */ +static inline unsigned long *get_next_sdbt(unsigned long *s) +{ + return (unsigned long *) (*s & ~0x1ul); +} #endif /* _ASM_S390_CPU_MF_H */ diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 231ceca..bbca76a 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -26,9 +26,6 @@ #define MAX_NUM_SDB 511 #define MIN_NUM_SDB 1 -#define ALERT_REQ_MASK 0x4000000000000000ul -#define BUFFER_FULL_MASK 0x8000000000000000ul - DECLARE_PER_CPU(struct hws_cpu_buffer, sampler_cpu_buffer); struct hws_execute_parms { @@ -65,43 +62,6 @@ static unsigned long interval; static unsigned long min_sampler_rate; static unsigned long max_sampler_rate; -static int ssctl(void *buffer) -{ - int cc; - - /* set in order to detect a program check */ - cc = 1; - - asm volatile( - "0: .insn s,0xB2870000,0(%1)\n" - "1: ipm %0\n" - " srl %0,28\n" - "2:\n" - EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) - : "+d" (cc), "+a" (buffer) - : "m" (*((struct hws_ssctl_request_block *)buffer)) - : "cc", "memory"); - - return cc ? -EINVAL : 0 ; -} - -static int qsi(void *buffer) -{ - int cc; - cc = 1; - - asm volatile( - "0: .insn s,0xB2860000,0(%1)\n" - "1: lhi %0,0\n" - "2:\n" - EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) - : "=d" (cc), "+a" (buffer) - : "m" (*((struct hws_qsi_info_block *)buffer)) - : "cc", "memory"); - - return cc ? -EINVAL : 0; -} - static void execute_qsi(void *parms) { struct hws_execute_parms *ep = parms; @@ -113,7 +73,7 @@ static void execute_ssctl(void *parms) { struct hws_execute_parms *ep = parms; - ep->rc = ssctl(ep->buffer); + ep->rc = lsctl(ep->buffer); } static int smp_ctl_ssctl_stop(int cpu) @@ -214,17 +174,6 @@ static int smp_ctl_qsi(int cpu) return ep.rc; } -static inline unsigned long *trailer_entry_ptr(unsigned long v) -{ - void *ret; - - ret = (void *)v; - ret += PAGE_SIZE; - ret -= sizeof(struct hws_trailer_entry); - - return (unsigned long *) ret; -} - static void hws_ext_handler(struct ext_code ext_code, unsigned int param32, unsigned long param64) { @@ -256,16 +205,6 @@ static void init_all_cpu_buffers(void) } } -static int is_link_entry(unsigned long *s) -{ - return *s & 0x1ul ? 1 : 0; -} - -static unsigned long *get_next_sdbt(unsigned long *s) -{ - return (unsigned long *) (*s & ~0x1ul); -} - static int prepare_cpu_buffers(void) { int cpu; @@ -353,7 +292,7 @@ static int allocate_sdbt(int cpu) } *sdbt = sdb; trailer = trailer_entry_ptr(*sdbt); - *trailer = ALERT_REQ_MASK; + *trailer = SDB_TE_ALERT_REQ_MASK; sdbt++; mutex_unlock(&hws_sem_oom); } @@ -829,7 +768,7 @@ static void worker_on_interrupt(unsigned int cpu) trailer = trailer_entry_ptr(*sdbt); /* leave loop if no more work to do */ - if (!(*trailer & BUFFER_FULL_MASK)) { + if (!(*trailer & SDB_TE_BUFFER_FULL_MASK)) { done = 1; if (!hws_flush_all) continue; diff --git a/arch/s390/oprofile/hwsampler.h b/arch/s390/oprofile/hwsampler.h index 0022e1e..a483d06 100644 --- a/arch/s390/oprofile/hwsampler.h +++ b/arch/s390/oprofile/hwsampler.h @@ -9,27 +9,7 @@ #define HWSAMPLER_H_ #include - -struct hws_qsi_info_block /* QUERY SAMPLING information block */ -{ /* Bit(s) */ - unsigned int b0_13:14; /* 0-13: zeros */ - unsigned int as:1; /* 14: sampling authorisation control*/ - unsigned int b15_21:7; /* 15-21: zeros */ - unsigned int es:1; /* 22: sampling enable control */ - unsigned int b23_29:7; /* 23-29: zeros */ - unsigned int cs:1; /* 30: sampling activation control */ - unsigned int:1; /* 31: reserved */ - unsigned int bsdes:16; /* 4-5: size of sampling entry */ - unsigned int:16; /* 6-7: reserved */ - unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */ - unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/ - unsigned long tear; /* 24-31: TEAR contents */ - unsigned long dear; /* 32-39: DEAR contents */ - unsigned int rsvrd0; /* 40-43: reserved */ - unsigned int cpu_speed; /* 44-47: CPU speed */ - unsigned long long rsvrd1; /* 48-55: reserved */ - unsigned long long rsvrd2; /* 56-63: reserved */ -}; +#include struct hws_ssctl_request_block /* SET SAMPLING CONTROLS req block */ { /* bytes 0 - 7 Bit(s) */ @@ -68,36 +48,6 @@ struct hws_cpu_buffer { unsigned int stop_mode:1; }; -struct hws_data_entry { - unsigned int def:16; /* 0-15 Data Entry Format */ - unsigned int R:4; /* 16-19 reserved */ - unsigned int U:4; /* 20-23 Number of unique instruct. */ - unsigned int z:2; /* zeros */ - unsigned int T:1; /* 26 PSW DAT mode */ - unsigned int W:1; /* 27 PSW wait state */ - unsigned int P:1; /* 28 PSW Problem state */ - unsigned int AS:2; /* 29-30 PSW address-space control */ - unsigned int I:1; /* 31 entry valid or invalid */ - unsigned int:16; - unsigned int prim_asn:16; /* primary ASN */ - unsigned long long ia; /* Instruction Address */ - unsigned long long gpp; /* Guest Program Parameter */ - unsigned long long hpp; /* Host Program Parameter */ -}; - -struct hws_trailer_entry { - unsigned int f:1; /* 0 - Block Full Indicator */ - unsigned int a:1; /* 1 - Alert request control */ - unsigned long:62; /* 2 - 63: Reserved */ - unsigned long overflow; /* 64 - sample Overflow count */ - unsigned long timestamp; /* 16 - time-stamp */ - unsigned long timestamp1; /* */ - unsigned long reserved1; /* 32 -Reserved */ - unsigned long reserved2; /* */ - unsigned long progusage1; /* 48 - reserved for programming use */ - unsigned long progusage2; /* */ -}; - int hwsampler_setup(void); int hwsampler_shutdown(void); int hwsampler_allocate(unsigned long sdbt, unsigned long sdb); -- cgit v0.10.2 From c716832513f30430179b60ac5ffd203c53f7eb40 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 11 Dec 2013 12:44:40 +0100 Subject: s390/cpum_cf: Export event names in sysfs Provide PMU event attributes for supported counters and export their symbolic names to the sysfs "events" directory. See the /sys/devices/cpum_cf/events/ directory for a list of available counters. Note that you might require counter set authorizations for the LPAR to use them. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 1141fb3..3418502 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -1,11 +1,18 @@ /* * Performance event support - s390 specific definitions. * - * Copyright IBM Corp. 2009, 2012 + * Copyright IBM Corp. 2009, 2013 * Author(s): Martin Schwidefsky * Hendrik Brueckner */ +#ifndef _ASM_S390_PERF_EVENT_H +#define _ASM_S390_PERF_EVENT_H + +#ifdef CONFIG_64BIT + +#include +#include #include /* CPU-measurement counter facility */ @@ -15,7 +22,18 @@ #define PMU_F_RESERVED 0x1000 #define PMU_F_ENABLED 0x2000 -#ifdef CONFIG_64BIT +/* Perf defintions for PMU event attributes in sysfs */ +extern __init const struct attribute_group **cpumf_cf_event_group(void); +extern ssize_t cpumf_events_sysfs_show(struct device *dev, + struct device_attribute *attr, + char *page); +#define EVENT_VAR(_cat, _name) event_attr_##_cat##_##_name +#define EVENT_PTR(_cat, _name) (&EVENT_VAR(_cat, _name).attr.attr) + +#define CPUMF_EVENT_ATTR(cat, name, id) \ + PMU_EVENT_ATTR(name, EVENT_VAR(cat, name), id, cpumf_events_sysfs_show) +#define CPUMF_EVENT_PTR(cat, name) EVENT_PTR(cat, name) + /* Perf callbacks */ struct pt_regs; @@ -24,3 +42,4 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) #endif /* CONFIG_64BIT */ +#endif /* _ASM_S390_PERF_EVENT_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 2403303..9f1e2ad 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -60,7 +60,8 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o ifdef CONFIG_64BIT -obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o \ + perf_cpum_cf_events.o obj-y += runtime_instr.o cache.o endif diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 1105502..f51214c 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -680,6 +680,7 @@ static int __init cpumf_pmu_init(void) goto out; } + cpumf_pmu.attr_groups = cpumf_cf_event_group(); rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); if (rc) { pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c new file mode 100644 index 0000000..4554a4b --- /dev/null +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -0,0 +1,322 @@ +/* + * Perf PMU sysfs events attributes for available CPU-measurement counters + * + */ + +#include +#include + + +/* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */ + +CPUMF_EVENT_ATTR(cf, CPU_CYCLES, 0x0000); +CPUMF_EVENT_ATTR(cf, INSTRUCTIONS, 0x0001); +CPUMF_EVENT_ATTR(cf, L1I_DIR_WRITES, 0x0002); +CPUMF_EVENT_ATTR(cf, L1I_PENALTY_CYCLES, 0x0003); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_CPU_CYCLES, 0x0020); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_INSTRUCTIONS, 0x0021); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024); +CPUMF_EVENT_ATTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025); +CPUMF_EVENT_ATTR(cf, L1D_DIR_WRITES, 0x0004); +CPUMF_EVENT_ATTR(cf, L1D_PENALTY_CYCLES, 0x0005); +CPUMF_EVENT_ATTR(cf, PRNG_FUNCTIONS, 0x0040); +CPUMF_EVENT_ATTR(cf, PRNG_CYCLES, 0x0041); +CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_FUNCTIONS, 0x0042); +CPUMF_EVENT_ATTR(cf, PRNG_BLOCKED_CYCLES, 0x0043); +CPUMF_EVENT_ATTR(cf, SHA_FUNCTIONS, 0x0044); +CPUMF_EVENT_ATTR(cf, SHA_CYCLES, 0x0045); +CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_FUNCTIONS, 0x0046); +CPUMF_EVENT_ATTR(cf, SHA_BLOCKED_CYCLES, 0x0047); +CPUMF_EVENT_ATTR(cf, DEA_FUNCTIONS, 0x0048); +CPUMF_EVENT_ATTR(cf, DEA_CYCLES, 0x0049); +CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_FUNCTIONS, 0x004a); +CPUMF_EVENT_ATTR(cf, DEA_BLOCKED_CYCLES, 0x004b); +CPUMF_EVENT_ATTR(cf, AES_FUNCTIONS, 0x004c); +CPUMF_EVENT_ATTR(cf, AES_CYCLES, 0x004d); +CPUMF_EVENT_ATTR(cf, AES_BLOCKED_FUNCTIONS, 0x004e); +CPUMF_EVENT_ATTR(cf, AES_BLOCKED_CYCLES, 0x004f); +CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082); +CPUMF_EVENT_ATTR(cf_z10, L1D_L3_LOCAL_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z10, L1I_L3_REMOTE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z10, L1D_L3_REMOTE_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_z10, L1D_LMEM_SOURCED_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z10, L1I_LMEM_SOURCED_WRITES, 0x0087); +CPUMF_EVENT_ATTR(cf_z10, L1D_RO_EXCL_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z10, L1I_CACHELINE_INVALIDATES, 0x0089); +CPUMF_EVENT_ATTR(cf_z10, ITLB1_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z10, DTLB1_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_z10, TLB2_PTE_WRITES, 0x008c); +CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_WRITES, 0x008d); +CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES, 0x008e); +CPUMF_EVENT_ATTR(cf_z10, ITLB1_MISSES, 0x0091); +CPUMF_EVENT_ATTR(cf_z10, DTLB1_MISSES, 0x0092); +CPUMF_EVENT_ATTR(cf_z10, L2C_STORES_SENT, 0x0093); +CPUMF_EVENT_ATTR(cf_z196, L1D_L2_SOURCED_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z196, L1I_L2_SOURCED_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z196, DTLB1_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z196, ITLB1_MISSES, 0x0083); +CPUMF_EVENT_ATTR(cf_z196, L2C_STORES_SENT, 0x0085); +CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0087); +CPUMF_EVENT_ATTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z196, L1D_RO_EXCL_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_z196, DTLB1_HPAGE_WRITES, 0x008c); +CPUMF_EVENT_ATTR(cf_z196, L1D_LMEM_SOURCED_WRITES, 0x008d); +CPUMF_EVENT_ATTR(cf_z196, L1I_LMEM_SOURCED_WRITES, 0x008e); +CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x008f); +CPUMF_EVENT_ATTR(cf_z196, DTLB1_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_z196, ITLB1_WRITES, 0x0091); +CPUMF_EVENT_ATTR(cf_z196, TLB2_PTE_WRITES, 0x0092); +CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES, 0x0093); +CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0096); +CPUMF_EVENT_ATTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0098); +CPUMF_EVENT_ATTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099); +CPUMF_EVENT_ATTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009b); +CPUMF_EVENT_ATTR(cf_zec12, DTLB1_MISSES, 0x0080); +CPUMF_EVENT_ATTR(cf_zec12, ITLB1_MISSES, 0x0081); +CPUMF_EVENT_ATTR(cf_zec12, L1D_L2I_SOURCED_WRITES, 0x0082); +CPUMF_EVENT_ATTR(cf_zec12, L1I_L2I_SOURCED_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_zec12, L1D_L2D_SOURCED_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_zec12, DTLB1_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_zec12, L1D_LMEM_SOURCED_WRITES, 0x0087); +CPUMF_EVENT_ATTR(cf_zec12, L1I_LMEM_SOURCED_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_zec12, L1D_RO_EXCL_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_zec12, DTLB1_HPAGE_WRITES, 0x008b); +CPUMF_EVENT_ATTR(cf_zec12, ITLB1_WRITES, 0x008c); +CPUMF_EVENT_ATTR(cf_zec12, TLB2_PTE_WRITES, 0x008d); +CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES, 0x008e); +CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_WRITES, 0x008f); +CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0091); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0092); +CPUMF_EVENT_ATTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0093); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TEND, 0x0095); +CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV, 0x0097); +CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV, 0x0098); +CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009a); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x009b); +CPUMF_EVENT_ATTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES, 0x009c); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x009d); +CPUMF_EVENT_ATTR(cf_zec12, TX_C_TEND, 0x009e); +CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x009f); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV, 0x00a0); +CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1); +CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1); +CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2); +CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3); + +static struct attribute *cpumcf_pmu_event_attr[] = { + CPUMF_EVENT_PTR(cf, CPU_CYCLES), + CPUMF_EVENT_PTR(cf, INSTRUCTIONS), + CPUMF_EVENT_PTR(cf, L1I_DIR_WRITES), + CPUMF_EVENT_PTR(cf, L1I_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_CPU_CYCLES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_DIR_WRITES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1I_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_DIR_WRITES), + CPUMF_EVENT_PTR(cf, PROBLEM_STATE_L1D_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, L1D_DIR_WRITES), + CPUMF_EVENT_PTR(cf, L1D_PENALTY_CYCLES), + CPUMF_EVENT_PTR(cf, PRNG_FUNCTIONS), + CPUMF_EVENT_PTR(cf, PRNG_CYCLES), + CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, PRNG_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf, SHA_FUNCTIONS), + CPUMF_EVENT_PTR(cf, SHA_CYCLES), + CPUMF_EVENT_PTR(cf, SHA_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, SHA_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf, DEA_FUNCTIONS), + CPUMF_EVENT_PTR(cf, DEA_CYCLES), + CPUMF_EVENT_PTR(cf, DEA_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, DEA_BLOCKED_CYCLES), + CPUMF_EVENT_PTR(cf, AES_FUNCTIONS), + CPUMF_EVENT_PTR(cf, AES_CYCLES), + CPUMF_EVENT_PTR(cf, AES_BLOCKED_FUNCTIONS), + CPUMF_EVENT_PTR(cf, AES_BLOCKED_CYCLES), + NULL, +}; + +static struct attribute *cpumcf_z10_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z10, L1I_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_L3_LOCAL_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_L3_LOCAL_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_L3_REMOTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_L3_REMOTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z10, L1I_CACHELINE_INVALIDATES), + CPUMF_EVENT_PTR(cf_z10, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_z10, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_z10, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z10, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_z10, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_z10, L2C_STORES_SENT), + NULL, +}; + +static struct attribute *cpumcf_z196_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z196, L1D_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_L2_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_z196, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_z196, L2C_STORES_SENT), + CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, DTLB1_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_z196, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_z196, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES), + NULL, +}; + +static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_zec12, DTLB1_MISSES), + CPUMF_EVENT_PTR(cf_zec12, ITLB1_MISSES), + CPUMF_EVENT_PTR(cf_zec12, L1D_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_L2D_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, DTLB1_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_LMEM_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_zec12, DTLB1_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, ITLB1_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_zec12, TX_C_TEND), + CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_zec12, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_SPECIAL), + NULL, +}; + +/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ + +static struct attribute_group cpumsf_pmu_events_group = { + .name = "events", + .attrs = cpumcf_pmu_event_attr, +}; + +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *cpumsf_pmu_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group cpumsf_pmu_format_group = { + .name = "format", + .attrs = cpumsf_pmu_format_attr, +}; + +static const struct attribute_group *cpumsf_pmu_attr_groups[] = { + &cpumsf_pmu_events_group, + &cpumsf_pmu_format_group, + NULL, +}; + + +static __init struct attribute **merge_attr(struct attribute **a, + struct attribute **b) +{ + struct attribute **new; + int j, i; + + for (j = 0; a[j]; j++) + ; + for (i = 0; b[i]; i++) + j++; + j++; + + new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL); + if (!new) + return NULL; + j = 0; + for (i = 0; a[i]; i++) + new[j++] = a[i]; + for (i = 0; b[i]; i++) + new[j++] = b[i]; + new[j] = NULL; + + return new; +} + +__init const struct attribute_group **cpumf_cf_event_group(void) +{ + struct attribute **combined, **model; + struct cpuid cpu_id; + + get_cpu_id(&cpu_id); + switch (cpu_id.machine) { + case 0x2097: + case 0x2098: + model = cpumcf_z10_pmu_event_attr; + break; + case 0x2817: + case 0x2818: + model = cpumcf_z196_pmu_event_attr; + break; + case 0x2827: + case 0x2828: + model = cpumcf_zec12_pmu_event_attr; + break; + default: + model = NULL; + break; + }; + + if (!model) + goto out; + + combined = merge_attr(cpumcf_pmu_event_attr, model); + if (combined) + cpumsf_pmu_events_group.attrs = combined; +out: + return cpumsf_pmu_attr_groups; +} diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 2343c21..4c1d336 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -172,3 +173,14 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, __store_trace(entry, head, S390_lowcore.thread_info, S390_lowcore.thread_info + THREAD_SIZE); } + +/* Perf defintions for PMU event attributes in sysfs */ +ssize_t cpumf_events_sysfs_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + return sprintf(page, "event=0x%04llx,name=%s\n", + pmu_attr->id, attr->attr.name); +} -- cgit v0.10.2 From 8c069ff4bd6063a3f15e606c882e03f75c7e7711 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 12 Dec 2013 16:32:47 +0100 Subject: s390/perf: add support for the CPU-Measurement Sampling Facility Introduce a perf PMU, "cpum_sf", to support the CPU-Measurement Sampling Facility. You can control the sampling facility through this perf PMU interfaces. Perf sampling events are created for hardware samples. For details about the CPU-Measurement Sampling Facility, see "The Load-Program-Parameter and the CPU-Measurement Facilities" (SA23-2260). Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index f6dddea..d707abc 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -210,6 +210,20 @@ static inline int lsctl(struct hws_lsctl_request_block *req) /* Sampling control helper functions */ +#include + +static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi, + unsigned long freq) +{ + return (USEC_PER_SEC / freq) * qsi->cpu_speed; +} + +static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi, + unsigned long rate) +{ + return USEC_PER_SEC * qsi->cpu_speed / rate; +} + #define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL #define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 3418502..b4eea25 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -15,12 +15,13 @@ #include #include -/* CPU-measurement counter facility */ -#define PERF_CPUM_CF_MAX_CTR 256 - /* Per-CPU flags for PMU states */ #define PMU_F_RESERVED 0x1000 #define PMU_F_ENABLED 0x2000 +#define PMU_F_IN_USE 0x4000 +#define PMU_F_ERR_IBE 0x0100 +#define PMU_F_ERR_LSDA 0x0200 +#define PMU_F_ERR_MASK (PMU_F_ERR_IBE|PMU_F_ERR_LSDA) /* Perf defintions for PMU event attributes in sysfs */ extern __init const struct attribute_group **cpumf_cf_event_group(void); @@ -41,5 +42,15 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) +/* Perf PMU definitions for the counter facility */ +#define PERF_CPUM_CF_MAX_CTR 256 + +/* Perf PMU definitions for the sampling facility */ +#define PERF_CPUM_SF_MAX_CTR 1 +#define PERF_EVENT_CPUM_SF 0xB0000UL /* Raw event ID */ + +#define TEAR_REG(hwc) ((hwc)->last_tag) +#define SAMPL_RATE(hwc) ((hwc)->event_base) + #endif /* CONFIG_64BIT */ #endif /* _ASM_S390_PERF_EVENT_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 9f1e2ad..1b3ac09 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -60,7 +60,7 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o ifdef CONFIG_64BIT -obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o \ +obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o \ perf_cpum_cf_events.o obj-y += runtime_instr.o cache.o endif diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c new file mode 100644 index 0000000..141eca0 --- /dev/null +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -0,0 +1,1024 @@ +/* + * Performance event support for the System z CPU-measurement Sampling Facility + * + * Copyright IBM Corp. 2013 + * Author(s): Hendrik Brueckner + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + */ +#define KMSG_COMPONENT "cpum_sf" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Minimum number of sample-data-block-tables: + * At least one table is required for the sampling buffer structure. + * A single table contains up to 511 pointers to sample-data-blocks. + */ +#define CPUM_SF_MIN_SDBT 1 + +/* Minimum number of sample-data-blocks: + * The minimum designates a single page for sample-data-block, i.e., + * up to 126 sample-data-blocks with a size of 32 bytes (bsdes). + */ +#define CPUM_SF_MIN_SDB 126 + +/* Maximum number of sample-data-blocks: + * The maximum number designates approx. 256K per CPU including + * the given number of sample-data-blocks and taking the number + * of sample-data-block tables into account. + * + * Later, this number can be increased for extending the sampling + * buffer, for example, by factor 2 (512K) or 4 (1M). + */ +#define CPUM_SF_MAX_SDB 6471 + +struct sf_buffer { + unsigned long sdbt; /* Sample-data-block-table origin */ + /* buffer characteristics (required for buffer increments) */ + unsigned long num_sdb; /* Number of sample-data-blocks */ + unsigned long tail; /* last sample-data-block-table */ +}; + +struct cpu_hw_sf { + /* CPU-measurement sampling information block */ + struct hws_qsi_info_block qsi; + struct hws_lsctl_request_block lsctl; + struct sf_buffer sfb; /* Sampling buffer */ + unsigned int flags; /* Status flags */ + struct perf_event *event; /* Scheduled perf event */ +}; +static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf); + +/* Debug feature */ +static debug_info_t *sfdbg; + +/* + * sf_buffer_available() - Check for an allocated sampling buffer + */ +static int sf_buffer_available(struct cpu_hw_sf *cpuhw) +{ + return (cpuhw->sfb.sdbt) ? 1 : 0; +} + +/* + * deallocate sampling facility buffer + */ +static void free_sampling_buffer(struct sf_buffer *sfb) +{ + unsigned long sdbt, *curr; + + if (!sfb->sdbt) + return; + + sdbt = sfb->sdbt; + curr = (unsigned long *) sdbt; + + /* we'll free the SDBT after all SDBs are processed... */ + while (1) { + if (!*curr || !sdbt) + break; + + /* watch for link entry reset if found */ + if (is_link_entry(curr)) { + curr = get_next_sdbt(curr); + if (sdbt) + free_page(sdbt); + + /* we are done if we reach the origin */ + if ((unsigned long) curr == sfb->sdbt) + break; + else + sdbt = (unsigned long) curr; + } else { + /* process SDB pointer */ + if (*curr) { + free_page(*curr); + curr++; + } + } + } + + debug_sprintf_event(sfdbg, 5, + "free_sampling_buffer: freed sdbt=%0lx\n", sfb->sdbt); + memset(sfb, 0, sizeof(*sfb)); +} + +/* + * allocate_sampling_buffer() - allocate sampler memory + * + * Allocates and initializes a sampling buffer structure using the + * specified number of sample-data-blocks (SDB). For each allocation, + * a 4K page is used. The number of sample-data-block-tables (SDBT) + * are calculated from SDBs. + * Also set the ALERT_REQ mask in each SDBs trailer. + * + * Returns zero on success, non-zero otherwise. + */ +static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) +{ + int j, k, rc; + unsigned long *sdbt, *tail, *trailer; + unsigned long sdb; + unsigned long num_sdbt, sdb_per_table; + + if (sfb->sdbt) + return -EINVAL; + sfb->num_sdb = 0; + + /* Compute the number of required sample-data-block-tables (SDBT) */ + num_sdbt = num_sdb / ((PAGE_SIZE - 8) / 8); + if (num_sdbt < CPUM_SF_MIN_SDBT) + num_sdbt = CPUM_SF_MIN_SDBT; + sdb_per_table = (PAGE_SIZE - 8) / 8; + + debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: num_sdbt=%lu " + "num_sdb=%lu sdb_per_table=%lu\n", + num_sdbt, num_sdb, sdb_per_table); + sdbt = NULL; + tail = sdbt; + + for (j = 0; j < num_sdbt; j++) { + sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); + if (!sdbt) { + rc = -ENOMEM; + goto allocate_sdbt_error; + } + + /* save origin of sample-data-block-table */ + if (!sfb->sdbt) + sfb->sdbt = (unsigned long) sdbt; + + /* link current page to tail of chain */ + if (tail) + *tail = (unsigned long)(void *) sdbt + 1; + + for (k = 0; k < num_sdb && k < sdb_per_table; k++) { + /* get and set SDB page */ + sdb = get_zeroed_page(GFP_KERNEL); + if (!sdb) { + rc = -ENOMEM; + goto allocate_sdbt_error; + } + *sdbt = sdb; + trailer = trailer_entry_ptr(*sdbt); + *trailer = SDB_TE_ALERT_REQ_MASK; + sdbt++; + } + num_sdb -= k; + sfb->num_sdb += k; /* count allocated sdb's */ + tail = sdbt; + } + + rc = 0; + if (tail) + *tail = sfb->sdbt + 1; + sfb->tail = (unsigned long) (void *)tail; + +allocate_sdbt_error: + if (rc) + free_sampling_buffer(sfb); + else + debug_sprintf_event(sfdbg, 4, + "alloc_sampling_buffer: tear=%0lx dear=%0lx\n", + sfb->sdbt, *(unsigned long *) sfb->sdbt); + return rc; +} + +static int allocate_sdbt(struct cpu_hw_sf *cpuhw, const struct hw_perf_event *hwc) +{ + unsigned long n_sdb, freq; + unsigned long factor; + + /* Calculate sampling buffers using 4K pages + * + * 1. Use frequency as input. The samping buffer is designed for + * a complete second. This can be adjusted through the "factor" + * variable. + * In any case, alloc_sampling_buffer() sets the Alert Request + * Control indicator to trigger measurement-alert to harvest + * sample-data-blocks (sdb). + * + * 2. Compute the number of sample-data-blocks and ensure a minimum + * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not + * exceed CPUM_SF_MAX_SDB. See also the remarks for these + * symbolic constants. + * + * 3. Compute number of pages used for the sample-data-block-table + * and ensure a minimum of CPUM_SF_MIN_SDBT (at minimum one table + * to manage up to 511 sample-data-blocks). + */ + freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)); + factor = 1; + n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / cpuhw->qsi.bsdes)); + if (n_sdb < CPUM_SF_MIN_SDB) + n_sdb = CPUM_SF_MIN_SDB; + + /* Return if there is already a sampling buffer allocated. + * XXX Remove this later and check number of available and + * required sdb's and, if necessary, increase the sampling buffer. + */ + if (sf_buffer_available(cpuhw)) + return 0; + + debug_sprintf_event(sfdbg, 3, + "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%i cpuhw=%p\n", + SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw); + + return alloc_sampling_buffer(&cpuhw->sfb, + min_t(unsigned long, n_sdb, CPUM_SF_MAX_SDB)); +} + + +/* Number of perf events counting hardware events */ +static atomic_t num_events; +/* Used to avoid races in calling reserve/release_cpumf_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * sf_disable() - Switch off sampling facility + */ +static int sf_disable(void) +{ + struct hws_lsctl_request_block sreq; + + memset(&sreq, 0, sizeof(sreq)); + return lsctl(&sreq); +} + + +#define PMC_INIT 0 +#define PMC_RELEASE 1 +static void setup_pmc_cpu(void *flags) +{ + int err; + struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf); + + /* XXX Improve error handling and pass a flag in the *flags + * variable to indicate failures. Alternatively, ignore + * (print) errors here and let the PMU functions fail if + * the per-cpu PMU_F_RESERVED flag is not. + */ + err = 0; + switch (*((int *) flags)) { + case PMC_INIT: + memset(cpusf, 0, sizeof(*cpusf)); + err = qsi(&cpusf->qsi); + if (err) + break; + cpusf->flags |= PMU_F_RESERVED; + err = sf_disable(); + if (err) + pr_err("Switching off the sampling facility failed " + "with rc=%i\n", err); + debug_sprintf_event(sfdbg, 5, + "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf); + break; + case PMC_RELEASE: + cpusf->flags &= ~PMU_F_RESERVED; + err = sf_disable(); + if (err) { + pr_err("Switching off the sampling facility failed " + "with rc=%i\n", err); + } else { + if (cpusf->sfb.sdbt) + free_sampling_buffer(&cpusf->sfb); + } + debug_sprintf_event(sfdbg, 5, + "setup_pmc_cpu: released: cpuhw=%p\n", cpusf); + break; + } +} + +static void release_pmc_hardware(void) +{ + int flags = PMC_RELEASE; + + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + on_each_cpu(setup_pmc_cpu, &flags, 1); +} + +static int reserve_pmc_hardware(void) +{ + int flags = PMC_INIT; + + on_each_cpu(setup_pmc_cpu, &flags, 1); + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + + return 0; +} + +static void hw_perf_event_destroy(struct perf_event *event) +{ + /* Release PMC if this is the last perf event */ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +static void hw_init_period(struct hw_perf_event *hwc, u64 period) +{ + hwc->sample_period = period; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); +} + +static void hw_reset_registers(struct hw_perf_event *hwc, + unsigned long sdbt_origin) +{ + TEAR_REG(hwc) = sdbt_origin; /* (re)set to first sdb table */ +} + +static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, + unsigned long rate) +{ + if (rate < si->min_sampl_rate) + return si->min_sampl_rate; + if (rate > si->max_sampl_rate) + return si->max_sampl_rate; + return rate; +} + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct cpu_hw_sf *cpuhw; + struct hws_qsi_info_block si; + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + unsigned long rate; + int cpu, err; + + /* Reserve CPU-measurement sampling facility */ + err = 0; + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && reserve_pmc_hardware()) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + } + event->destroy = hw_perf_event_destroy; + + if (err) + goto out; + + /* Access per-CPU sampling information (query sampling info) */ + /* + * The event->cpu value can be -1 to count on every CPU, for example, + * when attaching to a task. If this is specified, use the query + * sampling info from the current CPU, otherwise use event->cpu to + * retrieve the per-CPU information. + * Later, cpuhw indicates whether to allocate sampling buffers for a + * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL). + */ + memset(&si, 0, sizeof(si)); + cpuhw = NULL; + if (event->cpu == -1) + qsi(&si); + else { + /* Event is pinned to a particular CPU, retrieve the per-CPU + * sampling structure for accessing the CPU-specific QSI. + */ + cpuhw = &per_cpu(cpu_hw_sf, event->cpu); + si = cpuhw->qsi; + } + + /* Check sampling facility authorization and, if not authorized, + * fall back to other PMUs. It is safe to check any CPU because + * the authorization is identical for all configured CPUs. + */ + if (!si.as) { + err = -ENOENT; + goto out; + } + + /* The sampling information (si) contains information about the + * min/max sampling intervals and the CPU speed. So calculate the + * correct sampling interval and avoid the whole period adjust + * feedback loop. + */ + rate = 0; + if (attr->freq) { + rate = freq_to_sample_rate(&si, attr->sample_freq); + rate = hw_limit_rate(&si, rate); + attr->freq = 0; + attr->sample_period = rate; + } else { + /* The min/max sampling rates specifies the valid range + * of sample periods. If the specified sample period is + * out of range, limit the period to the range boundary. + */ + rate = hw_limit_rate(&si, hwc->sample_period); + + /* The perf core maintains a maximum sample rate that is + * configurable through the sysctl interface. Ensure the + * sampling rate does not exceed this value. This also helps + * to avoid throttling when pushing samples with + * perf_event_overflow(). + */ + if (sample_rate_to_freq(&si, rate) > + sysctl_perf_event_sample_rate) { + err = -EINVAL; + debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n"); + goto out; + } + } + SAMPL_RATE(hwc) = rate; + hw_init_period(hwc, SAMPL_RATE(hwc)); + + /* Allocate the per-CPU sampling buffer using the CPU information + * from the event. If the event is not pinned to a particular + * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling + * buffers for each online CPU. + */ + if (cpuhw) + /* Event is pinned to a particular CPU */ + err = allocate_sdbt(cpuhw, hwc); + else { + /* Event is not pinned, allocate sampling buffer on + * each online CPU + */ + for_each_online_cpu(cpu) { + cpuhw = &per_cpu(cpu_hw_sf, cpu); + err = allocate_sdbt(cpuhw, hwc); + if (err) + break; + } + } +out: + return err; +} + +static int cpumsf_pmu_event_init(struct perf_event *event) +{ + int err; + + if (event->attr.type != PERF_TYPE_RAW) + return -ENOENT; + + if (event->attr.config != PERF_EVENT_CPUM_SF) + return -ENOENT; + + if (event->cpu >= nr_cpumask_bits || + (event->cpu >= 0 && !cpu_online(event->cpu))) + return -ENODEV; + + err = __hw_perf_event_init(event); + if (unlikely(err)) + if (event->destroy) + event->destroy(event); + return err; +} + +static void cpumsf_pmu_enable(struct pmu *pmu) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + int err; + + if (cpuhw->flags & PMU_F_ENABLED) + return; + + if (cpuhw->flags & PMU_F_ERR_MASK) + return; + + cpuhw->flags |= PMU_F_ENABLED; + barrier(); + + err = lsctl(&cpuhw->lsctl); + if (err) { + cpuhw->flags &= ~PMU_F_ENABLED; + pr_err("Loading sampling controls failed: op=%i err=%i\n", + 1, err); + return; + } + + debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i tear=%p dear=%p\n", + cpuhw->lsctl.es, cpuhw->lsctl.cs, + (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear); +} + +static void cpumsf_pmu_disable(struct pmu *pmu) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct hws_lsctl_request_block inactive; + struct hws_qsi_info_block si; + int err; + + if (!(cpuhw->flags & PMU_F_ENABLED)) + return; + + if (cpuhw->flags & PMU_F_ERR_MASK) + return; + + /* Switch off sampling activation control */ + inactive = cpuhw->lsctl; + inactive.cs = 0; + + err = lsctl(&inactive); + if (err) { + pr_err("Loading sampling controls failed: op=%i err=%i\n", + 2, err); + return; + } + + /* Save state of TEAR and DEAR register contents */ + if (!qsi(&si)) { + /* TEAR/DEAR values are valid only if the sampling facility is + * enabled. Note that cpumsf_pmu_disable() might be called even + * for a disabled sampling facility because cpumsf_pmu_enable() + * controls the enable/disable state. + */ + if (si.es) { + cpuhw->lsctl.tear = si.tear; + cpuhw->lsctl.dear = si.dear; + } + } else + debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: " + "qsi() failed with err=%i\n", err); + + cpuhw->flags &= ~PMU_F_ENABLED; +} + +/* perf_push_sample() - Push samples to perf + * @event: The perf event + * @sample: Hardware sample data + * + * Use the hardware sample data to create perf event sample. The sample + * is the pushed to the event subsystem and the function checks for + * possible event overflows. If an event overflow occurs, the PMU is + * stopped. + * + * Return non-zero if an event overflow occurred. + */ +static int perf_push_sample(struct perf_event *event, + struct hws_data_entry *sample) +{ + int overflow; + struct pt_regs regs; + struct perf_sample_data data; + + /* Skip samples that are invalid or for which the instruction address + * is not predictable. For the latter, the wait-state bit is set. + */ + if (sample->I || sample->W) + return 0; + + perf_sample_data_init(&data, 0, event->hw.last_period); + + memset(®s, 0, sizeof(regs)); + regs.psw.addr = sample->ia; + if (sample->T) + regs.psw.mask |= PSW_MASK_DAT; + if (sample->W) + regs.psw.mask |= PSW_MASK_WAIT; + if (sample->P) + regs.psw.mask |= PSW_MASK_PSTATE; + switch (sample->AS) { + case 0x0: + regs.psw.mask |= PSW_ASC_PRIMARY; + break; + case 0x1: + regs.psw.mask |= PSW_ASC_ACCREG; + break; + case 0x2: + regs.psw.mask |= PSW_ASC_SECONDARY; + break; + case 0x3: + regs.psw.mask |= PSW_ASC_HOME; + break; + } + + overflow = 0; + if (perf_event_overflow(event, &data, ®s)) { + overflow = 1; + event->pmu->stop(event, 0); + debug_sprintf_event(sfdbg, 4, "perf_push_sample: PMU stopped" + " because of an event overflow\n"); + } + perf_event_update_userpage(event); + + return overflow; +} + +static void perf_event_count_update(struct perf_event *event, u64 count) +{ + local64_add(count, &event->count); +} + +/* hw_collect_samples() - Walk through a sample-data-block and collect samples + * @event: The perf event + * @sdbt: Sample-data-block table + * @overflow: Event overflow counter + * + * Walks through a sample-data-block and collects hardware sample-data that is + * pushed to the perf event subsystem. The overflow reports the number of + * samples that has been discarded due to an event overflow. + */ +static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, + unsigned long long *overflow) +{ + struct hws_data_entry *sample; + unsigned long *trailer; + + trailer = trailer_entry_ptr(*sdbt); + sample = (struct hws_data_entry *) *sdbt; + while ((unsigned long *) sample < trailer) { + /* Check for an empty sample */ + if (!sample->def) + break; + + /* Update perf event period */ + perf_event_count_update(event, SAMPL_RATE(&event->hw)); + + /* Check for basic sampling mode */ + if (sample->def == 0x0001) { + /* If an event overflow occurred, the PMU is stopped to + * throttle event delivery. Remaining sample data is + * discarded. + */ + if (!*overflow) + *overflow = perf_push_sample(event, sample); + else + /* Count discarded samples */ + *overflow += 1; + } else + /* Sample slot is not yet written or other record */ + debug_sprintf_event(sfdbg, 5, "hw_collect_samples: " + "Unknown sample data entry format:" + " %i\n", sample->def); + + /* Reset sample slot and advance to next sample */ + sample->def = 0; + sample++; + } +} + +/* hw_perf_event_update() - Process sampling buffer + * @event: The perf event + * @flush_all: Flag to also flush partially filled sample-data-blocks + * + * Processes the sampling buffer and create perf event samples. + * The sampling buffer position are retrieved and saved in the TEAR_REG + * register of the specified perf event. + * + * Only full sample-data-blocks are processed. Specify the flash_all flag + * to also walk through partially filled sample-data-blocks. + * + */ +static void hw_perf_event_update(struct perf_event *event, int flush_all) +{ + struct hw_perf_event *hwc = &event->hw; + struct hws_trailer_entry *te; + unsigned long *sdbt; + unsigned long long event_overflow, sampl_overflow; + int done; + + sdbt = (unsigned long *) TEAR_REG(hwc); + done = event_overflow = sampl_overflow = 0; + while (!done) { + /* Get the trailer entry of the sample-data-block */ + te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); + + /* Leave loop if no more work to do (block full indicator) */ + if (!te->f) { + done = 1; + if (!flush_all) + break; + } + + /* Check sample overflow count */ + if (te->overflow) { + /* Increment sample overflow counter */ + sampl_overflow += te->overflow; + + /* XXX: If an sample overflow occurs, increase the + * sampling buffer. Set a "realloc" flag because + * the sampler must be re-enabled for changing + * the sample-data-block-table content. + */ + } + + /* Timestamps are valid for full sample-data-blocks only */ + debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " + "overflow=%llu timestamp=0x%llx\n", + sdbt, te->overflow, + (te->f) ? te->timestamp : 0ULL); + + /* Collect all samples from a single sample-data-block and + * flag if an (perf) event overflow happened. If so, the PMU + * is stopped and remaining samples will be discarded. + */ + hw_collect_samples(event, sdbt, &event_overflow); + + /* Reset trailer */ + xchg(&te->overflow, 0); + xchg((unsigned char *) te, 0x40); + + /* Advance to next sample-data-block */ + sdbt++; + if (is_link_entry(sdbt)) + sdbt = get_next_sdbt(sdbt); + + /* Update event hardware registers */ + TEAR_REG(hwc) = (unsigned long) sdbt; + + /* Stop processing sample-data if all samples of the current + * sample-data-block were flushed even if it was not full. + */ + if (flush_all && done) + break; + + /* If an event overflow happened, discard samples by + * processing any remaining sample-data-blocks. + */ + if (event_overflow) + flush_all = 1; + } + + if (sampl_overflow || event_overflow) + debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " + "overflow stats: sample=%llu event=%llu\n", + sampl_overflow, event_overflow); +} + +static void cpumsf_pmu_read(struct perf_event *event) +{ + /* Nothing to do ... updates are interrupt-driven */ +} + +/* Activate sampling control. + * Next call of pmu_enable() starts sampling. + */ +static void cpumsf_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + perf_pmu_disable(event->pmu); + event->hw.state = 0; + cpuhw->lsctl.cs = 1; + perf_pmu_enable(event->pmu); +} + +/* Deactivate sampling control. + * Next call of pmu_enable() stops sampling. + */ +static void cpumsf_pmu_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + + if (event->hw.state & PERF_HES_STOPPED) + return; + + perf_pmu_disable(event->pmu); + cpuhw->lsctl.cs = 0; + event->hw.state |= PERF_HES_STOPPED; + + if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { + hw_perf_event_update(event, 1); + event->hw.state |= PERF_HES_UPTODATE; + } + perf_pmu_enable(event->pmu); +} + +static int cpumsf_pmu_add(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + int err; + + if (cpuhw->flags & PMU_F_IN_USE) + return -EAGAIN; + + if (!cpuhw->sfb.sdbt) + return -EINVAL; + + err = 0; + perf_pmu_disable(event->pmu); + + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + /* Set up sampling controls. Always program the sampling register + * using the SDB-table start. Reset TEAR_REG event hardware register + * that is used by hw_perf_event_update() to store the sampling buffer + * position after samples have been flushed. + */ + cpuhw->lsctl.s = 0; + cpuhw->lsctl.h = 1; + cpuhw->lsctl.tear = cpuhw->sfb.sdbt; + cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; + cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); + hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); + + /* Ensure sampling functions are in the disabled state. If disabled, + * switch on sampling enable control. */ + if (WARN_ON_ONCE(cpuhw->lsctl.es == 1)) { + err = -EAGAIN; + goto out; + } + cpuhw->lsctl.es = 1; + + /* Set in_use flag and store event */ + event->hw.idx = 0; /* only one sampling event per CPU supported */ + cpuhw->event = event; + cpuhw->flags |= PMU_F_IN_USE; + + if (flags & PERF_EF_START) + cpumsf_pmu_start(event, PERF_EF_RELOAD); +out: + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + return err; +} + +static void cpumsf_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + + perf_pmu_disable(event->pmu); + cpumsf_pmu_stop(event, PERF_EF_UPDATE); + + cpuhw->lsctl.es = 0; + cpuhw->flags &= ~PMU_F_IN_USE; + cpuhw->event = NULL; + + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); +} + +static int cpumsf_pmu_event_idx(struct perf_event *event) +{ + return event->hw.idx; +} + +CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF); + +static struct attribute *cpumsf_pmu_events_attr[] = { + CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), + NULL, +}; + +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *cpumsf_pmu_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group cpumsf_pmu_events_group = { + .name = "events", + .attrs = cpumsf_pmu_events_attr, +}; +static struct attribute_group cpumsf_pmu_format_group = { + .name = "format", + .attrs = cpumsf_pmu_format_attr, +}; +static const struct attribute_group *cpumsf_pmu_attr_groups[] = { + &cpumsf_pmu_events_group, + &cpumsf_pmu_format_group, + NULL, +}; + +static struct pmu cpumf_sampling = { + .pmu_enable = cpumsf_pmu_enable, + .pmu_disable = cpumsf_pmu_disable, + + .event_init = cpumsf_pmu_event_init, + .add = cpumsf_pmu_add, + .del = cpumsf_pmu_del, + + .start = cpumsf_pmu_start, + .stop = cpumsf_pmu_stop, + .read = cpumsf_pmu_read, + + .event_idx = cpumsf_pmu_event_idx, + .attr_groups = cpumsf_pmu_attr_groups, +}; + +static void cpumf_measurement_alert(struct ext_code ext_code, + unsigned int alert, unsigned long unused) +{ + struct cpu_hw_sf *cpuhw; + + if (!(alert & CPU_MF_INT_SF_MASK)) + return; + inc_irq_stat(IRQEXT_CMS); + cpuhw = &__get_cpu_var(cpu_hw_sf); + + /* Measurement alerts are shared and might happen when the PMU + * is not reserved. Ignore these alerts in this case. */ + if (!(cpuhw->flags & PMU_F_RESERVED)) + return; + + /* The processing below must take care of multiple alert events that + * might be indicated concurrently. */ + + /* Program alert request */ + if (alert & CPU_MF_INT_SF_PRA) { + if (cpuhw->flags & PMU_F_IN_USE) + hw_perf_event_update(cpuhw->event, 0); + else + WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE)); + } + + /* Report measurement alerts only for non-PRA codes */ + if (alert != CPU_MF_INT_SF_PRA) + debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert); + + /* Sampling authorization change request */ + if (alert & CPU_MF_INT_SF_SACA) + qsi(&cpuhw->qsi); + + /* Loss of sample data due to high-priority machine activities */ + if (alert & CPU_MF_INT_SF_LSDA) { + pr_err("Sample data was lost\n"); + cpuhw->flags |= PMU_F_ERR_LSDA; + sf_disable(); + } + + /* Invalid sampling buffer entry */ + if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) { + pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n", + alert); + cpuhw->flags |= PMU_F_ERR_IBE; + sf_disable(); + } +} + +static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (long) hcpu; + int flags; + + /* Ignore the notification if no events are scheduled on the PMU. + * This might be racy... + */ + if (!atomic_read(&num_events)) + return NOTIFY_OK; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + flags = PMC_INIT; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + case CPU_DOWN_PREPARE: + flags = PMC_RELEASE; + smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static int __init init_cpum_sampling_pmu(void) +{ + int err; + + if (!cpum_sf_avail()) + return -ENODEV; + + sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); + if (!sfdbg) + pr_err("Registering for s390dbf failed\n"); + debug_register_view(sfdbg, &debug_sprintf_view); + + err = register_external_interrupt(0x1407, cpumf_measurement_alert); + if (err) { + pr_err("Failed to register for CPU-measurement alerts\n"); + goto out; + } + + err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW); + if (err) { + pr_err("Failed to register cpum_sf pmu\n"); + unregister_external_interrupt(0x1407, cpumf_measurement_alert); + goto out; + } + perf_cpu_notifier(cpumf_pmu_notifier); +out: + return err; +} +arch_initcall(init_cpum_sampling_pmu); diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 4c1d336..b9843ba 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,8 @@ int perf_num_counters(void) if (cpum_cf_avail()) num += PERF_CPUM_CF_MAX_CTR; + if (cpum_sf_avail()) + num += PERF_CPUM_SF_MAX_CTR; return num; } @@ -93,24 +96,45 @@ unsigned long perf_misc_flags(struct pt_regs *regs) : PERF_RECORD_MISC_KERNEL; } -void perf_event_print_debug(void) +void print_debug_cf(void) { struct cpumf_ctr_info cf_info; - unsigned long flags; - int cpu; - - if (!cpum_cf_avail()) - return; - - local_irq_save(flags); + int cpu = smp_processor_id(); - cpu = smp_processor_id(); memset(&cf_info, 0, sizeof(cf_info)); if (!qctri(&cf_info)) pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n", cpu, cf_info.cfvn, cf_info.csvn, cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl); +} + +static void print_debug_sf(void) +{ + struct hws_qsi_info_block si; + int cpu = smp_processor_id(); + + memset(&si, 0, sizeof(si)); + if (qsi(&si)) { + pr_err("CPU[%i]: CPM_SF: qsi failed\n"); + return; + } + + pr_info("CPU[%i]: CPM_SF: as=%i es=%i cs=%i bsdes=%i dsdes=%i" + " min=%i max=%i cpu_speed=%i tear=%p dear=%p\n", + cpu, si.as, si.es, si.cs, si.bsdes, si.dsdes, + si.min_sampl_rate, si.max_sampl_rate, si.cpu_speed, + si.tear, si.dear); +} + +void perf_event_print_debug(void) +{ + unsigned long flags; + local_irq_save(flags); + if (cpum_cf_avail()) + print_debug_cf(); + if (cpum_sf_avail()) + print_debug_sf(); local_irq_restore(flags); } -- cgit v0.10.2 From 55baa2f831ae4a41da9617ab9e7cef5ebc991ec9 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 12 Dec 2013 16:47:00 +0100 Subject: s390/perf: Improve PMU selection for PERF_COUNT_HW_CPU_CYCLES events The cpum_cf (counter facility) PMU does not support sampling events. With cpum_sf (sampling facility), a PMU for sampling CPU cycles is available. Make cpum_sf the "default" PMU for PERF_COUNT_HW_CPU_CYCLES sampling events but use the more precise cpum_cf PMU for non-sampling events. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 141eca0..52bf36e 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -468,11 +468,29 @@ static int cpumsf_pmu_event_init(struct perf_event *event) { int err; - if (event->attr.type != PERF_TYPE_RAW) - return -ENOENT; - - if (event->attr.config != PERF_EVENT_CPUM_SF) + /* No support for taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + switch (event->attr.type) { + case PERF_TYPE_RAW: + if (event->attr.config != PERF_EVENT_CPUM_SF) + return -ENOENT; + break; + case PERF_TYPE_HARDWARE: + /* Support sampling of CPU cycles in addition to the + * counter facility. However, the counter facility + * is more precise and, hence, restrict this PMU to + * sampling events only. + */ + if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES) + return -ENOENT; + if (!is_sampling_event(event)) + return -ENOENT; + break; + default: return -ENOENT; + } if (event->cpu >= nr_cpumask_bits || (event->cpu >= 0 && !cpu_online(event->cpu))) -- cgit v0.10.2 From e28bb79d9935293a8eea5f3c771fde89db645ba7 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 12 Dec 2013 16:52:48 +0100 Subject: s390/perf,oprofile: Share sampling facility Introduce reserve/release functions to share the sampling facility between perf and oprofile. Also improve error handling for the sampling facility support in perf. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index b4eea25..23d2dfa 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -52,5 +52,9 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); #define TEAR_REG(hwc) ((hwc)->last_tag) #define SAMPL_RATE(hwc) ((hwc)->event_base) +/* Perf hardware reserve and release functions */ +int perf_reserve_sampling(void); +void perf_release_sampling(void); + #endif /* CONFIG_64BIT */ #endif /* _ASM_S390_PERF_EVENT_H */ diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 52bf36e..ae5e019 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -260,16 +260,12 @@ static int sf_disable(void) #define PMC_INIT 0 #define PMC_RELEASE 1 +#define PMC_FAILURE 2 static void setup_pmc_cpu(void *flags) { int err; struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf); - /* XXX Improve error handling and pass a flag in the *flags - * variable to indicate failures. Alternatively, ignore - * (print) errors here and let the PMU functions fail if - * the per-cpu PMU_F_RESERVED flag is not. - */ err = 0; switch (*((int *) flags)) { case PMC_INIT: @@ -299,6 +295,8 @@ static void setup_pmc_cpu(void *flags) "setup_pmc_cpu: released: cpuhw=%p\n", cpusf); break; } + if (err) + *((int *) flags) |= PMC_FAILURE; } static void release_pmc_hardware(void) @@ -307,13 +305,22 @@ static void release_pmc_hardware(void) irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); on_each_cpu(setup_pmc_cpu, &flags, 1); + perf_release_sampling(); } static int reserve_pmc_hardware(void) { int flags = PMC_INIT; + int err; + err = perf_reserve_sampling(); + if (err) + return err; on_each_cpu(setup_pmc_cpu, &flags, 1); + if (flags & PMC_FAILURE) { + release_pmc_hardware(); + return -ENODEV; + } irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); return 0; diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index b9843ba..4edcdfa 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -208,3 +208,33 @@ ssize_t cpumf_events_sysfs_show(struct device *dev, return sprintf(page, "event=0x%04llx,name=%s\n", pmu_attr->id, attr->attr.name); } + +/* Reserve/release functions for sharing perf hardware */ +static DEFINE_SPINLOCK(perf_hw_owner_lock); +static void *perf_sampling_owner; + +int perf_reserve_sampling(void) +{ + int err; + + err = 0; + spin_lock(&perf_hw_owner_lock); + if (perf_sampling_owner) { + pr_warn("The sampling facility is already reserved by %p\n", + perf_sampling_owner); + err = -EBUSY; + } else + perf_sampling_owner = __builtin_return_address(0); + spin_unlock(&perf_hw_owner_lock); + return err; +} +EXPORT_SYMBOL(perf_reserve_sampling); + +void perf_release_sampling(void) +{ + spin_lock(&perf_hw_owner_lock); + WARN_ON(!perf_sampling_owner); + perf_sampling_owner = NULL; + spin_unlock(&perf_hw_owner_lock); +} +EXPORT_SYMBOL(perf_release_sampling); diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index bbca76a..eb09587 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -41,6 +41,7 @@ static DEFINE_MUTEX(hws_sem_oom); static unsigned char hws_flush_all; static unsigned int hws_oom; +static unsigned int hws_alert; static struct workqueue_struct *hws_wq; static unsigned int hws_state; @@ -182,6 +183,9 @@ static void hws_ext_handler(struct ext_code ext_code, if (!(param32 & CPU_MF_INT_SF_MASK)) return; + if (!hws_alert) + return; + inc_irq_stat(IRQEXT_CMS); atomic_xchg(&cb->ext_params, atomic_read(&cb->ext_params) | param32); @@ -941,6 +945,7 @@ int hwsampler_deallocate(void) goto deallocate_exit; irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + hws_alert = 0; deallocate_sdbt(); hws_state = HWS_DEALLOCATED; @@ -1055,6 +1060,7 @@ int hwsampler_shutdown(void) if (hws_state == HWS_STOPPED) { irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); + hws_alert = 0; deallocate_sdbt(); } if (hws_wq) { @@ -1129,6 +1135,7 @@ start_all_exit: hws_oom = 1; hws_flush_all = 0; /* now let them in, 1407 CPUMF external interrupts */ + hws_alert = 1; irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); return 0; diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 04e1b6a..9ffe645 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -67,6 +68,21 @@ module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0); MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling" "(report cpu_type \"timer\""); +static int __oprofile_hwsampler_start(void) +{ + int retval; + + retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks); + if (retval) + return retval; + + retval = hwsampler_start_all(oprofile_hw_interval); + if (retval) + hwsampler_deallocate(); + + return retval; +} + static int oprofile_hwsampler_start(void) { int retval; @@ -76,13 +92,13 @@ static int oprofile_hwsampler_start(void) if (!hwsampler_running) return timer_ops.start(); - retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks); + retval = perf_reserve_sampling(); if (retval) return retval; - retval = hwsampler_start_all(oprofile_hw_interval); + retval = __oprofile_hwsampler_start(); if (retval) - hwsampler_deallocate(); + perf_release_sampling(); return retval; } @@ -96,6 +112,7 @@ static void oprofile_hwsampler_stop(void) hwsampler_stop_all(); hwsampler_deallocate(); + perf_release_sampling(); return; } -- cgit v0.10.2 From 99c64b6679c41d8238b154c1a462724d7101765c Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 13 Dec 2013 12:38:39 +0100 Subject: s390/perf: Add service level information for CPU-Measurement Facilities Register a service level handler to report information about available CPU-Measurement facilities. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 4edcdfa..3bd2bf0 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -16,17 +16,19 @@ #include #include #include +#include #include #include #include #include #include #include +#include const char *perf_pmu_name(void) { if (cpum_cf_avail() || cpum_sf_avail()) - return "CPU-measurement facilities (CPUMF)"; + return "CPU-Measurement Facilities (CPU-MF)"; return "pmu"; } EXPORT_SYMBOL(perf_pmu_name); @@ -138,6 +140,60 @@ void perf_event_print_debug(void) local_irq_restore(flags); } +/* Service level infrastructure */ +static void sl_print_counter(struct seq_file *m) +{ + struct cpumf_ctr_info ci; + + memset(&ci, 0, sizeof(ci)); + if (qctri(&ci)) + return; + + seq_printf(m, "CPU-MF: Counter facility: version=%u.%u " + "authorization=%04x\n", ci.cfvn, ci.csvn, ci.auth_ctl); +} + +static void sl_print_sampling(struct seq_file *m) +{ + struct hws_qsi_info_block si; + + memset(&si, 0, sizeof(si)); + if (qsi(&si)) + return; + + if (!si.as && !si.ad) + return; + + seq_printf(m, "CPU-MF: Sampling facility: min_rate=%lu max_rate=%lu" + " cpu_speed=%u\n", si.min_sampl_rate, si.max_sampl_rate, + si.cpu_speed); + if (si.as) + seq_printf(m, "CPU-MF: Sampling facility: mode=basic" + " sample_size=%u\n", si.bsdes); + if (si.ad) + seq_printf(m, "CPU-MF: Sampling facility: mode=diagnostic" + " sample_size=%u\n", si.dsdes); +} + +static void service_level_perf_print(struct seq_file *m, + struct service_level *sl) +{ + if (cpum_cf_avail()) + sl_print_counter(m); + if (cpum_sf_avail()) + sl_print_sampling(m); +} + +static struct service_level service_level_perf = { + .seq_print = service_level_perf_print, +}; + +static int __init service_level_perf_register(void) +{ + return register_service_level(&service_level_perf); +} +arch_initcall(service_level_perf_register); + /* See also arch/s390/kernel/traps.c */ static unsigned long __store_trace(struct perf_callchain_entry *entry, unsigned long sp, -- cgit v0.10.2 From aa3b7c296732b4351dfdbfe70be6b38a0882be14 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 12 Dec 2013 17:48:32 +0100 Subject: s390/pci: prevent inadvertently triggered bus scans Initialization and scanning of the pci bus is omitted on older machines without pci support or if pci=off was specified. Remember the fact that we ran without pci support and prevent further bus scans during resume from hibernate or after receiving hotplug notifications. Reported-by: Stefan Haberland Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index c129ab2..2583466 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -144,6 +144,7 @@ int clp_disable_fh(struct zpci_dev *); void zpci_event_error(void *); void zpci_event_availability(void *); void zpci_rescan(void); +bool zpci_is_enabled(void); #else /* CONFIG_PCI */ static inline void zpci_event_error(void *e) {} static inline void zpci_event_availability(void *e) {} diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index bf7c73d..e3265b5 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -920,6 +920,7 @@ static void zpci_mem_exit(void) } static unsigned int s390_pci_probe; +static unsigned int s390_pci_initialized; char * __init pcibios_setup(char *str) { @@ -930,6 +931,11 @@ char * __init pcibios_setup(char *str) return str; } +bool zpci_is_enabled(void) +{ + return s390_pci_initialized; +} + static int __init pci_base_init(void) { int rc; @@ -961,6 +967,7 @@ static int __init pci_base_init(void) if (rc) goto out_find; + s390_pci_initialized = 1; return 0; out_find: @@ -978,5 +985,6 @@ subsys_initcall_sync(pci_base_init); void zpci_rescan(void) { - clp_rescan_pci_devices_simple(); + if (zpci_is_enabled()) + clp_rescan_pci_devices_simple(); } diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 800f064..228787a 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -43,9 +43,8 @@ struct zpci_ccdf_avail { u16 pec; /* PCI event code */ } __packed; -void zpci_event_error(void *data) +static void __zpci_event_error(struct zpci_ccdf_err *ccdf) { - struct zpci_ccdf_err *ccdf = data; struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); zpci_err("error CCDF:\n"); @@ -58,9 +57,14 @@ void zpci_event_error(void *data) pci_name(zdev->pdev), ccdf->pec, ccdf->fid); } -void zpci_event_availability(void *data) +void zpci_event_error(void *data) +{ + if (zpci_is_enabled()) + __zpci_event_error(data); +} + +static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { - struct zpci_ccdf_avail *ccdf = data; struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); struct pci_dev *pdev = zdev ? zdev->pdev : NULL; int ret; @@ -115,3 +119,9 @@ void zpci_event_availability(void *data) break; } } + +void zpci_event_availability(void *data) +{ + if (zpci_is_enabled()) + __zpci_event_availability(data); +} -- cgit v0.10.2 From 704268925d32a0457202371a61580af76b94c53a Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 12 Dec 2013 17:50:53 +0100 Subject: s390/pci: fix removal of nonexistent pci bus If we remove a pci bus after receiving a hotplug notification we need to check if the bus is actually present (creation of the pci bus during an earlier notification may have been failed). Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 228787a..65ea105 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -112,6 +112,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) clp_rescan_pci_devices(); break; case 0x0308: /* Standby -> Reserved */ + if (!zdev) + break; pci_stop_root_bus(zdev->bus); pci_remove_root_bus(zdev->bus); break; -- cgit v0.10.2 From 0c0c2776926018e7560b99e921467aea1115d03b Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 12 Dec 2013 17:53:11 +0100 Subject: s390/pci: set error state for unavailable functions If we receive a notification that a pci function became unavailable we clean up by removing the pci device. This can confuse the driver since the function is already unaccessible. Improve this situation by setting an appropriate error_state. Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 65ea105..7fc4c2c 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -102,8 +102,12 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) break; case 0x0304: /* Configured -> Standby */ - if (pdev) + if (pdev) { + /* Give the driver a hint that the function is + * already unusable. */ + pdev->error_state = pci_channel_io_perm_failure; pci_stop_and_remove_bus_device(pdev); + } zpci_disable_device(zdev); zdev->state = ZPCI_FN_STATE_STANDBY; -- cgit v0.10.2 From f7038b7c3f4924b18390c51c1ec1e49287cc87db Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 12 Dec 2013 17:53:59 +0100 Subject: s390/pci/dma: fix accounting of allocated_pages allocated_pages sometimes are increased even if s390_dma_alloc fails also this value is never decreased even if s390_dma_free is called. This patch fixes these bugs. Also remove the atomic64_t casts (the members are already of this type). Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 9b83d08..60c11a6 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -285,7 +285,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, flags |= ZPCI_TABLE_PROTECTED; if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) { - atomic64_add(nr_pages, (atomic64_t *) &zdev->fmb->mapped_pages); + atomic64_add(nr_pages, &zdev->fmb->mapped_pages); return dma_addr + (offset & ~PAGE_MASK); } @@ -313,7 +313,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, zpci_err_hex(&dma_addr, sizeof(dma_addr)); } - atomic64_add(npages, (atomic64_t *) &zdev->fmb->unmapped_pages); + atomic64_add(npages, &zdev->fmb->unmapped_pages); iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; dma_free_iommu(zdev, iommu_page_index, npages); } @@ -332,7 +332,6 @@ static void *s390_dma_alloc(struct device *dev, size_t size, if (!page) return NULL; - atomic64_add(size / PAGE_SIZE, (atomic64_t *) &zdev->fmb->allocated_pages); pa = page_to_phys(page); memset((void *) pa, 0, size); @@ -343,6 +342,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size, return NULL; } + atomic64_add(size / PAGE_SIZE, &zdev->fmb->allocated_pages); if (dma_handle) *dma_handle = map; return (void *) pa; @@ -352,8 +352,11 @@ static void s390_dma_free(struct device *dev, size_t size, void *pa, dma_addr_t dma_handle, struct dma_attrs *attrs) { - s390_dma_unmap_pages(dev, dma_handle, PAGE_ALIGN(size), - DMA_BIDIRECTIONAL, NULL); + struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); + + size = PAGE_ALIGN(size); + atomic64_sub(size / PAGE_SIZE, &zdev->fmb->allocated_pages); + s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL); free_pages((unsigned long) pa, get_order(size)); } -- cgit v0.10.2 From 257608fb4112b4cabefd9e33a4fc2df6b64dca6a Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 12 Dec 2013 17:55:22 +0100 Subject: s390/pci: reenable per default HW, FW and Linux support is in a better shape now - let's reenable pci bus probing per default. Acked-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index e3265b5..0820362 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -919,13 +919,13 @@ static void zpci_mem_exit(void) kmem_cache_destroy(zdev_fmb_cache); } -static unsigned int s390_pci_probe; +static unsigned int s390_pci_probe = 1; static unsigned int s390_pci_initialized; char * __init pcibios_setup(char *str) { - if (!strcmp(str, "on")) { - s390_pci_probe = 1; + if (!strcmp(str, "off")) { + s390_pci_probe = 0; return NULL; } return str; -- cgit v0.10.2 From 69f239ed335a4b03265cae3ca930f3f166e42e35 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 12 Dec 2013 17:03:48 +0100 Subject: s390/cpum_sf: Dynamically extend the sampling buffer if overflows occur Improve the sampling buffer allocation and add a function to reallocate and increase the sampling buffer structure. The number of allocated buffer elements (sample-data-blocks) are accounted. You can control the minimum and maximum number these sample-data-blocks through the cpum_sfb_size kernel parameter. The number hardware sample overflows (if any) are also accounted and stored per perf event. During the PMU disable/enable calls, the accumulated overflow counter is analyzed and, if necessary, the sampling buffer is dynamically increased. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 23d2dfa..99d7f4e 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -49,6 +49,10 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs); #define PERF_CPUM_SF_MAX_CTR 1 #define PERF_EVENT_CPUM_SF 0xB0000UL /* Raw event ID */ +#define REG_NONE 0 +#define REG_OVERFLOW 1 +#define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config) +#define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc) #define TEAR_REG(hwc) ((hwc)->last_tag) #define SAMPL_RATE(hwc) ((hwc)->event_base) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index ae5e019..ea16560 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include #include @@ -26,34 +28,54 @@ * At least one table is required for the sampling buffer structure. * A single table contains up to 511 pointers to sample-data-blocks. */ -#define CPUM_SF_MIN_SDBT 1 +#define CPUM_SF_MIN_SDBT 1 -/* Minimum number of sample-data-blocks: - * The minimum designates a single page for sample-data-block, i.e., - * up to 126 sample-data-blocks with a size of 32 bytes (bsdes). +/* Number of sample-data-blocks per sample-data-block-table (SDBT): + * The table contains SDB origin (8 bytes) and one SDBT origin that + * points to the next table. */ -#define CPUM_SF_MIN_SDB 126 +#define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8) -/* Maximum number of sample-data-blocks: - * The maximum number designates approx. 256K per CPU including - * the given number of sample-data-blocks and taking the number - * of sample-data-block tables into account. +/* Maximum page offset for an SDBT table-link entry: + * If this page offset is reached, a table-link entry to the next SDBT + * must be added. + */ +#define CPUM_SF_SDBT_TL_OFFSET (CPUM_SF_SDB_PER_TABLE * 8) +static inline int require_table_link(const void *sdbt) +{ + return ((unsigned long) sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET; +} + +/* Minimum and maximum sampling buffer sizes: + * + * This number represents the maximum size of the sampling buffer + * taking the number of sample-data-block-tables into account. * - * Later, this number can be increased for extending the sampling - * buffer, for example, by factor 2 (512K) or 4 (1M). + * Sampling buffer size Buffer characteristics + * --------------------------------------------------- + * 64KB == 16 pages (4KB per page) + * 1 page for SDB-tables + * 15 pages for SDBs + * + * 32MB == 8192 pages (4KB per page) + * 16 pages for SDB-tables + * 8176 pages for SDBs */ -#define CPUM_SF_MAX_SDB 6471 +static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15; +static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176; struct sf_buffer { - unsigned long sdbt; /* Sample-data-block-table origin */ + unsigned long *sdbt; /* Sample-data-block-table origin */ /* buffer characteristics (required for buffer increments) */ - unsigned long num_sdb; /* Number of sample-data-blocks */ - unsigned long tail; /* last sample-data-block-table */ + unsigned long num_sdb; /* Number of sample-data-blocks */ + unsigned long num_sdbt; /* Number of sample-data-block-tables */ + unsigned long *tail; /* last sample-data-block-table */ }; struct cpu_hw_sf { /* CPU-measurement sampling information block */ struct hws_qsi_info_block qsi; + /* CPU-measurement sampling control block */ struct hws_lsctl_request_block lsctl; struct sf_buffer sfb; /* Sampling buffer */ unsigned int flags; /* Status flags */ @@ -65,11 +87,22 @@ static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf); static debug_info_t *sfdbg; /* + * sf_disable() - Switch off sampling facility + */ +static int sf_disable(void) +{ + struct hws_lsctl_request_block sreq; + + memset(&sreq, 0, sizeof(sreq)); + return lsctl(&sreq); +} + +/* * sf_buffer_available() - Check for an allocated sampling buffer */ static int sf_buffer_available(struct cpu_hw_sf *cpuhw) { - return (cpuhw->sfb.sdbt) ? 1 : 0; + return !!cpuhw->sfb.sdbt; } /* @@ -77,32 +110,32 @@ static int sf_buffer_available(struct cpu_hw_sf *cpuhw) */ static void free_sampling_buffer(struct sf_buffer *sfb) { - unsigned long sdbt, *curr; + unsigned long *sdbt, *curr; if (!sfb->sdbt) return; sdbt = sfb->sdbt; - curr = (unsigned long *) sdbt; + curr = sdbt; - /* we'll free the SDBT after all SDBs are processed... */ + /* Free the SDBT after all SDBs are processed... */ while (1) { if (!*curr || !sdbt) break; - /* watch for link entry reset if found */ + /* Process table-link entries */ if (is_link_entry(curr)) { curr = get_next_sdbt(curr); if (sdbt) - free_page(sdbt); + free_page((unsigned long) sdbt); - /* we are done if we reach the origin */ - if ((unsigned long) curr == sfb->sdbt) + /* If the origin is reached, sampling buffer is freed */ + if (curr == sfb->sdbt) break; else - sdbt = (unsigned long) curr; + sdbt = curr; } else { - /* process SDB pointer */ + /* Process SDB pointer */ if (*curr) { free_page(*curr); curr++; @@ -111,10 +144,106 @@ static void free_sampling_buffer(struct sf_buffer *sfb) } debug_sprintf_event(sfdbg, 5, - "free_sampling_buffer: freed sdbt=%0lx\n", sfb->sdbt); + "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt); memset(sfb, 0, sizeof(*sfb)); } +static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) +{ + unsigned long sdb, *trailer; + + /* Allocate and initialize sample-data-block */ + sdb = get_zeroed_page(gfp_flags); + if (!sdb) + return -ENOMEM; + trailer = trailer_entry_ptr(sdb); + *trailer = SDB_TE_ALERT_REQ_MASK; + + /* Link SDB into the sample-data-block-table */ + *sdbt = sdb; + + return 0; +} + +/* + * realloc_sampling_buffer() - extend sampler memory + * + * Allocates new sample-data-blocks and adds them to the specified sampling + * buffer memory. + * + * Important: This modifies the sampling buffer and must be called when the + * sampling facility is disabled. + * + * Returns zero on success, non-zero otherwise. + */ +static int realloc_sampling_buffer(struct sf_buffer *sfb, + unsigned long num_sdb, gfp_t gfp_flags) +{ + int i, rc; + unsigned long *new, *tail; + + if (!sfb->sdbt || !sfb->tail) + return -EINVAL; + + if (!is_link_entry(sfb->tail)) + return -EINVAL; + + /* Append to the existing sampling buffer, overwriting the table-link + * register. + * The tail variables always points to the "tail" (last and table-link) + * entry in an SDB-table. + */ + tail = sfb->tail; + + /* Do a sanity check whether the table-link entry points to + * the sampling buffer origin. + */ + if (sfb->sdbt != get_next_sdbt(tail)) { + debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: " + "sampling buffer is not linked: origin=%p" + "tail=%p\n", + (void *) sfb->sdbt, (void *) tail); + return -EINVAL; + } + + /* Allocate remaining SDBs */ + rc = 0; + for (i = 0; i < num_sdb; i++) { + /* Allocate a new SDB-table if it is full. */ + if (require_table_link(tail)) { + new = (unsigned long *) get_zeroed_page(gfp_flags); + if (!new) { + rc = -ENOMEM; + break; + } + sfb->num_sdbt++; + /* Link current page to tail of chain */ + *tail = (unsigned long)(void *) new + 1; + tail = new; + } + + /* Allocate a new sample-data-block. + * If there is not enough memory, stop the realloc process + * and simply use what was allocated. If this is a temporary + * issue, a new realloc call (if required) might succeed. + */ + rc = alloc_sample_data_block(tail, gfp_flags); + if (rc) + break; + sfb->num_sdb++; + tail++; + } + + /* Link sampling buffer to its origin */ + *tail = (unsigned long) sfb->sdbt + 1; + sfb->tail = tail; + + debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer" + " settings: sdbt=%lu sdb=%lu\n", + sfb->num_sdbt, sfb->num_sdb); + return rc; +} + /* * allocate_sampling_buffer() - allocate sampler memory * @@ -128,75 +257,74 @@ static void free_sampling_buffer(struct sf_buffer *sfb) */ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) { - int j, k, rc; - unsigned long *sdbt, *tail, *trailer; - unsigned long sdb; - unsigned long num_sdbt, sdb_per_table; + int rc; if (sfb->sdbt) return -EINVAL; + + /* Allocate the sample-data-block-table origin */ + sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); + if (!sfb->sdbt) + return -ENOMEM; sfb->num_sdb = 0; + sfb->num_sdbt = 1; - /* Compute the number of required sample-data-block-tables (SDBT) */ - num_sdbt = num_sdb / ((PAGE_SIZE - 8) / 8); - if (num_sdbt < CPUM_SF_MIN_SDBT) - num_sdbt = CPUM_SF_MIN_SDBT; - sdb_per_table = (PAGE_SIZE - 8) / 8; - - debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: num_sdbt=%lu " - "num_sdb=%lu sdb_per_table=%lu\n", - num_sdbt, num_sdb, sdb_per_table); - sdbt = NULL; - tail = sdbt; - - for (j = 0; j < num_sdbt; j++) { - sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); - if (!sdbt) { - rc = -ENOMEM; - goto allocate_sdbt_error; - } + /* Link the table origin to point to itself to prepare for + * realloc_sampling_buffer() invocation. + */ + sfb->tail = sfb->sdbt; + *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1; - /* save origin of sample-data-block-table */ - if (!sfb->sdbt) - sfb->sdbt = (unsigned long) sdbt; + /* Allocate requested number of sample-data-blocks */ + rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL); + if (rc) { + free_sampling_buffer(sfb); + debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: " + "realloc_sampling_buffer failed with rc=%i\n", rc); + } else + debug_sprintf_event(sfdbg, 4, + "alloc_sampling_buffer: tear=%p dear=%p\n", + sfb->sdbt, (void *) *sfb->sdbt); + return rc; +} - /* link current page to tail of chain */ - if (tail) - *tail = (unsigned long)(void *) sdbt + 1; +static void sfb_set_limits(unsigned long min, unsigned long max) +{ + CPUM_SF_MIN_SDB = min; + CPUM_SF_MAX_SDB = max; +} - for (k = 0; k < num_sdb && k < sdb_per_table; k++) { - /* get and set SDB page */ - sdb = get_zeroed_page(GFP_KERNEL); - if (!sdb) { - rc = -ENOMEM; - goto allocate_sdbt_error; - } - *sdbt = sdb; - trailer = trailer_entry_ptr(*sdbt); - *trailer = SDB_TE_ALERT_REQ_MASK; - sdbt++; - } - num_sdb -= k; - sfb->num_sdb += k; /* count allocated sdb's */ - tail = sdbt; - } +static unsigned long sfb_pending_allocs(struct sf_buffer *sfb, + struct hw_perf_event *hwc) +{ + if (!sfb->sdbt) + return SFB_ALLOC_REG(hwc); + if (SFB_ALLOC_REG(hwc) > sfb->num_sdb) + return SFB_ALLOC_REG(hwc) - sfb->num_sdb; + return 0; +} - rc = 0; - if (tail) - *tail = sfb->sdbt + 1; - sfb->tail = (unsigned long) (void *)tail; +static int sfb_has_pending_allocs(struct sf_buffer *sfb, + struct hw_perf_event *hwc) +{ + return sfb_pending_allocs(sfb, hwc) > 0; +} -allocate_sdbt_error: - if (rc) - free_sampling_buffer(sfb); - else - debug_sprintf_event(sfdbg, 4, - "alloc_sampling_buffer: tear=%0lx dear=%0lx\n", - sfb->sdbt, *(unsigned long *) sfb->sdbt); - return rc; +static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc) +{ + /* Limit the number SDBs to not exceed the maximum */ + num = min_t(unsigned long, num, CPUM_SF_MAX_SDB - SFB_ALLOC_REG(hwc)); + if (num) + SFB_ALLOC_REG(hwc) += num; } -static int allocate_sdbt(struct cpu_hw_sf *cpuhw, const struct hw_perf_event *hwc) +static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc) +{ + SFB_ALLOC_REG(hwc) = 0; + sfb_account_allocs(num, hwc); +} + +static int allocate_sdbt(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) { unsigned long n_sdb, freq; unsigned long factor; @@ -225,39 +353,133 @@ static int allocate_sdbt(struct cpu_hw_sf *cpuhw, const struct hw_perf_event *hw if (n_sdb < CPUM_SF_MIN_SDB) n_sdb = CPUM_SF_MIN_SDB; - /* Return if there is already a sampling buffer allocated. - * XXX Remove this later and check number of available and - * required sdb's and, if necessary, increase the sampling buffer. + /* If there is already a sampling buffer allocated, it is very likely + * that the sampling facility is enabled too. If the event to be + * initialized requires a greater sampling buffer, the allocation must + * be postponed. Changing the sampling buffer requires the sampling + * facility to be in the disabled state. So, account the number of + * required SDBs and let cpumsf_pmu_enable() resize the buffer just + * before the event is started. */ + sfb_init_allocs(n_sdb, hwc); if (sf_buffer_available(cpuhw)) return 0; debug_sprintf_event(sfdbg, 3, - "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%i cpuhw=%p\n", + "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%lu cpuhw=%p\n", SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw); return alloc_sampling_buffer(&cpuhw->sfb, - min_t(unsigned long, n_sdb, CPUM_SF_MAX_SDB)); + sfb_pending_allocs(&cpuhw->sfb, hwc)); } +static unsigned long min_percent(unsigned int percent, unsigned long base, + unsigned long min) +{ + return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100)); +} -/* Number of perf events counting hardware events */ -static atomic_t num_events; -/* Used to avoid races in calling reserve/release_cpumf_hardware */ -static DEFINE_MUTEX(pmc_reserve_mutex); +static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base) +{ + /* Use a percentage-based approach to extend the sampling facility + * buffer. Accept up to 5% sample data loss. + * Vary the extents between 1% to 5% of the current number of + * sample-data-blocks. + */ + if (ratio <= 5) + return 0; + if (ratio <= 25) + return min_percent(1, base, 1); + if (ratio <= 50) + return min_percent(1, base, 1); + if (ratio <= 75) + return min_percent(2, base, 2); + if (ratio <= 100) + return min_percent(3, base, 3); + if (ratio <= 250) + return min_percent(4, base, 4); + + return min_percent(5, base, 8); +} -/* - * sf_disable() - Switch off sampling facility +static void sfb_account_overflows(struct cpu_hw_sf *cpuhw, + struct hw_perf_event *hwc) +{ + unsigned long ratio, num; + + if (!OVERFLOW_REG(hwc)) + return; + + /* The sample_overflow contains the average number of sample data + * that has been lost because sample-data-blocks were full. + * + * Calculate the total number of sample data entries that has been + * discarded. Then calculate the ratio of lost samples to total samples + * per second in percent. + */ + ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb, + sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc))); + + /* Compute number of sample-data-blocks */ + num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb); + if (num) + sfb_account_allocs(num, hwc); + + debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu" + " num=%lu\n", OVERFLOW_REG(hwc), ratio, num); + OVERFLOW_REG(hwc) = 0; +} + +/* extend_sampling_buffer() - Extend sampling buffer + * @sfb: Sampling buffer structure (for local CPU) + * @hwc: Perf event hardware structure + * + * Use this function to extend the sampling buffer based on the overflow counter + * and postponed allocation extents stored in the specified Perf event hardware. + * + * Important: This function disables the sampling facility in order to safely + * change the sampling buffer structure. Do not call this function + * when the PMU is active. */ -static int sf_disable(void) +static void extend_sampling_buffer(struct sf_buffer *sfb, + struct hw_perf_event *hwc) { - struct hws_lsctl_request_block sreq; + unsigned long num, num_old; + int rc; - memset(&sreq, 0, sizeof(sreq)); - return lsctl(&sreq); + num = sfb_pending_allocs(sfb, hwc); + if (!num) + return; + num_old = sfb->num_sdb; + + /* Disable the sampling facility to reset any states and also + * clear pending measurement alerts. + */ + sf_disable(); + + /* Extend the sampling buffer. + * This memory allocation typically happens in an atomic context when + * called by perf. Because this is a reallocation, it is fine if the + * new SDB-request cannot be satisfied immediately. + */ + rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC); + if (rc) + debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc " + "failed with rc=%i\n", rc); + + if (sfb_has_pending_allocs(sfb, hwc)) + debug_sprintf_event(sfdbg, 5, "sfb: extend: " + "req=%lu alloc=%lu remaining=%lu\n", + num, sfb->num_sdb - num_old, + sfb_pending_allocs(sfb, hwc)); } +/* Number of perf events counting hardware events */ +static atomic_t num_events; +/* Used to avoid races in calling reserve/release_cpumf_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + #define PMC_INIT 0 #define PMC_RELEASE 1 #define PMC_FAILURE 2 @@ -345,19 +567,17 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period) } static void hw_reset_registers(struct hw_perf_event *hwc, - unsigned long sdbt_origin) + unsigned long *sdbt_origin) { - TEAR_REG(hwc) = sdbt_origin; /* (re)set to first sdb table */ + /* (Re)set to first sample-data-block-table */ + TEAR_REG(hwc) = (unsigned long) sdbt_origin; } static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, unsigned long rate) { - if (rate < si->min_sampl_rate) - return si->min_sampl_rate; - if (rate > si->max_sampl_rate) - return si->max_sampl_rate; - return rate; + return clamp_t(unsigned long, rate, + si->min_sampl_rate, si->max_sampl_rate); } static int __hw_perf_event_init(struct perf_event *event) @@ -448,6 +668,10 @@ static int __hw_perf_event_init(struct perf_event *event) SAMPL_RATE(hwc) = rate; hw_init_period(hwc, SAMPL_RATE(hwc)); + /* Initialize sample data overflow accounting */ + hwc->extra_reg.reg = REG_OVERFLOW; + OVERFLOW_REG(hwc) = 0; + /* Allocate the per-CPU sampling buffer using the CPU information * from the event. If the event is not pinned to a particular * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling @@ -513,6 +737,7 @@ static int cpumsf_pmu_event_init(struct perf_event *event) static void cpumsf_pmu_enable(struct pmu *pmu) { struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf); + struct hw_perf_event *hwc; int err; if (cpuhw->flags & PMU_F_ENABLED) @@ -521,6 +746,26 @@ static void cpumsf_pmu_enable(struct pmu *pmu) if (cpuhw->flags & PMU_F_ERR_MASK) return; + /* Check whether to extent the sampling buffer. + * + * Two conditions trigger an increase of the sampling buffer for a + * perf event: + * 1. Postponed buffer allocations from the event initialization. + * 2. Sampling overflows that contribute to pending allocations. + * + * Note that the extend_sampling_buffer() function disables the sampling + * facility, but it can be fully re-enabled using sampling controls that + * have been saved in cpumsf_pmu_disable(). + */ + if (cpuhw->event) { + hwc = &cpuhw->event->hw; + /* Account number of overflow-designated buffer extents */ + sfb_account_overflows(cpuhw, hwc); + if (sfb_has_pending_allocs(&cpuhw->sfb, hwc)) + extend_sampling_buffer(&cpuhw->sfb, hwc); + } + + /* (Re)enable the PMU and sampling facility */ cpuhw->flags |= PMU_F_ENABLED; barrier(); @@ -632,8 +877,6 @@ static int perf_push_sample(struct perf_event *event, if (perf_event_overflow(event, &data, ®s)) { overflow = 1; event->pmu->stop(event, 0); - debug_sprintf_event(sfdbg, 4, "perf_push_sample: PMU stopped" - " because of an event overflow\n"); } perf_event_update_userpage(event); @@ -710,11 +953,11 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) struct hw_perf_event *hwc = &event->hw; struct hws_trailer_entry *te; unsigned long *sdbt; - unsigned long long event_overflow, sampl_overflow; + unsigned long long event_overflow, sampl_overflow, num_sdb; int done; sdbt = (unsigned long *) TEAR_REG(hwc); - done = event_overflow = sampl_overflow = 0; + done = event_overflow = sampl_overflow = num_sdb = 0; while (!done) { /* Get the trailer entry of the sample-data-block */ te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); @@ -726,17 +969,13 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) break; } - /* Check sample overflow count */ - if (te->overflow) { - /* Increment sample overflow counter */ - sampl_overflow += te->overflow; - - /* XXX: If an sample overflow occurs, increase the - * sampling buffer. Set a "realloc" flag because - * the sampler must be re-enabled for changing - * the sample-data-block-table content. + /* Check the sample overflow count */ + if (te->overflow) + /* Account sample overflows and, if a particular limit + * is reached, extend the sampling buffer. + * For details, see sfb_account_overflows(). */ - } + sampl_overflow += te->overflow; /* Timestamps are valid for full sample-data-blocks only */ debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " @@ -749,6 +988,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) * is stopped and remaining samples will be discarded. */ hw_collect_samples(event, sdbt, &event_overflow); + num_sdb++; /* Reset trailer */ xchg(&te->overflow, 0); @@ -775,6 +1015,10 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) flush_all = 1; } + /* Account sample overflows in the event hardware structure */ + if (sampl_overflow) + OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + + sampl_overflow, 1 + num_sdb); if (sampl_overflow || event_overflow) debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " "overflow stats: sample=%llu event=%llu\n", @@ -849,7 +1093,7 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags) */ cpuhw->lsctl.s = 0; cpuhw->lsctl.h = 1; - cpuhw->lsctl.tear = cpuhw->sfb.sdbt; + cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt; cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); @@ -1018,6 +1262,48 @@ static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self, return NOTIFY_OK; } +static int param_get_sfb_size(char *buffer, const struct kernel_param *kp) +{ + if (!cpum_sf_avail()) + return -ENODEV; + return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); +} + +static int param_set_sfb_size(const char *val, const struct kernel_param *kp) +{ + int rc; + unsigned long min, max; + + if (!cpum_sf_avail()) + return -ENODEV; + if (!val || !strlen(val)) + return -EINVAL; + + /* Valid parameter values: "min,max" or "max" */ + min = CPUM_SF_MIN_SDB; + max = CPUM_SF_MAX_SDB; + if (strchr(val, ',')) + rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL; + else + rc = kstrtoul(val, 10, &max); + + if (min < 2 || min >= max || max > get_num_physpages()) + rc = -EINVAL; + if (rc) + return rc; + + sfb_set_limits(min, max); + pr_info("Changed sampling buffer settings: min=%lu max=%lu\n", + CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); + return 0; +} + +#define param_check_sfb_size(name, p) __param_check(name, p, void) +static struct kernel_param_ops param_ops_sfb_size = { + .set = param_set_sfb_size, + .get = param_get_sfb_size, +}; + static int __init init_cpum_sampling_pmu(void) { int err; @@ -1047,3 +1333,4 @@ out: return err; } arch_initcall(init_cpum_sampling_pmu); +core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); -- cgit v0.10.2 From fcc77f507333776eaa336ab4ff49c23422f53703 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 12 Dec 2013 17:26:51 +0100 Subject: s390/cpum_sf: Atomically reset trailer entry fields of sample-data-blocks Ensure to reset the sample-data-block full indicator and the overflow counter at the same time. This must be done atomically because the sampling hardware is still active while full sample-data-block is processed. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index d707abc..b0b3059 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -115,10 +115,15 @@ struct hws_data_entry { } __packed; struct hws_trailer_entry { - unsigned int f:1; /* 0 - Block Full Indicator */ - unsigned int a:1; /* 1 - Alert request control */ - unsigned int t:1; /* 2 - Timestamp format */ - unsigned long long:61; /* 3 - 63: Reserved */ + union { + struct { + unsigned int f:1; /* 0 - Block Full Indicator */ + unsigned int a:1; /* 1 - Alert request control */ + unsigned int t:1; /* 2 - Timestamp format */ + unsigned long long:61; /* 3 - 63: Reserved */ + }; + unsigned long long flags; /* 0 - 63: All indicators */ + }; unsigned long long overflow; /* 64 - sample Overflow count */ unsigned long long timestamp; /* 16 - time-stamp */ unsigned long long timestamp1; /* */ diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index ea16560..9202f28 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -953,7 +953,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) struct hw_perf_event *hwc = &event->hw; struct hws_trailer_entry *te; unsigned long *sdbt; - unsigned long long event_overflow, sampl_overflow, num_sdb; + unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; int done; sdbt = (unsigned long *) TEAR_REG(hwc); @@ -990,9 +990,13 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) hw_collect_samples(event, sdbt, &event_overflow); num_sdb++; - /* Reset trailer */ - xchg(&te->overflow, 0); - xchg((unsigned char *) te, 0x40); + /* Reset trailer (using compare-double-and-swap) */ + do { + te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; + te_flags |= SDB_TE_ALERT_REQ_MASK; + } while (!cmpxchg_double(&te->flags, &te->overflow, + te->flags, te->overflow, + te_flags, 0ULL)); /* Advance to next sample-data-block */ sdbt++; -- cgit v0.10.2 From 443d4beb823d4dccaaf964b59df9dd38b4d6aae7 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 12 Dec 2013 17:38:50 +0100 Subject: s390/cpum_sf: Add helper to read TOD from trailer entries The trailer entry contains a timestamp of the time when the sample-data-block became full. The timestamp specifies a TOD (time-of-day) value in either the STCK or STCKE format. Provide a helper function to return the TOD value depending on the setting of time format indicator. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index b0b3059..09dc5fa 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -125,8 +125,7 @@ struct hws_trailer_entry { unsigned long long flags; /* 0 - 63: All indicators */ }; unsigned long long overflow; /* 64 - sample Overflow count */ - unsigned long long timestamp; /* 16 - time-stamp */ - unsigned long long timestamp1; /* */ + unsigned char timestamp[16]; /* 16 - 31 timestamp */ unsigned long long reserved1; /* 32 -Reserved */ unsigned long long reserved2; /* */ unsigned long long progusage1; /* 48 - reserved for programming use */ @@ -232,6 +231,17 @@ static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi, #define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL #define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL +/* Return TOD timestamp contained in an trailer entry */ +static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te) +{ + /* TOD in STCKE format */ + if (te->t) + return *((unsigned long long *) &te->timestamp[1]); + + /* TOD in STCK format */ + return *((unsigned long long *) &te->timestamp[0]); +} + /* Return pointer to trailer entry of an sample data block */ static inline unsigned long *trailer_entry_ptr(unsigned long v) { diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 9202f28..3ab7e67 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -981,7 +981,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " "overflow=%llu timestamp=0x%llx\n", sdbt, te->overflow, - (te->f) ? te->timestamp : 0ULL); + (te->f) ? trailer_timestamp(te) : 0ULL); /* Collect all samples from a single sample-data-block and * flag if an (perf) event overflow happened. If so, the PMU -- cgit v0.10.2 From 443e802bab16916f9a51a34f2213f4dee6e8762c Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 12 Dec 2013 17:54:57 +0100 Subject: s390/cpum_sf: Detect KVM guest samples The host-program-parameter (hpp) value of basic sample-data-entries designates a SIE control block that is set by the LPP instruction in sie64a(). Non-zero values indicate guest samples, a value of zero indicates a host sample. For perf samples, host and guest samples are distinguished using particular PERF_MISC_* flags. The perf layer calls perf_misc_flags() to set the flags based on the pt_regs content. For each sample-data-entry, the cpum_sf PMU creates a pt_regs structure with the sample-data information. An additional flag structure is added to easily distinguish between host and guest samples. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 99d7f4e..7667bde 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -42,6 +42,12 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) +/* Perf pt_regs extension for sample-data-entry indicators */ +struct perf_sf_sde_regs { + unsigned char in_guest:1; /* guest sample */ + unsigned long reserved:63; /* reserved */ +}; + /* Perf PMU definitions for the counter facility */ #define PERF_CPUM_CF_MAX_CTR 256 diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 3ab7e67..d611fac 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -840,6 +840,7 @@ static int perf_push_sample(struct perf_event *event, { int overflow; struct pt_regs regs; + struct perf_sf_sde_regs *sde_regs; struct perf_sample_data data; /* Skip samples that are invalid or for which the instruction address @@ -850,7 +851,16 @@ static int perf_push_sample(struct perf_event *event, perf_sample_data_init(&data, 0, event->hw.last_period); + /* Setup pt_regs to look like an CPU-measurement external interrupt + * using the Program Request Alert code. The regs.int_parm_long + * field which is unused contains additional sample-data-entry related + * indicators. + */ memset(®s, 0, sizeof(regs)); + regs.int_code = 0x1407; + regs.int_parm = CPU_MF_INT_SF_PRA; + sde_regs = (struct perf_sf_sde_regs *) ®s.int_parm_long; + regs.psw.addr = sample->ia; if (sample->T) regs.psw.mask |= PSW_MASK_DAT; @@ -873,6 +883,16 @@ static int perf_push_sample(struct perf_event *event, break; } + /* The host-program-parameter (hpp) contains the sie control + * block that is set by sie64a() in entry64.S. Check if hpp + * refers to a valid control block and set sde_regs flags + * accordingly. This would allow to use hpp values for other + * purposes too. + * For now, simply use a non-zero value as guest indicator. + */ + if (sample->hpp) + sde_regs->in_guest = 1; + overflow = 0; if (perf_event_overflow(event, &data, ®s)) { overflow = 1; diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 3bd2bf0..60a6826 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -1,7 +1,7 @@ /* * Performance event support for s390x * - * Copyright IBM Corp. 2012 + * Copyright IBM Corp. 2012, 2013 * Author(s): Hendrik Brueckner * * This program is free software; you can redistribute it and/or modify @@ -89,8 +89,31 @@ static unsigned long perf_misc_guest_flags(struct pt_regs *regs) : PERF_RECORD_MISC_GUEST_KERNEL; } +static unsigned long perf_misc_flags_sf(struct pt_regs *regs) +{ + struct perf_sf_sde_regs *sde_regs; + unsigned long flags; + + sde_regs = (struct perf_sf_sde_regs *) ®s->int_parm_long; + if (sde_regs->in_guest) + flags = user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER + : PERF_RECORD_MISC_GUEST_KERNEL; + else + flags = user_mode(regs) ? PERF_RECORD_MISC_USER + : PERF_RECORD_MISC_KERNEL; + return flags; +} + unsigned long perf_misc_flags(struct pt_regs *regs) { + /* Check if the cpum_sf PMU has created the pt_regs structure. + * In this case, perf misc flags can be easily extracted. Otherwise, + * do regular checks on the pt_regs content. + */ + if (regs->int_code == 0x1407 && regs->int_parm == CPU_MF_INT_SF_PRA) + if (!regs->gprs[15]) + return perf_misc_flags_sf(regs); + if (is_in_guest(regs)) return perf_misc_guest_flags(regs); -- cgit v0.10.2 From dd127b3b977b81eab58d1d7ee037195cf0bbeba7 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 12 Dec 2013 18:05:20 +0100 Subject: s390/cpum_sf: Filter perf events based event->attr.exclude_* settings Introduce the perf_exclude_event() function to filter perf samples according to event->attr.exclude_* settings. During event initialization, reset event exclude settings that are not supported. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index d611fac..28fa2f2 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -723,10 +723,19 @@ static int cpumsf_pmu_event_init(struct perf_event *event) return -ENOENT; } + /* Check online status of the CPU to which the event is pinned */ if (event->cpu >= nr_cpumask_bits || (event->cpu >= 0 && !cpu_online(event->cpu))) return -ENODEV; + /* Force reset of idle/hv excludes regardless of what the + * user requested. + */ + if (event->attr.exclude_hv) + event->attr.exclude_hv = 0; + if (event->attr.exclude_idle) + event->attr.exclude_idle = 0; + err = __hw_perf_event_init(event); if (unlikely(err)) if (event->destroy) @@ -824,6 +833,29 @@ static void cpumsf_pmu_disable(struct pmu *pmu) cpuhw->flags &= ~PMU_F_ENABLED; } +/* perf_exclude_event() - Filter event + * @event: The perf event + * @regs: pt_regs structure + * @sde_regs: Sample-data-entry (sde) regs structure + * + * Filter perf events according to their exclude specification. + * + * Return non-zero if the event shall be excluded. + */ +static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, + struct perf_sf_sde_regs *sde_regs) +{ + if (event->attr.exclude_user && user_mode(regs)) + return 1; + if (event->attr.exclude_kernel && !user_mode(regs)) + return 1; + if (event->attr.exclude_guest && sde_regs->in_guest) + return 1; + if (event->attr.exclude_host && !sde_regs->in_guest) + return 1; + return 0; +} + /* perf_push_sample() - Push samples to perf * @event: The perf event * @sample: Hardware sample data @@ -894,12 +926,14 @@ static int perf_push_sample(struct perf_event *event, sde_regs->in_guest = 1; overflow = 0; + if (perf_exclude_event(event, ®s, sde_regs)) + goto out; if (perf_event_overflow(event, &data, ®s)) { overflow = 1; event->pmu->stop(event, 0); } perf_event_update_userpage(event); - +out: return overflow; } -- cgit v0.10.2 From 7e75fc3ff4cffd90684816d69838f8730ac3e072 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 13 Dec 2013 11:42:44 +0100 Subject: s390/cpum_sf: Add raw data sampling to support the diagnostic-sampling function Also support the diagnostic-sampling function in addition to the basic-sampling function. Diagnostic-sampling data entries contain hardware model specific sampling data and additional programs are required to analyze the data. To deliver diagnostic-sampling, as well, as basis-sampling data entries to user space, introduce support for sampling "raw data". If this particular perf sampling type (PERF_SAMPLE_RAW) is used, sampling data entries are copied to user space. External programs can then analyze these data. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 09dc5fa..cb700d5 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -59,13 +59,15 @@ struct cpumf_ctr_info { /* QUERY SAMPLING INFORMATION block */ struct hws_qsi_info_block { /* Bit(s) */ unsigned int b0_13:14; /* 0-13: zeros */ - unsigned int as:1; /* 14: sampling authorisation control*/ - unsigned int b15_21:7; /* 15-21: zeros */ - unsigned int es:1; /* 22: sampling enable control */ - unsigned int b23_29:7; /* 23-29: zeros */ - unsigned int cs:1; /* 30: sampling activation control */ - unsigned int:1; /* 31: reserved */ - unsigned int bsdes:16; /* 4-5: size of basic sampling entry */ + unsigned int as:1; /* 14: basic-sampling authorization */ + unsigned int ad:1; /* 15: diag-sampling authorization */ + unsigned int b16_21:6; /* 16-21: zeros */ + unsigned int es:1; /* 22: basic-sampling enable control */ + unsigned int ed:1; /* 23: diag-sampling enable control */ + unsigned int b24_29:6; /* 24-29: zeros */ + unsigned int cs:1; /* 30: basic-sampling activation control */ + unsigned int cd:1; /* 31: diag-sampling activation control */ + unsigned int bsdes:16; /* 4-5: size of basic sampling entry */ unsigned int dsdes:16; /* 6-7: size of diagnostic sampling entry */ unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */ unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/ @@ -82,10 +84,11 @@ struct hws_lsctl_request_block { unsigned int s:1; /* 0: maximum buffer indicator */ unsigned int h:1; /* 1: part. level reserved for VM use*/ unsigned long long b2_53:52;/* 2-53: zeros */ - unsigned int es:1; /* 54: sampling enable control */ - unsigned int b55_61:7; /* 55-61: - zeros */ - unsigned int cs:1; /* 62: sampling activation control */ - unsigned int b63:1; /* 63: zero */ + unsigned int es:1; /* 54: basic-sampling enable control */ + unsigned int ed:1; /* 55: diag-sampling enable control */ + unsigned int b56_61:6; /* 56-61: - zeros */ + unsigned int cs:1; /* 62: basic-sampling activation control */ + unsigned int cd:1; /* 63: diag-sampling activation control */ unsigned long interval; /* 8-15: sampling interval */ unsigned long tear; /* 16-23: TEAR contents */ unsigned long dear; /* 24-31: DEAR contents */ @@ -96,8 +99,7 @@ struct hws_lsctl_request_block { unsigned long rsvrd4; /* reserved */ } __packed; - -struct hws_data_entry { +struct hws_basic_entry { unsigned int def:16; /* 0-15 Data Entry Format */ unsigned int R:4; /* 16-19 reserved */ unsigned int U:4; /* 20-23 Number of unique instruct. */ @@ -114,6 +116,18 @@ struct hws_data_entry { unsigned long long hpp; /* Host Program Parameter */ } __packed; +struct hws_diag_entry { + unsigned int def:16; /* 0-15 Data Entry Format */ + unsigned int R:14; /* 16-19 and 20-30 reserved */ + unsigned int I:1; /* 31 entry valid or invalid */ + u8 data[]; /* Machine-dependent sample data */ +} __packed; + +struct hws_combined_entry { + struct hws_basic_entry basic; /* Basic-sampling data entry */ + struct hws_diag_entry diag; /* Diagnostic-sampling data entry */ +} __packed; + struct hws_trailer_entry { union { struct { diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 7667bde..bd4573f 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -52,15 +52,39 @@ struct perf_sf_sde_regs { #define PERF_CPUM_CF_MAX_CTR 256 /* Perf PMU definitions for the sampling facility */ -#define PERF_CPUM_SF_MAX_CTR 1 -#define PERF_EVENT_CPUM_SF 0xB0000UL /* Raw event ID */ +#define PERF_CPUM_SF_MAX_CTR 2 +#define PERF_EVENT_CPUM_SF 0xB0000UL /* Event: Basic-sampling */ +#define PERF_EVENT_CPUM_SF_DIAG 0xBD000UL /* Event: Combined-sampling */ +#define PERF_CPUM_SF_BASIC_MODE 0x0001 /* Basic-sampling flag */ +#define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */ +#define PERF_CPUM_SF_MODE_MASK (PERF_CPUM_SF_BASIC_MODE| \ + PERF_CPUM_SF_DIAG_MODE) #define REG_NONE 0 #define REG_OVERFLOW 1 #define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config) #define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc) +#define RAWSAMPLE_REG(hwc) ((hwc)->config) #define TEAR_REG(hwc) ((hwc)->last_tag) #define SAMPL_RATE(hwc) ((hwc)->event_base) +#define SAMPL_FLAGS(hwc) ((hwc)->config_base) +#define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE) + +/* Structure for sampling data entries to be passed as perf raw sample data + * to user space. Note that raw sample data must be aligned and, thus, might + * be padded with zeros. + */ +struct sf_raw_sample { +#define SF_RAW_SAMPLE_BASIC PERF_CPUM_SF_BASIC_MODE +#define SF_RAW_SAMPLE_DIAG PERF_CPUM_SF_DIAG_MODE + u64 format; + u32 size; /* Size of sf_raw_sample */ + u16 bsdes; /* Basic-sampling data entry size */ + u16 dsdes; /* Diagnostic-sampling data entry size */ + struct hws_basic_entry basic; /* Basic-sampling data entry */ + struct hws_diag_entry diag; /* Diagnostic-sampling data entry */ + u8 padding[]; /* Padding to next multiple of 8 */ +} __packed; /* Perf hardware reserve and release functions */ int perf_reserve_sampling(void); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 28fa2f2..b4ec058 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -31,8 +32,8 @@ #define CPUM_SF_MIN_SDBT 1 /* Number of sample-data-blocks per sample-data-block-table (SDBT): - * The table contains SDB origin (8 bytes) and one SDBT origin that - * points to the next table. + * A table contains SDB pointers (8 bytes) and one table-link entry + * that points to the origin of the next SDBT. */ #define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8) @@ -48,8 +49,11 @@ static inline int require_table_link(const void *sdbt) /* Minimum and maximum sampling buffer sizes: * - * This number represents the maximum size of the sampling buffer - * taking the number of sample-data-block-tables into account. + * This number represents the maximum size of the sampling buffer taking + * the number of sample-data-block-tables into account. Note that these + * numbers apply to the basic-sampling function only. + * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if + * the diagnostic-sampling function is active. * * Sampling buffer size Buffer characteristics * --------------------------------------------------- @@ -63,6 +67,7 @@ static inline int require_table_link(const void *sdbt) */ static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15; static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176; +static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1; struct sf_buffer { unsigned long *sdbt; /* Sample-data-block-table origin */ @@ -290,8 +295,20 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) static void sfb_set_limits(unsigned long min, unsigned long max) { + struct hws_qsi_info_block si; + CPUM_SF_MIN_SDB = min; CPUM_SF_MAX_SDB = max; + + memset(&si, 0, sizeof(si)); + if (!qsi(&si)) + CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes); +} + +static unsigned long sfb_max_limit(struct hw_perf_event *hwc) +{ + return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR + : CPUM_SF_MAX_SDB; } static unsigned long sfb_pending_allocs(struct sf_buffer *sfb, @@ -312,8 +329,8 @@ static int sfb_has_pending_allocs(struct sf_buffer *sfb, static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc) { - /* Limit the number SDBs to not exceed the maximum */ - num = min_t(unsigned long, num, CPUM_SF_MAX_SDB - SFB_ALLOC_REG(hwc)); + /* Limit the number of SDBs to not exceed the maximum */ + num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc)); if (num) SFB_ALLOC_REG(hwc) += num; } @@ -324,32 +341,89 @@ static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc) sfb_account_allocs(num, hwc); } -static int allocate_sdbt(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) +static size_t event_sample_size(struct hw_perf_event *hwc) +{ + struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); + size_t sample_size; + + /* The sample size depends on the sampling function: The basic-sampling + * function must be always enabled, diagnostic-sampling function is + * optional. + */ + sample_size = sfr->bsdes; + if (SAMPL_DIAG_MODE(hwc)) + sample_size += sfr->dsdes; + + return sample_size; +} + +static void deallocate_buffers(struct cpu_hw_sf *cpuhw) +{ + if (cpuhw->sfb.sdbt) + free_sampling_buffer(&cpuhw->sfb); +} + +static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) { - unsigned long n_sdb, freq; - unsigned long factor; + unsigned long n_sdb, freq, factor; + size_t sfr_size, sample_size; + struct sf_raw_sample *sfr; + + /* Allocate raw sample buffer + * + * The raw sample buffer is used to temporarily store sampling data + * entries for perf raw sample processing. The buffer size mainly + * depends on the size of diagnostic-sampling data entries which is + * machine-specific. The exact size calculation includes: + * 1. The first 4 bytes of diagnostic-sampling data entries are + * already reflected in the sf_raw_sample structure. Subtract + * these bytes. + * 2. The perf raw sample data must be 8-byte aligned (u64) and + * perf's internal data size must be considered too. So add + * an additional u32 for correct alignment and subtract before + * allocating the buffer. + * 3. Store the raw sample buffer pointer in the perf event + * hardware structure. + */ + sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) + + sizeof(u32), sizeof(u64)); + sfr_size -= sizeof(u32); + sfr = kzalloc(sfr_size, GFP_KERNEL); + if (!sfr) + return -ENOMEM; + sfr->size = sfr_size; + sfr->bsdes = cpuhw->qsi.bsdes; + sfr->dsdes = cpuhw->qsi.dsdes; + RAWSAMPLE_REG(hwc) = (unsigned long) sfr; /* Calculate sampling buffers using 4K pages * - * 1. Use frequency as input. The samping buffer is designed for - * a complete second. This can be adjusted through the "factor" - * variable. + * 1. Determine the sample data size which depends on the used + * sampling functions, for example, basic-sampling or + * basic-sampling with diagnostic-sampling. + * + * 2. Use the sampling frequency as input. The sampling buffer is + * designed for almost one second. This can be adjusted through + * the "factor" variable. * In any case, alloc_sampling_buffer() sets the Alert Request - * Control indicator to trigger measurement-alert to harvest + * Control indicator to trigger a measurement-alert to harvest * sample-data-blocks (sdb). * - * 2. Compute the number of sample-data-blocks and ensure a minimum + * 3. Compute the number of sample-data-blocks and ensure a minimum * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not - * exceed CPUM_SF_MAX_SDB. See also the remarks for these - * symbolic constants. + * exceed a "calculated" maximum. The symbolic maximum is + * designed for basic-sampling only and needs to be increased if + * diagnostic-sampling is active. + * See also the remarks for these symbolic constants. * - * 3. Compute number of pages used for the sample-data-block-table - * and ensure a minimum of CPUM_SF_MIN_SDBT (at minimum one table - * to manage up to 511 sample-data-blocks). + * 4. Compute the number of sample-data-block-tables (SDBT) and + * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up + * to 511 SDBs). */ + sample_size = event_sample_size(hwc); freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)); factor = 1; - n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / cpuhw->qsi.bsdes)); + n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size)); if (n_sdb < CPUM_SF_MIN_SDB) n_sdb = CPUM_SF_MIN_SDB; @@ -366,8 +440,10 @@ static int allocate_sdbt(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) return 0; debug_sprintf_event(sfdbg, 3, - "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%lu cpuhw=%p\n", - SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw); + "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu" + " sample_size=%lu cpuhw=%p\n", + SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc), + sample_size, cpuhw); return alloc_sampling_buffer(&cpuhw->sfb, sfb_pending_allocs(&cpuhw->sfb, hwc)); @@ -509,10 +585,8 @@ static void setup_pmc_cpu(void *flags) if (err) { pr_err("Switching off the sampling facility failed " "with rc=%i\n", err); - } else { - if (cpusf->sfb.sdbt) - free_sampling_buffer(&cpusf->sfb); - } + } else + deallocate_buffers(cpusf); debug_sprintf_event(sfdbg, 5, "setup_pmc_cpu: released: cpuhw=%p\n", cpusf); break; @@ -550,6 +624,10 @@ static int reserve_pmc_hardware(void) static void hw_perf_event_destroy(struct perf_event *event) { + /* Free raw sample buffer */ + if (RAWSAMPLE_REG(&event->hw)) + kfree((void *) RAWSAMPLE_REG(&event->hw)); + /* Release PMC if this is the last perf event */ if (!atomic_add_unless(&num_events, -1, 1)) { mutex_lock(&pmc_reserve_mutex); @@ -569,8 +647,15 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period) static void hw_reset_registers(struct hw_perf_event *hwc, unsigned long *sdbt_origin) { + struct sf_raw_sample *sfr; + /* (Re)set to first sample-data-block-table */ TEAR_REG(hwc) = (unsigned long) sdbt_origin; + + /* (Re)set raw sampling buffer register */ + sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); + memset(&sfr->basic, 0, sizeof(sfr->basic)); + memset(&sfr->diag, 0, sfr->dsdes); } static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, @@ -634,6 +719,20 @@ static int __hw_perf_event_init(struct perf_event *event) goto out; } + /* Always enable basic sampling */ + SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE; + + /* Check if diagnostic sampling is requested. Deny if the required + * sampling authorization is missing. + */ + if (attr->config == PERF_EVENT_CPUM_SF_DIAG) { + if (!si.ad) { + err = -EPERM; + goto out; + } + SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE; + } + /* The sampling information (si) contains information about the * min/max sampling intervals and the CPU speed. So calculate the * correct sampling interval and avoid the whole period adjust @@ -679,14 +778,14 @@ static int __hw_perf_event_init(struct perf_event *event) */ if (cpuhw) /* Event is pinned to a particular CPU */ - err = allocate_sdbt(cpuhw, hwc); + err = allocate_buffers(cpuhw, hwc); else { /* Event is not pinned, allocate sampling buffer on * each online CPU */ for_each_online_cpu(cpu) { cpuhw = &per_cpu(cpu_hw_sf, cpu); - err = allocate_sdbt(cpuhw, hwc); + err = allocate_buffers(cpuhw, hwc); if (err) break; } @@ -705,7 +804,8 @@ static int cpumsf_pmu_event_init(struct perf_event *event) switch (event->attr.type) { case PERF_TYPE_RAW: - if (event->attr.config != PERF_EVENT_CPUM_SF) + if ((event->attr.config != PERF_EVENT_CPUM_SF) && + (event->attr.config != PERF_EVENT_CPUM_SF_DIAG)) return -ENOENT; break; case PERF_TYPE_HARDWARE: @@ -786,8 +886,9 @@ static void cpumsf_pmu_enable(struct pmu *pmu) return; } - debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i tear=%p dear=%p\n", - cpuhw->lsctl.es, cpuhw->lsctl.cs, + debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i " + "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs, + cpuhw->lsctl.ed, cpuhw->lsctl.cd, (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear); } @@ -807,6 +908,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu) /* Switch off sampling activation control */ inactive = cpuhw->lsctl; inactive.cs = 0; + inactive.cd = 0; err = lsctl(&inactive); if (err) { @@ -867,21 +969,19 @@ static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, * * Return non-zero if an event overflow occurred. */ -static int perf_push_sample(struct perf_event *event, - struct hws_data_entry *sample) +static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) { int overflow; struct pt_regs regs; struct perf_sf_sde_regs *sde_regs; struct perf_sample_data data; + struct perf_raw_record raw; - /* Skip samples that are invalid or for which the instruction address - * is not predictable. For the latter, the wait-state bit is set. - */ - if (sample->I || sample->W) - return 0; - + /* Setup perf sample */ perf_sample_data_init(&data, 0, event->hw.last_period); + raw.size = sfr->size; + raw.data = sfr; + data.raw = &raw; /* Setup pt_regs to look like an CPU-measurement external interrupt * using the Program Request Alert code. The regs.int_parm_long @@ -893,14 +993,14 @@ static int perf_push_sample(struct perf_event *event, regs.int_parm = CPU_MF_INT_SF_PRA; sde_regs = (struct perf_sf_sde_regs *) ®s.int_parm_long; - regs.psw.addr = sample->ia; - if (sample->T) + regs.psw.addr = sfr->basic.ia; + if (sfr->basic.T) regs.psw.mask |= PSW_MASK_DAT; - if (sample->W) + if (sfr->basic.W) regs.psw.mask |= PSW_MASK_WAIT; - if (sample->P) + if (sfr->basic.P) regs.psw.mask |= PSW_MASK_PSTATE; - switch (sample->AS) { + switch (sfr->basic.AS) { case 0x0: regs.psw.mask |= PSW_ASC_PRIMARY; break; @@ -922,7 +1022,7 @@ static int perf_push_sample(struct perf_event *event, * purposes too. * For now, simply use a non-zero value as guest indicator. */ - if (sample->hpp) + if (sfr->basic.hpp) sde_regs->in_guest = 1; overflow = 0; @@ -942,51 +1042,155 @@ static void perf_event_count_update(struct perf_event *event, u64 count) local64_add(count, &event->count); } +static int sample_format_is_valid(struct hws_combined_entry *sample, + unsigned int flags) +{ + if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) + /* Only basic-sampling data entries with data-entry-format + * version of 0x0001 can be processed. + */ + if (sample->basic.def != 0x0001) + return 0; + if (flags & PERF_CPUM_SF_DIAG_MODE) + /* The data-entry-format number of diagnostic-sampling data + * entries can vary. Because diagnostic data is just passed + * through, do only a sanity check on the DEF. + */ + if (sample->diag.def < 0x8001) + return 0; + return 1; +} + +static int sample_is_consistent(struct hws_combined_entry *sample, + unsigned long flags) +{ + /* This check applies only to basic-sampling data entries of potentially + * combined-sampling data entries. Invalid entries cannot be processed + * by the PMU and, thus, do not deliver an associated + * diagnostic-sampling data entry. + */ + if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE))) + return 0; + /* + * Samples are skipped, if they are invalid or for which the + * instruction address is not predictable, i.e., the wait-state bit is + * set. + */ + if (sample->basic.I || sample->basic.W) + return 0; + return 1; +} + +static void reset_sample_slot(struct hws_combined_entry *sample, + unsigned long flags) +{ + if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) + sample->basic.def = 0; + if (flags & PERF_CPUM_SF_DIAG_MODE) + sample->diag.def = 0; +} + +static void sfr_store_sample(struct sf_raw_sample *sfr, + struct hws_combined_entry *sample) +{ + if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE)) + sfr->basic = sample->basic; + if (sfr->format & PERF_CPUM_SF_DIAG_MODE) + memcpy(&sfr->diag, &sample->diag, sfr->dsdes); +} + +static void debug_sample_entry(struct hws_combined_entry *sample, + struct hws_trailer_entry *te, + unsigned long flags) +{ + debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown " + "sampling data entry: te->f=%i basic.def=%04x (%p)" + " diag.def=%04x (%p)\n", te->f, + sample->basic.def, &sample->basic, + (flags & PERF_CPUM_SF_DIAG_MODE) + ? sample->diag.def : 0xFFFF, + (flags & PERF_CPUM_SF_DIAG_MODE) + ? &sample->diag : NULL); +} + /* hw_collect_samples() - Walk through a sample-data-block and collect samples * @event: The perf event * @sdbt: Sample-data-block table * @overflow: Event overflow counter * - * Walks through a sample-data-block and collects hardware sample-data that is - * pushed to the perf event subsystem. The overflow reports the number of - * samples that has been discarded due to an event overflow. + * Walks through a sample-data-block and collects sampling data entries that are + * then pushed to the perf event subsystem. Depending on the sampling function, + * there can be either basic-sampling or combined-sampling data entries. A + * combined-sampling data entry consists of a basic- and a diagnostic-sampling + * data entry. The sampling function is determined by the flags in the perf + * event hardware structure. The function always works with a combined-sampling + * data entry but ignores the the diagnostic portion if it is not available. + * + * Note that the implementation focuses on basic-sampling data entries and, if + * such an entry is not valid, the entire combined-sampling data entry is + * ignored. + * + * The overflow variables counts the number of samples that has been discarded + * due to a perf event overflow. */ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, unsigned long long *overflow) { - struct hws_data_entry *sample; - unsigned long *trailer; + unsigned long flags = SAMPL_FLAGS(&event->hw); + struct hws_combined_entry *sample; + struct hws_trailer_entry *te; + struct sf_raw_sample *sfr; + size_t sample_size; + + /* Prepare and initialize raw sample data */ + sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw); + sfr->format = flags & PERF_CPUM_SF_MODE_MASK; - trailer = trailer_entry_ptr(*sdbt); - sample = (struct hws_data_entry *) *sdbt; - while ((unsigned long *) sample < trailer) { + sample_size = event_sample_size(&event->hw); + te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); + sample = (struct hws_combined_entry *) *sdbt; + while ((unsigned long *) sample < (unsigned long *) te) { /* Check for an empty sample */ - if (!sample->def) + if (!sample->basic.def) break; /* Update perf event period */ perf_event_count_update(event, SAMPL_RATE(&event->hw)); - /* Check for basic sampling mode */ - if (sample->def == 0x0001) { + /* Check sampling data entry */ + if (sample_format_is_valid(sample, flags)) { /* If an event overflow occurred, the PMU is stopped to * throttle event delivery. Remaining sample data is * discarded. */ - if (!*overflow) - *overflow = perf_push_sample(event, sample); - else + if (!*overflow) { + if (sample_is_consistent(sample, flags)) { + /* Deliver sample data to perf */ + sfr_store_sample(sfr, sample); + *overflow = perf_push_sample(event, sfr); + } + } else /* Count discarded samples */ *overflow += 1; - } else - /* Sample slot is not yet written or other record */ - debug_sprintf_event(sfdbg, 5, "hw_collect_samples: " - "Unknown sample data entry format:" - " %i\n", sample->def); + } else { + debug_sample_entry(sample, te, flags); + /* Sample slot is not yet written or other record. + * + * This condition can occur if the buffer was reused + * from a combined basic- and diagnostic-sampling. + * If only basic-sampling is then active, entries are + * written into the larger diagnostic entries. + * This is typically the case for sample-data-blocks + * that are not full. Stop processing if the first + * invalid format was detected. + */ + if (!te->f) + break; + } /* Reset sample slot and advance to next sample */ - sample->def = 0; - sample++; + reset_sample_slot(sample, flags); + sample += sample_size; } } @@ -1104,6 +1308,8 @@ static void cpumsf_pmu_start(struct perf_event *event, int flags) perf_pmu_disable(event->pmu); event->hw.state = 0; cpuhw->lsctl.cs = 1; + if (SAMPL_DIAG_MODE(&event->hw)) + cpuhw->lsctl.cd = 1; perf_pmu_enable(event->pmu); } @@ -1119,6 +1325,7 @@ static void cpumsf_pmu_stop(struct perf_event *event, int flags) perf_pmu_disable(event->pmu); cpuhw->lsctl.cs = 0; + cpuhw->lsctl.cd = 0; event->hw.state |= PERF_HES_STOPPED; if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { @@ -1158,11 +1365,13 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags) /* Ensure sampling functions are in the disabled state. If disabled, * switch on sampling enable control. */ - if (WARN_ON_ONCE(cpuhw->lsctl.es == 1)) { + if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) { err = -EAGAIN; goto out; } cpuhw->lsctl.es = 1; + if (SAMPL_DIAG_MODE(&event->hw)) + cpuhw->lsctl.ed = 1; /* Set in_use flag and store event */ event->hw.idx = 0; /* only one sampling event per CPU supported */ @@ -1185,6 +1394,7 @@ static void cpumsf_pmu_del(struct perf_event *event, int flags) cpumsf_pmu_stop(event, PERF_EF_UPDATE); cpuhw->lsctl.es = 0; + cpuhw->lsctl.ed = 0; cpuhw->flags &= ~PMU_F_IN_USE; cpuhw->event = NULL; @@ -1198,9 +1408,11 @@ static int cpumsf_pmu_event_idx(struct perf_event *event) } CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF); +CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG); static struct attribute *cpumsf_pmu_events_attr[] = { CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), + CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG), NULL, }; @@ -1351,8 +1563,9 @@ static int param_set_sfb_size(const char *val, const struct kernel_param *kp) return rc; sfb_set_limits(min, max); - pr_info("Changed sampling buffer settings: min=%lu max=%lu\n", - CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); + pr_info("The sampling buffer limits have changed to: " + "min=%lu max=%lu (diag=x%lu)\n", + CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR); return 0; } @@ -1362,13 +1575,38 @@ static struct kernel_param_ops param_ops_sfb_size = { .get = param_get_sfb_size, }; +#define RS_INIT_FAILURE_QSI 0x0001 +#define RS_INIT_FAILURE_BSDES 0x0002 +#define RS_INIT_FAILURE_ALRT 0x0003 +#define RS_INIT_FAILURE_PERF 0x0004 +static void __init pr_cpumsf_err(unsigned int reason) +{ + pr_err("Sampling facility support for perf is not available: " + "reason=%04x\n", reason); +} + static int __init init_cpum_sampling_pmu(void) { + struct hws_qsi_info_block si; int err; if (!cpum_sf_avail()) return -ENODEV; + memset(&si, 0, sizeof(si)); + if (qsi(&si)) { + pr_cpumsf_err(RS_INIT_FAILURE_QSI); + return -ENODEV; + } + + if (si.bsdes != sizeof(struct hws_basic_entry)) { + pr_cpumsf_err(RS_INIT_FAILURE_BSDES); + return -EINVAL; + } + + if (si.ad) + sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); + sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); if (!sfdbg) pr_err("Registering for s390dbf failed\n"); @@ -1376,13 +1614,13 @@ static int __init init_cpum_sampling_pmu(void) err = register_external_interrupt(0x1407, cpumf_measurement_alert); if (err) { - pr_err("Failed to register for CPU-measurement alerts\n"); + pr_cpumsf_err(RS_INIT_FAILURE_ALRT); goto out; } err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW); if (err) { - pr_err("Failed to register cpum_sf pmu\n"); + pr_cpumsf_err(RS_INIT_FAILURE_PERF); unregister_external_interrupt(0x1407, cpumf_measurement_alert); goto out; } diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 60a6826..91aa215 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -139,16 +139,21 @@ static void print_debug_sf(void) int cpu = smp_processor_id(); memset(&si, 0, sizeof(si)); - if (qsi(&si)) { - pr_err("CPU[%i]: CPM_SF: qsi failed\n"); + if (qsi(&si)) return; - } - pr_info("CPU[%i]: CPM_SF: as=%i es=%i cs=%i bsdes=%i dsdes=%i" - " min=%i max=%i cpu_speed=%i tear=%p dear=%p\n", - cpu, si.as, si.es, si.cs, si.bsdes, si.dsdes, - si.min_sampl_rate, si.max_sampl_rate, si.cpu_speed, - si.tear, si.dear); + pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%i max=%i cpu_speed=%i\n", + cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate, + si.cpu_speed); + + if (si.as) + pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i" + " bsdes=%i tear=%p dear=%p\n", cpu, + si.as, si.es, si.cs, si.bsdes, si.tear, si.dear); + if (si.ad) + pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i" + " dsdes=%i tear=%p dear=%p\n", cpu, + si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear); } void perf_event_print_debug(void) diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index eb09587..a32c967 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -799,7 +799,7 @@ static void worker_on_interrupt(unsigned int cpu) static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, unsigned long *dear) { - struct hws_data_entry *sample_data_ptr; + struct hws_basic_entry *sample_data_ptr; unsigned long *trailer; trailer = trailer_entry_ptr(*sdbt); @@ -809,7 +809,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, trailer = dear; } - sample_data_ptr = (struct hws_data_entry *)(*sdbt); + sample_data_ptr = (struct hws_basic_entry *)(*sdbt); while ((unsigned long *)sample_data_ptr < trailer) { struct pt_regs *regs = NULL; -- cgit v0.10.2 From d7528862cf035994972c2c6f42c927db78f2f3a2 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 13 Dec 2013 12:45:01 +0100 Subject: s390/cpum_sf: Add flag to process full SDBs only Add the PERF_CPUM_SF_FULL_BLOCKS flag to process only sample-data-blocks that have the block-full-indicator bit set. Sample-data-blocks that are partially filled are discarded. Use this flag if the sampling buffer is likely to be shared among perf events that use different sampling modes. In such environments, flushing sample-data-blocks that are not completely filled, might cause invalid-data-formats. Setting PERF_CPUM_SF_FULL_BLOCKS prevents potentially invalid sampling data to be processed but, in contrast, also discards valid samples in partially filled sample-data-blocks. Note that sample-data-blocks might not become full for small sampling frequencies or for workload that is scheduled for tiny intervals. To sample with the PERF_CPUM_SF_FULL_BLOCKS flag, set the perf->attr.config1 to 0x0004. For example: perf record -e cpum_sf/config=0xB000,config1=0x0004/ Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index bd4573f..159a8ec 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -59,6 +59,7 @@ struct perf_sf_sde_regs { #define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */ #define PERF_CPUM_SF_MODE_MASK (PERF_CPUM_SF_BASIC_MODE| \ PERF_CPUM_SF_DIAG_MODE) +#define PERF_CPUM_SF_FULL_BLOCKS 0x0004 /* Process full SDBs only */ #define REG_NONE 0 #define REG_OVERFLOW 1 @@ -69,6 +70,7 @@ struct perf_sf_sde_regs { #define SAMPL_RATE(hwc) ((hwc)->event_base) #define SAMPL_FLAGS(hwc) ((hwc)->config_base) #define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE) +#define SDB_FULL_BLOCKS(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS) /* Structure for sampling data entries to be passed as perf raw sample data * to user space. Note that raw sample data must be aligned and, thus, might diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index b4ec058..3c3bc8d 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -733,6 +733,10 @@ static int __hw_perf_event_init(struct perf_event *event) SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE; } + /* Check and set other sampling flags */ + if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS) + SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS; + /* The sampling information (si) contains information about the * min/max sampling intervals and the CPU speed. So calculate the * correct sampling interval and avoid the whole period adjust @@ -1203,8 +1207,10 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, * register of the specified perf event. * * Only full sample-data-blocks are processed. Specify the flash_all flag - * to also walk through partially filled sample-data-blocks. - * + * to also walk through partially filled sample-data-blocks. It is ignored + * if PERF_CPUM_SF_FULL_BLOCKS is set. The PERF_CPUM_SF_FULL_BLOCKS flag + * enforces the processing of full sample-data-blocks only (trailer entries + * with the block-full-indicator bit set). */ static void hw_perf_event_update(struct perf_event *event, int flush_all) { @@ -1214,6 +1220,9 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; int done; + if (flush_all && SDB_FULL_BLOCKS(hwc)) + flush_all = 0; + sdbt = (unsigned long *) TEAR_REG(hwc); done = event_overflow = sampl_overflow = num_sdb = 0; while (!done) { -- cgit v0.10.2 From 74bd0d859dc3536b01c892abfa9f87620e4bca2f Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 16 Dec 2013 10:51:54 +0100 Subject: s390/cio: fix unlocked access of online member Make sure that access to the online member of a ccw device is guarded by the ccwlock. Reported-by: Cornelia Huck Reviewed-by: Peter Oberparleiter Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index e4a7ab2..4a0734f 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -333,9 +333,9 @@ int ccw_device_set_offline(struct ccw_device *cdev) if (ret != 0) return ret; } - cdev->online = 0; spin_lock_irq(cdev->ccwlock); sch = to_subchannel(cdev->dev.parent); + cdev->online = 0; /* Wait until a final state or DISCONNECTED is reached */ while (!dev_fsm_final_state(cdev) && cdev->private->state != DEV_STATE_DISCONNECTED) { @@ -446,7 +446,10 @@ int ccw_device_set_online(struct ccw_device *cdev) ret = cdev->drv->set_online(cdev); if (ret) goto rollback; + + spin_lock_irq(cdev->ccwlock); cdev->online = 1; + spin_unlock_irq(cdev->ccwlock); return 0; rollback: @@ -1745,8 +1748,7 @@ ccw_device_probe (struct device *dev) return 0; } -static int -ccw_device_remove (struct device *dev) +static int ccw_device_remove(struct device *dev) { struct ccw_device *cdev = to_ccwdev(dev); struct ccw_driver *cdrv = cdev->drv; @@ -1754,9 +1756,10 @@ ccw_device_remove (struct device *dev) if (cdrv->remove) cdrv->remove(cdev); + + spin_lock_irq(cdev->ccwlock); if (cdev->online) { cdev->online = 0; - spin_lock_irq(cdev->ccwlock); ret = ccw_device_offline(cdev); spin_unlock_irq(cdev->ccwlock); if (ret == 0) @@ -1769,10 +1772,12 @@ ccw_device_remove (struct device *dev) cdev->private->dev_id.devno); /* Give up reference obtained in ccw_device_set_online(). */ put_device(&cdev->dev); + spin_lock_irq(cdev->ccwlock); } ccw_device_set_timeout(cdev, 0); cdev->drv = NULL; cdev->private->int_class = IRQIO_CIO; + spin_unlock_irq(cdev->ccwlock); return 0; } -- cgit v0.10.2 From 00381eeb47ef6b3b70d2a17c89a8205b6f1aa00a Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 16 Dec 2013 10:54:13 +0100 Subject: s390/cio: use device_lock to synchronize calls to the ccw driver When calling set_{on,off}line of a ccw device driver we hold the module reference of the owner. This is pretty useless - we don't want to prevent module unloading but driver unbinding. Use the driver core's device_lock instead. Reviewed-by: Peter Oberparleiter Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 4a0734f..e9d7835 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -549,17 +549,12 @@ static ssize_t online_store (struct device *dev, struct device_attribute *attr, if (!dev_fsm_final_state(cdev) && cdev->private->state != DEV_STATE_DISCONNECTED) { ret = -EAGAIN; - goto out_onoff; + goto out; } /* Prevent conflict between pending work and on-/offline processing.*/ if (work_pending(&cdev->private->todo_work)) { ret = -EAGAIN; - goto out_onoff; - } - - if (cdev->drv && !try_module_get(cdev->drv->driver.owner)) { - ret = -EINVAL; - goto out_onoff; + goto out; } if (!strncmp(buf, "force\n", count)) { force = 1; @@ -571,6 +566,8 @@ static ssize_t online_store (struct device *dev, struct device_attribute *attr, } if (ret) goto out; + + device_lock(dev); switch (i) { case 0: ret = online_store_handle_offline(cdev); @@ -581,10 +578,9 @@ static ssize_t online_store (struct device *dev, struct device_attribute *attr, default: ret = -EINVAL; } + device_unlock(dev); + out: - if (cdev->drv) - module_put(cdev->drv->driver.owner); -out_onoff: atomic_set(&cdev->private->onoff, 0); return (ret < 0) ? ret : count; } -- cgit v0.10.2 From d5b877ffb9aa04a6fb6b6eae6dc19ca163d568a8 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 16 Dec 2013 10:56:46 +0100 Subject: s390/cio: use device_lock to synchronize calls to the ccwgroup driver When calling set_{on,off}line of a ccwgroup device driver we hold the module reference of the owner. This is pretty useless - we don't want to prevent module unloading but driver unbinding. Use the driver core's device_lock instead. Reviewed-by: Peter Oberparleiter Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index 959135a..fd3367a1 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -128,14 +128,14 @@ static ssize_t ccwgroup_online_store(struct device *dev, const char *buf, size_t count) { struct ccwgroup_device *gdev = to_ccwgroupdev(dev); - struct ccwgroup_driver *gdrv = to_ccwgroupdrv(dev->driver); unsigned long value; int ret; - if (!dev->driver) - return -EINVAL; - if (!try_module_get(gdrv->driver.owner)) - return -EINVAL; + device_lock(dev); + if (!dev->driver) { + ret = -EINVAL; + goto out; + } ret = kstrtoul(buf, 0, &value); if (ret) @@ -148,7 +148,7 @@ static ssize_t ccwgroup_online_store(struct device *dev, else ret = -EINVAL; out: - module_put(gdrv->driver.owner); + device_unlock(dev); return (ret == 0) ? count : ret; } -- cgit v0.10.2 From 61aa4884b70cdf3b2d373e18ebbbada43789eade Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 1 Nov 2013 10:08:20 +0100 Subject: s390: use IS_ENABLED to check if a CONFIG is set to y or m This is shorter and should be used instead of the longer form which checks for both possible config options. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index e5b43c9..9532fe2 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -74,7 +74,7 @@ _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ .endm .macro LPP newpp -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) +#if IS_ENABLED(CONFIG_KVM) tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP jz .+8 .insn s,0xb2800000,\newpp @@ -82,7 +82,7 @@ _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ .endm .macro HANDLE_SIE_INTERCEPT scratch,reason -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) +#if IS_ENABLED(CONFIG_KVM) tmhh %r8,0x0001 # interrupting from user ? jnz .+62 lgr \scratch,%r9 @@ -946,7 +946,7 @@ cleanup_idle_insn: .quad __critical_end - __critical_start -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) +#if IS_ENABLED(CONFIG_KVM) /* * sie64a calling convention: * %r2 pointer to sie control block diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 91aa215..a76d602 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -60,7 +60,7 @@ static bool is_in_guest(struct pt_regs *regs) { if (user_mode(regs)) return false; -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) +#if IS_ENABLED(CONFIG_KVM) return instruction_pointer(regs) == (unsigned long) &sie_exit; #else return false; diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c index 3bac589..9f60467 100644 --- a/arch/s390/kernel/s390_ksyms.c +++ b/arch/s390/kernel/s390_ksyms.c @@ -5,7 +5,7 @@ #ifdef CONFIG_FUNCTION_TRACER EXPORT_SYMBOL(_mcount); #endif -#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE) +#if IS_ENABLED(CONFIG_KVM) EXPORT_SYMBOL(sie64a); EXPORT_SYMBOL(sie_exit); #endif -- cgit v0.10.2 From 9efe4f2992025c3a4027c60bf36ae9d710ca3781 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 17 Dec 2013 13:41:31 +0100 Subject: s390/mm: optimize randomize_et_dyn for !PF_RANDOMIZE Skip the call to brk_rnd() if the PF_RANDOMIZE flag is not set for the process. This avoids the costly get_random_int() call. Modify arch_randomize_brk() as well to make it look like randomize_et_dyn(). Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 7ed0d4e..dd14532 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -261,20 +261,18 @@ static inline unsigned long brk_rnd(void) unsigned long arch_randomize_brk(struct mm_struct *mm) { - unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd()); + unsigned long ret; - if (ret < mm->brk) - return mm->brk; - return ret; + ret = PAGE_ALIGN(mm->brk + brk_rnd()); + return (ret > mm->brk) ? ret : mm->brk; } unsigned long randomize_et_dyn(unsigned long base) { - unsigned long ret = PAGE_ALIGN(base + brk_rnd()); + unsigned long ret; if (!(current->flags & PF_RANDOMIZE)) return base; - if (ret < base) - return base; - return ret; + ret = PAGE_ALIGN(base + brk_rnd()); + return (ret > base) ? ret : base; } -- cgit v0.10.2 From 91f3e3eaba4413e76ce8e12e3ef10525a889142f Mon Sep 17 00:00:00 2001 From: Ingo Tuchscherer Date: Wed, 20 Nov 2013 10:47:13 +0100 Subject: s390/zcrypt: add support for EP11 coprocessor cards This feature extends the generic cryptographic device driver (zcrypt) with a new capability to service EP11 requests for the Crypto Express4S card in EP11 (Enterprise PKCS#11 mode) coprocessor mode. Signed-off-by: Ingo Tuchscherer Signed-off-by: Martin Schwidefsky diff --git a/Documentation/kmsg/s390/zcrypt b/Documentation/kmsg/s390/zcrypt new file mode 100644 index 0000000..7fb2087 --- /dev/null +++ b/Documentation/kmsg/s390/zcrypt @@ -0,0 +1,20 @@ +/*? + * Text: "Cryptographic device %x failed and was set offline\n" + * Severity: Error + * Parameter: + * @1: device index + * Description: + * A cryptographic device failed to process a cryptographic request. + * The cryptographic device driver could not correct the error and + * set the device offline. The application that issued the + * request received an indication that the request has failed. + * User action: + * Use the lszcrypt command to confirm that the cryptographic + * hardware is still configured to your LPAR or z/VM guest virtual + * machine. If the device is available to your Linux instance the + * command output contains a line that begins with 'card', + * where is the two-digit decimal number in the message text. + * After ensuring that the device is available, use the chzcrypt command to + * set it online again. + * If the error persists, contact your support organization. + */ diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h index e83fc11..f2b18ea 100644 --- a/arch/s390/include/uapi/asm/zcrypt.h +++ b/arch/s390/include/uapi/asm/zcrypt.h @@ -154,6 +154,67 @@ struct ica_xcRB { unsigned short priority_window; unsigned int status; } __attribute__((packed)); + +/** + * struct ep11_cprb - EP11 connectivity programming request block + * @cprb_len: CPRB header length [0x0020] + * @cprb_ver_id: CPRB version id. [0x04] + * @pad_000: Alignment pad bytes + * @flags: Admin cmd [0x80] or functional cmd [0x00] + * @func_id: Function id / subtype [0x5434] + * @source_id: Source id [originator id] + * @target_id: Target id [usage/ctrl domain id] + * @ret_code: Return code + * @reserved1: Reserved + * @reserved2: Reserved + * @payload_len: Payload length + */ +struct ep11_cprb { + uint16_t cprb_len; + unsigned char cprb_ver_id; + unsigned char pad_000[2]; + unsigned char flags; + unsigned char func_id[2]; + uint32_t source_id; + uint32_t target_id; + uint32_t ret_code; + uint32_t reserved1; + uint32_t reserved2; + uint32_t payload_len; +} __attribute__((packed)); + +/** + * struct ep11_target_dev - EP11 target device list + * @ap_id: AP device id + * @dom_id: Usage domain id + */ +struct ep11_target_dev { + uint16_t ap_id; + uint16_t dom_id; +}; + +/** + * struct ep11_urb - EP11 user request block + * @targets_num: Number of target adapters + * @targets: Addr to target adapter list + * @weight: Level of request priority + * @req_no: Request id/number + * @req_len: Request length + * @req: Addr to request block + * @resp_len: Response length + * @resp: Addr to response block + */ +struct ep11_urb { + uint16_t targets_num; + uint64_t targets; + uint64_t weight; + uint64_t req_no; + uint64_t req_len; + uint64_t req; + uint64_t resp_len; + uint64_t resp; +} __attribute__((packed)); + #define AUTOSELECT ((unsigned int)0xFFFFFFFF) #define ZCRYPT_IOCTL_MAGIC 'z' @@ -183,6 +244,9 @@ struct ica_xcRB { * ZSECSENDCPRB * Send an arbitrary CPRB to a crypto card. * + * ZSENDEP11CPRB + * Send an arbitrary EP11 CPRB to an EP11 coprocessor crypto card. + * * Z90STAT_STATUS_MASK * Return an 64 element array of unsigned chars for the status of * all devices. @@ -256,6 +320,7 @@ struct ica_xcRB { #define ICARSAMODEXPO _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x05, 0) #define ICARSACRT _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0) #define ZSECSENDCPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0) +#define ZSENDEP11CPRB _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0) /* New status calls */ #define Z90STAT_TOTALCOUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x40, int) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 02300dc..ab3baa7 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -591,7 +591,13 @@ static int ap_init_queue(ap_qid_t qid) if (rc != -ENODEV && rc != -EBUSY) break; if (i < AP_MAX_RESET - 1) { - udelay(5); + /* Time we are waiting until we give up (0.7sec * 90). + * Since the actual request (in progress) will not + * interrupted immediately for the reset command, + * we have to be patient. In worst case we have to + * wait 60sec + reset time (some msec). + */ + schedule_timeout(AP_RESET_TIMEOUT); status = ap_test_queue(qid, &dummy, &dummy); } } @@ -992,6 +998,28 @@ static ssize_t ap_domain_show(struct bus_type *bus, char *buf) static BUS_ATTR(ap_domain, 0444, ap_domain_show, NULL); +static ssize_t ap_control_domain_mask_show(struct bus_type *bus, char *buf) +{ + if (ap_configuration != NULL) { /* QCI not supported */ + if (test_facility(76)) { /* format 1 - 256 bit domain field */ + return snprintf(buf, PAGE_SIZE, + "0x%08x%08x%08x%08x%08x%08x%08x%08x\n", + ap_configuration->adm[0], ap_configuration->adm[1], + ap_configuration->adm[2], ap_configuration->adm[3], + ap_configuration->adm[4], ap_configuration->adm[5], + ap_configuration->adm[6], ap_configuration->adm[7]); + } else { /* format 0 - 16 bit domain field */ + return snprintf(buf, PAGE_SIZE, "%08x%08x\n", + ap_configuration->adm[0], ap_configuration->adm[1]); + } + } else { + return snprintf(buf, PAGE_SIZE, "not supported\n"); + } +} + +static BUS_ATTR(ap_control_domain_mask, 0444, + ap_control_domain_mask_show, NULL); + static ssize_t ap_config_time_show(struct bus_type *bus, char *buf) { return snprintf(buf, PAGE_SIZE, "%d\n", ap_config_time); @@ -1077,6 +1105,7 @@ static BUS_ATTR(poll_timeout, 0644, poll_timeout_show, poll_timeout_store); static struct bus_attribute *const ap_bus_attrs[] = { &bus_attr_ap_domain, + &bus_attr_ap_control_domain_mask, &bus_attr_config_time, &bus_attr_poll_thread, &bus_attr_ap_interrupts, diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 685f6cc0..6405ae2 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -33,7 +33,7 @@ #define AP_DEVICES 64 /* Number of AP devices. */ #define AP_DOMAINS 16 /* Number of AP domains. */ #define AP_MAX_RESET 90 /* Maximum number of resets. */ -#define AP_RESET_TIMEOUT (HZ/2) /* Time in ticks for reset timeouts. */ +#define AP_RESET_TIMEOUT (HZ*0.7) /* Time in ticks for reset timeouts. */ #define AP_CONFIG_TIME 30 /* Time in seconds between AP bus rescans. */ #define AP_POLL_TIME 1 /* Time in ticks between receive polls. */ @@ -125,6 +125,8 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr) #define AP_FUNC_CRT4K 2 #define AP_FUNC_COPRO 3 #define AP_FUNC_ACCEL 4 +#define AP_FUNC_EP11 5 +#define AP_FUNC_APXA 6 /* * AP reset flag states diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 31cfaa5..4b824b1 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -44,6 +44,8 @@ #include "zcrypt_debug.h" #include "zcrypt_api.h" +#include "zcrypt_msgtype6.h" + /* * Module description. */ @@ -554,9 +556,9 @@ static long zcrypt_send_cprb(struct ica_xcRB *xcRB) spin_lock_bh(&zcrypt_device_lock); list_for_each_entry(zdev, &zcrypt_device_list, list) { if (!zdev->online || !zdev->ops->send_cprb || - (xcRB->user_defined != AUTOSELECT && - AP_QID_DEVICE(zdev->ap_dev->qid) != xcRB->user_defined) - ) + (zdev->ops->variant == MSGTYPE06_VARIANT_EP11) || + (xcRB->user_defined != AUTOSELECT && + AP_QID_DEVICE(zdev->ap_dev->qid) != xcRB->user_defined)) continue; zcrypt_device_get(zdev); get_device(&zdev->ap_dev->device); @@ -581,6 +583,90 @@ static long zcrypt_send_cprb(struct ica_xcRB *xcRB) return -ENODEV; } +struct ep11_target_dev_list { + unsigned short targets_num; + struct ep11_target_dev *targets; +}; + +static bool is_desired_ep11dev(unsigned int dev_qid, + struct ep11_target_dev_list dev_list) +{ + int n; + + for (n = 0; n < dev_list.targets_num; n++, dev_list.targets++) { + if ((AP_QID_DEVICE(dev_qid) == dev_list.targets->ap_id) && + (AP_QID_QUEUE(dev_qid) == dev_list.targets->dom_id)) { + return true; + } + } + return false; +} + +static long zcrypt_send_ep11_cprb(struct ep11_urb *xcrb) +{ + struct zcrypt_device *zdev; + bool autoselect = false; + int rc; + struct ep11_target_dev_list ep11_dev_list = { + .targets_num = 0x00, + .targets = NULL, + }; + + ep11_dev_list.targets_num = (unsigned short) xcrb->targets_num; + + /* empty list indicates autoselect (all available targets) */ + if (ep11_dev_list.targets_num == 0) + autoselect = true; + else { + ep11_dev_list.targets = kcalloc((unsigned short) + xcrb->targets_num, + sizeof(struct ep11_target_dev), + GFP_KERNEL); + if (!ep11_dev_list.targets) + return -ENOMEM; + + if (copy_from_user(ep11_dev_list.targets, + (struct ep11_target_dev *)xcrb->targets, + xcrb->targets_num * + sizeof(struct ep11_target_dev))) + return -EFAULT; + } + + spin_lock_bh(&zcrypt_device_lock); + list_for_each_entry(zdev, &zcrypt_device_list, list) { + /* check if device is eligible */ + if (!zdev->online || + zdev->ops->variant != MSGTYPE06_VARIANT_EP11) + continue; + + /* check if device is selected as valid target */ + if (!is_desired_ep11dev(zdev->ap_dev->qid, ep11_dev_list) && + !autoselect) + continue; + + zcrypt_device_get(zdev); + get_device(&zdev->ap_dev->device); + zdev->request_count++; + __zcrypt_decrease_preference(zdev); + if (try_module_get(zdev->ap_dev->drv->driver.owner)) { + spin_unlock_bh(&zcrypt_device_lock); + rc = zdev->ops->send_ep11_cprb(zdev, xcrb); + spin_lock_bh(&zcrypt_device_lock); + module_put(zdev->ap_dev->drv->driver.owner); + } else { + rc = -EAGAIN; + } + zdev->request_count--; + __zcrypt_increase_preference(zdev); + put_device(&zdev->ap_dev->device); + zcrypt_device_put(zdev); + spin_unlock_bh(&zcrypt_device_lock); + return rc; + } + spin_unlock_bh(&zcrypt_device_lock); + return -ENODEV; +} + static long zcrypt_rng(char *buffer) { struct zcrypt_device *zdev; @@ -784,6 +870,23 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd, return -EFAULT; return rc; } + case ZSENDEP11CPRB: { + struct ep11_urb __user *uxcrb = (void __user *)arg; + struct ep11_urb xcrb; + if (copy_from_user(&xcrb, uxcrb, sizeof(xcrb))) + return -EFAULT; + do { + rc = zcrypt_send_ep11_cprb(&xcrb); + } while (rc == -EAGAIN); + /* on failure: retry once again after a requested rescan */ + if ((rc == -ENODEV) && (zcrypt_process_rescan())) + do { + rc = zcrypt_send_ep11_cprb(&xcrb); + } while (rc == -EAGAIN); + if (copy_to_user(uxcrb, &xcrb, sizeof(xcrb))) + return -EFAULT; + return rc; + } case Z90STAT_STATUS_MASK: { char status[AP_DEVICES]; zcrypt_status_mask(status); diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h index 8963291..b3d496b 100644 --- a/drivers/s390/crypto/zcrypt_api.h +++ b/drivers/s390/crypto/zcrypt_api.h @@ -74,6 +74,7 @@ struct ica_z90_status { #define ZCRYPT_CEX2A 6 #define ZCRYPT_CEX3C 7 #define ZCRYPT_CEX3A 8 +#define ZCRYPT_CEX4 10 /** * Large random numbers are pulled in 4096 byte chunks from the crypto cards @@ -89,6 +90,7 @@ struct zcrypt_ops { long (*rsa_modexpo_crt)(struct zcrypt_device *, struct ica_rsa_modexpo_crt *); long (*send_cprb)(struct zcrypt_device *, struct ica_xcRB *); + long (*send_ep11_cprb)(struct zcrypt_device *, struct ep11_urb *); long (*rng)(struct zcrypt_device *, char *); struct list_head list; /* zcrypt ops list. */ struct module *owner; diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index ce12263..569f8b1 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -30,7 +30,12 @@ #define CEX4A_MAX_MESSAGE_SIZE MSGTYPE50_CRB3_MAX_MSG_SIZE #define CEX4C_MAX_MESSAGE_SIZE MSGTYPE06_MAX_MSG_SIZE -#define CEX4_CLEANUP_TIME (15*HZ) +/* Waiting time for requests to be processed. + * Currently there are some types of request which are not deterministic. + * But the maximum time limit managed by the stomper code is set to 60sec. + * Hence we have to wait at least that time period. + */ +#define CEX4_CLEANUP_TIME (61*HZ) static struct ap_device_id zcrypt_cex4_ids[] = { { AP_DEVICE(AP_DEVICE_TYPE_CEX4) }, @@ -101,6 +106,19 @@ static int zcrypt_cex4_probe(struct ap_device *ap_dev) zdev->speed_rating = CEX4C_SPEED_RATING; zdev->ops = zcrypt_msgtype_request(MSGTYPE06_NAME, MSGTYPE06_VARIANT_DEFAULT); + } else if (ap_test_bit(&ap_dev->functions, AP_FUNC_EP11)) { + zdev = zcrypt_device_alloc(CEX4C_MAX_MESSAGE_SIZE); + if (!zdev) + return -ENOMEM; + zdev->type_string = "CEX4P"; + zdev->user_space_type = ZCRYPT_CEX4; + zdev->min_mod_size = CEX4C_MIN_MOD_SIZE; + zdev->max_mod_size = CEX4C_MAX_MOD_SIZE; + zdev->max_exp_bit_length = CEX4C_MAX_MOD_SIZE; + zdev->short_crt = 0; + zdev->speed_rating = CEX4C_SPEED_RATING; + zdev->ops = zcrypt_msgtype_request(MSGTYPE06_NAME, + MSGTYPE06_VARIANT_EP11); } break; } diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h index 0079b66..7b23f43 100644 --- a/drivers/s390/crypto/zcrypt_error.h +++ b/drivers/s390/crypto/zcrypt_error.h @@ -106,15 +106,15 @@ static inline int convert_error(struct zcrypt_device *zdev, // REP88_ERROR_MESSAGE_TYPE // '20' CEX2A /* * To sent a message of the wrong type is a bug in the - * device driver. Warn about it, disable the device + * device driver. Send error msg, disable the device * and then repeat the request. */ - WARN_ON(1); atomic_set(&zcrypt_rescan_req, 1); zdev->online = 0; + pr_err("Cryptographic device %x failed and was set offline\n", + zdev->ap_dev->qid); ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d", - zdev->ap_dev->qid, - zdev->online, ehdr->reply_code); + zdev->ap_dev->qid, zdev->online, ehdr->reply_code); return -EAGAIN; case REP82_ERROR_TRANSPORT_FAIL: case REP82_ERROR_MACHINE_FAILURE: @@ -122,15 +122,17 @@ static inline int convert_error(struct zcrypt_device *zdev, /* If a card fails disable it and repeat the request. */ atomic_set(&zcrypt_rescan_req, 1); zdev->online = 0; + pr_err("Cryptographic device %x failed and was set offline\n", + zdev->ap_dev->qid); ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d", - zdev->ap_dev->qid, - zdev->online, ehdr->reply_code); + zdev->ap_dev->qid, zdev->online, ehdr->reply_code); return -EAGAIN; default: zdev->online = 0; + pr_err("Cryptographic device %x failed and was set offline\n", + zdev->ap_dev->qid); ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d", - zdev->ap_dev->qid, - zdev->online, ehdr->reply_code); + zdev->ap_dev->qid, zdev->online, ehdr->reply_code); return -EAGAIN; /* repeat the request on a different device. */ } } diff --git a/drivers/s390/crypto/zcrypt_msgtype50.c b/drivers/s390/crypto/zcrypt_msgtype50.c index 7c522f3..334e282 100644 --- a/drivers/s390/crypto/zcrypt_msgtype50.c +++ b/drivers/s390/crypto/zcrypt_msgtype50.c @@ -25,6 +25,9 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define KMSG_COMPONENT "zcrypt" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include #include #include @@ -332,6 +335,11 @@ static int convert_type80(struct zcrypt_device *zdev, if (t80h->len < sizeof(*t80h) + outputdatalength) { /* The result is too short, the CEX2A card may not do that.. */ zdev->online = 0; + pr_err("Cryptographic device %x failed and was set offline\n", + zdev->ap_dev->qid); + ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d", + zdev->ap_dev->qid, zdev->online, t80h->code); + return -EAGAIN; /* repeat the request on a different device. */ } if (zdev->user_space_type == ZCRYPT_CEX2A) @@ -359,6 +367,10 @@ static int convert_response(struct zcrypt_device *zdev, outputdata, outputdatalength); default: /* Unknown response type, this should NEVER EVER happen */ zdev->online = 0; + pr_err("Cryptographic device %x failed and was set offline\n", + zdev->ap_dev->qid); + ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail", + zdev->ap_dev->qid, zdev->online); return -EAGAIN; /* repeat the request on a different device. */ } } diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 7d97fa5..57bfda1 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -25,6 +25,9 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define KMSG_COMPONENT "zcrypt" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include #include #include @@ -50,6 +53,7 @@ struct response_type { }; #define PCIXCC_RESPONSE_TYPE_ICA 0 #define PCIXCC_RESPONSE_TYPE_XCRB 1 +#define PCIXCC_RESPONSE_TYPE_EP11 2 MODULE_AUTHOR("IBM Corporation"); MODULE_DESCRIPTION("Cryptographic Coprocessor (message type 6), " \ @@ -358,6 +362,91 @@ static int XCRB_msg_to_type6CPRB_msgX(struct zcrypt_device *zdev, return 0; } +static int xcrb_msg_to_type6_ep11cprb_msgx(struct zcrypt_device *zdev, + struct ap_message *ap_msg, + struct ep11_urb *xcRB) +{ + unsigned int lfmt; + + static struct type6_hdr static_type6_ep11_hdr = { + .type = 0x06, + .rqid = {0x00, 0x01}, + .function_code = {0x00, 0x00}, + .agent_id[0] = 0x58, /* {'X'} */ + .agent_id[1] = 0x43, /* {'C'} */ + .offset1 = 0x00000058, + }; + + struct { + struct type6_hdr hdr; + struct ep11_cprb cprbx; + unsigned char pld_tag; /* fixed value 0x30 */ + unsigned char pld_lenfmt; /* payload length format */ + } __packed * msg = ap_msg->message; + + struct pld_hdr { + unsigned char func_tag; /* fixed value 0x4 */ + unsigned char func_len; /* fixed value 0x4 */ + unsigned int func_val; /* function ID */ + unsigned char dom_tag; /* fixed value 0x4 */ + unsigned char dom_len; /* fixed value 0x4 */ + unsigned int dom_val; /* domain id */ + } __packed * payload_hdr; + + /* length checks */ + ap_msg->length = sizeof(struct type6_hdr) + xcRB->req_len; + if (CEIL4(xcRB->req_len) > MSGTYPE06_MAX_MSG_SIZE - + (sizeof(struct type6_hdr))) + return -EINVAL; + + if (CEIL4(xcRB->resp_len) > MSGTYPE06_MAX_MSG_SIZE - + (sizeof(struct type86_fmt2_msg))) + return -EINVAL; + + /* prepare type6 header */ + msg->hdr = static_type6_ep11_hdr; + msg->hdr.ToCardLen1 = xcRB->req_len; + msg->hdr.FromCardLen1 = xcRB->resp_len; + + /* Import CPRB data from the ioctl input parameter */ + if (copy_from_user(&(msg->cprbx.cprb_len), + (char *)xcRB->req, xcRB->req_len)) { + return -EFAULT; + } + + /* + The target domain field within the cprb body/payload block will be + replaced by the usage domain for non-management commands only. + Therefore we check the first bit of the 'flags' parameter for + management command indication. + 0 - non managment command + 1 - management command + */ + if (!((msg->cprbx.flags & 0x80) == 0x80)) { + msg->cprbx.target_id = (unsigned int) + AP_QID_QUEUE(zdev->ap_dev->qid); + + if ((msg->pld_lenfmt & 0x80) == 0x80) { /*ext.len.fmt 2 or 3*/ + switch (msg->pld_lenfmt & 0x03) { + case 1: + lfmt = 2; + break; + case 2: + lfmt = 3; + break; + default: + return -EINVAL; + } + } else { + lfmt = 1; /* length format #1 */ + } + payload_hdr = (struct pld_hdr *)((&(msg->pld_lenfmt))+lfmt); + payload_hdr->dom_val = (unsigned int) + AP_QID_QUEUE(zdev->ap_dev->qid); + } + return 0; +} + /** * Copy results from a type 86 ICA reply message back to user space. * @@ -377,6 +466,12 @@ struct type86x_reply { char text[0]; } __packed; +struct type86_ep11_reply { + struct type86_hdr hdr; + struct type86_fmt2_ext fmt2; + struct ep11_cprb cprbx; +} __packed; + static int convert_type86_ica(struct zcrypt_device *zdev, struct ap_message *reply, char __user *outputdata, @@ -440,6 +535,11 @@ static int convert_type86_ica(struct zcrypt_device *zdev, if (service_rc == 8 && service_rs == 72) return -EINVAL; zdev->online = 0; + pr_err("Cryptographic device %x failed and was set offline\n", + zdev->ap_dev->qid); + ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d", + zdev->ap_dev->qid, zdev->online, + msg->hdr.reply_code); return -EAGAIN; /* repeat the request on a different device. */ } data = msg->text; @@ -503,6 +603,33 @@ static int convert_type86_xcrb(struct zcrypt_device *zdev, return 0; } +/** + * Copy results from a type 86 EP11 XCRB reply message back to user space. + * + * @zdev: crypto device pointer + * @reply: reply AP message. + * @xcRB: pointer to EP11 user request block + * + * Returns 0 on success or -EINVAL, -EFAULT, -EAGAIN in case of an error. + */ +static int convert_type86_ep11_xcrb(struct zcrypt_device *zdev, + struct ap_message *reply, + struct ep11_urb *xcRB) +{ + struct type86_fmt2_msg *msg = reply->message; + char *data = reply->message; + + if (xcRB->resp_len < msg->fmt2.count1) + return -EINVAL; + + /* Copy response CPRB to user */ + if (copy_to_user((char *)xcRB->resp, + data + msg->fmt2.offset1, msg->fmt2.count1)) + return -EFAULT; + xcRB->resp_len = msg->fmt2.count1; + return 0; +} + static int convert_type86_rng(struct zcrypt_device *zdev, struct ap_message *reply, char *buffer) @@ -551,6 +678,10 @@ static int convert_response_ica(struct zcrypt_device *zdev, * response */ default: /* Unknown response type, this should NEVER EVER happen */ zdev->online = 0; + pr_err("Cryptographic device %x failed and was set offline\n", + zdev->ap_dev->qid); + ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail", + zdev->ap_dev->qid, zdev->online); return -EAGAIN; /* repeat the request on a different device. */ } } @@ -579,10 +710,40 @@ static int convert_response_xcrb(struct zcrypt_device *zdev, default: /* Unknown response type, this should NEVER EVER happen */ xcRB->status = 0x0008044DL; /* HDD_InvalidParm */ zdev->online = 0; + pr_err("Cryptographic device %x failed and was set offline\n", + zdev->ap_dev->qid); + ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail", + zdev->ap_dev->qid, zdev->online); return -EAGAIN; /* repeat the request on a different device. */ } } +static int convert_response_ep11_xcrb(struct zcrypt_device *zdev, + struct ap_message *reply, struct ep11_urb *xcRB) +{ + struct type86_ep11_reply *msg = reply->message; + + /* Response type byte is the second byte in the response. */ + switch (((unsigned char *)reply->message)[1]) { + case TYPE82_RSP_CODE: + case TYPE87_RSP_CODE: + return convert_error(zdev, reply); + case TYPE86_RSP_CODE: + if (msg->hdr.reply_code) + return convert_error(zdev, reply); + if (msg->cprbx.cprb_ver_id == 0x04) + return convert_type86_ep11_xcrb(zdev, reply, xcRB); + /* Fall through, no break, incorrect cprb version is an unknown resp.*/ + default: /* Unknown response type, this should NEVER EVER happen */ + zdev->online = 0; + pr_err("Cryptographic device %x failed and was set offline\n", + zdev->ap_dev->qid); + ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail", + zdev->ap_dev->qid, zdev->online); + return -EAGAIN; /* repeat the request on a different device. */ + } +} + static int convert_response_rng(struct zcrypt_device *zdev, struct ap_message *reply, char *data) @@ -602,6 +763,10 @@ static int convert_response_rng(struct zcrypt_device *zdev, * response */ default: /* Unknown response type, this should NEVER EVER happen */ zdev->online = 0; + pr_err("Cryptographic device %x failed and was set offline\n", + zdev->ap_dev->qid); + ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail", + zdev->ap_dev->qid, zdev->online); return -EAGAIN; /* repeat the request on a different device. */ } } @@ -657,6 +822,51 @@ out: complete(&(resp_type->work)); } +/** + * This function is called from the AP bus code after a crypto request + * "msg" has finished with the reply message "reply". + * It is called from tasklet context. + * @ap_dev: pointer to the AP device + * @msg: pointer to the AP message + * @reply: pointer to the AP reply message + */ +static void zcrypt_msgtype6_receive_ep11(struct ap_device *ap_dev, + struct ap_message *msg, + struct ap_message *reply) +{ + static struct error_hdr error_reply = { + .type = TYPE82_RSP_CODE, + .reply_code = REP82_ERROR_MACHINE_FAILURE, + }; + struct response_type *resp_type = + (struct response_type *)msg->private; + struct type86_ep11_reply *t86r; + int length; + + /* Copy the reply message to the request message buffer. */ + if (IS_ERR(reply)) { + memcpy(msg->message, &error_reply, sizeof(error_reply)); + goto out; + } + t86r = reply->message; + if (t86r->hdr.type == TYPE86_RSP_CODE && + t86r->cprbx.cprb_ver_id == 0x04) { + switch (resp_type->type) { + case PCIXCC_RESPONSE_TYPE_EP11: + length = t86r->fmt2.offset1 + t86r->fmt2.count1; + length = min(MSGTYPE06_MAX_MSG_SIZE, length); + memcpy(msg->message, reply->message, length); + break; + default: + memcpy(msg->message, &error_reply, sizeof(error_reply)); + } + } else { + memcpy(msg->message, reply->message, sizeof(error_reply)); + } +out: + complete(&(resp_type->work)); +} + static atomic_t zcrypt_step = ATOMIC_INIT(0); /** @@ -782,6 +992,46 @@ out_free: } /** + * The request distributor calls this function if it picked the CEX4P + * device to handle a send_ep11_cprb request. + * @zdev: pointer to zcrypt_device structure that identifies the + * CEX4P device to the request distributor + * @xcRB: pointer to the ep11 user request block + */ +static long zcrypt_msgtype6_send_ep11_cprb(struct zcrypt_device *zdev, + struct ep11_urb *xcrb) +{ + struct ap_message ap_msg; + struct response_type resp_type = { + .type = PCIXCC_RESPONSE_TYPE_EP11, + }; + int rc; + + ap_init_message(&ap_msg); + ap_msg.message = kmalloc(MSGTYPE06_MAX_MSG_SIZE, GFP_KERNEL); + if (!ap_msg.message) + return -ENOMEM; + ap_msg.receive = zcrypt_msgtype6_receive_ep11; + ap_msg.psmid = (((unsigned long long) current->pid) << 32) + + atomic_inc_return(&zcrypt_step); + ap_msg.private = &resp_type; + rc = xcrb_msg_to_type6_ep11cprb_msgx(zdev, &ap_msg, xcrb); + if (rc) + goto out_free; + init_completion(&resp_type.work); + ap_queue_message(zdev->ap_dev, &ap_msg); + rc = wait_for_completion_interruptible(&resp_type.work); + if (rc == 0) + rc = convert_response_ep11_xcrb(zdev, &ap_msg, xcrb); + else /* Signal pending. */ + ap_cancel_message(zdev->ap_dev, &ap_msg); + +out_free: + kzfree(ap_msg.message); + return rc; +} + +/** * The request distributor calls this function if it picked the PCIXCC/CEX2C * device to generate random data. * @zdev: pointer to zcrypt_device structure that identifies the @@ -839,10 +1089,19 @@ static struct zcrypt_ops zcrypt_msgtype6_ops = { .rng = zcrypt_msgtype6_rng, }; +static struct zcrypt_ops zcrypt_msgtype6_ep11_ops = { + .owner = THIS_MODULE, + .variant = MSGTYPE06_VARIANT_EP11, + .rsa_modexpo = NULL, + .rsa_modexpo_crt = NULL, + .send_ep11_cprb = zcrypt_msgtype6_send_ep11_cprb, +}; + int __init zcrypt_msgtype6_init(void) { zcrypt_msgtype_register(&zcrypt_msgtype6_norng_ops); zcrypt_msgtype_register(&zcrypt_msgtype6_ops); + zcrypt_msgtype_register(&zcrypt_msgtype6_ep11_ops); return 0; } @@ -850,6 +1109,7 @@ void __exit zcrypt_msgtype6_exit(void) { zcrypt_msgtype_unregister(&zcrypt_msgtype6_norng_ops); zcrypt_msgtype_unregister(&zcrypt_msgtype6_ops); + zcrypt_msgtype_unregister(&zcrypt_msgtype6_ep11_ops); } module_init(zcrypt_msgtype6_init); diff --git a/drivers/s390/crypto/zcrypt_msgtype6.h b/drivers/s390/crypto/zcrypt_msgtype6.h index 1e500d3..2072475 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.h +++ b/drivers/s390/crypto/zcrypt_msgtype6.h @@ -32,6 +32,7 @@ #define MSGTYPE06_NAME "zcrypt_msgtype6" #define MSGTYPE06_VARIANT_DEFAULT 0 #define MSGTYPE06_VARIANT_NORNG 1 +#define MSGTYPE06_VARIANT_EP11 2 #define MSGTYPE06_MAX_MSG_SIZE (12*1024) @@ -99,6 +100,7 @@ struct type86_hdr { } __packed; #define TYPE86_RSP_CODE 0x86 +#define TYPE87_RSP_CODE 0x87 #define TYPE86_FMT2 0x02 struct type86_fmt2_ext { diff --git a/drivers/s390/crypto/zcrypt_pcica.c b/drivers/s390/crypto/zcrypt_pcica.c index f2b71d8..7a743f4 100644 --- a/drivers/s390/crypto/zcrypt_pcica.c +++ b/drivers/s390/crypto/zcrypt_pcica.c @@ -24,6 +24,9 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define KMSG_COMPONENT "zcrypt" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include #include #include @@ -199,6 +202,10 @@ static int convert_type84(struct zcrypt_device *zdev, if (t84h->len < sizeof(*t84h) + outputdatalength) { /* The result is too short, the PCICA card may not do that.. */ zdev->online = 0; + pr_err("Cryptographic device %x failed and was set offline\n", + zdev->ap_dev->qid); + ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d", + zdev->ap_dev->qid, zdev->online, t84h->code); return -EAGAIN; /* repeat the request on a different device. */ } BUG_ON(t84h->len > PCICA_MAX_RESPONSE_SIZE); @@ -223,6 +230,10 @@ static int convert_response(struct zcrypt_device *zdev, outputdata, outputdatalength); default: /* Unknown response type, this should NEVER EVER happen */ zdev->online = 0; + pr_err("Cryptographic device %x failed and was set offline\n", + zdev->ap_dev->qid); + ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail", + zdev->ap_dev->qid, zdev->online); return -EAGAIN; /* repeat the request on a different device. */ } } diff --git a/drivers/s390/crypto/zcrypt_pcicc.c b/drivers/s390/crypto/zcrypt_pcicc.c index 0d90a43..4d14c04 100644 --- a/drivers/s390/crypto/zcrypt_pcicc.c +++ b/drivers/s390/crypto/zcrypt_pcicc.c @@ -24,6 +24,9 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define KMSG_COMPONENT "zcrypt" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include #include #include @@ -372,6 +375,11 @@ static int convert_type86(struct zcrypt_device *zdev, if (service_rc == 8 && service_rs == 72) return -EINVAL; zdev->online = 0; + pr_err("Cryptographic device %x failed and was set offline\n", + zdev->ap_dev->qid); + ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%drc%d", + zdev->ap_dev->qid, zdev->online, + msg->hdr.reply_code); return -EAGAIN; /* repeat the request on a different device. */ } data = msg->text; @@ -425,6 +433,10 @@ static int convert_response(struct zcrypt_device *zdev, /* no break, incorrect cprb version is an unknown response */ default: /* Unknown response type, this should NEVER EVER happen */ zdev->online = 0; + pr_err("Cryptographic device %x failed and was set offline\n", + zdev->ap_dev->qid); + ZCRYPT_DBF_DEV(DBF_ERR, zdev, "dev%04xo%dfail", + zdev->ap_dev->qid, zdev->online); return -EAGAIN; /* repeat the request on a different device. */ } } -- cgit v0.10.2 From 075dfd82102d2048e43e1cbf48d558d915c50072 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 13 Jan 2014 13:35:16 +0100 Subject: s390/compat: fix PSW32_USER_BITS definition PSW32_USER_BITS should define the primary address space for user space instead of the home address space. Symptom of this bug is that gdb doesn't work in compat mode. The bug was introduced with e258d719ff28 "s390/uaccess: always run the kernel in home space" and f26946d7ecad "s390/compat: make psw32_user_bits a constant value again". Cc: stable@vger.kernel.org # v3.13+ Reported-by: Andreas Arnez Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index 4bf9da0..5d7e8cf 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -38,7 +38,8 @@ #define PSW32_USER_BITS (PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | \ PSW32_DEFAULT_KEY | PSW32_MASK_BASE | \ - PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | PSW32_ASC_HOME) + PSW32_MASK_MCHECK | PSW32_MASK_PSTATE | \ + PSW32_ASC_PRIMARY) #define COMPAT_USER_HZ 100 #define COMPAT_UTS_MACHINE "s390\0\0\0\0" -- cgit v0.10.2 From 28aa39b853fc889e672b73f69ce591d15e6306b0 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 13 Jan 2014 16:02:28 +0100 Subject: s390: delete new instances of __cpuinit usage The patch "s390/perf: add support for the CPU-Measurement Sampling Facility" added a new instance of the __cpuinit macro usage. We removed this a couple versions ago; we now want to remove the compat no-op stubs. Introducing new users is not what we want to see at this point in time, as it will break once the stubs are gone. Cc: Hendrik Brueckner Signed-off-by: Paul Gortmaker Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 3c3bc8d..6c0d298 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1512,8 +1512,8 @@ static void cpumf_measurement_alert(struct ext_code ext_code, } } -static int __cpuinit cpumf_pmu_notifier(struct notifier_block *self, - unsigned long action, void *hcpu) +static int cpumf_pmu_notifier(struct notifier_block *self, + unsigned long action, void *hcpu) { unsigned int cpu = (long) hcpu; int flags; -- cgit v0.10.2 From 1c59a861d6982edf3f9905ad2098575336ae904d Mon Sep 17 00:00:00 2001 From: Eugene Crosser Date: Wed, 24 Apr 2013 12:00:23 +0200 Subject: s390/qdio: bridgeport support - CHSC part Introduce function for the "Perform network-subchannel operation" CHSC command with operation code "bridgeport information", and bit definitions for "characteristics" pertaning to this command. Signed-off-by: Eugene Crosser Reviewed-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h index 7e1c917..09d1dd4 100644 --- a/arch/s390/include/asm/css_chars.h +++ b/arch/s390/include/asm/css_chars.h @@ -29,6 +29,8 @@ struct css_general_char { u32 fcx : 1; /* bit 88 */ u32 : 19; u32 alt_ssi : 1; /* bit 108 */ + u32:1; + u32 narf:1; /* bit 110 */ } __packed; extern struct css_general_char css_general_characteristics; diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 57d0d7e..0a1abf1 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -378,6 +378,34 @@ struct qdio_initialize { struct qdio_outbuf_state *output_sbal_state_array; }; +/** + * enum qdio_brinfo_entry_type - type of address entry for qdio_brinfo_desc() + * @l3_ipv6_addr: entry contains IPv6 address + * @l3_ipv4_addr: entry contains IPv4 address + * @l2_addr_lnid: entry contains MAC address and VLAN ID + */ +enum qdio_brinfo_entry_type {l3_ipv6_addr, l3_ipv4_addr, l2_addr_lnid}; + +/** + * struct qdio_brinfo_entry_XXX - Address entry for qdio_brinfo_desc() + * @nit: Network interface token + * @addr: Address of one of the three types + * + * The struct is passed to the callback function by qdio_brinfo_desc() + */ +struct qdio_brinfo_entry_l3_ipv6 { + u64 nit; + struct { unsigned char _s6_addr[16]; } addr; +} __packed; +struct qdio_brinfo_entry_l3_ipv4 { + u64 nit; + struct { uint32_t _s_addr; } addr; +} __packed; +struct qdio_brinfo_entry_l2 { + u64 nit; + struct { u8 mac[6]; u16 lnid; } addr_lnid; +} __packed; + #define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */ #define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_establish */ #define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */ @@ -399,5 +427,10 @@ extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *); extern int qdio_shutdown(struct ccw_device *, int); extern int qdio_free(struct ccw_device *); extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *); +extern int qdio_pnso_brinfo(struct subchannel_id schid, + int cnc, u16 *response, + void (*cb)(void *priv, enum qdio_brinfo_entry_type type, + void *entry), + void *priv); #endif /* __QDIO_H__ */ diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index eee70cb..f6b9188 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -55,6 +55,7 @@ int chsc_error_from_response(int response) case 0x0004: return -EOPNOTSUPP; case 0x000b: + case 0x0107: /* "Channel busy" for the op 0x003d */ return -EBUSY; case 0x0100: case 0x0102: @@ -1202,3 +1203,35 @@ out: return ret; } EXPORT_SYMBOL_GPL(chsc_scm_info); + +/** + * chsc_pnso_brinfo() - Perform Network-Subchannel Operation, Bridge Info. + * @schid: id of the subchannel on which PNSO is performed + * @brinfo_area: request and response block for the operation + * @resume_token: resume token for multiblock response + * @cnc: Boolean change-notification control + * + * brinfo_area must be allocated by the caller with get_zeroed_page(GFP_KERNEL) + * + * Returns 0 on success. + */ +int chsc_pnso_brinfo(struct subchannel_id schid, + struct chsc_pnso_area *brinfo_area, + struct chsc_brinfo_resume_token resume_token, + int cnc) +{ + memset(brinfo_area, 0, sizeof(*brinfo_area)); + brinfo_area->request.length = 0x0030; + brinfo_area->request.code = 0x003d; /* network-subchannel operation */ + brinfo_area->m = schid.m; + brinfo_area->ssid = schid.ssid; + brinfo_area->sch = schid.sch_no; + brinfo_area->cssid = schid.cssid; + brinfo_area->oc = 0; /* Store-network-bridging-information list */ + brinfo_area->resume_token = resume_token; + brinfo_area->n = (cnc != 0); + if (chsc(brinfo_area)) + return -EIO; + return chsc_error_from_response(brinfo_area->response.code); +} +EXPORT_SYMBOL_GPL(chsc_pnso_brinfo); diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h index 23d072e..7e53a9c 100644 --- a/drivers/s390/cio/chsc.h +++ b/drivers/s390/cio/chsc.h @@ -61,7 +61,9 @@ struct css_chsc_char { u32 : 20; u32 scssc : 1; /* bit 107 */ u32 scsscf : 1; /* bit 108 */ - u32 : 19; + u32:7; + u32 pnso:1; /* bit 116 */ + u32:11; }__attribute__((packed)); extern struct css_chsc_char css_chsc_characteristics; @@ -188,6 +190,53 @@ struct chsc_scm_info { int chsc_scm_info(struct chsc_scm_info *scm_area, u64 token); +struct chsc_brinfo_resume_token { + u64 t1; + u64 t2; +} __packed; + +struct chsc_brinfo_naihdr { + struct chsc_brinfo_resume_token resume_token; + u32:32; + u32 instance; + u32:24; + u8 naids; + u32 reserved[3]; +} __packed; + +struct chsc_pnso_area { + struct chsc_header request; + u8:2; + u8 m:1; + u8:5; + u8:2; + u8 ssid:2; + u8 fmt:4; + u16 sch; + u8:8; + u8 cssid; + u16:16; + u8 oc; + u32:24; + struct chsc_brinfo_resume_token resume_token; + u32 n:1; + u32:31; + u32 reserved[3]; + struct chsc_header response; + u32:32; + struct chsc_brinfo_naihdr naihdr; + union { + struct qdio_brinfo_entry_l3_ipv6 l3_ipv6[0]; + struct qdio_brinfo_entry_l3_ipv4 l3_ipv4[0]; + struct qdio_brinfo_entry_l2 l2[0]; + } entries; +} __packed; + +int chsc_pnso_brinfo(struct subchannel_id schid, + struct chsc_pnso_area *brinfo_area, + struct chsc_brinfo_resume_token resume_token, + int cnc); + #ifdef CONFIG_SCM_BUS int scm_update_information(void); int scm_process_availability_information(void); diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 3e602e8..c883a08 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1752,6 +1752,97 @@ int qdio_stop_irq(struct ccw_device *cdev, int nr) } EXPORT_SYMBOL(qdio_stop_irq); +/** + * qdio_pnso_brinfo() - perform network subchannel op #0 - bridge info. + * @schid: Subchannel ID. + * @cnc: Boolean Change-Notification Control + * @response: Response code will be stored at this address + * @cb: Callback function will be executed for each element + * of the address list + * @priv: Pointer passed from the caller to qdio_pnso_brinfo() + * @type: Type of the address entry passed to the callback + * @entry: Entry containg the address of the specified type + * @priv: Pointer to pass to the callback function. + * + * Performs "Store-network-bridging-information list" operation and calls + * the callback function for every entry in the list. If "change- + * notification-control" is set, further changes in the address list + * will be reported via the IPA command. + */ +int qdio_pnso_brinfo(struct subchannel_id schid, + int cnc, u16 *response, + void (*cb)(void *priv, enum qdio_brinfo_entry_type type, + void *entry), + void *priv) +{ + struct chsc_pnso_area *rr; + int rc; + u32 prev_instance = 0; + int isfirstblock = 1; + int i, size, elems; + + rr = (struct chsc_pnso_area *)get_zeroed_page(GFP_KERNEL); + if (rr == NULL) + return -ENOMEM; + do { + /* on the first iteration, naihdr.resume_token will be zero */ + rc = chsc_pnso_brinfo(schid, rr, rr->naihdr.resume_token, cnc); + if (rc != 0 && rc != -EBUSY) + goto out; + if (rr->response.code != 1) { + rc = -EIO; + continue; + } else + rc = 0; + + if (cb == NULL) + continue; + + size = rr->naihdr.naids; + elems = (rr->response.length - + sizeof(struct chsc_header) - + sizeof(struct chsc_brinfo_naihdr)) / + size; + + if (!isfirstblock && (rr->naihdr.instance != prev_instance)) { + /* Inform the caller that they need to scrap */ + /* the data that was already reported via cb */ + rc = -EAGAIN; + break; + } + isfirstblock = 0; + prev_instance = rr->naihdr.instance; + for (i = 0; i < elems; i++) + switch (size) { + case sizeof(struct qdio_brinfo_entry_l3_ipv6): + (*cb)(priv, l3_ipv6_addr, + &rr->entries.l3_ipv6[i]); + break; + case sizeof(struct qdio_brinfo_entry_l3_ipv4): + (*cb)(priv, l3_ipv4_addr, + &rr->entries.l3_ipv4[i]); + break; + case sizeof(struct qdio_brinfo_entry_l2): + (*cb)(priv, l2_addr_lnid, + &rr->entries.l2[i]); + break; + default: + WARN_ON_ONCE(1); + rc = -EIO; + goto out; + } + } while (rr->response.code == 0x0107 || /* channel busy */ + (rr->response.code == 1 && /* list stored */ + /* resume token is non-zero => list incomplete */ + (rr->naihdr.resume_token.t1 || rr->naihdr.resume_token.t2))); + (*response) = rr->response.code; + +out: + free_page((unsigned long)rr); + return rc; +} +EXPORT_SYMBOL_GPL(qdio_pnso_brinfo); + static int __init init_QDIO(void) { int rc; -- cgit v0.10.2 From b4a960159e6f5254ac3c95dd183789f402431977 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 13 Dec 2013 12:53:42 +0100 Subject: s390: Fix misspellings using 'codespell' tool Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 0a1abf1..d786c63 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -336,7 +336,7 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, #define QDIO_FLAG_CLEANUP_USING_HALT 0x02 /** - * struct qdio_initialize - qdio initalization data + * struct qdio_initialize - qdio initialization data * @cdev: associated ccw device * @q_format: queue format * @adapter_name: name for the adapter diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 9532fe2..384e609 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -975,7 +975,7 @@ sie_done: lctlg %c1,%c1,__LC_USER_ASCE # load primary asce # some program checks are suppressing. C code (e.g. do_protection_exception) # will rewind the PSW by the ILC, which is 4 bytes in case of SIE. Other -# instructions beween sie64a and sie_done should not cause program +# instructions between sie64a and sie_done should not cause program # interrupts. So lets use a nop (47 00 00 00) as a landing pad. # See also HANDLE_SIE_INTERCEPT rewind_pad: diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 4444875..36e81d7 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -373,7 +373,7 @@ static void __init setup_lowcore(void) /* * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant - * restart data to the absolute zero lowcore. This is necesary if + * restart data to the absolute zero lowcore. This is necessary if * PSW restart is done on an offline CPU that has lowcore zero. */ lc->restart_stack = (unsigned long) restart_stack; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 2440602..d101dae 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -275,7 +275,7 @@ static int handle_io_inst(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } else { /* - * Set condition code 3 to stop the guest from issueing channel + * Set condition code 3 to stop the guest from issuing channel * I/O instructions. */ kvm_s390_set_psw_cc(vcpu, 3); diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index dbdab3e..0632dc5 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -74,8 +74,8 @@ static size_t copy_in_kernel(size_t count, void __user *to, /* * Returns kernel address for user virtual address. If the returned address is - * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occured and the address - * contains the (negative) exception code. + * >= -4095 (IS_ERR_VALUE(x) returns true), a fault has occurred and the + * address contains the (negative) exception code. */ #ifdef CONFIG_64BIT diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index e794c88..3584ed9 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -293,7 +293,7 @@ static int gmap_alloc_table(struct gmap *gmap, * @addr: address in the guest address space * @len: length of the memory area to unmap * - * Returns 0 if the unmap succeded, -EINVAL if not. + * Returns 0 if the unmap succeeded, -EINVAL if not. */ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) { @@ -344,7 +344,7 @@ EXPORT_SYMBOL_GPL(gmap_unmap_segment); * @from: source address in the parent address space * @to: target address in the guest address space * - * Returns 0 if the mmap succeded, -EINVAL or -ENOMEM if not. + * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. */ int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long to, unsigned long len) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index f302efa9..1eef0f5 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3386,7 +3386,7 @@ int dasd_generic_set_offline(struct ccw_device *cdev) if (test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) { /* - * safe offline allready running + * safe offline already running * could only be called by normal offline so safe_offline flag * needs to be removed to run normal offline and kill all I/O */ diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 57bfda1..dc542e0 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -419,7 +419,7 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(struct zcrypt_device *zdev, replaced by the usage domain for non-management commands only. Therefore we check the first bit of the 'flags' parameter for management command indication. - 0 - non managment command + 0 - non management command 1 - management command */ if (!((msg->cprbx.flags & 0x80) == 0x80)) { -- cgit v0.10.2 From f85168e4d96b31b09ecf09a679820b031224e69e Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 8 Jan 2014 16:45:39 +0100 Subject: s390/cpum_sf: fix printk format warnings Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index a76d602..5d2dfa3 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -142,17 +142,17 @@ static void print_debug_sf(void) if (qsi(&si)) return; - pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%i max=%i cpu_speed=%i\n", + pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n", cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate, si.cpu_speed); if (si.as) pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i" - " bsdes=%i tear=%p dear=%p\n", cpu, + " bsdes=%i tear=%016lx dear=%016lx\n", cpu, si.as, si.es, si.cs, si.bsdes, si.tear, si.dear); if (si.ad) pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i" - " dsdes=%i tear=%p dear=%p\n", cpu, + " dsdes=%i tear=%016lx dear=%016lx\n", cpu, si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear); } -- cgit v0.10.2