From 488ca7d72d974e3c00ae73ed9f947590680bdf00 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Fri, 21 Aug 2015 14:56:46 -0700 Subject: x86/cpufeatures: Enable cpuid for Intel SHA extensions Add Intel CPUID for Intel Secure Hash Algorithm Extensions. This feature provides new instructions for accelerated computation of SHA-1 and SHA-256. This allows the feature to be shown in the /proc/cpuinfo for cpus that support it. Refer to SHA extension programming guide in chapter 8.2 of the Intel Architecture Instruction Set Extensions Programming reference for definition of this feature's cpuid: CPUID.(EAX=07H, ECX=0):EBX.SHA [bit 29] = 1 https://software.intel.com/sites/default/files/managed/07/b7/319433-023.pdf Originally-by: Chandramouli Narayanan Signed-off-by: Tim Chen Cc: Borislav Petkov Cc: Dave Hansen Cc: Herbert Xu Link: http://lkml.kernel.org/r/1440194206.3940.6.camel@schen9-mobl2 Signed-off-by: Thomas Gleixner diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 3d6606f..a94f83d 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -239,6 +239,7 @@ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ /* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ #define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ -- cgit v0.10.2 From 31e09b18c863718939e3e9c30eee55f9011d85ee Mon Sep 17 00:00:00 2001 From: Linda Knippers Date: Tue, 1 Sep 2015 15:41:55 -0400 Subject: x86/mm/srat: Print non-volatile flag in SRAT With the addition of NVDIMM support, a question came up as to whether NVDIMM ranges should be in the SRAT with this bit set. I think the consensus was no because the ranges are in the NFIT with proximity domain information there. ACPI is not clear on the meaning of this bit in the SRAT. If someone is setting it, we might want to ask them what they expect to happen with it. Right now this bit is only printed if all the ACPI debug information is turned on. Signed-off-by: Linda Knippers Acked-by: Thomas Gleixner Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150901194154.GA4939@ljkz400 Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 66338a6..c2aea63 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -192,10 +192,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) node_set(node, numa_nodes_parsed); - pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s\n", + pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n", node, pxm, (unsigned long long) start, (unsigned long long) end - 1, - hotpluggable ? " hotplug" : ""); + hotpluggable ? " hotplug" : "", + ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : ""); /* Mark hotplug range in memblock. */ if (hotpluggable && memblock_mark_hotplug(start, ma->length)) -- cgit v0.10.2 From 9642d18eee2cd169b60c6ac0f20bda745b5a3d1e Mon Sep 17 00:00:00 2001 From: Vatika Harlalka Date: Tue, 1 Sep 2015 16:50:59 +0200 Subject: nohz: Affine unpinned timers to housekeepers The problem addressed in this patch is about affining unpinned timers. Adaptive or Full Dynticks CPUs are currently disturbed by unnecessary jitter due to firing of such timers on them. This patch will affine timers to online CPUs which are not full dynticks in NOHZ_FULL configured systems. It should not introduce overhead in nohz full off case due to static keys. Signed-off-by: Vatika Harlalka Signed-off-by: Frederic Weisbecker Reviewed-by: Preeti U Murthy Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1441119060-2230-2-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar diff --git a/include/linux/tick.h b/include/linux/tick.h index 48d901f..e312219 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -147,11 +147,20 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) cpumask_or(mask, mask, tick_nohz_full_mask); } +static inline int housekeeping_any_cpu(void) +{ + return cpumask_any_and(housekeeping_mask, cpu_online_mask); +} + extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_cpu(int cpu); extern void tick_nohz_full_kick_all(void); extern void __tick_nohz_task_switch(void); #else +static inline int housekeeping_any_cpu(void) +{ + return smp_processor_id(); +} static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8b864ec..0902e4d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -623,18 +623,21 @@ int get_nohz_timer_target(void) int i, cpu = smp_processor_id(); struct sched_domain *sd; - if (!idle_cpu(cpu)) + if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu)) return cpu; rcu_read_lock(); for_each_domain(cpu, sd) { for_each_cpu(i, sched_domain_span(sd)) { - if (!idle_cpu(i)) { + if (!idle_cpu(i) && is_housekeeping_cpu(cpu)) { cpu = i; goto unlock; } } } + + if (!is_housekeeping_cpu(cpu)) + cpu = housekeeping_any_cpu(); unlock: rcu_read_unlock(); return cpu; -- cgit v0.10.2 From 7c8bb6cb95061b3143759459ed6c6b0c73bcfecb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 1 Sep 2015 16:51:00 +0200 Subject: nohz: Assert existing housekeepers when nohz full enabled The code ensures that when nohz full is running, at least the boot CPU serves as a housekeeper and it can't be later offlined. Let's assert this assumption to make sure that we have CPUs to handle unbound jobs like workqueues and timers while nohz full CPUs run undisturbed. Also improve the comments on housekeeper offlining prevention. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Vatika Harlalka Link: http://lkml.kernel.org/r/1441119060-2230-3-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3319e16..7c7ec45 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -290,16 +290,17 @@ static int __init tick_nohz_full_setup(char *str) __setup("nohz_full=", tick_nohz_full_setup); static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) + unsigned long action, + void *hcpu) { unsigned int cpu = (unsigned long)hcpu; switch (action & ~CPU_TASKS_FROZEN) { case CPU_DOWN_PREPARE: /* - * If we handle the timekeeping duty for full dynticks CPUs, - * we can't safely shutdown that CPU. + * The boot CPU handles housekeeping duty (unbound timers, + * workqueues, timekeeping, ...) on behalf of full dynticks + * CPUs. It must remain online when nohz full is enabled. */ if (tick_nohz_full_running && tick_do_timer_cpu == cpu) return NOTIFY_BAD; @@ -370,6 +371,12 @@ void __init tick_nohz_init(void) cpu_notifier(tick_nohz_cpu_down_callback, 0); pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n", cpumask_pr_args(tick_nohz_full_mask)); + + /* + * We need at least one CPU to handle housekeeping work such + * as timekeeping, unbound timers, workqueues, ... + */ + WARN_ON_ONCE(cpumask_empty(housekeeping_mask)); } #endif -- cgit v0.10.2 From 5ad4da4302712fba10624d28cb6c269fee592b69 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 2 Sep 2015 09:56:31 +0200 Subject: perf tools: Fix parse_events_add_pmu caller Following commit changed parse_events_add_pmu interface: 36adec85a86f perf tools: Change parse_events_add_pmu interface but forgot to change one caller. Because of lessen compilation rules for the bison parser, the compiler did not warn on that. Signed-off-by: Jiri Olsa Cc: Raphael Beamonte Cc: David Ahern Cc: Matt Fleming Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt Fixes: 36adec85a86f ("perf tools: Change parse_events_add_pmu interface") Link: http://lkml.kernel.org/r/1441180605-24737-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 591905a..9cd7081 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -255,7 +255,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc list_add_tail(&term->list, head); ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(list, &data->idx, "cpu", head)); + ABORT_ON(parse_events_add_pmu(data, list, "cpu", head)); parse_events__free_terms(head); $$ = list; } -- cgit v0.10.2 From 53ff6bc37be449f546158a39c528d7814dfb15a1 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 18 Aug 2015 12:07:05 +0300 Subject: perf tools: Fix use of wrong event when processing exit events In a couple of cases the 'comm' member of 'union event' has been used instead of the correct member ('fork') when processing exit events. In the cases where it has been used incorrectly, only the 'pid' and 'tid' are affected. The 'pid' value would be correct anyway because it is in the same position in 'comm' and 'fork' events, but the 'tid' would have been incorrectly assigned from 'ppid'. However, for exit events, the kernel puts the current task in the 'ppid' and 'ttid' which is the same as the exiting task. That is 'ppid' == 'pid' and if the task is not multi-threaded, 'pid' == 'tid' i.e. the data goes wrong only when tracing multi-threaded programs. It is hard to find an example of how this would produce an error in practice. There are 3 occurences of the fix: 1. perf script is only affected if !sample_id_all which only happens on old kernels. 2. intel_pt is only affected when decoding without timestamps and would probably still decode correctly - the exit event is only used to flush out data which anyway gets flushed at the end of the session 3. intel_bts also uses the exit event to flush data which would probably not cause errors as it would get flushed at the end of the session instead Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1439888825-27708-1-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index eb51325..284a76e 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -768,8 +768,8 @@ static int process_exit_event(struct perf_tool *tool, if (!evsel->attr.sample_id_all) { sample->cpu = 0; sample->time = 0; - sample->tid = event->comm.tid; - sample->pid = event->comm.pid; + sample->tid = event->fork.tid; + sample->pid = event->fork.pid; } print_sample_start(sample, thread, evsel); perf_event__fprintf(event, stdout); diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index ea76862..eb0e7f8 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -623,7 +623,7 @@ static int intel_bts_process_event(struct perf_session *session, if (err) return err; if (event->header.type == PERF_RECORD_EXIT) { - err = intel_bts_process_tid_exit(bts, event->comm.tid); + err = intel_bts_process_tid_exit(bts, event->fork.tid); if (err) return err; } diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index bb41c20..535d86f 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1494,7 +1494,7 @@ static int intel_pt_process_event(struct perf_session *session, if (pt->timeless_decoding) { if (event->header.type == PERF_RECORD_EXIT) { err = intel_pt_process_timeless_queues(pt, - event->comm.tid, + event->fork.tid, sample->time); } } else if (timestamp) { -- cgit v0.10.2 From ac0e137ab0da80e8fc0db2027598e2f7f82a5a02 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 31 Aug 2015 20:27:32 -0700 Subject: clk: h8s2678: Fix compile error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent cleanup removed some include files without checking if the cleaned up code still compiles. This results in the following compile error. drivers/clk/h8300/clk-h8s2678.c: In function ‘h8s2678_pll_clk_setup’: drivers/clk/h8300/clk-h8s2678.c:99:14: error: implicit declaration of function ‘kzalloc’ drivers/clk/h8300/clk-h8s2678.c:138:2: error: implicit declaration of function ‘kfree’ Cc: Yoshinori Sato Signed-off-by: Guenter Roeck Signed-off-by: Stephen Boyd diff --git a/drivers/clk/h8300/clk-h8s2678.c b/drivers/clk/h8300/clk-h8s2678.c index 2a38eb4..6cf38dc 100644 --- a/drivers/clk/h8300/clk-h8s2678.c +++ b/drivers/clk/h8300/clk-h8s2678.c @@ -8,6 +8,7 @@ #include #include #include +#include static DEFINE_SPINLOCK(clklock); -- cgit v0.10.2 From 9f42a89da6b4dc015631e01ba990d3db2cae2a1b Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 2 Sep 2015 10:57:47 +0800 Subject: clk: Hi6220: separately build stub clock driver The previous code, kernel builds Hi6220's common clock driver and stub clock driver together. Stub clock driver has introduced the dependency with CONFIG_MAILBOX, so kernel will not build Hi6220's common clock driver due ARM64's defconfig have not enabled CONFIG_MAILBOX by default. So separately build stub clock driver and common clock driver for Hi6220; and only let stub clock driver has the dependency with CONFIG_MAILBOX. Signed-off-by: Leo Yan Tested-by: Kevin Hilman Signed-off-by: Stephen Boyd diff --git a/drivers/clk/hisilicon/Kconfig b/drivers/clk/hisilicon/Kconfig index 2c16807..e434854 100644 --- a/drivers/clk/hisilicon/Kconfig +++ b/drivers/clk/hisilicon/Kconfig @@ -1,6 +1,12 @@ config COMMON_CLK_HI6220 bool "Hi6220 Clock Driver" - depends on (ARCH_HISI || COMPILE_TEST) && MAILBOX + depends on ARCH_HISI || COMPILE_TEST default ARCH_HISI help Build the Hisilicon Hi6220 clock driver based on the common clock framework. + +config STUB_CLK_HI6220 + bool "Hi6220 Stub Clock Driver" + depends on COMMON_CLK_HI6220 && MAILBOX + help + Build the Hisilicon Hi6220 stub clock driver. diff --git a/drivers/clk/hisilicon/Makefile b/drivers/clk/hisilicon/Makefile index 4a1001a..74dba31 100644 --- a/drivers/clk/hisilicon/Makefile +++ b/drivers/clk/hisilicon/Makefile @@ -7,4 +7,5 @@ obj-y += clk.o clkgate-separated.o clkdivider-hi6220.o obj-$(CONFIG_ARCH_HI3xxx) += clk-hi3620.o obj-$(CONFIG_ARCH_HIP04) += clk-hip04.o obj-$(CONFIG_ARCH_HIX5HD2) += clk-hix5hd2.o -obj-$(CONFIG_COMMON_CLK_HI6220) += clk-hi6220.o clk-hi6220-stub.o +obj-$(CONFIG_COMMON_CLK_HI6220) += clk-hi6220.o +obj-$(CONFIG_STUB_CLK_HI6220) += clk-hi6220-stub.o -- cgit v0.10.2 From 66c117d7fa2ae429911e60d84bf31a90b2b96189 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 3 Sep 2015 12:34:55 +0200 Subject: x86/alternatives: Make optimize_nops() interrupt safe and synced Richard reported the following crash: [ 0.036000] BUG: unable to handle kernel paging request at 55501e06 [ 0.036000] IP: [] common_interrupt+0xb/0x38 [ 0.036000] Call Trace: [ 0.036000] [] ? add_nops+0x90/0xa0 [ 0.036000] [] apply_alternatives+0x274/0x630 Chuck decoded: " 0: 8d 90 90 83 04 24 lea 0x24048390(%eax),%edx 6: 80 fc 0f cmp $0xf,%ah 9: a8 0f test $0xf,%al >> b: a0 06 1e 50 55 mov 0x55501e06,%al 10: 57 push %edi 11: 56 push %esi Interrupt 0x30 occurred while the alternatives code was replacing the initial 0x90,0x90,0x90 NOPs (from the ASM_CLAC macro) with the optimized version, 0x8d,0x76,0x00. Only the first byte has been replaced so far, and it makes a mess out of the insn decoding." optimize_nops() is buggy in two aspects: - It's not disabling interrupts across the modification - It's lacking a sync_core() call Add both. Fixes: 4fd4b6e5537c 'x86/alternatives: Use optimized NOPs for padding' Reported-and-tested-by: "Richard W.M. Jones" Signed-off-by: Thomas Gleixner Cc: Richard W.M. Jones Cc: Chuck Ebbert Cc: Borislav Petkov Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1509031232340.15006@nanos Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c42827e..25f9093 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -338,10 +338,15 @@ done: static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) { + unsigned long flags; + if (instr[0] != 0x90) return; + local_irq_save(flags); add_nops(instr + (a->instrlen - a->padlen), a->padlen); + sync_core(); + local_irq_restore(flags); DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", instr, a->instrlen - a->padlen, a->padlen); -- cgit v0.10.2 From dc3a04d551b5d21f1badbb39bfe8e5bc1289b184 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 2 Sep 2015 17:11:22 -0700 Subject: security/device_cgroup: Fix RCU_LOCKDEP_WARN() condition f78f5b90c4ff ("rcu: Rename rcu_lockdep_assert() to RCU_LOCKDEP_WARN()") introduced a bug by incorrectly inverting the condition when moving from rcu_lockdep_assert() to RCU_LOCKDEP_WARN(). This commit therefore fixes the inversion. Reported-by: Felipe Balbi Reported-by: Tejun Heo Signed-off-by: Paul E. McKenney Acked-by: Serge Hallyn Tested-by: Josh Boyer diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 73455089..03c1652 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -401,7 +401,7 @@ static bool verify_new_ex(struct dev_cgroup *dev_cgroup, bool match = false; RCU_LOCKDEP_WARN(!rcu_read_lock_held() && - lockdep_is_held(&devcgroup_mutex), + !lockdep_is_held(&devcgroup_mutex), "device_cgroup:verify_new_ex called without proper synchronization"); if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) { -- cgit v0.10.2 From 76fc5e7b2355af167dea1a32e93c57fc37900a5b Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 4 Sep 2015 17:00:43 -0700 Subject: x86/vm86: Block non-root vm86(old) if mmap_min_addr != 0 vm86 exposes an interesting attack surface against the entry code. Since vm86 is mostly useless anyway if mmap_min_addr != 0, just turn it off in that case. There are some reports that vbetool can work despite setting mmap_min_addr to zero. This shouldn't break that use case, as CAP_SYS_RAWIO already overrides mmap_min_addr. Suggested-by: Linus Torvalds Signed-off-by: Andy Lutomirski Cc: Arjan van de Ven Cc: Austin S Hemmelgarn Cc: Borislav Petkov Cc: Brian Gerst Cc: Josh Boyer Cc: Kees Cook Cc: Matthew Garrett Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stas Sergeev Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index abd8b856..5246193 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -232,6 +233,32 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) struct pt_regs *regs = current_pt_regs(); unsigned long err = 0; + err = security_mmap_addr(0); + if (err) { + /* + * vm86 cannot virtualize the address space, so vm86 users + * need to manage the low 1MB themselves using mmap. Given + * that BIOS places important data in the first page, vm86 + * is essentially useless if mmap_min_addr != 0. DOSEMU, + * for example, won't even bother trying to use vm86 if it + * can't map a page at virtual address 0. + * + * To reduce the available kernel attack surface, simply + * disallow vm86(old) for users who cannot mmap at va 0. + * + * The implementation of security_mmap_addr will allow + * suitably privileged users to map va 0 even if + * vm.mmap_min_addr is set above 0, and we want this + * behavior for vm86 as well, as it ensures that legacy + * tools like vbetool will not fail just because of + * vm.mmap_min_addr. + */ + pr_info_once("Denied a call to vm86(old) from %s[%d] (uid: %d). Set the vm.mmap_min_addr sysctl to 0 and/or adjust LSM mmap_min_addr policy to enable vm86 if you are using a vm86-based DOS emulator.\n", + current->comm, task_pid_nr(current), + from_kuid_munged(&init_user_ns, current_uid())); + return -EPERM; + } + if (!vm86) { if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL))) return -ENOMEM; diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c index 9a43a59..421c607 100644 --- a/tools/testing/selftests/x86/entry_from_vm86.c +++ b/tools/testing/selftests/x86/entry_from_vm86.c @@ -116,8 +116,9 @@ static bool do_test(struct vm86plus_struct *v86, unsigned long eip, v86->regs.eip = eip; ret = vm86(VM86_ENTER, v86); - if (ret == -1 && errno == ENOSYS) { - printf("[SKIP]\tvm86 not supported\n"); + if (ret == -1 && (errno == ENOSYS || errno == EPERM)) { + printf("[SKIP]\tvm86 %s\n", + errno == ENOSYS ? "not supported" : "not allowed"); return false; } -- cgit v0.10.2 From 8901c18b6cafa51f7985f1031968bbfe9dc47735 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Wed, 2 Sep 2015 16:10:34 -0600 Subject: asus-nb-wmi: Add wapf=4 quirk for X456UA/X456UF These laptops boot with wifi as hard-blocked, with no obvious way to enable it. Using a quirk to set wapf=4 solves the problem. Signed-off-by: Daniel Drake Acked-by: Corentin Chary Signed-off-by: Darren Hart diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index abdaed3..131fee2 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -128,6 +128,24 @@ static const struct dmi_system_id asus_quirks[] = { }, { .callback = dmi_matched, + .ident = "ASUSTeK COMPUTER INC. X456UA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X456UA"), + }, + .driver_data = &quirk_asus_wapf4, + }, + { + .callback = dmi_matched, + .ident = "ASUSTeK COMPUTER INC. X456UF", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X456UF"), + }, + .driver_data = &quirk_asus_wapf4, + }, + { + .callback = dmi_matched, .ident = "ASUSTeK COMPUTER INC. X501U", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), -- cgit v0.10.2 From a077224fd35b2f7fbc93f14cf67074fc792fbac2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 3 Sep 2015 13:24:40 +0100 Subject: ARM: 8429/1: disable GCC SRA optimization While working on the 32-bit ARM port of UEFI, I noticed a strange corruption in the kernel log. The following snprintf() statement (in drivers/firmware/efi/efi.c:efi_md_typeattr_format()) snprintf(pos, size, "|%3s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", was producing the following output in the log: | | | | | |WB|WT|WC|UC] | | | | | |WB|WT|WC|UC] | | | | | |WB|WT|WC|UC] |RUN| | | | |WB|WT|WC|UC]* |RUN| | | | |WB|WT|WC|UC]* | | | | | |WB|WT|WC|UC] |RUN| | | | |WB|WT|WC|UC]* | | | | | |WB|WT|WC|UC] |RUN| | | | | | | |UC] |RUN| | | | | | | |UC] As it turns out, this is caused by incorrect code being emitted for the string() function in lib/vsprintf.c. The following code if (!(spec.flags & LEFT)) { while (len < spec.field_width--) { if (buf < end) *buf = ' '; ++buf; } } for (i = 0; i < len; ++i) { if (buf < end) *buf = *s; ++buf; ++s; } while (len < spec.field_width--) { if (buf < end) *buf = ' '; ++buf; } when called with len == 0, triggers an issue in the GCC SRA optimization pass (Scalar Replacement of Aggregates), which handles promotion of signed struct members incorrectly. This is a known but as yet unresolved issue. (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65932). In this particular case, it is causing the second while loop to be executed erroneously a single time, causing the additional space characters to be printed. So disable the optimization by passing -fno-ipa-sra. Cc: Acked-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 7451b44..2c2b28e 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -54,6 +54,14 @@ AS += -EL LD += -EL endif +# +# The Scalar Replacement of Aggregates (SRA) optimization pass in GCC 4.9 and +# later may result in code being generated that handles signed short and signed +# char struct members incorrectly. So disable it. +# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65932) +# +KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra) + # This selects which instruction set is used. # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes -- cgit v0.10.2 From 39dc53deff30d9b239ac36cfeb0ef2022d03a449 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 7 Sep 2015 00:29:15 +0100 Subject: ARM: swpan: fix nwfpe for uaccess changes NWFPE needs to access userspace to check whether the next instruction is another FP instruction. Allow userspace access for this read. Signed-off-by: Russell King diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S index 71df435..39c20af 100644 --- a/arch/arm/nwfpe/entry.S +++ b/arch/arm/nwfpe/entry.S @@ -95,9 +95,10 @@ emulate: reteq r4 @ no, return failure next: + uaccess_enable r3 .Lx1: ldrt r6, [r5], #4 @ get the next instruction and @ increment PC - + uaccess_disable r3 and r2, r6, #0x0F000000 @ test for FP insns teq r2, #0x0C000000 teqne r2, #0x0D000000 -- cgit v0.10.2 From 296254f3223d201f2aa53f5f717eedfdc63f3db8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 7 Sep 2015 00:30:06 +0100 Subject: ARM: uaccess: remove unneeded uaccess_save_and_disable macro This macro is never referenced, remove it. Signed-off-by: Russell King diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 3ae0eda..9007c51 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -487,11 +487,6 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #endif .endm - .macro uaccess_save_and_disable, tmp - uaccess_save \tmp - uaccess_disable \tmp - .endm - .irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo .macro ret\c, reg #if __LINUX_ARM_ARCH__ < 6 -- cgit v0.10.2 From af4cb25df93d2e7a97d65db2bfacaa4400988dea Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 9 Sep 2015 21:19:49 +0100 Subject: ARM: uaccess: fix undefined instruction on ARMv7M/noMMU The use of get_domain() in copy_thread() results in an oops on ARMv7M/noMMU systems. The thread cpu_domain value is only used when CONFIG_CPU_USE_DOMAINS is enabled, so there's no need to save the value in copy_thread() except when this is enabled, and this option will never be enabled on these platforms. Unhandled exception: IPSR = 00000006 LR = fffffff1 CPU: 0 PID: 0 Comm: swapper Not tainted 4.2.0-next-20150909-00001-gb8ec5ad #41 Hardware name: NXP LPC18xx/43xx (Device Tree) task: 2823fbe0 ti: 2823c000 task.ti: 2823c000 PC is at copy_thread+0x18/0x92 LR is at copy_thread+0x19/0x92 pc : [<2800a46e>] lr : [<2800a46f>] psr: 4100000b sp : 2823df00 ip : 00000000 fp : 287c81c0 r10: 00000000 r9 : 00800300 r8 : 287c8000 r7 : 287c8000 r6 : 2818908d r5 : 00000000 r4 : 287ca000 r3 : 00000000 r2 : 00000000 r1 : fffffff0 r0 : 287ca048 xPSR: 4100000b Reported-by: Ariel D'Alessandro Signed-off-by: Russell King diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 3f18098..e550a45 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -233,6 +233,7 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save)); +#ifdef CONFIG_CPU_USE_DOMAINS /* * Copy the initial value of the domain access control register * from the current thread: thread->addr_limit will have been @@ -240,6 +241,7 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, * kernel/fork.c */ thread->cpu_domain = get_domain(); +#endif if (likely(!(p->flags & PF_KTHREAD))) { *childregs = *current_pt_regs(); -- cgit v0.10.2 From f1ab428711358fbb747ba392c3448462494e6c6a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 9 Sep 2015 18:22:35 +1000 Subject: crypto: vmx - VMX crypto should depend on CONFIG_VSX This code uses FP (floating point), Altivec and VSX (Vector-Scalar Extension). It can just depend on CONFIG_VSX though, because that already depends on FP and Altivec. Otherwise we get lots of link errors such as: drivers/built-in.o: In function `.p8_aes_setkey': aes.c:(.text+0x2d325c): undefined reference to `.enable_kernel_altivec' aes.c:(.text+0x2d326c): undefined reference to `.enable_kernel_vsx' Signed-off-by: Michael Ellerman Signed-off-by: Herbert Xu diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 07bc7aa..d234719 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -461,7 +461,7 @@ config CRYPTO_DEV_QCE config CRYPTO_DEV_VMX bool "Support for VMX cryptographic acceleration instructions" - depends on PPC64 + depends on PPC64 && VSX help Support for VMX cryptographic acceleration instructions. -- cgit v0.10.2 From 9da75de030bb6e49475ef37c8495d07e98cfeb33 Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Wed, 9 Sep 2015 14:27:07 +0200 Subject: crypto: sunxi-ss - Fix a possible driver hang with ciphers The sun4i_ss_opti_poll function cipher data until the output miter have a length of 0. If the crypto API client, give more SGs than necessary this could result in an infinite loop. Fix it by checking for remaining bytes, just like sun4i_ss_cipher_poll(). Signed-off-by: LABBE Corentin Signed-off-by: Herbert Xu diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c index e070c31..a19ee12 100644 --- a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c +++ b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c @@ -104,7 +104,7 @@ static int sun4i_ss_opti_poll(struct ablkcipher_request *areq) sg_miter_next(&mo); oo = 0; } - } while (mo.length > 0); + } while (oleft > 0); if (areq->info) { for (i = 0; i < 4 && i < ivsize / 4; i++) { -- cgit v0.10.2 From 85b4e4eb2f9ac29ee8ec47f1f055f251cb251a3c Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 10 Sep 2015 00:08:45 +0200 Subject: wmi: Remove private %pUL implementation The work performed by wmi_gtoa is equivalent to simply sprintf(out, "%pUL", in), so one could replace its body by this. However, most users feed the result directly as a %s argument to some other function which also understands the %p extensions (they all ultimately use vsnprintf), so we can eliminate some stack buffers and quite a bit of code by just using %pUL directly. In wmi_dev_uevent I'm not sure whether there's room for a nul-terminator in env->buf, so I've just replaced wmi_gtoa with the equivalent sprintf call. Signed-off-by: Rasmus Villemoes Signed-off-by: Darren Hart diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index aac4757..eb391a2 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -194,34 +194,6 @@ static bool wmi_parse_guid(const u8 *src, u8 *dest) return true; } -/* - * Convert a raw GUID to the ACII string representation - */ -static int wmi_gtoa(const char *in, char *out) -{ - int i; - - for (i = 3; i >= 0; i--) - out += sprintf(out, "%02X", in[i] & 0xFF); - - out += sprintf(out, "-"); - out += sprintf(out, "%02X", in[5] & 0xFF); - out += sprintf(out, "%02X", in[4] & 0xFF); - out += sprintf(out, "-"); - out += sprintf(out, "%02X", in[7] & 0xFF); - out += sprintf(out, "%02X", in[6] & 0xFF); - out += sprintf(out, "-"); - out += sprintf(out, "%02X", in[8] & 0xFF); - out += sprintf(out, "%02X", in[9] & 0xFF); - out += sprintf(out, "-"); - - for (i = 10; i <= 15; i++) - out += sprintf(out, "%02X", in[i] & 0xFF); - - *out = '\0'; - return 0; -} - static bool find_guid(const char *guid_string, struct wmi_block **out) { char tmp[16], guid_input[16]; @@ -457,11 +429,7 @@ EXPORT_SYMBOL_GPL(wmi_set_block); static void wmi_dump_wdg(const struct guid_block *g) { - char guid_string[37]; - - wmi_gtoa(g->guid, guid_string); - - pr_info("%s:\n", guid_string); + pr_info("%pUL:\n", g->guid); pr_info("\tobject_id: %c%c\n", g->object_id[0], g->object_id[1]); pr_info("\tnotify_id: %02X\n", g->notify_id); pr_info("\treserved: %02X\n", g->reserved); @@ -661,7 +629,6 @@ EXPORT_SYMBOL_GPL(wmi_has_guid); static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { - char guid_string[37]; struct wmi_block *wblock; wblock = dev_get_drvdata(dev); @@ -670,9 +637,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, return strlen(buf); } - wmi_gtoa(wblock->gblock.guid, guid_string); - - return sprintf(buf, "wmi:%s\n", guid_string); + return sprintf(buf, "wmi:%pUL\n", wblock->gblock.guid); } static DEVICE_ATTR_RO(modalias); @@ -695,7 +660,7 @@ static int wmi_dev_uevent(struct device *dev, struct kobj_uevent_env *env) if (!wblock) return -ENOMEM; - wmi_gtoa(wblock->gblock.guid, guid_string); + sprintf(guid_string, "%pUL", wblock->gblock.guid); strcpy(&env->buf[env->buflen - 1], "wmi:"); memcpy(&env->buf[env->buflen - 1 + 4], guid_string, 36); @@ -721,12 +686,9 @@ static struct class wmi_class = { static int wmi_create_device(const struct guid_block *gblock, struct wmi_block *wblock, acpi_handle handle) { - char guid_string[37]; - wblock->dev.class = &wmi_class; - wmi_gtoa(gblock->guid, guid_string); - dev_set_name(&wblock->dev, "%s", guid_string); + dev_set_name(&wblock->dev, "%pUL", gblock->guid); dev_set_drvdata(&wblock->dev, wblock); @@ -877,7 +839,6 @@ static void acpi_wmi_notify(struct acpi_device *device, u32 event) struct guid_block *block; struct wmi_block *wblock; struct list_head *p; - char guid_string[37]; list_for_each(p, &wmi_block_list) { wblock = list_entry(p, struct wmi_block, list); @@ -888,8 +849,8 @@ static void acpi_wmi_notify(struct acpi_device *device, u32 event) if (wblock->handler) wblock->handler(event, wblock->handler_data); if (debug_event) { - wmi_gtoa(wblock->gblock.guid, guid_string); - pr_info("DEBUG Event GUID: %s\n", guid_string); + pr_info("DEBUG Event GUID: %pUL\n", + wblock->gblock.guid); } acpi_bus_generate_netlink_event( -- cgit v0.10.2 From aefc574bbbbe74bb891ba392d98f2d59a417c774 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 10 Sep 2015 10:00:55 +0200 Subject: Revert "twl4030_charger: correctly handle -EPROBE_DEFER from devm_usb_get_phy_by_node" Revert commit 3fc3895e4fe17ee92ae1d1bb9f04da6069e8c370, since it introduced a boot failure on some OMAP platforms. Reported-by: Kevin Hilman Signed-off-by: Sebastian Reichel diff --git a/drivers/power/twl4030_charger.c b/drivers/power/twl4030_charger.c index f4f2c1f..54b0325 100644 --- a/drivers/power/twl4030_charger.c +++ b/drivers/power/twl4030_charger.c @@ -1057,13 +1057,9 @@ static int twl4030_bci_probe(struct platform_device *pdev) phynode = of_find_compatible_node(bci->dev->of_node->parent, NULL, "ti,twl4030-usb"); - if (phynode) { + if (phynode) bci->transceiver = devm_usb_get_phy_by_node( bci->dev, phynode, &bci->usb_nb); - if (IS_ERR(bci->transceiver) && - PTR_ERR(bci->transceiver) == -EPROBE_DEFER) - return -EPROBE_DEFER; - } } /* Enable interrupts now. */ -- cgit v0.10.2 From e11fc21e756e662e465cac3da6547d438d0b1446 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sat, 5 Sep 2015 02:32:34 +0300 Subject: twl4030_charger: fix another compile error When CONFIG_CHARGER_TWL4030=y and CONFIG_TWL4030_MADC=m we get a compile error: drivers/built-in.o: In function `twl4030_charger_update_current': twl4030_charger.c:(.text+0x504681): undefined reference to `twl4030_get_madc_conversion' Use IS_REACHABLE to fix it. Cc: NeilBrown Reported-by: Randy Dunlap Signed-off-by: Grazvydas Ignotas Acked-by: Tony Lindgren Signed-off-by: Sebastian Reichel diff --git a/drivers/power/twl4030_charger.c b/drivers/power/twl4030_charger.c index 54b0325..74f2d3f 100644 --- a/drivers/power/twl4030_charger.c +++ b/drivers/power/twl4030_charger.c @@ -91,7 +91,7 @@ #define TWL4030_MSTATEC_COMPLETE1 0x0b #define TWL4030_MSTATEC_COMPLETE4 0x0e -#if IS_ENABLED(CONFIG_TWL4030_MADC) +#if IS_REACHABLE(CONFIG_TWL4030_MADC) /* * If AC (Accessory Charger) voltage exceeds 4.5V (MADC 11) * then AC is available. -- cgit v0.10.2 From bb0f73616396e7929b68d3bdea70064003599d33 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 21 Aug 2015 19:51:41 +0200 Subject: clk: rockchip: handle critical clocks after registering all clocks Currently the registration of critical clocks is done in the function shared between rk3066 and rk3188 clock trees. That results in them getting handled maybe before all of them are registered. Therefore move the critical clock handling down to the end of the soc- specific clock registration function, so that all clocks are registered before they're maybe handled as critical clock. Signed-off-by: Heiko Stuebner Tested-by: Michael Niewoehner Signed-off-by: Stephen Boyd diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c index ed02bbc..f26e3ed 100644 --- a/drivers/clk/rockchip/clk-rk3188.c +++ b/drivers/clk/rockchip/clk-rk3188.c @@ -744,8 +744,6 @@ static void __init rk3188_common_clk_init(struct device_node *np) rockchip_clk_register_branches(common_clk_branches, ARRAY_SIZE(common_clk_branches)); - rockchip_clk_protect_critical(rk3188_critical_clocks, - ARRAY_SIZE(rk3188_critical_clocks)); rockchip_register_softrst(np, 9, reg_base + RK2928_SOFTRST_CON(0), ROCKCHIP_SOFTRST_HIWORD_MASK); @@ -765,6 +763,8 @@ static void __init rk3066a_clk_init(struct device_node *np) mux_armclk_p, ARRAY_SIZE(mux_armclk_p), &rk3066_cpuclk_data, rk3066_cpuclk_rates, ARRAY_SIZE(rk3066_cpuclk_rates)); + rockchip_clk_protect_critical(rk3188_critical_clocks, + ARRAY_SIZE(rk3188_critical_clocks)); } CLK_OF_DECLARE(rk3066a_cru, "rockchip,rk3066a-cru", rk3066a_clk_init); @@ -801,6 +801,9 @@ static void __init rk3188a_clk_init(struct device_node *np) pr_warn("%s: missing clocks to reparent aclk_cpu_pre to gpll\n", __func__); } + + rockchip_clk_protect_critical(rk3188_critical_clocks, + ARRAY_SIZE(rk3188_critical_clocks)); } CLK_OF_DECLARE(rk3188a_cru, "rockchip,rk3188a-cru", rk3188a_clk_init); -- cgit v0.10.2 From 1166160ab531198f7abc773992c0e04d0f9b7600 Mon Sep 17 00:00:00 2001 From: Michael Niewoehner Date: Tue, 25 Aug 2015 22:22:07 +0200 Subject: clk: rockchip: add pclk_cpu to the list of rk3188 critical clocks pclk_cpu needs to keep running because it is needed for devices like the act8865 regulator but with the recent gpio clock handling this is not always the case anymore. So add it to the list of critical clocks. Signed-off-by: Michael Niewoehner Reviewed-by: Heiko Stuebner Signed-off-by: Stephen Boyd diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c index f26e3ed..fa2f36b 100644 --- a/drivers/clk/rockchip/clk-rk3188.c +++ b/drivers/clk/rockchip/clk-rk3188.c @@ -716,6 +716,7 @@ static const char *const rk3188_critical_clocks[] __initconst = { "aclk_cpu", "aclk_peri", "hclk_peri", + "pclk_cpu", }; static void __init rk3188_common_clk_init(struct device_node *np) -- cgit v0.10.2 From 3bba75a2ec32bd5fa7024a4de3b8cf9ee113a76a Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Sun, 23 Aug 2015 11:32:37 +0200 Subject: clk: rockchip: Add pclk_peri to critical clocks on RK3066/RK3188 Now that the rockchip clock subsystem does clock gating with GPIO banks, these are no longer enabled once during probe and no longer stay enabled for eternity. When all these clocks are disabled, the parent clock pclk_peri might be disabled too, as no other child claims it. So, we need to add pclk_peri to the critical clocks. Signed-off-by: Romain Perier Tested-by: Michael Niewoehner Signed-off-by: Stephen Boyd diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c index fa2f36b..abb4760 100644 --- a/drivers/clk/rockchip/clk-rk3188.c +++ b/drivers/clk/rockchip/clk-rk3188.c @@ -717,6 +717,7 @@ static const char *const rk3188_critical_clocks[] __initconst = { "aclk_peri", "hclk_peri", "pclk_cpu", + "pclk_peri", }; static void __init rk3188_common_clk_init(struct device_node *np) -- cgit v0.10.2 From cf680eae34d26bd474c2ed3bd7d3aff59054aed5 Mon Sep 17 00:00:00 2001 From: Azael Avalos Date: Wed, 9 Sep 2015 11:25:44 -0600 Subject: toshiba_acpi: Fix USB Sleep and Music always disabled Commit e1a949c1b988 ("toshiba_acpi: Refactor *{get, set} functions return value") made changes on the return type of the HCI/SCI functions, but a typo on the USB Sleep and Music code is always reporting non existent support for such feature. This patch corrects the typo, changing an assignment to a comparison, making the laptops with actual support for such feature to work again. Signed-off-by: Azael Avalos Signed-off-by: Darren Hart diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index 6740c51..c434b53 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -938,7 +938,7 @@ static int toshiba_usb_sleep_music_get(struct toshiba_acpi_dev *dev, u32 *state) else if (result == TOS_NOT_SUPPORTED) return -ENODEV; - return result = TOS_SUCCESS ? 0 : -EIO; + return result == TOS_SUCCESS ? 0 : -EIO; } static int toshiba_usb_sleep_music_set(struct toshiba_acpi_dev *dev, u32 state) -- cgit v0.10.2 From 53147b6cabee5e8d1997b5682fcc0c3b72ddf9c2 Mon Sep 17 00:00:00 2001 From: Azael Avalos Date: Wed, 9 Sep 2015 11:25:45 -0600 Subject: toshiba_acpi: Fix hotkeys registration on some toshiba models Commit a2b3471b5b13 ("toshiba_acpi: Use the Hotkey Event Type function for keymap choosing") changed the *setup_keyboard function to query for the Hotkey Event Type to help choose the correct keymap, but turns out that here are certain Toshiba models out there not implementing this feature, and thus, failing to continue the input device registration and leaving such laptops without hotkey support. This patch changes such check, and instead of returning an error if the Hotkey Event Type is not present, we simply inform userspace about it, changing the message printed from err to notice, making the function responsible for registering the input device to continue. This issue was found on a Toshiba Portege Z30-B, but there might be some other models out there affected by this regression as well. Cc: # 4.1+ Signed-off-by: Azael Avalos Signed-off-by: Darren Hart diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index c434b53..f2372f4 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -2398,11 +2398,9 @@ static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev) if (error) return error; - error = toshiba_hotkey_event_type_get(dev, &events_type); - if (error) { - pr_err("Unable to query Hotkey Event Type\n"); - return error; - } + if (toshiba_hotkey_event_type_get(dev, &events_type)) + pr_notice("Unable to query Hotkey Event Type\n"); + dev->hotkey_event_type = events_type; dev->hotkey_dev = input_allocate_device(); -- cgit v0.10.2 From 5473e0cc37c03c576adbda7591a6cc8e37c1bb7f Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 28 Aug 2015 14:55:56 +0800 Subject: sched: 'Annotate' migrate_tasks() Kernel testing triggered this warning: | WARNING: CPU: 0 PID: 13 at kernel/sched/core.c:1156 do_set_cpus_allowed+0x7e/0x80() | Modules linked in: | CPU: 0 PID: 13 Comm: migration/0 Not tainted 4.2.0-rc1-00049-g25834c7 #2 | Call Trace: | dump_stack+0x4b/0x75 | warn_slowpath_common+0x8b/0xc0 | warn_slowpath_null+0x22/0x30 | do_set_cpus_allowed+0x7e/0x80 | cpuset_cpus_allowed_fallback+0x7c/0x170 | select_fallback_rq+0x221/0x280 | migration_call+0xe3/0x250 | notifier_call_chain+0x53/0x70 | __raw_notifier_call_chain+0x1e/0x30 | cpu_notify+0x28/0x50 | take_cpu_down+0x22/0x40 | multi_cpu_stop+0xd5/0x140 | cpu_stopper_thread+0xbc/0x170 | smpboot_thread_fn+0x174/0x2f0 | kthread+0xc4/0xe0 | ret_from_kernel_thread+0x21/0x30 As Peterz pointed out: | So the normal rules for changing task_struct::cpus_allowed are holding | both pi_lock and rq->lock, such that holding either stabilizes the mask. | | This is so that wakeup can happen without rq->lock and load-balance | without pi_lock. | | From this we already get the relaxation that we can omit acquiring | rq->lock if the task is not on the rq, because in that case | load-balancing will not apply to it. | | ** these are the rules currently tested in do_set_cpus_allowed() ** | | Now, since __set_cpus_allowed_ptr() uses task_rq_lock() which | unconditionally acquires both locks, we could get away with holding just | rq->lock when on_rq for modification because that'd still exclude | __set_cpus_allowed_ptr(), it would also work against | __kthread_bind_mask() because that assumes !on_rq. | | That said, this is all somewhat fragile. | | Now, I don't think dropping rq->lock is quite as disastrous as it | usually is because !cpu_active at this point, which means load-balance | will not interfere, but that too is somewhat fragile. | | So we end up with a choice of two fragile.. This patch fixes it by following the rules for changing task_struct::cpus_allowed with both pi_lock and rq->lock held. Reported-by: kernel test robot Reported-by: Sasha Levin Signed-off-by: Wanpeng Li [ Modified changelog and patch. ] Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/BLU436-SMTP1660820490DE202E3934ED3806E0@phx.gbl Signed-off-by: Ingo Molnar diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0902e4d..9b78670 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5183,24 +5183,47 @@ static void migrate_tasks(struct rq *dead_rq) break; /* - * Ensure rq->lock covers the entire task selection - * until the migration. + * pick_next_task assumes pinned rq->lock. */ lockdep_pin_lock(&rq->lock); next = pick_next_task(rq, &fake_task); BUG_ON(!next); next->sched_class->put_prev_task(rq, next); + /* + * Rules for changing task_struct::cpus_allowed are holding + * both pi_lock and rq->lock, such that holding either + * stabilizes the mask. + * + * Drop rq->lock is not quite as disastrous as it usually is + * because !cpu_active at this point, which means load-balance + * will not interfere. Also, stop-machine. + */ + lockdep_unpin_lock(&rq->lock); + raw_spin_unlock(&rq->lock); + raw_spin_lock(&next->pi_lock); + raw_spin_lock(&rq->lock); + + /* + * Since we're inside stop-machine, _nothing_ should have + * changed the task, WARN if weird stuff happened, because in + * that case the above rq->lock drop is a fail too. + */ + if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) { + raw_spin_unlock(&next->pi_lock); + continue; + } + /* Find suitable destination for @next, with force if needed. */ dest_cpu = select_fallback_rq(dead_rq->cpu, next); - lockdep_unpin_lock(&rq->lock); rq = __migrate_task(rq, next, dest_cpu); if (rq != dead_rq) { raw_spin_unlock(&rq->lock); rq = dead_rq; raw_spin_lock(&rq->lock); } + raw_spin_unlock(&next->pi_lock); } rq->stop = stop; -- cgit v0.10.2 From d249872939bfa86c9cce44a56a8cbdbc7086519b Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Fri, 17 Jul 2015 16:34:10 +0300 Subject: perf/x86/intel/bts: Set event->hw.itrace_started in pmu::start to match the new logic Since event->hw.itrace_started is now set in pmu::start() to signal the beginning of the trace, do so also in the intel_bts driver. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@infradead.org Cc: adrian.hunter@intel.com Cc: hpa@zytor.com Link: http://lkml.kernel.org/r/1437140050-23363-4-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c index 54690e8..d1c0f25 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_bts.c +++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c @@ -222,6 +222,7 @@ static void __bts_event_start(struct perf_event *event) if (!buf || bts_buffer_is_full(buf, bts)) return; + event->hw.itrace_started = 1; event->hw.state = 0; if (!buf->snapshot) -- cgit v0.10.2 From 84cba178a3b88efe2668a9039f70abda072faa21 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 10 Sep 2015 13:11:55 +0300 Subject: crypto: testmgr - don't copy from source IV too much While the destination buffer 'iv' is MAX_IVLEN size, the source 'template[i].iv' could be smaller, thus memcpy may read read invalid memory. Use crypto_skcipher_ivsize() to get real ivsize and pass it to memcpy. Signed-off-by: Andrey Ryabinin Signed-off-by: Herbert Xu diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 35c2de1..fa18753 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -940,6 +940,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc, char *xbuf[XBUFSIZE]; char *xoutbuf[XBUFSIZE]; int ret = -ENOMEM; + unsigned int ivsize = crypto_skcipher_ivsize(tfm); if (testmgr_alloc_buf(xbuf)) goto out_nobuf; @@ -975,7 +976,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc, continue; if (template[i].iv) - memcpy(iv, template[i].iv, MAX_IVLEN); + memcpy(iv, template[i].iv, ivsize); else memset(iv, 0, MAX_IVLEN); @@ -1051,7 +1052,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc, continue; if (template[i].iv) - memcpy(iv, template[i].iv, MAX_IVLEN); + memcpy(iv, template[i].iv, ivsize); else memset(iv, 0, MAX_IVLEN); -- cgit v0.10.2 From 4c17a6d56bb0cad3066a714e94f7185a24b40f49 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 11 Sep 2015 16:27:27 +0200 Subject: CIFS: fix type confusion in copy offload ioctl This might lead to local privilege escalation (code execution as kernel) for systems where the following conditions are met: - CONFIG_CIFS_SMB2 and CONFIG_CIFS_POSIX are enabled - a cifs filesystem is mounted where: - the mount option "vers" was used and set to a value >=2.0 - the attacker has write access to at least one file on the filesystem To attack this, an attacker would have to guess the target_tcon pointer (but guessing wrong doesn't cause a crash, it just returns an error code) and win a narrow race. CC: Stable Signed-off-by: Jann Horn Signed-off-by: Steve French diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index c63f522..28a77bf 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -67,6 +67,12 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, goto out_drop_write; } + if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) { + rc = -EBADF; + cifs_dbg(VFS, "src file seems to be from a different filesystem type\n"); + goto out_fput; + } + if ((!src_file.file->private_data) || (!dst_file->private_data)) { rc = -EBADF; cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); -- cgit v0.10.2 From 716ff1921a86c637b8875c7bb312fc6755fa9300 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 11 Sep 2015 08:17:39 +0100 Subject: ARM: domains: thread_info.h no longer needs asm/domains.h As of 1eef5d2f1b46 ("ARM: domains: switch to keeping domain value in register") we no longer need to include asm/domains.h into asm/thread_info.h. Remove it. Tested-by: Robert Jarzmik Signed-off-by: Russell King diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 0a0aec4..ae02e68 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -25,7 +25,6 @@ struct task_struct; #include -#include typedef unsigned long mm_segment_t; -- cgit v0.10.2 From 6e8f580d1fcc18e290713984c379cb97131c015a Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 11 Sep 2015 08:34:52 +0100 Subject: ARM: domains: add memory dependencies to get_domain/set_domain We need to have memory dependencies on get_domain/set_domain to avoid the compiler over-optimising these inline assembly instructions. Loads/stores must not be reordered across a set_domain(), so introduce a compiler barrier for that assembly. The value of get_domain() must not be cached across a set_domain(), but we still want to allow the compiler to optimise it away. Introduce a dependency on current_thread_info()->cpu_domain to avoid this; the new memory clobber in set_domain() should therefore cause the compiler to re-load this. The other advantage of using this is we should have its address in the register set already, or very soon after at most call sites. Tested-by: Robert Jarzmik Signed-off-by: Russell King diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index e878129..fc8ba16 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -12,6 +12,7 @@ #ifndef __ASSEMBLY__ #include +#include #endif /* @@ -89,7 +90,8 @@ static inline unsigned int get_domain(void) asm( "mrc p15, 0, %0, c3, c0 @ get domain" - : "=r" (domain)); + : "=r" (domain) + : "m" (current_thread_info()->cpu_domain)); return domain; } @@ -98,7 +100,7 @@ static inline void set_domain(unsigned val) { asm volatile( "mcr p15, 0, %0, c3, c0 @ set domain" - : : "r" (val)); + : : "r" (val) : "memory"); isb(); } -- cgit v0.10.2 From 0b61f2c0f37983c98ed4207f3f5e265938371b68 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 11 Sep 2015 17:25:59 +0100 Subject: arm/xen: Enable user access to the kernel before issuing a privcmd call When Xen is copying data to/from the guest it will check if the kernel has the right to do the access. If not, the hypercall will return an error. After the commit a5e090acbf545c0a3b04080f8a488b17ec41fe02 "ARM: software-based privileged-no-access support", the kernel can't access any longer the user space by default. This will result to fail on every hypercall made by the userspace (i.e via privcmd). We have to enable the userspace access and then restore the correct permission every time the privcmd is used to made an hypercall. I didn't find generic helpers to do a these operations, so the change is only arm32 specific. Reported-by: Riku Voipio Signed-off-by: Julien Grall Signed-off-by: Russell King diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S index f00e080..10fd99c 100644 --- a/arch/arm/xen/hypercall.S +++ b/arch/arm/xen/hypercall.S @@ -98,8 +98,23 @@ ENTRY(privcmd_call) mov r1, r2 mov r2, r3 ldr r3, [sp, #8] + /* + * Privcmd calls are issued by the userspace. We need to allow the + * kernel to access the userspace memory before issuing the hypercall. + */ + uaccess_enable r4 + + /* r4 is loaded now as we use it as scratch register before */ ldr r4, [sp, #4] __HVC(XEN_IMM) + + /* + * Disable userspace access from kernel. This is fine to do it + * unconditionally as no set_fs(KERNEL_DS)/set_fs(get_ds()) is + * called before. + */ + uaccess_disable r4 + ldm sp!, {r4} ret lr ENDPROC(privcmd_call); -- cgit v0.10.2 From a4a5a7379e4ca03c192b732d61e446994eb67bbc Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Fri, 11 Sep 2015 17:12:27 +0100 Subject: ARM: 8431/1: fix alignement of __bug_table section entries On old ARM chips, unaligned accesses to memory are not trapped and fixed. On module load, symbols are relocated, and the relocation of __bug_table symbols is done on a u32 basis. Yet the section is not aligned to a multiple of 4 address, but to a multiple of 2. This triggers an Oops on pxa architecture, where address 0xbf0021ea is the first relocation in the __bug_table section : apply_relocate(): pxa3xx_nand: section 13 reloc 0 sym '' Unable to handle kernel paging request at virtual address bf0021ea pgd = e1cd0000 [bf0021ea] *pgd=c1cce851, *pte=c1cde04f, *ppte=c1cde01f Internal error: Oops: 23 [#1] ARM Modules linked in: CPU: 0 PID: 606 Comm: insmod Not tainted 4.2.0-rc8-next-20150828-cm-x300+ #887 Hardware name: CM-X300 module task: e1c68700 ti: e1c3e000 task.ti: e1c3e000 PC is at apply_relocate+0x2f4/0x3d4 LR is at 0xbf0021ea pc : [] lr : [] psr: 80000013 sp : e1c3fe30 ip : 60000013 fp : e49e8c60 r10: e49e8fa8 r9 : 00000000 r8 : e49e7c58 r7 : e49e8c38 r6 : e49e8a58 r5 : e49e8920 r4 : e49e8918 r3 : bf0021ea r2 : bf007034 r1 : 00000000 r0 : bf000000 Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 0000397f Table: c1cd0018 DAC: 00000051 Process insmod (pid: 606, stack limit = 0xe1c3e198) [] (apply_relocate) from [] (load_module+0x1248/0x1f5c) [] (load_module) from [] (SyS_init_module+0xe4/0x170) [] (SyS_init_module) from [] (ret_fast_syscall+0x0/0x38) Fix this by ensuring entries in __bug_table are all aligned to at least of multiple of 4. This transforms a module section __bug_table as : - [12] __bug_table PROGBITS 00000000 002232 000018 00 A 0 0 1 + [12] __bug_table PROGBITS 00000000 002232 000018 00 A 0 0 4 Signed-off-by: Robert Jarzmik Reviewed-by: Dave Martin Signed-off-by: Russell King diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h index b274bde..e7335a9 100644 --- a/arch/arm/include/asm/bug.h +++ b/arch/arm/include/asm/bug.h @@ -40,6 +40,7 @@ do { \ "2:\t.asciz " #__file "\n" \ ".popsection\n" \ ".pushsection __bug_table,\"a\"\n" \ + ".align 2\n" \ "3:\t.word 1b, 2b\n" \ "\t.hword " #__line ", 0\n" \ ".popsection"); \ -- cgit v0.10.2 From 447e9a4d27484175a84daaa8e03d35c650f443b7 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Fri, 11 Sep 2015 12:52:26 -0400 Subject: IB/ehca: Deprecate driver, move to staging, schedule deletion The ehca driver is only supported on IBM machines with a custom EBus. As they have opted to build their newer machines using more industry standard technology and haven't really been pushing EBus capable machines for a while, this driver can now safely be moved to the staging area and scheduled for eventual removal. This plan was brought to IBM's attention and received their sign-off. Cc: alexs@linux.vnet.ibm.com Cc: hnguyen@de.ibm.com Cc: raisch@de.ibm.com Cc: stefan.roscher@de.ibm.com Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index da4c697..aa26f3c 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -56,7 +56,6 @@ config INFINIBAND_ADDR_TRANS source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/hw/qib/Kconfig" -source "drivers/infiniband/hw/ehca/Kconfig" source "drivers/infiniband/hw/cxgb3/Kconfig" source "drivers/infiniband/hw/cxgb4/Kconfig" source "drivers/infiniband/hw/mlx4/Kconfig" diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index 1bdb999..aded2a5 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -1,6 +1,5 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/ obj-$(CONFIG_INFINIBAND_QIB) += qib/ -obj-$(CONFIG_INFINIBAND_EHCA) += ehca/ obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/ obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/ obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/ diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig deleted file mode 100644 index 59f807d..0000000 --- a/drivers/infiniband/hw/ehca/Kconfig +++ /dev/null @@ -1,9 +0,0 @@ -config INFINIBAND_EHCA - tristate "eHCA support" - depends on IBMEBUS - ---help--- - This driver supports the IBM pSeries eHCA InfiniBand adapter. - - To compile the driver as a module, choose M here. The module - will be called ib_ehca. - diff --git a/drivers/infiniband/hw/ehca/Makefile b/drivers/infiniband/hw/ehca/Makefile deleted file mode 100644 index 74d284e..0000000 --- a/drivers/infiniband/hw/ehca/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -# Authors: Heiko J Schick -# Christoph Raisch -# Joachim Fenkes -# -# Copyright (c) 2005 IBM Corporation -# -# All rights reserved. -# -# This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD. - -obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o - -ib_ehca-objs = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \ - ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \ - ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o - diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c deleted file mode 100644 index 4659263..0000000 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ /dev/null @@ -1,277 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * address vector functions - * - * Authors: Hoang-Nam Nguyen - * Khadija Souissi - * Reinhard Ernst - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_tools.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" - -static struct kmem_cache *av_cache; - -int ehca_calc_ipd(struct ehca_shca *shca, int port, - enum ib_rate path_rate, u32 *ipd) -{ - int path = ib_rate_to_mult(path_rate); - int link, ret; - struct ib_port_attr pa; - - if (path_rate == IB_RATE_PORT_CURRENT) { - *ipd = 0; - return 0; - } - - if (unlikely(path < 0)) { - ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x", - path_rate); - return -EINVAL; - } - - ret = ehca_query_port(&shca->ib_device, port, &pa); - if (unlikely(ret < 0)) { - ehca_err(&shca->ib_device, "Failed to query port ret=%i", ret); - return ret; - } - - link = ib_width_enum_to_int(pa.active_width) * pa.active_speed; - - if (path >= link) - /* no need to throttle if path faster than link */ - *ipd = 0; - else - /* IPD = round((link / path) - 1) */ - *ipd = ((link + (path >> 1)) / path) - 1; - - return 0; -} - -struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) -{ - int ret; - struct ehca_av *av; - struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, - ib_device); - - av = kmem_cache_alloc(av_cache, GFP_KERNEL); - if (!av) { - ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p", - pd, ah_attr); - return ERR_PTR(-ENOMEM); - } - - av->av.sl = ah_attr->sl; - av->av.dlid = ah_attr->dlid; - av->av.slid_path_bits = ah_attr->src_path_bits; - - if (ehca_static_rate < 0) { - u32 ipd; - if (ehca_calc_ipd(shca, ah_attr->port_num, - ah_attr->static_rate, &ipd)) { - ret = -EINVAL; - goto create_ah_exit1; - } - av->av.ipd = ipd; - } else - av->av.ipd = ehca_static_rate; - - av->av.lnh = ah_attr->ah_flags; - av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6); - av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK, - ah_attr->grh.traffic_class); - av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, - ah_attr->grh.flow_label); - av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, - ah_attr->grh.hop_limit); - av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B); - /* set sgid in grh.word_1 */ - if (ah_attr->ah_flags & IB_AH_GRH) { - int rc; - struct ib_port_attr port_attr; - union ib_gid gid; - memset(&port_attr, 0, sizeof(port_attr)); - rc = ehca_query_port(pd->device, ah_attr->port_num, - &port_attr); - if (rc) { /* invalid port number */ - ret = -EINVAL; - ehca_err(pd->device, "Invalid port number " - "ehca_query_port() returned %x " - "pd=%p ah_attr=%p", rc, pd, ah_attr); - goto create_ah_exit1; - } - memset(&gid, 0, sizeof(gid)); - rc = ehca_query_gid(pd->device, - ah_attr->port_num, - ah_attr->grh.sgid_index, &gid); - if (rc) { - ret = -EINVAL; - ehca_err(pd->device, "Failed to retrieve sgid " - "ehca_query_gid() returned %x " - "pd=%p ah_attr=%p", rc, pd, ah_attr); - goto create_ah_exit1; - } - memcpy(&av->av.grh.word_1, &gid, sizeof(gid)); - } - av->av.pmtu = shca->max_mtu; - - /* dgid comes in grh.word_3 */ - memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid, - sizeof(ah_attr->grh.dgid)); - - return &av->ib_ah; - -create_ah_exit1: - kmem_cache_free(av_cache, av); - - return ERR_PTR(ret); -} - -int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) -{ - struct ehca_av *av; - struct ehca_ud_av new_ehca_av; - struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca, - ib_device); - - memset(&new_ehca_av, 0, sizeof(new_ehca_av)); - new_ehca_av.sl = ah_attr->sl; - new_ehca_av.dlid = ah_attr->dlid; - new_ehca_av.slid_path_bits = ah_attr->src_path_bits; - new_ehca_av.ipd = ah_attr->static_rate; - new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK, - (ah_attr->ah_flags & IB_AH_GRH) > 0); - new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK, - ah_attr->grh.traffic_class); - new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, - ah_attr->grh.flow_label); - new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, - ah_attr->grh.hop_limit); - new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b); - - /* set sgid in grh.word_1 */ - if (ah_attr->ah_flags & IB_AH_GRH) { - int rc; - struct ib_port_attr port_attr; - union ib_gid gid; - memset(&port_attr, 0, sizeof(port_attr)); - rc = ehca_query_port(ah->device, ah_attr->port_num, - &port_attr); - if (rc) { /* invalid port number */ - ehca_err(ah->device, "Invalid port number " - "ehca_query_port() returned %x " - "ah=%p ah_attr=%p port_num=%x", - rc, ah, ah_attr, ah_attr->port_num); - return -EINVAL; - } - memset(&gid, 0, sizeof(gid)); - rc = ehca_query_gid(ah->device, - ah_attr->port_num, - ah_attr->grh.sgid_index, &gid); - if (rc) { - ehca_err(ah->device, "Failed to retrieve sgid " - "ehca_query_gid() returned %x " - "ah=%p ah_attr=%p port_num=%x " - "sgid_index=%x", - rc, ah, ah_attr, ah_attr->port_num, - ah_attr->grh.sgid_index); - return -EINVAL; - } - memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid)); - } - - new_ehca_av.pmtu = shca->max_mtu; - - memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid, - sizeof(ah_attr->grh.dgid)); - - av = container_of(ah, struct ehca_av, ib_ah); - av->av = new_ehca_av; - - return 0; -} - -int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) -{ - struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah); - - memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3, - sizeof(ah_attr->grh.dgid)); - ah_attr->sl = av->av.sl; - - ah_attr->dlid = av->av.dlid; - - ah_attr->src_path_bits = av->av.slid_path_bits; - ah_attr->static_rate = av->av.ipd; - ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh); - ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK, - av->av.grh.word_0); - ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK, - av->av.grh.word_0); - ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK, - av->av.grh.word_0); - - return 0; -} - -int ehca_destroy_ah(struct ib_ah *ah) -{ - kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah)); - - return 0; -} - -int ehca_init_av_cache(void) -{ - av_cache = kmem_cache_create("ehca_cache_av", - sizeof(struct ehca_av), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!av_cache) - return -ENOMEM; - return 0; -} - -void ehca_cleanup_av_cache(void) -{ - if (av_cache) - kmem_cache_destroy(av_cache); -} diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h deleted file mode 100644 index bd45e0f..0000000 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ /dev/null @@ -1,482 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Struct definition for eHCA internal structures - * - * Authors: Heiko J Schick - * Christoph Raisch - * Joachim Fenkes - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __EHCA_CLASSES_H__ -#define __EHCA_CLASSES_H__ - -struct ehca_module; -struct ehca_qp; -struct ehca_cq; -struct ehca_eq; -struct ehca_mr; -struct ehca_mw; -struct ehca_pd; -struct ehca_av; - -#include -#include - -#include -#include - -#ifdef CONFIG_PPC64 -#include "ehca_classes_pSeries.h" -#endif -#include "ipz_pt_fn.h" -#include "ehca_qes.h" -#include "ehca_irq.h" - -#define EHCA_EQE_CACHE_SIZE 20 -#define EHCA_MAX_NUM_QUEUES 0xffff - -struct ehca_eqe_cache_entry { - struct ehca_eqe *eqe; - struct ehca_cq *cq; -}; - -struct ehca_eq { - u32 length; - struct ipz_queue ipz_queue; - struct ipz_eq_handle ipz_eq_handle; - struct work_struct work; - struct h_galpas galpas; - int is_initialized; - struct ehca_pfeq pf; - spinlock_t spinlock; - struct tasklet_struct interrupt_task; - u32 ist; - spinlock_t irq_spinlock; - struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE]; -}; - -struct ehca_sma_attr { - u16 lid, lmc, sm_sl, sm_lid; - u16 pkey_tbl_len, pkeys[16]; -}; - -struct ehca_sport { - struct ib_cq *ibcq_aqp1; - struct ib_qp *ibqp_sqp[2]; - /* lock to serialze modify_qp() calls for sqp in normal - * and irq path (when event PORT_ACTIVE is received first time) - */ - spinlock_t mod_sqp_lock; - enum ib_port_state port_state; - struct ehca_sma_attr saved_attr; - u32 pma_qp_nr; -}; - -#define HCA_CAP_MR_PGSIZE_4K 0x80000000 -#define HCA_CAP_MR_PGSIZE_64K 0x40000000 -#define HCA_CAP_MR_PGSIZE_1M 0x20000000 -#define HCA_CAP_MR_PGSIZE_16M 0x10000000 - -struct ehca_shca { - struct ib_device ib_device; - struct platform_device *ofdev; - u8 num_ports; - int hw_level; - struct list_head shca_list; - struct ipz_adapter_handle ipz_hca_handle; - struct ehca_sport sport[2]; - struct ehca_eq eq; - struct ehca_eq neq; - struct ehca_mr *maxmr; - struct ehca_pd *pd; - struct h_galpas galpas; - struct mutex modify_mutex; - u64 hca_cap; - /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */ - u32 hca_cap_mr_pgsize; - int max_mtu; - int max_num_qps; - int max_num_cqs; - atomic_t num_cqs; - atomic_t num_qps; -}; - -struct ehca_pd { - struct ib_pd ib_pd; - struct ipz_pd fw_pd; - /* small queue mgmt */ - struct mutex lock; - struct list_head free[2]; - struct list_head full[2]; -}; - -enum ehca_ext_qp_type { - EQPT_NORMAL = 0, - EQPT_LLQP = 1, - EQPT_SRQBASE = 2, - EQPT_SRQ = 3, -}; - -/* struct to cache modify_qp()'s parms for GSI/SMI qp */ -struct ehca_mod_qp_parm { - int mask; - struct ib_qp_attr attr; -}; - -#define EHCA_MOD_QP_PARM_MAX 4 - -#define QMAP_IDX_MASK 0xFFFFULL - -/* struct for tracking if cqes have been reported to the application */ -struct ehca_qmap_entry { - u16 app_wr_id; - u8 reported; - u8 cqe_req; -}; - -struct ehca_queue_map { - struct ehca_qmap_entry *map; - unsigned int entries; - unsigned int tail; - unsigned int left_to_poll; - unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */ -}; - -/* function to calculate the next index for the qmap */ -static inline unsigned int next_index(unsigned int cur_index, unsigned int limit) -{ - unsigned int temp = cur_index + 1; - return (temp == limit) ? 0 : temp; -} - -struct ehca_qp { - union { - struct ib_qp ib_qp; - struct ib_srq ib_srq; - }; - u32 qp_type; - enum ehca_ext_qp_type ext_type; - enum ib_qp_state state; - struct ipz_queue ipz_squeue; - struct ehca_queue_map sq_map; - struct ipz_queue ipz_rqueue; - struct ehca_queue_map rq_map; - struct h_galpas galpas; - u32 qkey; - u32 real_qp_num; - u32 token; - spinlock_t spinlock_s; - spinlock_t spinlock_r; - u32 sq_max_inline_data_size; - struct ipz_qp_handle ipz_qp_handle; - struct ehca_pfqp pf; - struct ib_qp_init_attr init_attr; - struct ehca_cq *send_cq; - struct ehca_cq *recv_cq; - unsigned int sqerr_purgeflag; - struct hlist_node list_entries; - /* array to cache modify_qp()'s parms for GSI/SMI qp */ - struct ehca_mod_qp_parm *mod_qp_parm; - int mod_qp_parm_idx; - /* mmap counter for resources mapped into user space */ - u32 mm_count_squeue; - u32 mm_count_rqueue; - u32 mm_count_galpa; - /* unsolicited ack circumvention */ - int unsol_ack_circ; - int mtu_shift; - u32 message_count; - u32 packet_count; - atomic_t nr_events; /* events seen */ - wait_queue_head_t wait_completion; - int mig_armed; - struct list_head sq_err_node; - struct list_head rq_err_node; -}; - -#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) -#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ) -#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE) - -/* must be power of 2 */ -#define QP_HASHTAB_LEN 8 - -struct ehca_cq { - struct ib_cq ib_cq; - struct ipz_queue ipz_queue; - struct h_galpas galpas; - spinlock_t spinlock; - u32 cq_number; - u32 token; - u32 nr_of_entries; - struct ipz_cq_handle ipz_cq_handle; - struct ehca_pfcq pf; - spinlock_t cb_lock; - struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; - struct list_head entry; - u32 nr_callbacks; /* #events assigned to cpu by scaling code */ - atomic_t nr_events; /* #events seen */ - wait_queue_head_t wait_completion; - spinlock_t task_lock; - /* mmap counter for resources mapped into user space */ - u32 mm_count_queue; - u32 mm_count_galpa; - struct list_head sqp_err_list; - struct list_head rqp_err_list; -}; - -enum ehca_mr_flag { - EHCA_MR_FLAG_FMR = 0x80000000, /* FMR, created with ehca_alloc_fmr */ - EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR */ -}; - -struct ehca_mr { - union { - struct ib_mr ib_mr; /* must always be first in ehca_mr */ - struct ib_fmr ib_fmr; /* must always be first in ehca_mr */ - } ib; - struct ib_umem *umem; - spinlock_t mrlock; - - enum ehca_mr_flag flags; - u32 num_kpages; /* number of kernel pages */ - u32 num_hwpages; /* number of hw pages to form MR */ - u64 hwpage_size; /* hw page size used for this MR */ - int acl; /* ACL (stored here for usage in reregister) */ - u64 *start; /* virtual start address (stored here for */ - /* usage in reregister) */ - u64 size; /* size (stored here for usage in reregister) */ - u32 fmr_page_size; /* page size for FMR */ - u32 fmr_max_pages; /* max pages for FMR */ - u32 fmr_max_maps; /* max outstanding maps for FMR */ - u32 fmr_map_cnt; /* map counter for FMR */ - /* fw specific data */ - struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */ - struct h_galpas galpas; -}; - -struct ehca_mw { - struct ib_mw ib_mw; /* gen2 mw, must always be first in ehca_mw */ - spinlock_t mwlock; - - u8 never_bound; /* indication MW was never bound */ - struct ipz_mrmw_handle ipz_mw_handle; /* MW handle for h-calls */ - struct h_galpas galpas; -}; - -enum ehca_mr_pgi_type { - EHCA_MR_PGI_PHYS = 1, /* type of ehca_reg_phys_mr, - * ehca_rereg_phys_mr, - * ehca_reg_internal_maxmr */ - EHCA_MR_PGI_USER = 2, /* type of ehca_reg_user_mr */ - EHCA_MR_PGI_FMR = 3 /* type of ehca_map_phys_fmr */ -}; - -struct ehca_mr_pginfo { - enum ehca_mr_pgi_type type; - u64 num_kpages; - u64 kpage_cnt; - u64 hwpage_size; /* hw page size used for this MR */ - u64 num_hwpages; /* number of hw pages */ - u64 hwpage_cnt; /* counter for hw pages */ - u64 next_hwpage; /* next hw page in buffer/chunk/listelem */ - - union { - struct { /* type EHCA_MR_PGI_PHYS section */ - int num_phys_buf; - struct ib_phys_buf *phys_buf_array; - u64 next_buf; - } phy; - struct { /* type EHCA_MR_PGI_USER section */ - struct ib_umem *region; - struct scatterlist *next_sg; - u64 next_nmap; - } usr; - struct { /* type EHCA_MR_PGI_FMR section */ - u64 fmr_pgsize; - u64 *page_list; - u64 next_listelem; - } fmr; - } u; -}; - -/* output parameters for MR/FMR hipz calls */ -struct ehca_mr_hipzout_parms { - struct ipz_mrmw_handle handle; - u32 lkey; - u32 rkey; - u64 len; - u64 vaddr; - u32 acl; -}; - -/* output parameters for MW hipz calls */ -struct ehca_mw_hipzout_parms { - struct ipz_mrmw_handle handle; - u32 rkey; -}; - -struct ehca_av { - struct ib_ah ib_ah; - struct ehca_ud_av av; -}; - -struct ehca_ucontext { - struct ib_ucontext ib_ucontext; -}; - -int ehca_init_pd_cache(void); -void ehca_cleanup_pd_cache(void); -int ehca_init_cq_cache(void); -void ehca_cleanup_cq_cache(void); -int ehca_init_qp_cache(void); -void ehca_cleanup_qp_cache(void); -int ehca_init_av_cache(void); -void ehca_cleanup_av_cache(void); -int ehca_init_mrmw_cache(void); -void ehca_cleanup_mrmw_cache(void); -int ehca_init_small_qp_cache(void); -void ehca_cleanup_small_qp_cache(void); - -extern rwlock_t ehca_qp_idr_lock; -extern rwlock_t ehca_cq_idr_lock; -extern struct idr ehca_qp_idr; -extern struct idr ehca_cq_idr; -extern spinlock_t shca_list_lock; - -extern int ehca_static_rate; -extern int ehca_port_act_time; -extern bool ehca_use_hp_mr; -extern bool ehca_scaling_code; -extern int ehca_lock_hcalls; -extern int ehca_nr_ports; -extern int ehca_max_cq; -extern int ehca_max_qp; - -struct ipzu_queue_resp { - u32 qe_size; /* queue entry size */ - u32 act_nr_of_sg; - u32 queue_length; /* queue length allocated in bytes */ - u32 pagesize; - u32 toggle_state; - u32 offset; /* save offset within a page for small_qp */ -}; - -struct ehca_create_cq_resp { - u32 cq_number; - u32 token; - struct ipzu_queue_resp ipz_queue; - u32 fw_handle_ofs; - u32 dummy; -}; - -struct ehca_create_qp_resp { - u32 qp_num; - u32 token; - u32 qp_type; - u32 ext_type; - u32 qkey; - /* qp_num assigned by ehca: sqp0/1 may have got different numbers */ - u32 real_qp_num; - u32 fw_handle_ofs; - u32 dummy; - struct ipzu_queue_resp ipz_squeue; - struct ipzu_queue_resp ipz_rqueue; -}; - -struct ehca_alloc_cq_parms { - u32 nr_cqe; - u32 act_nr_of_entries; - u32 act_pages; - struct ipz_eq_handle eq_handle; -}; - -enum ehca_service_type { - ST_RC = 0, - ST_UC = 1, - ST_RD = 2, - ST_UD = 3, -}; - -enum ehca_ll_comp_flags { - LLQP_SEND_COMP = 0x20, - LLQP_RECV_COMP = 0x40, - LLQP_COMP_MASK = 0x60, -}; - -struct ehca_alloc_queue_parms { - /* input parameters */ - int max_wr; - int max_sge; - int page_size; - int is_small; - - /* output parameters */ - u16 act_nr_wqes; - u8 act_nr_sges; - u32 queue_size; /* bytes for small queues, pages otherwise */ -}; - -struct ehca_alloc_qp_parms { - struct ehca_alloc_queue_parms squeue; - struct ehca_alloc_queue_parms rqueue; - - /* input parameters */ - enum ehca_service_type servicetype; - int qp_storage; - int sigtype; - enum ehca_ext_qp_type ext_type; - enum ehca_ll_comp_flags ll_comp_flags; - int ud_av_l_key_ctl; - - u32 token; - struct ipz_eq_handle eq_handle; - struct ipz_pd pd; - struct ipz_cq_handle send_cq_handle, recv_cq_handle; - - u32 srq_qpn, srq_token, srq_limit; - - /* output parameters */ - u32 real_qp_num; - struct ipz_qp_handle qp_handle; - struct h_galpas galpas; -}; - -int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); -int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num); -struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); - -#endif diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h deleted file mode 100644 index 689c357..0000000 --- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h +++ /dev/null @@ -1,208 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * pSeries interface definitions - * - * Authors: Waleri Fomin - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __EHCA_CLASSES_PSERIES_H__ -#define __EHCA_CLASSES_PSERIES_H__ - -#include "hcp_phyp.h" -#include "ipz_pt_fn.h" - - -struct ehca_pfqp { - struct ipz_qpt sqpt; - struct ipz_qpt rqpt; -}; - -struct ehca_pfcq { - struct ipz_qpt qpt; - u32 cqnr; -}; - -struct ehca_pfeq { - struct ipz_qpt qpt; - struct h_galpa galpa; - u32 eqnr; -}; - -struct ipz_adapter_handle { - u64 handle; -}; - -struct ipz_cq_handle { - u64 handle; -}; - -struct ipz_eq_handle { - u64 handle; -}; - -struct ipz_qp_handle { - u64 handle; -}; -struct ipz_mrmw_handle { - u64 handle; -}; - -struct ipz_pd { - u32 value; -}; - -struct hcp_modify_qp_control_block { - u32 qkey; /* 00 */ - u32 rdd; /* reliable datagram domain */ - u32 send_psn; /* 02 */ - u32 receive_psn; /* 03 */ - u32 prim_phys_port; /* 04 */ - u32 alt_phys_port; /* 05 */ - u32 prim_p_key_idx; /* 06 */ - u32 alt_p_key_idx; /* 07 */ - u32 rdma_atomic_ctrl; /* 08 */ - u32 qp_state; /* 09 */ - u32 reserved_10; /* 10 */ - u32 rdma_nr_atomic_resp_res; /* 11 */ - u32 path_migration_state; /* 12 */ - u32 rdma_atomic_outst_dest_qp; /* 13 */ - u32 dest_qp_nr; /* 14 */ - u32 min_rnr_nak_timer_field; /* 15 */ - u32 service_level; /* 16 */ - u32 send_grh_flag; /* 17 */ - u32 retry_count; /* 18 */ - u32 timeout; /* 19 */ - u32 path_mtu; /* 20 */ - u32 max_static_rate; /* 21 */ - u32 dlid; /* 22 */ - u32 rnr_retry_count; /* 23 */ - u32 source_path_bits; /* 24 */ - u32 traffic_class; /* 25 */ - u32 hop_limit; /* 26 */ - u32 source_gid_idx; /* 27 */ - u32 flow_label; /* 28 */ - u32 reserved_29; /* 29 */ - union { /* 30 */ - u64 dw[2]; - u8 byte[16]; - } dest_gid; - u32 service_level_al; /* 34 */ - u32 send_grh_flag_al; /* 35 */ - u32 retry_count_al; /* 36 */ - u32 timeout_al; /* 37 */ - u32 max_static_rate_al; /* 38 */ - u32 dlid_al; /* 39 */ - u32 rnr_retry_count_al; /* 40 */ - u32 source_path_bits_al; /* 41 */ - u32 traffic_class_al; /* 42 */ - u32 hop_limit_al; /* 43 */ - u32 source_gid_idx_al; /* 44 */ - u32 flow_label_al; /* 45 */ - u32 reserved_46; /* 46 */ - u32 reserved_47; /* 47 */ - union { /* 48 */ - u64 dw[2]; - u8 byte[16]; - } dest_gid_al; - u32 max_nr_outst_send_wr; /* 52 */ - u32 max_nr_outst_recv_wr; /* 53 */ - u32 disable_ete_credit_check; /* 54 */ - u32 qp_number; /* 55 */ - u64 send_queue_handle; /* 56 */ - u64 recv_queue_handle; /* 58 */ - u32 actual_nr_sges_in_sq_wqe; /* 60 */ - u32 actual_nr_sges_in_rq_wqe; /* 61 */ - u32 qp_enable; /* 62 */ - u32 curr_srq_limit; /* 63 */ - u64 qp_aff_asyn_ev_log_reg; /* 64 */ - u64 shared_rq_hndl; /* 66 */ - u64 trigg_doorbell_qp_hndl; /* 68 */ - u32 reserved_70_127[58]; /* 70 */ -}; - -#define MQPCB_MASK_QKEY EHCA_BMASK_IBM( 0, 0) -#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM( 2, 2) -#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM( 3, 3) -#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM( 4, 4) -#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24, 31) -#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM( 5, 5) -#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM( 6, 6) -#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24, 31) -#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7) -#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8) -#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9) -#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11) -#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12) -#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13) -#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14, 14) -#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15, 15) -#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16, 16) -#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17, 17) -#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18) -#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19) -#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20) -#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21) -#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22) -#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23) -#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24) -#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25) -#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26) -#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27) -#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28) -#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30) -#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31) -#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32) -#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33) -#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34) -#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35) -#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36) -#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37) -#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38) -#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39) -#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40) -#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41) -#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42) -#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44) -#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45) -#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46) -#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47) -#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48) -#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49) -#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50) -#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51) - -#endif /* __EHCA_CLASSES_PSERIES_H__ */ diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c deleted file mode 100644 index 9b68b17..0000000 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ /dev/null @@ -1,397 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Completion queue handling - * - * Authors: Waleri Fomin - * Khadija Souissi - * Reinhard Ernst - * Heiko J Schick - * Hoang-Nam Nguyen - * - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_iverbs.h" -#include "ehca_classes.h" -#include "ehca_irq.h" -#include "hcp_if.h" - -static struct kmem_cache *cq_cache; - -int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp) -{ - unsigned int qp_num = qp->real_qp_num; - unsigned int key = qp_num & (QP_HASHTAB_LEN-1); - unsigned long flags; - - spin_lock_irqsave(&cq->spinlock, flags); - hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]); - spin_unlock_irqrestore(&cq->spinlock, flags); - - ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x", - cq->cq_number, qp_num); - - return 0; -} - -int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) -{ - int ret = -EINVAL; - unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); - struct hlist_node *iter; - struct ehca_qp *qp; - unsigned long flags; - - spin_lock_irqsave(&cq->spinlock, flags); - hlist_for_each(iter, &cq->qp_hashtab[key]) { - qp = hlist_entry(iter, struct ehca_qp, list_entries); - if (qp->real_qp_num == real_qp_num) { - hlist_del(iter); - ehca_dbg(cq->ib_cq.device, - "removed qp from cq .cq_num=%x real_qp_num=%x", - cq->cq_number, real_qp_num); - ret = 0; - break; - } - } - spin_unlock_irqrestore(&cq->spinlock, flags); - if (ret) - ehca_err(cq->ib_cq.device, - "qp not found cq_num=%x real_qp_num=%x", - cq->cq_number, real_qp_num); - - return ret; -} - -struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) -{ - struct ehca_qp *ret = NULL; - unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); - struct hlist_node *iter; - struct ehca_qp *qp; - hlist_for_each(iter, &cq->qp_hashtab[key]) { - qp = hlist_entry(iter, struct ehca_qp, list_entries); - if (qp->real_qp_num == real_qp_num) { - ret = qp; - break; - } - } - return ret; -} - -struct ib_cq *ehca_create_cq(struct ib_device *device, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, - struct ib_udata *udata) -{ - int cqe = attr->cqe; - static const u32 additional_cqe = 20; - struct ib_cq *cq; - struct ehca_cq *my_cq; - struct ehca_shca *shca = - container_of(device, struct ehca_shca, ib_device); - struct ipz_adapter_handle adapter_handle; - struct ehca_alloc_cq_parms param; /* h_call's out parameters */ - struct h_galpa gal; - void *vpage; - u32 counter; - u64 rpage, cqx_fec, h_ret; - int ipz_rc, i; - unsigned long flags; - - if (attr->flags) - return ERR_PTR(-EINVAL); - - if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) - return ERR_PTR(-EINVAL); - - if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) { - ehca_err(device, "Unable to create CQ, max number of %i " - "CQs reached.", shca->max_num_cqs); - ehca_err(device, "To increase the maximum number of CQs " - "use the number_of_cqs module parameter.\n"); - return ERR_PTR(-ENOSPC); - } - - my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL); - if (!my_cq) { - ehca_err(device, "Out of memory for ehca_cq struct device=%p", - device); - atomic_dec(&shca->num_cqs); - return ERR_PTR(-ENOMEM); - } - - memset(¶m, 0, sizeof(struct ehca_alloc_cq_parms)); - - spin_lock_init(&my_cq->spinlock); - spin_lock_init(&my_cq->cb_lock); - spin_lock_init(&my_cq->task_lock); - atomic_set(&my_cq->nr_events, 0); - init_waitqueue_head(&my_cq->wait_completion); - - cq = &my_cq->ib_cq; - - adapter_handle = shca->ipz_hca_handle; - param.eq_handle = shca->eq.ipz_eq_handle; - - idr_preload(GFP_KERNEL); - write_lock_irqsave(&ehca_cq_idr_lock, flags); - my_cq->token = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT); - write_unlock_irqrestore(&ehca_cq_idr_lock, flags); - idr_preload_end(); - - if (my_cq->token < 0) { - cq = ERR_PTR(-ENOMEM); - ehca_err(device, "Can't allocate new idr entry. device=%p", - device); - goto create_cq_exit1; - } - - /* - * CQs maximum depth is 4GB-64, but we need additional 20 as buffer - * for receiving errors CQEs. - */ - param.nr_cqe = cqe + additional_cqe; - h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, ¶m); - - if (h_ret != H_SUCCESS) { - ehca_err(device, "hipz_h_alloc_resource_cq() failed " - "h_ret=%lli device=%p", h_ret, device); - cq = ERR_PTR(ehca2ib_return_code(h_ret)); - goto create_cq_exit2; - } - - ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages, - EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0); - if (!ipz_rc) { - ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%i device=%p", - ipz_rc, device); - cq = ERR_PTR(-EINVAL); - goto create_cq_exit3; - } - - for (counter = 0; counter < param.act_pages; counter++) { - vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); - if (!vpage) { - ehca_err(device, "ipz_qpageit_get_inc() " - "returns NULL device=%p", device); - cq = ERR_PTR(-EAGAIN); - goto create_cq_exit4; - } - rpage = __pa(vpage); - - h_ret = hipz_h_register_rpage_cq(adapter_handle, - my_cq->ipz_cq_handle, - &my_cq->pf, - 0, - 0, - rpage, - 1, - my_cq->galpas. - kernel); - - if (h_ret < H_SUCCESS) { - ehca_err(device, "hipz_h_register_rpage_cq() failed " - "ehca_cq=%p cq_num=%x h_ret=%lli counter=%i " - "act_pages=%i", my_cq, my_cq->cq_number, - h_ret, counter, param.act_pages); - cq = ERR_PTR(-EINVAL); - goto create_cq_exit4; - } - - if (counter == (param.act_pages - 1)) { - vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); - if ((h_ret != H_SUCCESS) || vpage) { - ehca_err(device, "Registration of pages not " - "complete ehca_cq=%p cq_num=%x " - "h_ret=%lli", my_cq, my_cq->cq_number, - h_ret); - cq = ERR_PTR(-EAGAIN); - goto create_cq_exit4; - } - } else { - if (h_ret != H_PAGE_REGISTERED) { - ehca_err(device, "Registration of page failed " - "ehca_cq=%p cq_num=%x h_ret=%lli " - "counter=%i act_pages=%i", - my_cq, my_cq->cq_number, - h_ret, counter, param.act_pages); - cq = ERR_PTR(-ENOMEM); - goto create_cq_exit4; - } - } - } - - ipz_qeit_reset(&my_cq->ipz_queue); - - gal = my_cq->galpas.kernel; - cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec)); - ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%llx", - my_cq, my_cq->cq_number, cqx_fec); - - my_cq->ib_cq.cqe = my_cq->nr_of_entries = - param.act_nr_of_entries - additional_cqe; - my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff; - - for (i = 0; i < QP_HASHTAB_LEN; i++) - INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]); - - INIT_LIST_HEAD(&my_cq->sqp_err_list); - INIT_LIST_HEAD(&my_cq->rqp_err_list); - - if (context) { - struct ipz_queue *ipz_queue = &my_cq->ipz_queue; - struct ehca_create_cq_resp resp; - memset(&resp, 0, sizeof(resp)); - resp.cq_number = my_cq->cq_number; - resp.token = my_cq->token; - resp.ipz_queue.qe_size = ipz_queue->qe_size; - resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg; - resp.ipz_queue.queue_length = ipz_queue->queue_length; - resp.ipz_queue.pagesize = ipz_queue->pagesize; - resp.ipz_queue.toggle_state = ipz_queue->toggle_state; - resp.fw_handle_ofs = (u32) - (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1)); - if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { - ehca_err(device, "Copy to udata failed."); - cq = ERR_PTR(-EFAULT); - goto create_cq_exit4; - } - } - - return cq; - -create_cq_exit4: - ipz_queue_dtor(NULL, &my_cq->ipz_queue); - -create_cq_exit3: - h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); - if (h_ret != H_SUCCESS) - ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p " - "cq_num=%x h_ret=%lli", my_cq, my_cq->cq_number, h_ret); - -create_cq_exit2: - write_lock_irqsave(&ehca_cq_idr_lock, flags); - idr_remove(&ehca_cq_idr, my_cq->token); - write_unlock_irqrestore(&ehca_cq_idr_lock, flags); - -create_cq_exit1: - kmem_cache_free(cq_cache, my_cq); - - atomic_dec(&shca->num_cqs); - return cq; -} - -int ehca_destroy_cq(struct ib_cq *cq) -{ - u64 h_ret; - struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - int cq_num = my_cq->cq_number; - struct ib_device *device = cq->device; - struct ehca_shca *shca = container_of(device, struct ehca_shca, - ib_device); - struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; - unsigned long flags; - - if (cq->uobject) { - if (my_cq->mm_count_galpa || my_cq->mm_count_queue) { - ehca_err(device, "Resources still referenced in " - "user space cq_num=%x", my_cq->cq_number); - return -EINVAL; - } - } - - /* - * remove the CQ from the idr first to make sure - * no more interrupt tasklets will touch this CQ - */ - write_lock_irqsave(&ehca_cq_idr_lock, flags); - idr_remove(&ehca_cq_idr, my_cq->token); - write_unlock_irqrestore(&ehca_cq_idr_lock, flags); - - /* now wait until all pending events have completed */ - wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events)); - - /* nobody's using our CQ any longer -- we can destroy it */ - h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); - if (h_ret == H_R_STATE) { - /* cq in err: read err data and destroy it forcibly */ - ehca_dbg(device, "ehca_cq=%p cq_num=%x resource=%llx in err " - "state. Try to delete it forcibly.", - my_cq, cq_num, my_cq->ipz_cq_handle.handle); - ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle); - h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); - if (h_ret == H_SUCCESS) - ehca_dbg(device, "cq_num=%x deleted successfully.", - cq_num); - } - if (h_ret != H_SUCCESS) { - ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lli " - "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num); - return ehca2ib_return_code(h_ret); - } - ipz_queue_dtor(NULL, &my_cq->ipz_queue); - kmem_cache_free(cq_cache, my_cq); - - atomic_dec(&shca->num_cqs); - return 0; -} - -int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) -{ - /* TODO: proper resize needs to be done */ - ehca_err(cq->device, "not implemented yet"); - - return -EFAULT; -} - -int ehca_init_cq_cache(void) -{ - cq_cache = kmem_cache_create("ehca_cache_cq", - sizeof(struct ehca_cq), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!cq_cache) - return -ENOMEM; - return 0; -} - -void ehca_cleanup_cq_cache(void) -{ - if (cq_cache) - kmem_cache_destroy(cq_cache); -} diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c deleted file mode 100644 index 90da674..0000000 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ /dev/null @@ -1,189 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Event queue handling - * - * Authors: Waleri Fomin - * Khadija Souissi - * Reinhard Ernst - * Heiko J Schick - * Hoang-Nam Nguyen - * - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "ehca_classes.h" -#include "ehca_irq.h" -#include "ehca_iverbs.h" -#include "ehca_qes.h" -#include "hcp_if.h" -#include "ipz_pt_fn.h" - -int ehca_create_eq(struct ehca_shca *shca, - struct ehca_eq *eq, - const enum ehca_eq_type type, const u32 length) -{ - int ret; - u64 h_ret; - u32 nr_pages; - u32 i; - void *vpage; - struct ib_device *ib_dev = &shca->ib_device; - - spin_lock_init(&eq->spinlock); - spin_lock_init(&eq->irq_spinlock); - eq->is_initialized = 0; - - if (type != EHCA_EQ && type != EHCA_NEQ) { - ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq); - return -EINVAL; - } - if (!length) { - ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq); - return -EINVAL; - } - - h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle, - &eq->pf, - type, - length, - &eq->ipz_eq_handle, - &eq->length, - &nr_pages, &eq->ist); - - if (h_ret != H_SUCCESS) { - ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq); - return -EINVAL; - } - - ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages, - EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0); - if (!ret) { - ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq); - goto create_eq_exit1; - } - - for (i = 0; i < nr_pages; i++) { - u64 rpage; - - vpage = ipz_qpageit_get_inc(&eq->ipz_queue); - if (!vpage) - goto create_eq_exit2; - - rpage = __pa(vpage); - h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, - eq->ipz_eq_handle, - &eq->pf, - 0, 0, rpage, 1); - - if (i == (nr_pages - 1)) { - /* last page */ - vpage = ipz_qpageit_get_inc(&eq->ipz_queue); - if (h_ret != H_SUCCESS || vpage) - goto create_eq_exit2; - } else { - if (h_ret != H_PAGE_REGISTERED) - goto create_eq_exit2; - } - } - - ipz_qeit_reset(&eq->ipz_queue); - - /* register interrupt handlers and initialize work queues */ - if (type == EHCA_EQ) { - tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); - - ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq, - 0, "ehca_eq", - (void *)shca); - if (ret < 0) - ehca_err(ib_dev, "Can't map interrupt handler."); - } else if (type == EHCA_NEQ) { - tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); - - ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq, - 0, "ehca_neq", - (void *)shca); - if (ret < 0) - ehca_err(ib_dev, "Can't map interrupt handler."); - } - - eq->is_initialized = 1; - - return 0; - -create_eq_exit2: - ipz_queue_dtor(NULL, &eq->ipz_queue); - -create_eq_exit1: - hipz_h_destroy_eq(shca->ipz_hca_handle, eq); - - return -EINVAL; -} - -void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq) -{ - unsigned long flags; - void *eqe; - - spin_lock_irqsave(&eq->spinlock, flags); - eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue); - spin_unlock_irqrestore(&eq->spinlock, flags); - - return eqe; -} - -int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq) -{ - unsigned long flags; - u64 h_ret; - - ibmebus_free_irq(eq->ist, (void *)shca); - - spin_lock_irqsave(&shca_list_lock, flags); - eq->is_initialized = 0; - spin_unlock_irqrestore(&shca_list_lock, flags); - - tasklet_kill(&eq->interrupt_task); - - h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq); - - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't free EQ resources."); - return -EINVAL; - } - ipz_queue_dtor(NULL, &eq->ipz_queue); - - return 0; -} diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c deleted file mode 100644 index e8b1bb6..0000000 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ /dev/null @@ -1,414 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * HCA query functions - * - * Authors: Heiko J Schick - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_tools.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" - -static unsigned int limit_uint(unsigned int value) -{ - return min_t(unsigned int, value, INT_MAX); -} - -int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, - struct ib_udata *uhw) -{ - int i, ret = 0; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, - ib_device); - struct hipz_query_hca *rblock; - - static const u32 cap_mapping[] = { - IB_DEVICE_RESIZE_MAX_WR, HCA_CAP_WQE_RESIZE, - IB_DEVICE_BAD_PKEY_CNTR, HCA_CAP_BAD_P_KEY_CTR, - IB_DEVICE_BAD_QKEY_CNTR, HCA_CAP_Q_KEY_VIOL_CTR, - IB_DEVICE_RAW_MULTI, HCA_CAP_RAW_PACKET_MCAST, - IB_DEVICE_AUTO_PATH_MIG, HCA_CAP_AUTO_PATH_MIG, - IB_DEVICE_CHANGE_PHY_PORT, HCA_CAP_SQD_RTS_PORT_CHANGE, - IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK, - IB_DEVICE_CURR_QP_STATE_MOD, HCA_CAP_CUR_QP_STATE_MOD, - IB_DEVICE_SHUTDOWN_PORT, HCA_CAP_SHUTDOWN_PORT, - IB_DEVICE_INIT_TYPE, HCA_CAP_INIT_TYPE, - IB_DEVICE_PORT_ACTIVE_EVENT, HCA_CAP_PORT_ACTIVE_EVENT, - }; - - if (uhw->inlen || uhw->outlen) - return -EINVAL; - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query device properties"); - ret = -EINVAL; - goto query_device1; - } - - memset(props, 0, sizeof(struct ib_device_attr)); - props->page_size_cap = shca->hca_cap_mr_pgsize; - props->fw_ver = rblock->hw_ver; - props->max_mr_size = rblock->max_mr_size; - props->vendor_id = rblock->vendor_id >> 8; - props->vendor_part_id = rblock->vendor_part_id >> 16; - props->hw_ver = rblock->hw_ver; - props->max_qp = limit_uint(rblock->max_qp); - props->max_qp_wr = limit_uint(rblock->max_wqes_wq); - props->max_sge = limit_uint(rblock->max_sge); - props->max_sge_rd = limit_uint(rblock->max_sge_rd); - props->max_cq = limit_uint(rblock->max_cq); - props->max_cqe = limit_uint(rblock->max_cqe); - props->max_mr = limit_uint(rblock->max_mr); - props->max_mw = limit_uint(rblock->max_mw); - props->max_pd = limit_uint(rblock->max_pd); - props->max_ah = limit_uint(rblock->max_ah); - props->max_ee = limit_uint(rblock->max_rd_ee_context); - props->max_rdd = limit_uint(rblock->max_rd_domain); - props->max_fmr = limit_uint(rblock->max_mr); - props->max_qp_rd_atom = limit_uint(rblock->max_rr_qp); - props->max_ee_rd_atom = limit_uint(rblock->max_rr_ee_context); - props->max_res_rd_atom = limit_uint(rblock->max_rr_hca); - props->max_qp_init_rd_atom = limit_uint(rblock->max_act_wqs_qp); - props->max_ee_init_rd_atom = limit_uint(rblock->max_act_wqs_ee_context); - - if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { - props->max_srq = limit_uint(props->max_qp); - props->max_srq_wr = limit_uint(props->max_qp_wr); - props->max_srq_sge = 3; - } - - props->max_pkeys = 16; - /* Some FW versions say 0 here; insert sensible value in that case */ - props->local_ca_ack_delay = rblock->local_ca_ack_delay ? - min_t(u8, rblock->local_ca_ack_delay, 255) : 12; - props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp); - props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp); - props->max_mcast_grp = limit_uint(rblock->max_mcast_grp); - props->max_mcast_qp_attach = limit_uint(rblock->max_mcast_qp_attach); - props->max_total_mcast_qp_attach - = limit_uint(rblock->max_total_mcast_qp_attach); - - /* translate device capabilities */ - props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ; - for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2) - if (rblock->hca_cap_indicators & cap_mapping[i + 1]) - props->device_cap_flags |= cap_mapping[i]; - -query_device1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu) -{ - switch (fw_mtu) { - case 0x1: - return IB_MTU_256; - case 0x2: - return IB_MTU_512; - case 0x3: - return IB_MTU_1024; - case 0x4: - return IB_MTU_2048; - case 0x5: - return IB_MTU_4096; - default: - ehca_err(&shca->ib_device, "Unknown MTU size: %x.", - fw_mtu); - return 0; - } -} - -static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap) -{ - switch (vl_cap) { - case 0x1: - return 1; - case 0x2: - return 2; - case 0x3: - return 4; - case 0x4: - return 8; - case 0x5: - return 15; - default: - ehca_err(&shca->ib_device, "invalid Vl Capability: %x.", - vl_cap); - return 0; - } -} - -int ehca_query_port(struct ib_device *ibdev, - u8 port, struct ib_port_attr *props) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, - ib_device); - struct hipz_query_port *rblock; - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto query_port1; - } - - memset(props, 0, sizeof(struct ib_port_attr)); - - props->active_mtu = props->max_mtu = map_mtu(shca, rblock->max_mtu); - props->port_cap_flags = rblock->capability_mask; - props->gid_tbl_len = rblock->gid_tbl_len; - if (rblock->max_msg_sz) - props->max_msg_sz = rblock->max_msg_sz; - else - props->max_msg_sz = 0x1 << 31; - props->bad_pkey_cntr = rblock->bad_pkey_cntr; - props->qkey_viol_cntr = rblock->qkey_viol_cntr; - props->pkey_tbl_len = rblock->pkey_tbl_len; - props->lid = rblock->lid; - props->sm_lid = rblock->sm_lid; - props->lmc = rblock->lmc; - props->sm_sl = rblock->sm_sl; - props->subnet_timeout = rblock->subnet_timeout; - props->init_type_reply = rblock->init_type_reply; - props->max_vl_num = map_number_of_vls(shca, rblock->vl_cap); - - if (rblock->state && rblock->phys_width) { - props->phys_state = rblock->phys_pstate; - props->state = rblock->phys_state; - props->active_width = rblock->phys_width; - props->active_speed = rblock->phys_speed; - } else { - /* old firmware releases don't report physical - * port info, so use default values - */ - props->phys_state = 5; - props->state = rblock->state; - props->active_width = IB_WIDTH_12X; - props->active_speed = IB_SPEED_SDR; - } - -query_port1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -int ehca_query_sma_attr(struct ehca_shca *shca, - u8 port, struct ehca_sma_attr *attr) -{ - int ret = 0; - u64 h_ret; - struct hipz_query_port *rblock; - - rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto query_sma_attr1; - } - - memset(attr, 0, sizeof(struct ehca_sma_attr)); - - attr->lid = rblock->lid; - attr->lmc = rblock->lmc; - attr->sm_sl = rblock->sm_sl; - attr->sm_lid = rblock->sm_lid; - - attr->pkey_tbl_len = rblock->pkey_tbl_len; - memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys)); - -query_sma_attr1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca; - struct hipz_query_port *rblock; - - shca = container_of(ibdev, struct ehca_shca, ib_device); - if (index > 16) { - ehca_err(&shca->ib_device, "Invalid index: %x.", index); - return -EINVAL; - } - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto query_pkey1; - } - - memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16)); - -query_pkey1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -int ehca_query_gid(struct ib_device *ibdev, u8 port, - int index, union ib_gid *gid) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, - ib_device); - struct hipz_query_port *rblock; - - if (index < 0 || index > 255) { - ehca_err(&shca->ib_device, "Invalid index: %x.", index); - return -EINVAL; - } - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto query_gid1; - } - - memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64)); - memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64)); - -query_gid1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -static const u32 allowed_port_caps = ( - IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP | - IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP | - IB_PORT_VENDOR_CLASS_SUP); - -int ehca_modify_port(struct ib_device *ibdev, - u8 port, int port_modify_mask, - struct ib_port_modify *props) -{ - int ret = 0; - struct ehca_shca *shca; - struct hipz_query_port *rblock; - u32 cap; - u64 hret; - - shca = container_of(ibdev, struct ehca_shca, ib_device); - if ((props->set_port_cap_mask | props->clr_port_cap_mask) - & ~allowed_port_caps) { - ehca_err(&shca->ib_device, "Non-changeable bits set in masks " - "set=%x clr=%x allowed=%x", props->set_port_cap_mask, - props->clr_port_cap_mask, allowed_port_caps); - return -EINVAL; - } - - if (mutex_lock_interruptible(&shca->modify_mutex)) - return -ERESTARTSYS; - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - ret = -ENOMEM; - goto modify_port1; - } - - hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (hret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto modify_port2; - } - - cap = (rblock->capability_mask | props->set_port_cap_mask) - & ~props->clr_port_cap_mask; - - hret = hipz_h_modify_port(shca->ipz_hca_handle, port, - cap, props->init_type, port_modify_mask); - if (hret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Modify port failed h_ret=%lli", - hret); - ret = -EINVAL; - } - -modify_port2: - ehca_free_fw_ctrlblock(rblock); - -modify_port1: - mutex_unlock(&shca->modify_mutex); - - return ret; -} diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c deleted file mode 100644 index 8615d7c..0000000 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ /dev/null @@ -1,870 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Functions for EQs, NEQs and interrupts - * - * Authors: Heiko J Schick - * Khadija Souissi - * Hoang-Nam Nguyen - * Joachim Fenkes - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#include "ehca_classes.h" -#include "ehca_irq.h" -#include "ehca_iverbs.h" -#include "ehca_tools.h" -#include "hcp_if.h" -#include "hipz_fns.h" -#include "ipz_pt_fn.h" - -#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) -#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31) -#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7) -#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31) -#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31) -#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63) -#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63) - -#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) -#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7) -#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) -#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) -#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) -#define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23) - -#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) -#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) - -static void queue_comp_task(struct ehca_cq *__cq); - -static struct ehca_comp_pool *pool; - -static inline void comp_event_callback(struct ehca_cq *cq) -{ - if (!cq->ib_cq.comp_handler) - return; - - spin_lock(&cq->cb_lock); - cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context); - spin_unlock(&cq->cb_lock); - - return; -} - -static void print_error_data(struct ehca_shca *shca, void *data, - u64 *rblock, int length) -{ - u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); - u64 resource = rblock[1]; - - switch (type) { - case 0x1: /* Queue Pair */ - { - struct ehca_qp *qp = (struct ehca_qp *)data; - - /* only print error data if AER is set */ - if (rblock[6] == 0) - return; - - ehca_err(&shca->ib_device, - "QP 0x%x (resource=%llx) has errors.", - qp->ib_qp.qp_num, resource); - break; - } - case 0x4: /* Completion Queue */ - { - struct ehca_cq *cq = (struct ehca_cq *)data; - - ehca_err(&shca->ib_device, - "CQ 0x%x (resource=%llx) has errors.", - cq->cq_number, resource); - break; - } - default: - ehca_err(&shca->ib_device, - "Unknown error type: %llx on %s.", - type, shca->ib_device.name); - break; - } - - ehca_err(&shca->ib_device, "Error data is available: %llx.", resource); - ehca_err(&shca->ib_device, "EHCA ----- error data begin " - "---------------------------------------------------"); - ehca_dmp(rblock, length, "resource=%llx", resource); - ehca_err(&shca->ib_device, "EHCA ----- error data end " - "----------------------------------------------------"); - - return; -} - -int ehca_error_data(struct ehca_shca *shca, void *data, - u64 resource) -{ - - unsigned long ret; - u64 *rblock; - unsigned long block_count; - - rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); - if (!rblock) { - ehca_err(&shca->ib_device, "Cannot allocate rblock memory."); - ret = -ENOMEM; - goto error_data1; - } - - /* rblock must be 4K aligned and should be 4K large */ - ret = hipz_h_error_data(shca->ipz_hca_handle, - resource, - rblock, - &block_count); - - if (ret == H_R_STATE) - ehca_err(&shca->ib_device, - "No error data is available: %llx.", resource); - else if (ret == H_SUCCESS) { - int length; - - length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]); - - if (length > EHCA_PAGESIZE) - length = EHCA_PAGESIZE; - - print_error_data(shca, data, rblock, length); - } else - ehca_err(&shca->ib_device, - "Error data could not be fetched: %llx", resource); - - ehca_free_fw_ctrlblock(rblock); - -error_data1: - return ret; - -} - -static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp, - enum ib_event_type event_type) -{ - struct ib_event event; - - /* PATH_MIG without the QP ever having been armed is false alarm */ - if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed) - return; - - event.device = &shca->ib_device; - event.event = event_type; - - if (qp->ext_type == EQPT_SRQ) { - if (!qp->ib_srq.event_handler) - return; - - event.element.srq = &qp->ib_srq; - qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context); - } else { - if (!qp->ib_qp.event_handler) - return; - - event.element.qp = &qp->ib_qp; - qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); - } -} - -static void qp_event_callback(struct ehca_shca *shca, u64 eqe, - enum ib_event_type event_type, int fatal) -{ - struct ehca_qp *qp; - u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); - - read_lock(&ehca_qp_idr_lock); - qp = idr_find(&ehca_qp_idr, token); - if (qp) - atomic_inc(&qp->nr_events); - read_unlock(&ehca_qp_idr_lock); - - if (!qp) - return; - - if (fatal) - ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); - - dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ? - IB_EVENT_SRQ_ERR : event_type); - - /* - * eHCA only processes one WQE at a time for SRQ base QPs, - * so the last WQE has been processed as soon as the QP enters - * error state. - */ - if (fatal && qp->ext_type == EQPT_SRQBASE) - dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED); - - if (atomic_dec_and_test(&qp->nr_events)) - wake_up(&qp->wait_completion); - return; -} - -static void cq_event_callback(struct ehca_shca *shca, - u64 eqe) -{ - struct ehca_cq *cq; - u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe); - - read_lock(&ehca_cq_idr_lock); - cq = idr_find(&ehca_cq_idr, token); - if (cq) - atomic_inc(&cq->nr_events); - read_unlock(&ehca_cq_idr_lock); - - if (!cq) - return; - - ehca_error_data(shca, cq, cq->ipz_cq_handle.handle); - - if (atomic_dec_and_test(&cq->nr_events)) - wake_up(&cq->wait_completion); - - return; -} - -static void parse_identifier(struct ehca_shca *shca, u64 eqe) -{ - u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe); - - switch (identifier) { - case 0x02: /* path migrated */ - qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0); - break; - case 0x03: /* communication established */ - qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0); - break; - case 0x04: /* send queue drained */ - qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0); - break; - case 0x05: /* QP error */ - case 0x06: /* QP error */ - qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1); - break; - case 0x07: /* CQ error */ - case 0x08: /* CQ error */ - cq_event_callback(shca, eqe); - break; - case 0x09: /* MRMWPTE error */ - ehca_err(&shca->ib_device, "MRMWPTE error."); - break; - case 0x0A: /* port event */ - ehca_err(&shca->ib_device, "Port event."); - break; - case 0x0B: /* MR access error */ - ehca_err(&shca->ib_device, "MR access error."); - break; - case 0x0C: /* EQ error */ - ehca_err(&shca->ib_device, "EQ error."); - break; - case 0x0D: /* P/Q_Key mismatch */ - ehca_err(&shca->ib_device, "P/Q_Key mismatch."); - break; - case 0x10: /* sampling complete */ - ehca_err(&shca->ib_device, "Sampling complete."); - break; - case 0x11: /* unaffiliated access error */ - ehca_err(&shca->ib_device, "Unaffiliated access error."); - break; - case 0x12: /* path migrating */ - ehca_err(&shca->ib_device, "Path migrating."); - break; - case 0x13: /* interface trace stopped */ - ehca_err(&shca->ib_device, "Interface trace stopped."); - break; - case 0x14: /* first error capture info available */ - ehca_info(&shca->ib_device, "First error capture available"); - break; - case 0x15: /* SRQ limit reached */ - qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0); - break; - default: - ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.", - identifier, shca->ib_device.name); - break; - } - - return; -} - -static void dispatch_port_event(struct ehca_shca *shca, int port_num, - enum ib_event_type type, const char *msg) -{ - struct ib_event event; - - ehca_info(&shca->ib_device, "port %d %s.", port_num, msg); - event.device = &shca->ib_device; - event.event = type; - event.element.port_num = port_num; - ib_dispatch_event(&event); -} - -static void notify_port_conf_change(struct ehca_shca *shca, int port_num) -{ - struct ehca_sma_attr new_attr; - struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr; - - ehca_query_sma_attr(shca, port_num, &new_attr); - - if (new_attr.sm_sl != old_attr->sm_sl || - new_attr.sm_lid != old_attr->sm_lid) - dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE, - "SM changed"); - - if (new_attr.lid != old_attr->lid || - new_attr.lmc != old_attr->lmc) - dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE, - "LID changed"); - - if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len || - memcmp(new_attr.pkeys, old_attr->pkeys, - sizeof(u16) * new_attr.pkey_tbl_len)) - dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE, - "P_Key changed"); - - *old_attr = new_attr; -} - -/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */ -static int replay_modify_qp(struct ehca_sport *sport) -{ - int aqp1_destroyed; - unsigned long flags; - - spin_lock_irqsave(&sport->mod_sqp_lock, flags); - - aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI]; - - if (sport->ibqp_sqp[IB_QPT_SMI]) - ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]); - if (!aqp1_destroyed) - ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]); - - spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - - return aqp1_destroyed; -} - -static void parse_ec(struct ehca_shca *shca, u64 eqe) -{ - u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); - u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); - u8 spec_event; - struct ehca_sport *sport = &shca->sport[port - 1]; - - switch (ec) { - case 0x30: /* port availability change */ - if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { - /* only replay modify_qp calls in autodetect mode; - * if AQP1 was destroyed, the port is already down - * again and we can drop the event. - */ - if (ehca_nr_ports < 0) - if (replay_modify_qp(sport)) - break; - - sport->port_state = IB_PORT_ACTIVE; - dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, - "is active"); - ehca_query_sma_attr(shca, port, &sport->saved_attr); - } else { - sport->port_state = IB_PORT_DOWN; - dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, - "is inactive"); - } - break; - case 0x31: - /* port configuration change - * disruptive change is caused by - * LID, PKEY or SM change - */ - if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) { - ehca_warn(&shca->ib_device, "disruptive port " - "%d configuration change", port); - - sport->port_state = IB_PORT_DOWN; - dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, - "is inactive"); - - sport->port_state = IB_PORT_ACTIVE; - dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, - "is active"); - ehca_query_sma_attr(shca, port, - &sport->saved_attr); - } else - notify_port_conf_change(shca, port); - break; - case 0x32: /* adapter malfunction */ - ehca_err(&shca->ib_device, "Adapter malfunction."); - break; - case 0x33: /* trace stopped */ - ehca_err(&shca->ib_device, "Traced stopped."); - break; - case 0x34: /* util async event */ - spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe); - if (spec_event == 0x80) /* client reregister required */ - dispatch_port_event(shca, port, - IB_EVENT_CLIENT_REREGISTER, - "client reregister req."); - else - ehca_warn(&shca->ib_device, "Unknown util async " - "event %x on port %x", spec_event, port); - break; - default: - ehca_err(&shca->ib_device, "Unknown event code: %x on %s.", - ec, shca->ib_device.name); - break; - } - - return; -} - -static inline void reset_eq_pending(struct ehca_cq *cq) -{ - u64 CQx_EP; - struct h_galpa gal = cq->galpas.kernel; - - hipz_galpa_store_cq(gal, cqx_ep, 0x0); - CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep)); - - return; -} - -irqreturn_t ehca_interrupt_neq(int irq, void *dev_id) -{ - struct ehca_shca *shca = (struct ehca_shca*)dev_id; - - tasklet_hi_schedule(&shca->neq.interrupt_task); - - return IRQ_HANDLED; -} - -void ehca_tasklet_neq(unsigned long data) -{ - struct ehca_shca *shca = (struct ehca_shca*)data; - struct ehca_eqe *eqe; - u64 ret; - - eqe = ehca_poll_eq(shca, &shca->neq); - - while (eqe) { - if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) - parse_ec(shca, eqe->entry); - - eqe = ehca_poll_eq(shca, &shca->neq); - } - - ret = hipz_h_reset_event(shca->ipz_hca_handle, - shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL); - - if (ret != H_SUCCESS) - ehca_err(&shca->ib_device, "Can't clear notification events."); - - return; -} - -irqreturn_t ehca_interrupt_eq(int irq, void *dev_id) -{ - struct ehca_shca *shca = (struct ehca_shca*)dev_id; - - tasklet_hi_schedule(&shca->eq.interrupt_task); - - return IRQ_HANDLED; -} - - -static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) -{ - u64 eqe_value; - u32 token; - struct ehca_cq *cq; - - eqe_value = eqe->entry; - ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value); - if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { - ehca_dbg(&shca->ib_device, "Got completion event"); - token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); - read_lock(&ehca_cq_idr_lock); - cq = idr_find(&ehca_cq_idr, token); - if (cq) - atomic_inc(&cq->nr_events); - read_unlock(&ehca_cq_idr_lock); - if (cq == NULL) { - ehca_err(&shca->ib_device, - "Invalid eqe for non-existing cq token=%x", - token); - return; - } - reset_eq_pending(cq); - if (ehca_scaling_code) - queue_comp_task(cq); - else { - comp_event_callback(cq); - if (atomic_dec_and_test(&cq->nr_events)) - wake_up(&cq->wait_completion); - } - } else { - ehca_dbg(&shca->ib_device, "Got non completion event"); - parse_identifier(shca, eqe_value); - } -} - -void ehca_process_eq(struct ehca_shca *shca, int is_irq) -{ - struct ehca_eq *eq = &shca->eq; - struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; - u64 eqe_value, ret; - int eqe_cnt, i; - int eq_empty = 0; - - spin_lock(&eq->irq_spinlock); - if (is_irq) { - const int max_query_cnt = 100; - int query_cnt = 0; - int int_state = 1; - do { - int_state = hipz_h_query_int_state( - shca->ipz_hca_handle, eq->ist); - query_cnt++; - iosync(); - } while (int_state && query_cnt < max_query_cnt); - if (unlikely((query_cnt == max_query_cnt))) - ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x", - int_state, query_cnt); - } - - /* read out all eqes */ - eqe_cnt = 0; - do { - u32 token; - eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq); - if (!eqe_cache[eqe_cnt].eqe) - break; - eqe_value = eqe_cache[eqe_cnt].eqe->entry; - if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { - token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); - read_lock(&ehca_cq_idr_lock); - eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token); - if (eqe_cache[eqe_cnt].cq) - atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events); - read_unlock(&ehca_cq_idr_lock); - if (!eqe_cache[eqe_cnt].cq) { - ehca_err(&shca->ib_device, - "Invalid eqe for non-existing cq " - "token=%x", token); - continue; - } - } else - eqe_cache[eqe_cnt].cq = NULL; - eqe_cnt++; - } while (eqe_cnt < EHCA_EQE_CACHE_SIZE); - if (!eqe_cnt) { - if (is_irq) - ehca_dbg(&shca->ib_device, - "No eqe found for irq event"); - goto unlock_irq_spinlock; - } else if (!is_irq) { - ret = hipz_h_eoi(eq->ist); - if (ret != H_SUCCESS) - ehca_err(&shca->ib_device, - "bad return code EOI -rc = %lld\n", ret); - ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); - } - if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) - ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); - /* enable irq for new packets */ - for (i = 0; i < eqe_cnt; i++) { - if (eq->eqe_cache[i].cq) - reset_eq_pending(eq->eqe_cache[i].cq); - } - /* check eq */ - spin_lock(&eq->spinlock); - eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue)); - spin_unlock(&eq->spinlock); - /* call completion handler for cached eqes */ - for (i = 0; i < eqe_cnt; i++) - if (eq->eqe_cache[i].cq) { - if (ehca_scaling_code) - queue_comp_task(eq->eqe_cache[i].cq); - else { - struct ehca_cq *cq = eq->eqe_cache[i].cq; - comp_event_callback(cq); - if (atomic_dec_and_test(&cq->nr_events)) - wake_up(&cq->wait_completion); - } - } else { - ehca_dbg(&shca->ib_device, "Got non completion event"); - parse_identifier(shca, eq->eqe_cache[i].eqe->entry); - } - /* poll eq if not empty */ - if (eq_empty) - goto unlock_irq_spinlock; - do { - struct ehca_eqe *eqe; - eqe = ehca_poll_eq(shca, &shca->eq); - if (!eqe) - break; - process_eqe(shca, eqe); - } while (1); - -unlock_irq_spinlock: - spin_unlock(&eq->irq_spinlock); -} - -void ehca_tasklet_eq(unsigned long data) -{ - ehca_process_eq((struct ehca_shca*)data, 1); -} - -static int find_next_online_cpu(struct ehca_comp_pool *pool) -{ - int cpu; - unsigned long flags; - - WARN_ON_ONCE(!in_interrupt()); - if (ehca_debug_level >= 3) - ehca_dmp(cpu_online_mask, cpumask_size(), ""); - - spin_lock_irqsave(&pool->last_cpu_lock, flags); - do { - cpu = cpumask_next(pool->last_cpu, cpu_online_mask); - if (cpu >= nr_cpu_ids) - cpu = cpumask_first(cpu_online_mask); - pool->last_cpu = cpu; - } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active); - spin_unlock_irqrestore(&pool->last_cpu_lock, flags); - - return cpu; -} - -static void __queue_comp_task(struct ehca_cq *__cq, - struct ehca_cpu_comp_task *cct, - struct task_struct *thread) -{ - unsigned long flags; - - spin_lock_irqsave(&cct->task_lock, flags); - spin_lock(&__cq->task_lock); - - if (__cq->nr_callbacks == 0) { - __cq->nr_callbacks++; - list_add_tail(&__cq->entry, &cct->cq_list); - cct->cq_jobs++; - wake_up_process(thread); - } else - __cq->nr_callbacks++; - - spin_unlock(&__cq->task_lock); - spin_unlock_irqrestore(&cct->task_lock, flags); -} - -static void queue_comp_task(struct ehca_cq *__cq) -{ - int cpu_id; - struct ehca_cpu_comp_task *cct; - struct task_struct *thread; - int cq_jobs; - unsigned long flags; - - cpu_id = find_next_online_cpu(pool); - BUG_ON(!cpu_online(cpu_id)); - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); - BUG_ON(!cct || !thread); - - spin_lock_irqsave(&cct->task_lock, flags); - cq_jobs = cct->cq_jobs; - spin_unlock_irqrestore(&cct->task_lock, flags); - if (cq_jobs > 0) { - cpu_id = find_next_online_cpu(pool); - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); - BUG_ON(!cct || !thread); - } - __queue_comp_task(__cq, cct, thread); -} - -static void run_comp_task(struct ehca_cpu_comp_task *cct) -{ - struct ehca_cq *cq; - - while (!list_empty(&cct->cq_list)) { - cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - spin_unlock_irq(&cct->task_lock); - - comp_event_callback(cq); - if (atomic_dec_and_test(&cq->nr_events)) - wake_up(&cq->wait_completion); - - spin_lock_irq(&cct->task_lock); - spin_lock(&cq->task_lock); - cq->nr_callbacks--; - if (!cq->nr_callbacks) { - list_del_init(cct->cq_list.next); - cct->cq_jobs--; - } - spin_unlock(&cq->task_lock); - } -} - -static void comp_task_park(unsigned int cpu) -{ - struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - struct ehca_cpu_comp_task *target; - struct task_struct *thread; - struct ehca_cq *cq, *tmp; - LIST_HEAD(list); - - spin_lock_irq(&cct->task_lock); - cct->cq_jobs = 0; - cct->active = 0; - list_splice_init(&cct->cq_list, &list); - spin_unlock_irq(&cct->task_lock); - - cpu = find_next_online_cpu(pool); - target = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu); - spin_lock_irq(&target->task_lock); - list_for_each_entry_safe(cq, tmp, &list, entry) { - list_del(&cq->entry); - __queue_comp_task(cq, target, thread); - } - spin_unlock_irq(&target->task_lock); -} - -static void comp_task_stop(unsigned int cpu, bool online) -{ - struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - - spin_lock_irq(&cct->task_lock); - cct->cq_jobs = 0; - cct->active = 0; - WARN_ON(!list_empty(&cct->cq_list)); - spin_unlock_irq(&cct->task_lock); -} - -static int comp_task_should_run(unsigned int cpu) -{ - struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - - return cct->cq_jobs; -} - -static void comp_task(unsigned int cpu) -{ - struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks); - int cql_empty; - - spin_lock_irq(&cct->task_lock); - cql_empty = list_empty(&cct->cq_list); - if (!cql_empty) { - __set_current_state(TASK_RUNNING); - run_comp_task(cct); - } - spin_unlock_irq(&cct->task_lock); -} - -static struct smp_hotplug_thread comp_pool_threads = { - .thread_should_run = comp_task_should_run, - .thread_fn = comp_task, - .thread_comm = "ehca_comp/%u", - .cleanup = comp_task_stop, - .park = comp_task_park, -}; - -int ehca_create_comp_pool(void) -{ - int cpu, ret = -ENOMEM; - - if (!ehca_scaling_code) - return 0; - - pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL); - if (pool == NULL) - return -ENOMEM; - - spin_lock_init(&pool->last_cpu_lock); - pool->last_cpu = cpumask_any(cpu_online_mask); - - pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); - if (!pool->cpu_comp_tasks) - goto out_pool; - - pool->cpu_comp_threads = alloc_percpu(struct task_struct *); - if (!pool->cpu_comp_threads) - goto out_tasks; - - for_each_present_cpu(cpu) { - struct ehca_cpu_comp_task *cct; - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - spin_lock_init(&cct->task_lock); - INIT_LIST_HEAD(&cct->cq_list); - } - - comp_pool_threads.store = pool->cpu_comp_threads; - ret = smpboot_register_percpu_thread(&comp_pool_threads); - if (ret) - goto out_threads; - - pr_info("eHCA scaling code enabled\n"); - return ret; - -out_threads: - free_percpu(pool->cpu_comp_threads); -out_tasks: - free_percpu(pool->cpu_comp_tasks); -out_pool: - kfree(pool); - return ret; -} - -void ehca_destroy_comp_pool(void) -{ - if (!ehca_scaling_code) - return; - - smpboot_unregister_percpu_thread(&comp_pool_threads); - - free_percpu(pool->cpu_comp_threads); - free_percpu(pool->cpu_comp_tasks); - kfree(pool); -} diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h deleted file mode 100644 index 5370199..0000000 --- a/drivers/infiniband/hw/ehca/ehca_irq.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Function definitions and structs for EQs, NEQs and interrupts - * - * Authors: Heiko J Schick - * Khadija Souissi - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __EHCA_IRQ_H -#define __EHCA_IRQ_H - - -struct ehca_shca; - -#include -#include - -int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource); - -irqreturn_t ehca_interrupt_neq(int irq, void *dev_id); -void ehca_tasklet_neq(unsigned long data); - -irqreturn_t ehca_interrupt_eq(int irq, void *dev_id); -void ehca_tasklet_eq(unsigned long data); -void ehca_process_eq(struct ehca_shca *shca, int is_irq); - -struct ehca_cpu_comp_task { - struct list_head cq_list; - spinlock_t task_lock; - int cq_jobs; - int active; -}; - -struct ehca_comp_pool { - struct ehca_cpu_comp_task __percpu *cpu_comp_tasks; - struct task_struct * __percpu *cpu_comp_threads; - int last_cpu; - spinlock_t last_cpu_lock; -}; - -int ehca_create_comp_pool(void); -void ehca_destroy_comp_pool(void); - -#endif diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h deleted file mode 100644 index 80e6a3d..0000000 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ /dev/null @@ -1,218 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Function definitions for internal functions - * - * Authors: Heiko J Schick - * Dietmar Decker - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __EHCA_IVERBS_H__ -#define __EHCA_IVERBS_H__ - -#include "ehca_classes.h" - -int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, - struct ib_udata *uhw); - -int ehca_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props); - -enum rdma_protocol_type -ehca_query_protocol(struct ib_device *device, u8 port_num); - -int ehca_query_sma_attr(struct ehca_shca *shca, u8 port, - struct ehca_sma_attr *attr); - -int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey); - -int ehca_query_gid(struct ib_device *ibdev, u8 port, int index, - union ib_gid *gid); - -int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, - struct ib_port_modify *props); - -struct ib_pd *ehca_alloc_pd(struct ib_device *device, - struct ib_ucontext *context, - struct ib_udata *udata); - -int ehca_dealloc_pd(struct ib_pd *pd); - -struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); - -int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); - -int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); - -int ehca_destroy_ah(struct ib_ah *ah); - -struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags); - -struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, u64 *iova_start); - -struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt, int mr_access_flags, - struct ib_udata *udata); - -int ehca_rereg_phys_mr(struct ib_mr *mr, - int mr_rereg_mask, - struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, int mr_access_flags, u64 *iova_start); - -int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); - -int ehca_dereg_mr(struct ib_mr *mr); - -struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); - -int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw, - struct ib_mw_bind *mw_bind); - -int ehca_dealloc_mw(struct ib_mw *mw); - -struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, - int mr_access_flags, - struct ib_fmr_attr *fmr_attr); - -int ehca_map_phys_fmr(struct ib_fmr *fmr, - u64 *page_list, int list_len, u64 iova); - -int ehca_unmap_fmr(struct list_head *fmr_list); - -int ehca_dealloc_fmr(struct ib_fmr *fmr); - -enum ehca_eq_type { - EHCA_EQ = 0, /* Event Queue */ - EHCA_NEQ /* Notification Event Queue */ -}; - -int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq, - enum ehca_eq_type type, const u32 length); - -int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq); - -void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq); - - -struct ib_cq *ehca_create_cq(struct ib_device *device, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, - struct ib_udata *udata); - -int ehca_destroy_cq(struct ib_cq *cq); - -int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); - -int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); - -int ehca_peek_cq(struct ib_cq *cq, int wc_cnt); - -int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags); - -struct ib_qp *ehca_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata); - -int ehca_destroy_qp(struct ib_qp *qp); - -int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, - struct ib_udata *udata); - -int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, - int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); - -int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr); - -int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); - -int ehca_post_srq_recv(struct ib_srq *srq, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); - -struct ib_srq *ehca_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata); - -int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); - -int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); - -int ehca_destroy_srq(struct ib_srq *srq); - -u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp, - struct ib_qp_init_attr *qp_init_attr); - -int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); - -int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); - -struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, - struct ib_udata *udata); - -int ehca_dealloc_ucontext(struct ib_ucontext *context); - -int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); - -int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); - -void ehca_poll_eqs(unsigned long data); - -int ehca_calc_ipd(struct ehca_shca *shca, int port, - enum ib_rate path_rate, u32 *ipd); - -void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq); - -#ifdef CONFIG_PPC_64K_PAGES -void *ehca_alloc_fw_ctrlblock(gfp_t flags); -void ehca_free_fw_ctrlblock(void *ptr); -#else -#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags)) -#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) -#endif - -void ehca_recover_sqp(struct ib_qp *sqp); - -#endif diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c deleted file mode 100644 index 8246418..0000000 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ /dev/null @@ -1,1123 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * module start stop, hca detection - * - * Authors: Heiko J Schick - * Hoang-Nam Nguyen - * Joachim Fenkes - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifdef CONFIG_PPC_64K_PAGES -#include -#endif - -#include -#include -#include -#include "ehca_classes.h" -#include "ehca_iverbs.h" -#include "ehca_mrmw.h" -#include "ehca_tools.h" -#include "hcp_if.h" - -#define HCAD_VERSION "0029" - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Christoph Raisch "); -MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); -MODULE_VERSION(HCAD_VERSION); - -static bool ehca_open_aqp1 = 0; -static int ehca_hw_level = 0; -static bool ehca_poll_all_eqs = 1; - -int ehca_debug_level = 0; -int ehca_nr_ports = -1; -bool ehca_use_hp_mr = 0; -int ehca_port_act_time = 30; -int ehca_static_rate = -1; -bool ehca_scaling_code = 0; -int ehca_lock_hcalls = -1; -int ehca_max_cq = -1; -int ehca_max_qp = -1; - -module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO); -module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); -module_param_named(hw_level, ehca_hw_level, int, S_IRUGO); -module_param_named(nr_ports, ehca_nr_ports, int, S_IRUGO); -module_param_named(use_hp_mr, ehca_use_hp_mr, bool, S_IRUGO); -module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO); -module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO); -module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); -module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); -module_param_named(lock_hcalls, ehca_lock_hcalls, bint, S_IRUGO); -module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO); -module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO); - -MODULE_PARM_DESC(open_aqp1, - "Open AQP1 on startup (default: no)"); -MODULE_PARM_DESC(debug_level, - "Amount of debug output (0: none (default), 1: traces, " - "2: some dumps, 3: lots)"); -MODULE_PARM_DESC(hw_level, - "Hardware level (0: autosensing (default), " - "0x10..0x14: eHCA, 0x20..0x23: eHCA2)"); -MODULE_PARM_DESC(nr_ports, - "number of connected ports (-1: autodetect (default), " - "1: port one only, 2: two ports)"); -MODULE_PARM_DESC(use_hp_mr, - "Use high performance MRs (default: no)"); -MODULE_PARM_DESC(port_act_time, - "Time to wait for port activation (default: 30 sec)"); -MODULE_PARM_DESC(poll_all_eqs, - "Poll all event queues periodically (default: yes)"); -MODULE_PARM_DESC(static_rate, - "Set permanent static rate (default: no static rate)"); -MODULE_PARM_DESC(scaling_code, - "Enable scaling code (default: no)"); -MODULE_PARM_DESC(lock_hcalls, - "Serialize all hCalls made by the driver " - "(default: autodetect)"); -MODULE_PARM_DESC(number_of_cqs, - "Max number of CQs which can be allocated " - "(default: autodetect)"); -MODULE_PARM_DESC(number_of_qps, - "Max number of QPs which can be allocated " - "(default: autodetect)"); - -DEFINE_RWLOCK(ehca_qp_idr_lock); -DEFINE_RWLOCK(ehca_cq_idr_lock); -DEFINE_IDR(ehca_qp_idr); -DEFINE_IDR(ehca_cq_idr); - -static LIST_HEAD(shca_list); /* list of all registered ehcas */ -DEFINE_SPINLOCK(shca_list_lock); - -static struct timer_list poll_eqs_timer; - -#ifdef CONFIG_PPC_64K_PAGES -static struct kmem_cache *ctblk_cache; - -void *ehca_alloc_fw_ctrlblock(gfp_t flags) -{ - void *ret = kmem_cache_zalloc(ctblk_cache, flags); - if (!ret) - ehca_gen_err("Out of memory for ctblk"); - return ret; -} - -void ehca_free_fw_ctrlblock(void *ptr) -{ - if (ptr) - kmem_cache_free(ctblk_cache, ptr); - -} -#endif - -int ehca2ib_return_code(u64 ehca_rc) -{ - switch (ehca_rc) { - case H_SUCCESS: - return 0; - case H_RESOURCE: /* Resource in use */ - case H_BUSY: - return -EBUSY; - case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ - case H_CONSTRAINED: /* resource constraint */ - case H_NO_MEM: - return -ENOMEM; - default: - return -EINVAL; - } -} - -static int ehca_create_slab_caches(void) -{ - int ret; - - ret = ehca_init_pd_cache(); - if (ret) { - ehca_gen_err("Cannot create PD SLAB cache."); - return ret; - } - - ret = ehca_init_cq_cache(); - if (ret) { - ehca_gen_err("Cannot create CQ SLAB cache."); - goto create_slab_caches2; - } - - ret = ehca_init_qp_cache(); - if (ret) { - ehca_gen_err("Cannot create QP SLAB cache."); - goto create_slab_caches3; - } - - ret = ehca_init_av_cache(); - if (ret) { - ehca_gen_err("Cannot create AV SLAB cache."); - goto create_slab_caches4; - } - - ret = ehca_init_mrmw_cache(); - if (ret) { - ehca_gen_err("Cannot create MR&MW SLAB cache."); - goto create_slab_caches5; - } - - ret = ehca_init_small_qp_cache(); - if (ret) { - ehca_gen_err("Cannot create small queue SLAB cache."); - goto create_slab_caches6; - } - -#ifdef CONFIG_PPC_64K_PAGES - ctblk_cache = kmem_cache_create("ehca_cache_ctblk", - EHCA_PAGESIZE, H_CB_ALIGNMENT, - SLAB_HWCACHE_ALIGN, - NULL); - if (!ctblk_cache) { - ehca_gen_err("Cannot create ctblk SLAB cache."); - ehca_cleanup_small_qp_cache(); - ret = -ENOMEM; - goto create_slab_caches6; - } -#endif - return 0; - -create_slab_caches6: - ehca_cleanup_mrmw_cache(); - -create_slab_caches5: - ehca_cleanup_av_cache(); - -create_slab_caches4: - ehca_cleanup_qp_cache(); - -create_slab_caches3: - ehca_cleanup_cq_cache(); - -create_slab_caches2: - ehca_cleanup_pd_cache(); - - return ret; -} - -static void ehca_destroy_slab_caches(void) -{ - ehca_cleanup_small_qp_cache(); - ehca_cleanup_mrmw_cache(); - ehca_cleanup_av_cache(); - ehca_cleanup_qp_cache(); - ehca_cleanup_cq_cache(); - ehca_cleanup_pd_cache(); -#ifdef CONFIG_PPC_64K_PAGES - if (ctblk_cache) - kmem_cache_destroy(ctblk_cache); -#endif -} - -#define EHCA_HCAAVER EHCA_BMASK_IBM(32, 39) -#define EHCA_REVID EHCA_BMASK_IBM(40, 63) - -static struct cap_descr { - u64 mask; - char *descr; -} hca_cap_descr[] = { - { HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" }, - { HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" }, - { HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" }, - { HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" }, - { HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" }, - { HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" }, - { HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" }, - { HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" }, - { HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" }, - { HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" }, - { HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" }, - { HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" }, - { HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" }, - { HCA_CAP_SRQ, "HCA_CAP_SRQ" }, - { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" }, - { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" }, - { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" }, - { HCA_CAP_H_ALLOC_RES_SYNC, "HCA_CAP_H_ALLOC_RES_SYNC" }, -}; - -static int ehca_sense_attributes(struct ehca_shca *shca) -{ - int i, ret = 0; - u64 h_ret; - struct hipz_query_hca *rblock; - struct hipz_query_port *port; - const char *loc_code; - - static const u32 pgsize_map[] = { - HCA_CAP_MR_PGSIZE_4K, 0x1000, - HCA_CAP_MR_PGSIZE_64K, 0x10000, - HCA_CAP_MR_PGSIZE_1M, 0x100000, - HCA_CAP_MR_PGSIZE_16M, 0x1000000, - }; - - ehca_gen_dbg("Probing adapter %s...", - shca->ofdev->dev.of_node->full_name); - loc_code = of_get_property(shca->ofdev->dev.of_node, "ibm,loc-code", - NULL); - if (loc_code) - ehca_gen_dbg(" ... location lode=%s", loc_code); - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_gen_err("Cannot allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock); - if (h_ret != H_SUCCESS) { - ehca_gen_err("Cannot query device properties. h_ret=%lli", - h_ret); - ret = -EPERM; - goto sense_attributes1; - } - - if (ehca_nr_ports == 1) - shca->num_ports = 1; - else - shca->num_ports = (u8)rblock->num_ports; - - ehca_gen_dbg(" ... found %x ports", rblock->num_ports); - - if (ehca_hw_level == 0) { - u32 hcaaver; - u32 revid; - - hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver); - revid = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver); - - ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid); - - if (hcaaver == 1) { - if (revid <= 3) - shca->hw_level = 0x10 | (revid + 1); - else - shca->hw_level = 0x14; - } else if (hcaaver == 2) { - if (revid == 0) - shca->hw_level = 0x21; - else if (revid == 0x10) - shca->hw_level = 0x22; - else if (revid == 0x20 || revid == 0x21) - shca->hw_level = 0x23; - } - - if (!shca->hw_level) { - ehca_gen_warn("unknown hardware version" - " - assuming default level"); - shca->hw_level = 0x22; - } - } else - shca->hw_level = ehca_hw_level; - ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); - - shca->hca_cap = rblock->hca_cap_indicators; - ehca_gen_dbg(" ... HCA capabilities:"); - for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++) - if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) - ehca_gen_dbg(" %s", hca_cap_descr[i].descr); - - /* Autodetect hCall locking -- the "H_ALLOC_RESOURCE synced" flag is - * a firmware property, so it's valid across all adapters - */ - if (ehca_lock_hcalls == -1) - ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC, - shca->hca_cap); - - /* translate supported MR page sizes; always support 4K */ - shca->hca_cap_mr_pgsize = EHCA_PAGESIZE; - for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2) - if (rblock->memory_page_size_supported & pgsize_map[i]) - shca->hca_cap_mr_pgsize |= pgsize_map[i + 1]; - - /* Set maximum number of CQs and QPs to calculate EQ size */ - if (shca->max_num_qps == -1) - shca->max_num_qps = min_t(int, rblock->max_qp, - EHCA_MAX_NUM_QUEUES); - else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) { - ehca_gen_warn("The requested number of QPs is out of range " - "(1 - %i) specified by HW. Value is set to %i", - rblock->max_qp, rblock->max_qp); - shca->max_num_qps = rblock->max_qp; - } - - if (shca->max_num_cqs == -1) - shca->max_num_cqs = min_t(int, rblock->max_cq, - EHCA_MAX_NUM_QUEUES); - else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) { - ehca_gen_warn("The requested number of CQs is out of range " - "(1 - %i) specified by HW. Value is set to %i", - rblock->max_cq, rblock->max_cq); - } - - /* query max MTU from first port -- it's the same for all ports */ - port = (struct hipz_query_port *)rblock; - h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); - if (h_ret != H_SUCCESS) { - ehca_gen_err("Cannot query port properties. h_ret=%lli", - h_ret); - ret = -EPERM; - goto sense_attributes1; - } - - shca->max_mtu = port->max_mtu; - -sense_attributes1: - ehca_free_fw_ctrlblock(rblock); - return ret; -} - -static int init_node_guid(struct ehca_shca *shca) -{ - int ret = 0; - struct hipz_query_hca *rblock; - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query device properties"); - ret = -EINVAL; - goto init_node_guid1; - } - - memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64)); - -init_node_guid1: - ehca_free_fw_ctrlblock(rblock); - return ret; -} - -static int ehca_port_immutable(struct ib_device *ibdev, u8 port_num, - struct ib_port_immutable *immutable) -{ - struct ib_port_attr attr; - int err; - - err = ehca_query_port(ibdev, port_num, &attr); - if (err) - return err; - - immutable->pkey_tbl_len = attr.pkey_tbl_len; - immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; - immutable->max_mad_size = IB_MGMT_MAD_SIZE; - - return 0; -} - -static int ehca_init_device(struct ehca_shca *shca) -{ - int ret; - - ret = init_node_guid(shca); - if (ret) - return ret; - - strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); - shca->ib_device.owner = THIS_MODULE; - - shca->ib_device.uverbs_abi_ver = 8; - shca->ib_device.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); - - shca->ib_device.node_type = RDMA_NODE_IB_CA; - shca->ib_device.phys_port_cnt = shca->num_ports; - shca->ib_device.num_comp_vectors = 1; - shca->ib_device.dma_device = &shca->ofdev->dev; - shca->ib_device.query_device = ehca_query_device; - shca->ib_device.query_port = ehca_query_port; - shca->ib_device.query_gid = ehca_query_gid; - shca->ib_device.query_pkey = ehca_query_pkey; - /* shca->in_device.modify_device = ehca_modify_device */ - shca->ib_device.modify_port = ehca_modify_port; - shca->ib_device.alloc_ucontext = ehca_alloc_ucontext; - shca->ib_device.dealloc_ucontext = ehca_dealloc_ucontext; - shca->ib_device.alloc_pd = ehca_alloc_pd; - shca->ib_device.dealloc_pd = ehca_dealloc_pd; - shca->ib_device.create_ah = ehca_create_ah; - /* shca->ib_device.modify_ah = ehca_modify_ah; */ - shca->ib_device.query_ah = ehca_query_ah; - shca->ib_device.destroy_ah = ehca_destroy_ah; - shca->ib_device.create_qp = ehca_create_qp; - shca->ib_device.modify_qp = ehca_modify_qp; - shca->ib_device.query_qp = ehca_query_qp; - shca->ib_device.destroy_qp = ehca_destroy_qp; - shca->ib_device.post_send = ehca_post_send; - shca->ib_device.post_recv = ehca_post_recv; - shca->ib_device.create_cq = ehca_create_cq; - shca->ib_device.destroy_cq = ehca_destroy_cq; - shca->ib_device.resize_cq = ehca_resize_cq; - shca->ib_device.poll_cq = ehca_poll_cq; - /* shca->ib_device.peek_cq = ehca_peek_cq; */ - shca->ib_device.req_notify_cq = ehca_req_notify_cq; - /* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */ - shca->ib_device.get_dma_mr = ehca_get_dma_mr; - shca->ib_device.reg_phys_mr = ehca_reg_phys_mr; - shca->ib_device.reg_user_mr = ehca_reg_user_mr; - shca->ib_device.query_mr = ehca_query_mr; - shca->ib_device.dereg_mr = ehca_dereg_mr; - shca->ib_device.rereg_phys_mr = ehca_rereg_phys_mr; - shca->ib_device.alloc_mw = ehca_alloc_mw; - shca->ib_device.bind_mw = ehca_bind_mw; - shca->ib_device.dealloc_mw = ehca_dealloc_mw; - shca->ib_device.alloc_fmr = ehca_alloc_fmr; - shca->ib_device.map_phys_fmr = ehca_map_phys_fmr; - shca->ib_device.unmap_fmr = ehca_unmap_fmr; - shca->ib_device.dealloc_fmr = ehca_dealloc_fmr; - shca->ib_device.attach_mcast = ehca_attach_mcast; - shca->ib_device.detach_mcast = ehca_detach_mcast; - shca->ib_device.process_mad = ehca_process_mad; - shca->ib_device.mmap = ehca_mmap; - shca->ib_device.dma_ops = &ehca_dma_mapping_ops; - shca->ib_device.get_port_immutable = ehca_port_immutable; - - if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { - shca->ib_device.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); - - shca->ib_device.create_srq = ehca_create_srq; - shca->ib_device.modify_srq = ehca_modify_srq; - shca->ib_device.query_srq = ehca_query_srq; - shca->ib_device.destroy_srq = ehca_destroy_srq; - shca->ib_device.post_srq_recv = ehca_post_srq_recv; - } - - return ret; -} - -static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) -{ - struct ehca_sport *sport = &shca->sport[port - 1]; - struct ib_cq *ibcq; - struct ib_qp *ibqp; - struct ib_qp_init_attr qp_init_attr; - struct ib_cq_init_attr cq_attr = {}; - int ret; - - if (sport->ibcq_aqp1) { - ehca_err(&shca->ib_device, "AQP1 CQ is already created."); - return -EPERM; - } - - cq_attr.cqe = 10; - ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), - &cq_attr); - if (IS_ERR(ibcq)) { - ehca_err(&shca->ib_device, "Cannot create AQP1 CQ."); - return PTR_ERR(ibcq); - } - sport->ibcq_aqp1 = ibcq; - - if (sport->ibqp_sqp[IB_QPT_GSI]) { - ehca_err(&shca->ib_device, "AQP1 QP is already created."); - ret = -EPERM; - goto create_aqp1; - } - - memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr)); - qp_init_attr.send_cq = ibcq; - qp_init_attr.recv_cq = ibcq; - qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; - qp_init_attr.cap.max_send_wr = 100; - qp_init_attr.cap.max_recv_wr = 100; - qp_init_attr.cap.max_send_sge = 2; - qp_init_attr.cap.max_recv_sge = 1; - qp_init_attr.qp_type = IB_QPT_GSI; - qp_init_attr.port_num = port; - qp_init_attr.qp_context = NULL; - qp_init_attr.event_handler = NULL; - qp_init_attr.srq = NULL; - - ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr); - if (IS_ERR(ibqp)) { - ehca_err(&shca->ib_device, "Cannot create AQP1 QP."); - ret = PTR_ERR(ibqp); - goto create_aqp1; - } - sport->ibqp_sqp[IB_QPT_GSI] = ibqp; - - return 0; - -create_aqp1: - ib_destroy_cq(sport->ibcq_aqp1); - return ret; -} - -static int ehca_destroy_aqp1(struct ehca_sport *sport) -{ - int ret; - - ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]); - if (ret) { - ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret); - return ret; - } - - ret = ib_destroy_cq(sport->ibcq_aqp1); - if (ret) - ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret); - - return ret; -} - -static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%d\n", ehca_debug_level); -} - -static ssize_t ehca_store_debug_level(struct device_driver *ddp, - const char *buf, size_t count) -{ - int value = (*buf) - '0'; - if (value >= 0 && value <= 9) - ehca_debug_level = value; - return 1; -} - -static DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR, - ehca_show_debug_level, ehca_store_debug_level); - -static struct attribute *ehca_drv_attrs[] = { - &driver_attr_debug_level.attr, - NULL -}; - -static struct attribute_group ehca_drv_attr_grp = { - .attrs = ehca_drv_attrs -}; - -static const struct attribute_group *ehca_drv_attr_groups[] = { - &ehca_drv_attr_grp, - NULL, -}; - -#define EHCA_RESOURCE_ATTR(name) \ -static ssize_t ehca_show_##name(struct device *dev, \ - struct device_attribute *attr, \ - char *buf) \ -{ \ - struct ehca_shca *shca; \ - struct hipz_query_hca *rblock; \ - int data; \ - \ - shca = dev_get_drvdata(dev); \ - \ - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); \ - if (!rblock) { \ - dev_err(dev, "Can't allocate rblock memory.\n"); \ - return 0; \ - } \ - \ - if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \ - dev_err(dev, "Can't query device properties\n"); \ - ehca_free_fw_ctrlblock(rblock); \ - return 0; \ - } \ - \ - data = rblock->name; \ - ehca_free_fw_ctrlblock(rblock); \ - \ - if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1)) \ - return snprintf(buf, 256, "1\n"); \ - else \ - return snprintf(buf, 256, "%d\n", data); \ - \ -} \ -static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL); - -EHCA_RESOURCE_ATTR(num_ports); -EHCA_RESOURCE_ATTR(hw_ver); -EHCA_RESOURCE_ATTR(max_eq); -EHCA_RESOURCE_ATTR(cur_eq); -EHCA_RESOURCE_ATTR(max_cq); -EHCA_RESOURCE_ATTR(cur_cq); -EHCA_RESOURCE_ATTR(max_qp); -EHCA_RESOURCE_ATTR(cur_qp); -EHCA_RESOURCE_ATTR(max_mr); -EHCA_RESOURCE_ATTR(cur_mr); -EHCA_RESOURCE_ATTR(max_mw); -EHCA_RESOURCE_ATTR(cur_mw); -EHCA_RESOURCE_ATTR(max_pd); -EHCA_RESOURCE_ATTR(max_ah); - -static ssize_t ehca_show_adapter_handle(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ehca_shca *shca = dev_get_drvdata(dev); - - return sprintf(buf, "%llx\n", shca->ipz_hca_handle.handle); - -} -static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); - -static struct attribute *ehca_dev_attrs[] = { - &dev_attr_adapter_handle.attr, - &dev_attr_num_ports.attr, - &dev_attr_hw_ver.attr, - &dev_attr_max_eq.attr, - &dev_attr_cur_eq.attr, - &dev_attr_max_cq.attr, - &dev_attr_cur_cq.attr, - &dev_attr_max_qp.attr, - &dev_attr_cur_qp.attr, - &dev_attr_max_mr.attr, - &dev_attr_cur_mr.attr, - &dev_attr_max_mw.attr, - &dev_attr_cur_mw.attr, - &dev_attr_max_pd.attr, - &dev_attr_max_ah.attr, - NULL -}; - -static struct attribute_group ehca_dev_attr_grp = { - .attrs = ehca_dev_attrs -}; - -static int ehca_probe(struct platform_device *dev) -{ - struct ehca_shca *shca; - const u64 *handle; - struct ib_pd *ibpd; - int ret, i, eq_size; - unsigned long flags; - - handle = of_get_property(dev->dev.of_node, "ibm,hca-handle", NULL); - if (!handle) { - ehca_gen_err("Cannot get eHCA handle for adapter: %s.", - dev->dev.of_node->full_name); - return -ENODEV; - } - - if (!(*handle)) { - ehca_gen_err("Wrong eHCA handle for adapter: %s.", - dev->dev.of_node->full_name); - return -ENODEV; - } - - shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca)); - if (!shca) { - ehca_gen_err("Cannot allocate shca memory."); - return -ENOMEM; - } - - mutex_init(&shca->modify_mutex); - atomic_set(&shca->num_cqs, 0); - atomic_set(&shca->num_qps, 0); - shca->max_num_qps = ehca_max_qp; - shca->max_num_cqs = ehca_max_cq; - - for (i = 0; i < ARRAY_SIZE(shca->sport); i++) - spin_lock_init(&shca->sport[i].mod_sqp_lock); - - shca->ofdev = dev; - shca->ipz_hca_handle.handle = *handle; - dev_set_drvdata(&dev->dev, shca); - - ret = ehca_sense_attributes(shca); - if (ret < 0) { - ehca_gen_err("Cannot sense eHCA attributes."); - goto probe1; - } - - ret = ehca_init_device(shca); - if (ret) { - ehca_gen_err("Cannot init ehca device struct"); - goto probe1; - } - - eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps; - /* create event queues */ - ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size); - if (ret) { - ehca_err(&shca->ib_device, "Cannot create EQ."); - goto probe1; - } - - ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513); - if (ret) { - ehca_err(&shca->ib_device, "Cannot create NEQ."); - goto probe3; - } - - /* create internal protection domain */ - ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL); - if (IS_ERR(ibpd)) { - ehca_err(&shca->ib_device, "Cannot create internal PD."); - ret = PTR_ERR(ibpd); - goto probe4; - } - - shca->pd = container_of(ibpd, struct ehca_pd, ib_pd); - shca->pd->ib_pd.device = &shca->ib_device; - - /* create internal max MR */ - ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr); - - if (ret) { - ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i", - ret); - goto probe5; - } - - ret = ib_register_device(&shca->ib_device, NULL); - if (ret) { - ehca_err(&shca->ib_device, - "ib_register_device() failed ret=%i", ret); - goto probe6; - } - - /* create AQP1 for port 1 */ - if (ehca_open_aqp1 == 1) { - shca->sport[0].port_state = IB_PORT_DOWN; - ret = ehca_create_aqp1(shca, 1); - if (ret) { - ehca_err(&shca->ib_device, - "Cannot create AQP1 for port 1."); - goto probe7; - } - } - - /* create AQP1 for port 2 */ - if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) { - shca->sport[1].port_state = IB_PORT_DOWN; - ret = ehca_create_aqp1(shca, 2); - if (ret) { - ehca_err(&shca->ib_device, - "Cannot create AQP1 for port 2."); - goto probe8; - } - } - - ret = sysfs_create_group(&dev->dev.kobj, &ehca_dev_attr_grp); - if (ret) /* only complain; we can live without attributes */ - ehca_err(&shca->ib_device, - "Cannot create device attributes ret=%d", ret); - - spin_lock_irqsave(&shca_list_lock, flags); - list_add(&shca->shca_list, &shca_list); - spin_unlock_irqrestore(&shca_list_lock, flags); - - return 0; - -probe8: - ret = ehca_destroy_aqp1(&shca->sport[0]); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy AQP1 for port 1. ret=%i", ret); - -probe7: - ib_unregister_device(&shca->ib_device); - -probe6: - ret = ehca_dereg_internal_maxmr(shca); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy internal MR. ret=%x", ret); - -probe5: - ret = ehca_dealloc_pd(&shca->pd->ib_pd); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy internal PD. ret=%x", ret); - -probe4: - ret = ehca_destroy_eq(shca, &shca->neq); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy NEQ. ret=%x", ret); - -probe3: - ret = ehca_destroy_eq(shca, &shca->eq); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy EQ. ret=%x", ret); - -probe1: - ib_dealloc_device(&shca->ib_device); - - return -EINVAL; -} - -static int ehca_remove(struct platform_device *dev) -{ - struct ehca_shca *shca = dev_get_drvdata(&dev->dev); - unsigned long flags; - int ret; - - sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp); - - if (ehca_open_aqp1 == 1) { - int i; - for (i = 0; i < shca->num_ports; i++) { - ret = ehca_destroy_aqp1(&shca->sport[i]); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy AQP1 for port %x " - "ret=%i", ret, i); - } - } - - ib_unregister_device(&shca->ib_device); - - ret = ehca_dereg_internal_maxmr(shca); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy internal MR. ret=%i", ret); - - ret = ehca_dealloc_pd(&shca->pd->ib_pd); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy internal PD. ret=%i", ret); - - ret = ehca_destroy_eq(shca, &shca->eq); - if (ret) - ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret); - - ret = ehca_destroy_eq(shca, &shca->neq); - if (ret) - ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret); - - ib_dealloc_device(&shca->ib_device); - - spin_lock_irqsave(&shca_list_lock, flags); - list_del(&shca->shca_list); - spin_unlock_irqrestore(&shca_list_lock, flags); - - return ret; -} - -static struct of_device_id ehca_device_table[] = -{ - { - .name = "lhca", - .compatible = "IBM,lhca", - }, - {}, -}; -MODULE_DEVICE_TABLE(of, ehca_device_table); - -static struct platform_driver ehca_driver = { - .probe = ehca_probe, - .remove = ehca_remove, - .driver = { - .name = "ehca", - .owner = THIS_MODULE, - .groups = ehca_drv_attr_groups, - .of_match_table = ehca_device_table, - }, -}; - -void ehca_poll_eqs(unsigned long data) -{ - struct ehca_shca *shca; - - spin_lock(&shca_list_lock); - list_for_each_entry(shca, &shca_list, shca_list) { - if (shca->eq.is_initialized) { - /* call deadman proc only if eq ptr does not change */ - struct ehca_eq *eq = &shca->eq; - int max = 3; - volatile u64 q_ofs, q_ofs2; - unsigned long flags; - spin_lock_irqsave(&eq->spinlock, flags); - q_ofs = eq->ipz_queue.current_q_offset; - spin_unlock_irqrestore(&eq->spinlock, flags); - do { - spin_lock_irqsave(&eq->spinlock, flags); - q_ofs2 = eq->ipz_queue.current_q_offset; - spin_unlock_irqrestore(&eq->spinlock, flags); - max--; - } while (q_ofs == q_ofs2 && max > 0); - if (q_ofs == q_ofs2) - ehca_process_eq(shca, 0); - } - } - mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ)); - spin_unlock(&shca_list_lock); -} - -static int ehca_mem_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - static unsigned long ehca_dmem_warn_time; - unsigned long flags; - - switch (action) { - case MEM_CANCEL_OFFLINE: - case MEM_CANCEL_ONLINE: - case MEM_ONLINE: - case MEM_OFFLINE: - return NOTIFY_OK; - case MEM_GOING_ONLINE: - case MEM_GOING_OFFLINE: - /* only ok if no hca is attached to the lpar */ - spin_lock_irqsave(&shca_list_lock, flags); - if (list_empty(&shca_list)) { - spin_unlock_irqrestore(&shca_list_lock, flags); - return NOTIFY_OK; - } else { - spin_unlock_irqrestore(&shca_list_lock, flags); - if (printk_timed_ratelimit(&ehca_dmem_warn_time, - 30 * 1000)) - ehca_gen_err("DMEM operations are not allowed" - "in conjunction with eHCA"); - return NOTIFY_BAD; - } - } - return NOTIFY_OK; -} - -static struct notifier_block ehca_mem_nb = { - .notifier_call = ehca_mem_notifier, -}; - -static int __init ehca_module_init(void) -{ - int ret; - - printk(KERN_INFO "eHCA Infiniband Device Driver " - "(Version " HCAD_VERSION ")\n"); - - ret = ehca_create_comp_pool(); - if (ret) { - ehca_gen_err("Cannot create comp pool."); - return ret; - } - - ret = ehca_create_slab_caches(); - if (ret) { - ehca_gen_err("Cannot create SLAB caches"); - ret = -ENOMEM; - goto module_init1; - } - - ret = ehca_create_busmap(); - if (ret) { - ehca_gen_err("Cannot create busmap."); - goto module_init2; - } - - ret = ibmebus_register_driver(&ehca_driver); - if (ret) { - ehca_gen_err("Cannot register eHCA device driver"); - ret = -EINVAL; - goto module_init3; - } - - ret = register_memory_notifier(&ehca_mem_nb); - if (ret) { - ehca_gen_err("Failed registering memory add/remove notifier"); - goto module_init4; - } - - if (ehca_poll_all_eqs != 1) { - ehca_gen_err("WARNING!!!"); - ehca_gen_err("It is possible to lose interrupts."); - } else { - init_timer(&poll_eqs_timer); - poll_eqs_timer.function = ehca_poll_eqs; - poll_eqs_timer.expires = jiffies + HZ; - add_timer(&poll_eqs_timer); - } - - return 0; - -module_init4: - ibmebus_unregister_driver(&ehca_driver); - -module_init3: - ehca_destroy_busmap(); - -module_init2: - ehca_destroy_slab_caches(); - -module_init1: - ehca_destroy_comp_pool(); - return ret; -}; - -static void __exit ehca_module_exit(void) -{ - if (ehca_poll_all_eqs == 1) - del_timer_sync(&poll_eqs_timer); - - ibmebus_unregister_driver(&ehca_driver); - - unregister_memory_notifier(&ehca_mem_nb); - - ehca_destroy_busmap(); - - ehca_destroy_slab_caches(); - - ehca_destroy_comp_pool(); - - idr_destroy(&ehca_cq_idr); - idr_destroy(&ehca_qp_idr); -}; - -module_init(ehca_module_init); -module_exit(ehca_module_exit); diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/infiniband/hw/ehca/ehca_mcast.c deleted file mode 100644 index cec1815..0000000 --- a/drivers/infiniband/hw/ehca/ehca_mcast.c +++ /dev/null @@ -1,131 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * mcast functions - * - * Authors: Khadija Souissi - * Waleri Fomin - * Reinhard Ernst - * Hoang-Nam Nguyen - * Heiko J Schick - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "ehca_qes.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" - -#define MAX_MC_LID 0xFFFE -#define MIN_MC_LID 0xC000 /* Multicast limits */ -#define EHCA_VALID_MULTICAST_GID(gid) ((gid)[0] == 0xFF) -#define EHCA_VALID_MULTICAST_LID(lid) \ - (((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID)) - -int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, - ib_device); - union ib_gid my_gid; - u64 subnet_prefix, interface_id, h_ret; - - if (ibqp->qp_type != IB_QPT_UD) { - ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type); - return -EINVAL; - } - - if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { - ehca_err(ibqp->device, "invalid mulitcast gid"); - return -EINVAL; - } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { - ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); - return -EINVAL; - } - - memcpy(&my_gid, gid->raw, sizeof(union ib_gid)); - - subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); - interface_id = be64_to_cpu(my_gid.global.interface_id); - h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - my_qp->galpas.kernel, - lid, subnet_prefix, interface_id); - if (h_ret != H_SUCCESS) - ehca_err(ibqp->device, - "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed " - "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); - - return ehca2ib_return_code(h_ret); -} - -int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - struct ehca_shca *shca = container_of(ibqp->pd->device, - struct ehca_shca, ib_device); - union ib_gid my_gid; - u64 subnet_prefix, interface_id, h_ret; - - if (ibqp->qp_type != IB_QPT_UD) { - ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type); - return -EINVAL; - } - - if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { - ehca_err(ibqp->device, "invalid mulitcast gid"); - return -EINVAL; - } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { - ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); - return -EINVAL; - } - - memcpy(&my_gid, gid->raw, sizeof(union ib_gid)); - - subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); - interface_id = be64_to_cpu(my_gid.global.interface_id); - h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - my_qp->galpas.kernel, - lid, subnet_prefix, interface_id); - if (h_ret != H_SUCCESS) - ehca_err(ibqp->device, - "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed " - "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); - - return ehca2ib_return_code(h_ret); -} diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c deleted file mode 100644 index f914b30..0000000 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ /dev/null @@ -1,2593 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * MR/MW functions - * - * Authors: Dietmar Decker - * Christoph Raisch - * Hoang-Nam Nguyen - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#include "ehca_iverbs.h" -#include "ehca_mrmw.h" -#include "hcp_if.h" -#include "hipz_hw.h" - -#define NUM_CHUNKS(length, chunk_size) \ - (((length) + (chunk_size - 1)) / (chunk_size)) - -/* max number of rpages (per hcall register_rpages) */ -#define MAX_RPAGES 512 - -/* DMEM toleration management */ -#define EHCA_SECTSHIFT SECTION_SIZE_BITS -#define EHCA_SECTSIZE (1UL << EHCA_SECTSHIFT) -#define EHCA_HUGEPAGESHIFT 34 -#define EHCA_HUGEPAGE_SIZE (1UL << EHCA_HUGEPAGESHIFT) -#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT) -#define EHCA_INVAL_ADDR 0xFFFFFFFFFFFFFFFFULL -#define EHCA_DIR_INDEX_SHIFT 13 /* 8k Entries in 64k block */ -#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2) -#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT) -#define EHCA_TOP_MAP_SIZE (0x10000) /* currently fixed map size */ -#define EHCA_DIR_MAP_SIZE (0x10000) -#define EHCA_ENT_MAP_SIZE (0x10000) -#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1) - -static unsigned long ehca_mr_len; - -/* - * Memory map data structures - */ -struct ehca_dir_bmap { - u64 ent[EHCA_MAP_ENTRIES]; -}; -struct ehca_top_bmap { - struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES]; -}; -struct ehca_bmap { - struct ehca_top_bmap *top[EHCA_MAP_ENTRIES]; -}; - -static struct ehca_bmap *ehca_bmap; - -static struct kmem_cache *mr_cache; -static struct kmem_cache *mw_cache; - -enum ehca_mr_pgsize { - EHCA_MR_PGSIZE4K = 0x1000L, - EHCA_MR_PGSIZE64K = 0x10000L, - EHCA_MR_PGSIZE1M = 0x100000L, - EHCA_MR_PGSIZE16M = 0x1000000L -}; - -#define EHCA_MR_PGSHIFT4K 12 -#define EHCA_MR_PGSHIFT64K 16 -#define EHCA_MR_PGSHIFT1M 20 -#define EHCA_MR_PGSHIFT16M 24 - -static u64 ehca_map_vaddr(void *caddr); - -static u32 ehca_encode_hwpage_size(u32 pgsize) -{ - int log = ilog2(pgsize); - WARN_ON(log < 12 || log > 24 || log & 3); - return (log - 12) / 4; -} - -static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca) -{ - return rounddown_pow_of_two(shca->hca_cap_mr_pgsize); -} - -static struct ehca_mr *ehca_mr_new(void) -{ - struct ehca_mr *me; - - me = kmem_cache_zalloc(mr_cache, GFP_KERNEL); - if (me) - spin_lock_init(&me->mrlock); - else - ehca_gen_err("alloc failed"); - - return me; -} - -static void ehca_mr_delete(struct ehca_mr *me) -{ - kmem_cache_free(mr_cache, me); -} - -static struct ehca_mw *ehca_mw_new(void) -{ - struct ehca_mw *me; - - me = kmem_cache_zalloc(mw_cache, GFP_KERNEL); - if (me) - spin_lock_init(&me->mwlock); - else - ehca_gen_err("alloc failed"); - - return me; -} - -static void ehca_mw_delete(struct ehca_mw *me) -{ - kmem_cache_free(mw_cache, me); -} - -/*----------------------------------------------------------------------*/ - -struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) -{ - struct ib_mr *ib_mr; - int ret; - struct ehca_mr *e_maxmr; - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - - if (shca->maxmr) { - e_maxmr = ehca_mr_new(); - if (!e_maxmr) { - ehca_err(&shca->ib_device, "out of memory"); - ib_mr = ERR_PTR(-ENOMEM); - goto get_dma_mr_exit0; - } - - ret = ehca_reg_maxmr(shca, e_maxmr, - (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)), - mr_access_flags, e_pd, - &e_maxmr->ib.ib_mr.lkey, - &e_maxmr->ib.ib_mr.rkey); - if (ret) { - ehca_mr_delete(e_maxmr); - ib_mr = ERR_PTR(ret); - goto get_dma_mr_exit0; - } - ib_mr = &e_maxmr->ib.ib_mr; - } else { - ehca_err(&shca->ib_device, "no internal max-MR exist!"); - ib_mr = ERR_PTR(-EINVAL); - goto get_dma_mr_exit0; - } - -get_dma_mr_exit0: - if (IS_ERR(ib_mr)) - ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x", - PTR_ERR(ib_mr), pd, mr_access_flags); - return ib_mr; -} /* end ehca_get_dma_mr() */ - -/*----------------------------------------------------------------------*/ - -struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start) -{ - struct ib_mr *ib_mr; - int ret; - struct ehca_mr *e_mr; - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - - u64 size; - - if ((num_phys_buf <= 0) || !phys_buf_array) { - ehca_err(pd->device, "bad input values: num_phys_buf=%x " - "phys_buf_array=%p", num_phys_buf, phys_buf_array); - ib_mr = ERR_PTR(-EINVAL); - goto reg_phys_mr_exit0; - } - if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || - ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { - /* - * Remote Write Access requires Local Write Access - * Remote Atomic Access requires Local Write Access - */ - ehca_err(pd->device, "bad input values: mr_access_flags=%x", - mr_access_flags); - ib_mr = ERR_PTR(-EINVAL); - goto reg_phys_mr_exit0; - } - - /* check physical buffer list and calculate size */ - ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf, - iova_start, &size); - if (ret) { - ib_mr = ERR_PTR(ret); - goto reg_phys_mr_exit0; - } - if ((size == 0) || - (((u64)iova_start + size) < (u64)iova_start)) { - ehca_err(pd->device, "bad input values: size=%llx iova_start=%p", - size, iova_start); - ib_mr = ERR_PTR(-EINVAL); - goto reg_phys_mr_exit0; - } - - e_mr = ehca_mr_new(); - if (!e_mr) { - ehca_err(pd->device, "out of memory"); - ib_mr = ERR_PTR(-ENOMEM); - goto reg_phys_mr_exit0; - } - - /* register MR on HCA */ - if (ehca_mr_is_maxmr(size, iova_start)) { - e_mr->flags |= EHCA_MR_FLAG_MAXMR; - ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags, - e_pd, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey); - if (ret) { - ib_mr = ERR_PTR(ret); - goto reg_phys_mr_exit1; - } - } else { - struct ehca_mr_pginfo pginfo; - u32 num_kpages; - u32 num_hwpages; - u64 hw_pgsize; - - num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size, - PAGE_SIZE); - /* for kernel space we try most possible pgsize */ - hw_pgsize = ehca_get_max_hwpage_size(shca); - num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size, - hw_pgsize); - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_kpages = num_kpages; - pginfo.hwpage_size = hw_pgsize; - pginfo.num_hwpages = num_hwpages; - pginfo.u.phy.num_phys_buf = num_phys_buf; - pginfo.u.phy.phys_buf_array = phys_buf_array; - pginfo.next_hwpage = - ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; - - ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, - e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); - if (ret) { - ib_mr = ERR_PTR(ret); - goto reg_phys_mr_exit1; - } - } - - /* successful registration of all pages */ - return &e_mr->ib.ib_mr; - -reg_phys_mr_exit1: - ehca_mr_delete(e_mr); -reg_phys_mr_exit0: - if (IS_ERR(ib_mr)) - ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p " - "num_phys_buf=%x mr_access_flags=%x iova_start=%p", - PTR_ERR(ib_mr), pd, phys_buf_array, - num_phys_buf, mr_access_flags, iova_start); - return ib_mr; -} /* end ehca_reg_phys_mr() */ - -/*----------------------------------------------------------------------*/ - -struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt, int mr_access_flags, - struct ib_udata *udata) -{ - struct ib_mr *ib_mr; - struct ehca_mr *e_mr; - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_mr_pginfo pginfo; - int ret, page_shift; - u32 num_kpages; - u32 num_hwpages; - u64 hwpage_size; - - if (!pd) { - ehca_gen_err("bad pd=%p", pd); - return ERR_PTR(-EFAULT); - } - - if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || - ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { - /* - * Remote Write Access requires Local Write Access - * Remote Atomic Access requires Local Write Access - */ - ehca_err(pd->device, "bad input values: mr_access_flags=%x", - mr_access_flags); - ib_mr = ERR_PTR(-EINVAL); - goto reg_user_mr_exit0; - } - - if (length == 0 || virt + length < virt) { - ehca_err(pd->device, "bad input values: length=%llx " - "virt_base=%llx", length, virt); - ib_mr = ERR_PTR(-EINVAL); - goto reg_user_mr_exit0; - } - - e_mr = ehca_mr_new(); - if (!e_mr) { - ehca_err(pd->device, "out of memory"); - ib_mr = ERR_PTR(-ENOMEM); - goto reg_user_mr_exit0; - } - - e_mr->umem = ib_umem_get(pd->uobject->context, start, length, - mr_access_flags, 0); - if (IS_ERR(e_mr->umem)) { - ib_mr = (void *)e_mr->umem; - goto reg_user_mr_exit1; - } - - if (e_mr->umem->page_size != PAGE_SIZE) { - ehca_err(pd->device, "page size not supported, " - "e_mr->umem->page_size=%x", e_mr->umem->page_size); - ib_mr = ERR_PTR(-EINVAL); - goto reg_user_mr_exit2; - } - - /* determine number of MR pages */ - num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE); - /* select proper hw_pgsize */ - page_shift = PAGE_SHIFT; - if (e_mr->umem->hugetlb) { - /* determine page_shift, clamp between 4K and 16M */ - page_shift = (fls64(length - 1) + 3) & ~3; - page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K), - EHCA_MR_PGSHIFT16M); - } - hwpage_size = 1UL << page_shift; - - /* now that we have the desired page size, shift until it's - * supported, too. 4K is always supported, so this terminates. - */ - while (!(hwpage_size & shca->hca_cap_mr_pgsize)) - hwpage_size >>= 4; - -reg_user_mr_fallback: - num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size); - /* register MR on HCA */ - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_USER; - pginfo.hwpage_size = hwpage_size; - pginfo.num_kpages = num_kpages; - pginfo.num_hwpages = num_hwpages; - pginfo.u.usr.region = e_mr->umem; - pginfo.next_hwpage = ib_umem_offset(e_mr->umem) / hwpage_size; - pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl; - ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, - e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); - if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) { - ehca_warn(pd->device, "failed to register mr " - "with hwpage_size=%llx", hwpage_size); - ehca_info(pd->device, "try to register mr with " - "kpage_size=%lx", PAGE_SIZE); - /* - * this means kpages are not contiguous for a hw page - * try kernel page size as fallback solution - */ - hwpage_size = PAGE_SIZE; - goto reg_user_mr_fallback; - } - if (ret) { - ib_mr = ERR_PTR(ret); - goto reg_user_mr_exit2; - } - - /* successful registration of all pages */ - return &e_mr->ib.ib_mr; - -reg_user_mr_exit2: - ib_umem_release(e_mr->umem); -reg_user_mr_exit1: - ehca_mr_delete(e_mr); -reg_user_mr_exit0: - if (IS_ERR(ib_mr)) - ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p", - PTR_ERR(ib_mr), pd, mr_access_flags, udata); - return ib_mr; -} /* end ehca_reg_user_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_rereg_phys_mr(struct ib_mr *mr, - int mr_rereg_mask, - struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start) -{ - int ret; - - struct ehca_shca *shca = - container_of(mr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); - u64 new_size; - u64 *new_start; - u32 new_acl; - struct ehca_pd *new_pd; - u32 tmp_lkey, tmp_rkey; - unsigned long sl_flags; - u32 num_kpages = 0; - u32 num_hwpages = 0; - struct ehca_mr_pginfo pginfo; - - if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) { - /* TODO not supported, because PHYP rereg hCall needs pages */ - ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not " - "supported yet, mr_rereg_mask=%x", mr_rereg_mask); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - - if (mr_rereg_mask & IB_MR_REREG_PD) { - if (!pd) { - ehca_err(mr->device, "rereg with bad pd, pd=%p " - "mr_rereg_mask=%x", pd, mr_rereg_mask); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - } - - if ((mr_rereg_mask & - ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) || - (mr_rereg_mask == 0)) { - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - - /* check other parameters */ - if (e_mr == shca->maxmr) { - /* should be impossible, however reject to be sure */ - ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p " - "shca->maxmr=%p mr->lkey=%x", - mr, shca->maxmr, mr->lkey); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */ - if (e_mr->flags & EHCA_MR_FLAG_FMR) { - ehca_err(mr->device, "not supported for FMR, mr=%p " - "flags=%x", mr, e_mr->flags); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - if (!phys_buf_array || num_phys_buf <= 0) { - ehca_err(mr->device, "bad input values mr_rereg_mask=%x" - " phys_buf_array=%p num_phys_buf=%x", - mr_rereg_mask, phys_buf_array, num_phys_buf); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - } - if ((mr_rereg_mask & IB_MR_REREG_ACCESS) && /* change ACL */ - (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || - ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) { - /* - * Remote Write Access requires Local Write Access - * Remote Atomic Access requires Local Write Access - */ - ehca_err(mr->device, "bad input values: mr_rereg_mask=%x " - "mr_access_flags=%x", mr_rereg_mask, mr_access_flags); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - - /* set requested values dependent on rereg request */ - spin_lock_irqsave(&e_mr->mrlock, sl_flags); - new_start = e_mr->start; - new_size = e_mr->size; - new_acl = e_mr->acl; - new_pd = container_of(mr->pd, struct ehca_pd, ib_pd); - - if (mr_rereg_mask & IB_MR_REREG_TRANS) { - u64 hw_pgsize = ehca_get_max_hwpage_size(shca); - - new_start = iova_start; /* change address */ - /* check physical buffer list and calculate size */ - ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, - num_phys_buf, iova_start, - &new_size); - if (ret) - goto rereg_phys_mr_exit1; - if ((new_size == 0) || - (((u64)iova_start + new_size) < (u64)iova_start)) { - ehca_err(mr->device, "bad input values: new_size=%llx " - "iova_start=%p", new_size, iova_start); - ret = -EINVAL; - goto rereg_phys_mr_exit1; - } - num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) + - new_size, PAGE_SIZE); - num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) + - new_size, hw_pgsize); - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_kpages = num_kpages; - pginfo.hwpage_size = hw_pgsize; - pginfo.num_hwpages = num_hwpages; - pginfo.u.phy.num_phys_buf = num_phys_buf; - pginfo.u.phy.phys_buf_array = phys_buf_array; - pginfo.next_hwpage = - ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; - } - if (mr_rereg_mask & IB_MR_REREG_ACCESS) - new_acl = mr_access_flags; - if (mr_rereg_mask & IB_MR_REREG_PD) - new_pd = container_of(pd, struct ehca_pd, ib_pd); - - ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl, - new_pd, &pginfo, &tmp_lkey, &tmp_rkey); - if (ret) - goto rereg_phys_mr_exit1; - - /* successful reregistration */ - if (mr_rereg_mask & IB_MR_REREG_PD) - mr->pd = pd; - mr->lkey = tmp_lkey; - mr->rkey = tmp_rkey; - -rereg_phys_mr_exit1: - spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); -rereg_phys_mr_exit0: - if (ret) - ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p " - "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x " - "iova_start=%p", - ret, mr, mr_rereg_mask, pd, phys_buf_array, - num_phys_buf, mr_access_flags, iova_start); - return ret; -} /* end ehca_rereg_phys_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca = - container_of(mr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); - unsigned long sl_flags; - struct ehca_mr_hipzout_parms hipzout; - - if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " - "e_mr->flags=%x", mr, e_mr, e_mr->flags); - ret = -EINVAL; - goto query_mr_exit0; - } - - memset(mr_attr, 0, sizeof(struct ib_mr_attr)); - spin_lock_irqsave(&e_mr->mrlock, sl_flags); - - h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p " - "hca_hndl=%llx mr_hndl=%llx lkey=%x", - h_ret, mr, shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle, mr->lkey); - ret = ehca2ib_return_code(h_ret); - goto query_mr_exit1; - } - mr_attr->pd = mr->pd; - mr_attr->device_virt_addr = hipzout.vaddr; - mr_attr->size = hipzout.len; - mr_attr->lkey = hipzout.lkey; - mr_attr->rkey = hipzout.rkey; - ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags); - -query_mr_exit1: - spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); -query_mr_exit0: - if (ret) - ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p", - ret, mr, mr_attr); - return ret; -} /* end ehca_query_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_dereg_mr(struct ib_mr *mr) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca = - container_of(mr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); - - if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " - "e_mr->flags=%x", mr, e_mr, e_mr->flags); - ret = -EINVAL; - goto dereg_mr_exit0; - } else if (e_mr == shca->maxmr) { - /* should be impossible, however reject to be sure */ - ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p " - "shca->maxmr=%p mr->lkey=%x", - mr, shca->maxmr, mr->lkey); - ret = -EINVAL; - goto dereg_mr_exit0; - } - - /* TODO: BUSY: MR still has bound window(s) */ - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); - if (h_ret != H_SUCCESS) { - ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p " - "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x", - h_ret, shca, e_mr, shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle, mr->lkey); - ret = ehca2ib_return_code(h_ret); - goto dereg_mr_exit0; - } - - if (e_mr->umem) - ib_umem_release(e_mr->umem); - - /* successful deregistration */ - ehca_mr_delete(e_mr); - -dereg_mr_exit0: - if (ret) - ehca_err(mr->device, "ret=%i mr=%p", ret, mr); - return ret; -} /* end ehca_dereg_mr() */ - -/*----------------------------------------------------------------------*/ - -struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) -{ - struct ib_mw *ib_mw; - u64 h_ret; - struct ehca_mw *e_mw; - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_mw_hipzout_parms hipzout; - - if (type != IB_MW_TYPE_1) - return ERR_PTR(-EINVAL); - - e_mw = ehca_mw_new(); - if (!e_mw) { - ib_mw = ERR_PTR(-ENOMEM); - goto alloc_mw_exit0; - } - - h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw, - e_pd->fw_pd, &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli " - "shca=%p hca_hndl=%llx mw=%p", - h_ret, shca, shca->ipz_hca_handle.handle, e_mw); - ib_mw = ERR_PTR(ehca2ib_return_code(h_ret)); - goto alloc_mw_exit1; - } - /* successful MW allocation */ - e_mw->ipz_mw_handle = hipzout.handle; - e_mw->ib_mw.rkey = hipzout.rkey; - return &e_mw->ib_mw; - -alloc_mw_exit1: - ehca_mw_delete(e_mw); -alloc_mw_exit0: - if (IS_ERR(ib_mw)) - ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd); - return ib_mw; -} /* end ehca_alloc_mw() */ - -/*----------------------------------------------------------------------*/ - -int ehca_bind_mw(struct ib_qp *qp, - struct ib_mw *mw, - struct ib_mw_bind *mw_bind) -{ - /* TODO: not supported up to now */ - ehca_gen_err("bind MW currently not supported by HCAD"); - - return -EPERM; -} /* end ehca_bind_mw() */ - -/*----------------------------------------------------------------------*/ - -int ehca_dealloc_mw(struct ib_mw *mw) -{ - u64 h_ret; - struct ehca_shca *shca = - container_of(mw->device, struct ehca_shca, ib_device); - struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw); - - h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw); - if (h_ret != H_SUCCESS) { - ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p " - "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx", - h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle, - e_mw->ipz_mw_handle.handle); - return ehca2ib_return_code(h_ret); - } - /* successful deallocation */ - ehca_mw_delete(e_mw); - return 0; -} /* end ehca_dealloc_mw() */ - -/*----------------------------------------------------------------------*/ - -struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, - int mr_access_flags, - struct ib_fmr_attr *fmr_attr) -{ - struct ib_fmr *ib_fmr; - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_mr *e_fmr; - int ret; - u32 tmp_lkey, tmp_rkey; - struct ehca_mr_pginfo pginfo; - u64 hw_pgsize; - - /* check other parameters */ - if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || - ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { - /* - * Remote Write Access requires Local Write Access - * Remote Atomic Access requires Local Write Access - */ - ehca_err(pd->device, "bad input values: mr_access_flags=%x", - mr_access_flags); - ib_fmr = ERR_PTR(-EINVAL); - goto alloc_fmr_exit0; - } - if (mr_access_flags & IB_ACCESS_MW_BIND) { - ehca_err(pd->device, "bad input values: mr_access_flags=%x", - mr_access_flags); - ib_fmr = ERR_PTR(-EINVAL); - goto alloc_fmr_exit0; - } - if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) { - ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x " - "fmr_attr->max_maps=%x fmr_attr->page_shift=%x", - fmr_attr->max_pages, fmr_attr->max_maps, - fmr_attr->page_shift); - ib_fmr = ERR_PTR(-EINVAL); - goto alloc_fmr_exit0; - } - - hw_pgsize = 1 << fmr_attr->page_shift; - if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) { - ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x", - fmr_attr->page_shift); - ib_fmr = ERR_PTR(-EINVAL); - goto alloc_fmr_exit0; - } - - e_fmr = ehca_mr_new(); - if (!e_fmr) { - ib_fmr = ERR_PTR(-ENOMEM); - goto alloc_fmr_exit0; - } - e_fmr->flags |= EHCA_MR_FLAG_FMR; - - /* register MR on HCA */ - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.hwpage_size = hw_pgsize; - /* - * pginfo.num_hwpages==0, ie register_rpages() will not be called - * but deferred to map_phys_fmr() - */ - ret = ehca_reg_mr(shca, e_fmr, NULL, - fmr_attr->max_pages * (1 << fmr_attr->page_shift), - mr_access_flags, e_pd, &pginfo, - &tmp_lkey, &tmp_rkey, EHCA_REG_MR); - if (ret) { - ib_fmr = ERR_PTR(ret); - goto alloc_fmr_exit1; - } - - /* successful */ - e_fmr->hwpage_size = hw_pgsize; - e_fmr->fmr_page_size = 1 << fmr_attr->page_shift; - e_fmr->fmr_max_pages = fmr_attr->max_pages; - e_fmr->fmr_max_maps = fmr_attr->max_maps; - e_fmr->fmr_map_cnt = 0; - return &e_fmr->ib.ib_fmr; - -alloc_fmr_exit1: - ehca_mr_delete(e_fmr); -alloc_fmr_exit0: - return ib_fmr; -} /* end ehca_alloc_fmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_map_phys_fmr(struct ib_fmr *fmr, - u64 *page_list, - int list_len, - u64 iova) -{ - int ret; - struct ehca_shca *shca = - container_of(fmr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); - struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd); - struct ehca_mr_pginfo pginfo; - u32 tmp_lkey, tmp_rkey; - - if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", - e_fmr, e_fmr->flags); - ret = -EINVAL; - goto map_phys_fmr_exit0; - } - ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len); - if (ret) - goto map_phys_fmr_exit0; - if (iova % e_fmr->fmr_page_size) { - /* only whole-numbered pages */ - ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x", - iova, e_fmr->fmr_page_size); - ret = -EINVAL; - goto map_phys_fmr_exit0; - } - if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) { - /* HCAD does not limit the maps, however trace this anyway */ - ehca_info(fmr->device, "map limit exceeded, fmr=%p " - "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x", - fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps); - } - - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_FMR; - pginfo.num_kpages = list_len; - pginfo.hwpage_size = e_fmr->hwpage_size; - pginfo.num_hwpages = - list_len * e_fmr->fmr_page_size / pginfo.hwpage_size; - pginfo.u.fmr.page_list = page_list; - pginfo.next_hwpage = - (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size; - pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size; - - ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova, - list_len * e_fmr->fmr_page_size, - e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey); - if (ret) - goto map_phys_fmr_exit0; - - /* successful reregistration */ - e_fmr->fmr_map_cnt++; - e_fmr->ib.ib_fmr.lkey = tmp_lkey; - e_fmr->ib.ib_fmr.rkey = tmp_rkey; - return 0; - -map_phys_fmr_exit0: - if (ret) - ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x " - "iova=%llx", ret, fmr, page_list, list_len, iova); - return ret; -} /* end ehca_map_phys_fmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_unmap_fmr(struct list_head *fmr_list) -{ - int ret = 0; - struct ib_fmr *ib_fmr; - struct ehca_shca *shca = NULL; - struct ehca_shca *prev_shca; - struct ehca_mr *e_fmr; - u32 num_fmr = 0; - u32 unmap_fmr_cnt = 0; - - /* check all FMR belong to same SHCA, and check internal flag */ - list_for_each_entry(ib_fmr, fmr_list, list) { - prev_shca = shca; - shca = container_of(ib_fmr->device, struct ehca_shca, - ib_device); - e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); - if ((shca != prev_shca) && prev_shca) { - ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p " - "prev_shca=%p e_fmr=%p", - shca, prev_shca, e_fmr); - ret = -EINVAL; - goto unmap_fmr_exit0; - } - if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p " - "e_fmr->flags=%x", e_fmr, e_fmr->flags); - ret = -EINVAL; - goto unmap_fmr_exit0; - } - num_fmr++; - } - - /* loop over all FMRs to unmap */ - list_for_each_entry(ib_fmr, fmr_list, list) { - unmap_fmr_cnt++; - e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); - shca = container_of(ib_fmr->device, struct ehca_shca, - ib_device); - ret = ehca_unmap_one_fmr(shca, e_fmr); - if (ret) { - /* unmap failed, stop unmapping of rest of FMRs */ - ehca_err(&shca->ib_device, "unmap of one FMR failed, " - "stop rest, e_fmr=%p num_fmr=%x " - "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr, - unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey); - goto unmap_fmr_exit0; - } - } - -unmap_fmr_exit0: - if (ret) - ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x", - ret, fmr_list, num_fmr, unmap_fmr_cnt); - return ret; -} /* end ehca_unmap_fmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_dealloc_fmr(struct ib_fmr *fmr) -{ - int ret; - u64 h_ret; - struct ehca_shca *shca = - container_of(fmr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); - - if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", - e_fmr, e_fmr->flags); - ret = -EINVAL; - goto free_fmr_exit0; - } - - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); - if (h_ret != H_SUCCESS) { - ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p " - "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, fmr->lkey); - ret = ehca2ib_return_code(h_ret); - goto free_fmr_exit0; - } - /* successful deregistration */ - ehca_mr_delete(e_fmr); - return 0; - -free_fmr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr); - return ret; -} /* end ehca_dealloc_fmr() */ - -/*----------------------------------------------------------------------*/ - -static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, - struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo); - -int ehca_reg_mr(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - int acl, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, /*OUT*/ - u32 *rkey, /*OUT*/ - enum ehca_reg_type reg_type) -{ - int ret; - u64 h_ret; - u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout; - - ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); - if (ehca_use_hp_mr == 1) - hipz_acl |= 0x00000001; - - h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr, - (u64)iova_start, size, hipz_acl, - e_pd->fw_pd, &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli " - "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle); - ret = ehca2ib_return_code(h_ret); - goto ehca_reg_mr_exit0; - } - - e_mr->ipz_mr_handle = hipzout.handle; - - if (reg_type == EHCA_REG_BUSMAP_MR) - ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo); - else if (reg_type == EHCA_REG_MR) - ret = ehca_reg_mr_rpages(shca, e_mr, pginfo); - else - ret = -EINVAL; - - if (ret) - goto ehca_reg_mr_exit1; - - /* successful registration */ - e_mr->num_kpages = pginfo->num_kpages; - e_mr->num_hwpages = pginfo->num_hwpages; - e_mr->hwpage_size = pginfo->hwpage_size; - e_mr->start = iova_start; - e_mr->size = size; - e_mr->acl = acl; - *lkey = hipzout.lkey; - *rkey = hipzout.rkey; - return 0; - -ehca_reg_mr_exit1: - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p " - "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x " - "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i", - h_ret, shca, e_mr, iova_start, size, acl, e_pd, - hipzout.lkey, pginfo, pginfo->num_kpages, - pginfo->num_hwpages, ret); - ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, " - "not recoverable"); - } -ehca_reg_mr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " - "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " - "num_kpages=%llx num_hwpages=%llx", - ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, - pginfo->num_kpages, pginfo->num_hwpages); - return ret; -} /* end ehca_reg_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_reg_mr_rpages(struct ehca_shca *shca, - struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo) -{ - int ret = 0; - u64 h_ret; - u32 rnum; - u64 rpage; - u32 i; - u64 *kpage; - - if (!pginfo->num_hwpages) /* in case of fmr */ - return 0; - - kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!kpage) { - ehca_err(&shca->ib_device, "kpage alloc failed"); - ret = -ENOMEM; - goto ehca_reg_mr_rpages_exit0; - } - - /* max MAX_RPAGES ehca mr pages per register call */ - for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) { - - if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { - rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */ - if (rnum == 0) - rnum = MAX_RPAGES; /* last shot is full */ - } else - rnum = MAX_RPAGES; - - ret = ehca_set_pagebuf(pginfo, rnum, kpage); - if (ret) { - ehca_err(&shca->ib_device, "ehca_set_pagebuf " - "bad rc, ret=%i rnum=%x kpage=%p", - ret, rnum, kpage); - goto ehca_reg_mr_rpages_exit1; - } - - if (rnum > 1) { - rpage = __pa(kpage); - if (!rpage) { - ehca_err(&shca->ib_device, "kpage=%p i=%x", - kpage, i); - ret = -EFAULT; - goto ehca_reg_mr_rpages_exit1; - } - } else - rpage = *kpage; - - h_ret = hipz_h_register_rpage_mr( - shca->ipz_hca_handle, e_mr, - ehca_encode_hwpage_size(pginfo->hwpage_size), - 0, rpage, rnum); - - if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { - /* - * check for 'registration complete'==H_SUCCESS - * and for 'page registered'==H_PAGE_REGISTERED - */ - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "last " - "hipz_reg_rpage_mr failed, h_ret=%lli " - "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx" - " lkey=%x", h_ret, e_mr, i, - shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle, - e_mr->ib.ib_mr.lkey); - ret = ehca2ib_return_code(h_ret); - break; - } else - ret = 0; - } else if (h_ret != H_PAGE_REGISTERED) { - ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, " - "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx " - "mr_hndl=%llx", h_ret, e_mr, i, - e_mr->ib.ib_mr.lkey, - shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle); - ret = ehca2ib_return_code(h_ret); - break; - } else - ret = 0; - } /* end for(i) */ - - -ehca_reg_mr_rpages_exit1: - ehca_free_fw_ctrlblock(kpage); -ehca_reg_mr_rpages_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p " - "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr, - pginfo, pginfo->num_kpages, pginfo->num_hwpages); - return ret; -} /* end ehca_reg_mr_rpages() */ - -/*----------------------------------------------------------------------*/ - -inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - u32 acl, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, /*OUT*/ - u32 *rkey) /*OUT*/ -{ - int ret; - u64 h_ret; - u32 hipz_acl; - u64 *kpage; - u64 rpage; - struct ehca_mr_pginfo pginfo_save; - struct ehca_mr_hipzout_parms hipzout; - - ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); - - kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!kpage) { - ehca_err(&shca->ib_device, "kpage alloc failed"); - ret = -ENOMEM; - goto ehca_rereg_mr_rereg1_exit0; - } - - pginfo_save = *pginfo; - ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage); - if (ret) { - ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p " - "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx " - "kpage=%p", e_mr, pginfo, pginfo->type, - pginfo->num_kpages, pginfo->num_hwpages, kpage); - goto ehca_rereg_mr_rereg1_exit1; - } - rpage = __pa(kpage); - if (!rpage) { - ehca_err(&shca->ib_device, "kpage=%p", kpage); - ret = -EFAULT; - goto ehca_rereg_mr_rereg1_exit1; - } - h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr, - (u64)iova_start, size, hipz_acl, - e_pd->fw_pd, rpage, &hipzout); - if (h_ret != H_SUCCESS) { - /* - * reregistration unsuccessful, try it again with the 3 hCalls, - * e.g. this is required in case H_MR_CONDITION - * (MW bound or MR is shared) - */ - ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed " - "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr); - *pginfo = pginfo_save; - ret = -EAGAIN; - } else if ((u64 *)hipzout.vaddr != iova_start) { - ehca_err(&shca->ib_device, "PHYP changed iova_start in " - "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p " - "mr_handle=%llx lkey=%x lkey_out=%x", iova_start, - hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle, - e_mr->ib.ib_mr.lkey, hipzout.lkey); - ret = -EFAULT; - } else { - /* - * successful reregistration - * note: start and start_out are identical for eServer HCAs - */ - e_mr->num_kpages = pginfo->num_kpages; - e_mr->num_hwpages = pginfo->num_hwpages; - e_mr->hwpage_size = pginfo->hwpage_size; - e_mr->start = iova_start; - e_mr->size = size; - e_mr->acl = acl; - *lkey = hipzout.lkey; - *rkey = hipzout.rkey; - } - -ehca_rereg_mr_rereg1_exit1: - ehca_free_fw_ctrlblock(kpage); -ehca_rereg_mr_rereg1_exit0: - if ( ret && (ret != -EAGAIN) ) - ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x " - "pginfo=%p num_kpages=%llx num_hwpages=%llx", - ret, *lkey, *rkey, pginfo, pginfo->num_kpages, - pginfo->num_hwpages); - return ret; -} /* end ehca_rereg_mr_rereg1() */ - -/*----------------------------------------------------------------------*/ - -int ehca_rereg_mr(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - int acl, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, - u32 *rkey) -{ - int ret = 0; - u64 h_ret; - int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */ - int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */ - - /* first determine reregistration hCall(s) */ - if ((pginfo->num_hwpages > MAX_RPAGES) || - (e_mr->num_hwpages > MAX_RPAGES) || - (pginfo->num_hwpages > e_mr->num_hwpages)) { - ehca_dbg(&shca->ib_device, "Rereg3 case, " - "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x", - pginfo->num_hwpages, e_mr->num_hwpages); - rereg_1_hcall = 0; - rereg_3_hcall = 1; - } - - if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */ - rereg_1_hcall = 0; - rereg_3_hcall = 1; - e_mr->flags &= ~EHCA_MR_FLAG_MAXMR; - ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p", - e_mr); - } - - if (rereg_1_hcall) { - ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size, - acl, e_pd, pginfo, lkey, rkey); - if (ret) { - if (ret == -EAGAIN) - rereg_3_hcall = 1; - else - goto ehca_rereg_mr_exit0; - } - } - - if (rereg_3_hcall) { - struct ehca_mr save_mr; - - /* first deregister old MR */ - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_free_mr failed, " - "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx " - "mr->lkey=%x", - h_ret, e_mr, shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle, - e_mr->ib.ib_mr.lkey); - ret = ehca2ib_return_code(h_ret); - goto ehca_rereg_mr_exit0; - } - /* clean ehca_mr_t, without changing struct ib_mr and lock */ - save_mr = *e_mr; - ehca_mr_deletenew(e_mr); - - /* set some MR values */ - e_mr->flags = save_mr.flags; - e_mr->hwpage_size = save_mr.hwpage_size; - e_mr->fmr_page_size = save_mr.fmr_page_size; - e_mr->fmr_max_pages = save_mr.fmr_max_pages; - e_mr->fmr_max_maps = save_mr.fmr_max_maps; - e_mr->fmr_map_cnt = save_mr.fmr_map_cnt; - - ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl, - e_pd, pginfo, lkey, rkey, EHCA_REG_MR); - if (ret) { - u32 offset = (u64)(&e_mr->flags) - (u64)e_mr; - memcpy(&e_mr->flags, &(save_mr.flags), - sizeof(struct ehca_mr) - offset); - goto ehca_rereg_mr_exit0; - } - } - -ehca_rereg_mr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " - "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " - "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x " - "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size, - acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey, - rereg_1_hcall, rereg_3_hcall); - return ret; -} /* end ehca_rereg_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_unmap_one_fmr(struct ehca_shca *shca, - struct ehca_mr *e_fmr) -{ - int ret = 0; - u64 h_ret; - struct ehca_pd *e_pd = - container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd); - struct ehca_mr save_fmr; - u32 tmp_lkey, tmp_rkey; - struct ehca_mr_pginfo pginfo; - struct ehca_mr_hipzout_parms hipzout; - struct ehca_mr save_mr; - - if (e_fmr->fmr_max_pages <= MAX_RPAGES) { - /* - * note: after using rereg hcall with len=0, - * rereg hcall must be used again for registering pages - */ - h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0, - 0, 0, e_pd->fw_pd, 0, &hipzout); - if (h_ret == H_SUCCESS) { - /* successful reregistration */ - e_fmr->start = NULL; - e_fmr->size = 0; - tmp_lkey = hipzout.lkey; - tmp_rkey = hipzout.rkey; - return 0; - } - /* - * should not happen, because length checked above, - * FMRs are not shared and no MW bound to FMRs - */ - ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " - "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx " - "mr_hndl=%llx lkey=%x lkey_out=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, - e_fmr->ib.ib_fmr.lkey, hipzout.lkey); - /* try free and rereg */ - } - - /* first free old FMR */ - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_free_mr failed, " - "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx " - "lkey=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, - e_fmr->ib.ib_fmr.lkey); - ret = ehca2ib_return_code(h_ret); - goto ehca_unmap_one_fmr_exit0; - } - /* clean ehca_mr_t, without changing lock */ - save_fmr = *e_fmr; - ehca_mr_deletenew(e_fmr); - - /* set some MR values */ - e_fmr->flags = save_fmr.flags; - e_fmr->hwpage_size = save_fmr.hwpage_size; - e_fmr->fmr_page_size = save_fmr.fmr_page_size; - e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; - e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; - e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; - e_fmr->acl = save_fmr.acl; - - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_FMR; - ret = ehca_reg_mr(shca, e_fmr, NULL, - (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), - e_fmr->acl, e_pd, &pginfo, &tmp_lkey, - &tmp_rkey, EHCA_REG_MR); - if (ret) { - u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; - memcpy(&e_fmr->flags, &(save_mr.flags), - sizeof(struct ehca_mr) - offset); - } - -ehca_unmap_one_fmr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x " - "fmr_max_pages=%x", - ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages); - return ret; -} /* end ehca_unmap_one_fmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_reg_smr(struct ehca_shca *shca, - struct ehca_mr *e_origmr, - struct ehca_mr *e_newmr, - u64 *iova_start, - int acl, - struct ehca_pd *e_pd, - u32 *lkey, /*OUT*/ - u32 *rkey) /*OUT*/ -{ - int ret = 0; - u64 h_ret; - u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout; - - ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); - - h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, - (u64)iova_start, hipz_acl, e_pd->fw_pd, - &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " - "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x " - "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", - h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd, - shca->ipz_hca_handle.handle, - e_origmr->ipz_mr_handle.handle, - e_origmr->ib.ib_mr.lkey); - ret = ehca2ib_return_code(h_ret); - goto ehca_reg_smr_exit0; - } - /* successful registration */ - e_newmr->num_kpages = e_origmr->num_kpages; - e_newmr->num_hwpages = e_origmr->num_hwpages; - e_newmr->hwpage_size = e_origmr->hwpage_size; - e_newmr->start = iova_start; - e_newmr->size = e_origmr->size; - e_newmr->acl = acl; - e_newmr->ipz_mr_handle = hipzout.handle; - *lkey = hipzout.lkey; - *rkey = hipzout.rkey; - return 0; - -ehca_reg_smr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p " - "e_newmr=%p iova_start=%p acl=%x e_pd=%p", - ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd); - return ret; -} /* end ehca_reg_smr() */ - -/*----------------------------------------------------------------------*/ -static inline void *ehca_calc_sectbase(int top, int dir, int idx) -{ - unsigned long ret = idx; - ret |= dir << EHCA_DIR_INDEX_SHIFT; - ret |= top << EHCA_TOP_INDEX_SHIFT; - return __va(ret << SECTION_SIZE_BITS); -} - -#define ehca_bmap_valid(entry) \ - ((u64)entry != (u64)EHCA_INVAL_ADDR) - -static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage, - struct ehca_shca *shca, struct ehca_mr *mr, - struct ehca_mr_pginfo *pginfo) -{ - u64 h_ret = 0; - unsigned long page = 0; - u64 rpage = __pa(kpage); - int page_count; - - void *sectbase = ehca_calc_sectbase(top, dir, idx); - if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) { - ehca_err(&shca->ib_device, "reg_mr_section will probably fail:" - "hwpage_size does not fit to " - "section start address"); - } - page_count = EHCA_SECTSIZE / pginfo->hwpage_size; - - while (page < page_count) { - u64 rnum; - for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count); - rnum++) { - void *pg = sectbase + ((page++) * pginfo->hwpage_size); - kpage[rnum] = __pa(pg); - } - - h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr, - ehca_encode_hwpage_size(pginfo->hwpage_size), - 0, rpage, rnum); - - if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) { - ehca_err(&shca->ib_device, "register_rpage_mr failed"); - return h_ret; - } - } - return h_ret; -} - -static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage, - struct ehca_shca *shca, struct ehca_mr *mr, - struct ehca_mr_pginfo *pginfo) -{ - u64 hret = H_SUCCESS; - int idx; - - for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) { - if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx])) - continue; - - hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr, - pginfo); - if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) - return hret; - } - return hret; -} - -static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca, - struct ehca_mr *mr, - struct ehca_mr_pginfo *pginfo) -{ - u64 hret = H_SUCCESS; - int dir; - - for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { - if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) - continue; - - hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo); - if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) - return hret; - } - return hret; -} - -/* register internal max-MR to internal SHCA */ -int ehca_reg_internal_maxmr( - struct ehca_shca *shca, - struct ehca_pd *e_pd, - struct ehca_mr **e_maxmr) /*OUT*/ -{ - int ret; - struct ehca_mr *e_mr; - u64 *iova_start; - u64 size_maxmr; - struct ehca_mr_pginfo pginfo; - struct ib_phys_buf ib_pbuf; - u32 num_kpages; - u32 num_hwpages; - u64 hw_pgsize; - - if (!ehca_bmap) { - ret = -EFAULT; - goto ehca_reg_internal_maxmr_exit0; - } - - e_mr = ehca_mr_new(); - if (!e_mr) { - ehca_err(&shca->ib_device, "out of memory"); - ret = -ENOMEM; - goto ehca_reg_internal_maxmr_exit0; - } - e_mr->flags |= EHCA_MR_FLAG_MAXMR; - - /* register internal max-MR on HCA */ - size_maxmr = ehca_mr_len; - iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)); - ib_pbuf.addr = 0; - ib_pbuf.size = size_maxmr; - num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr, - PAGE_SIZE); - hw_pgsize = ehca_get_max_hwpage_size(shca); - num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr, - hw_pgsize); - - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_kpages = num_kpages; - pginfo.num_hwpages = num_hwpages; - pginfo.hwpage_size = hw_pgsize; - pginfo.u.phy.num_phys_buf = 1; - pginfo.u.phy.phys_buf_array = &ib_pbuf; - - ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, - &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR); - if (ret) { - ehca_err(&shca->ib_device, "reg of internal max MR failed, " - "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x " - "num_hwpages=%x", e_mr, iova_start, size_maxmr, - num_kpages, num_hwpages); - goto ehca_reg_internal_maxmr_exit1; - } - - /* successful registration of all pages */ - e_mr->ib.ib_mr.device = e_pd->ib_pd.device; - e_mr->ib.ib_mr.pd = &e_pd->ib_pd; - e_mr->ib.ib_mr.uobject = NULL; - atomic_inc(&(e_pd->ib_pd.usecnt)); - atomic_set(&(e_mr->ib.ib_mr.usecnt), 0); - *e_maxmr = e_mr; - return 0; - -ehca_reg_internal_maxmr_exit1: - ehca_mr_delete(e_mr); -ehca_reg_internal_maxmr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p", - ret, shca, e_pd, e_maxmr); - return ret; -} /* end ehca_reg_internal_maxmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_reg_maxmr(struct ehca_shca *shca, - struct ehca_mr *e_newmr, - u64 *iova_start, - int acl, - struct ehca_pd *e_pd, - u32 *lkey, - u32 *rkey) -{ - u64 h_ret; - struct ehca_mr *e_origmr = shca->maxmr; - u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout; - - ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); - - h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, - (u64)iova_start, hipz_acl, e_pd->fw_pd, - &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " - "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", - h_ret, e_origmr, shca->ipz_hca_handle.handle, - e_origmr->ipz_mr_handle.handle, - e_origmr->ib.ib_mr.lkey); - return ehca2ib_return_code(h_ret); - } - /* successful registration */ - e_newmr->num_kpages = e_origmr->num_kpages; - e_newmr->num_hwpages = e_origmr->num_hwpages; - e_newmr->hwpage_size = e_origmr->hwpage_size; - e_newmr->start = iova_start; - e_newmr->size = e_origmr->size; - e_newmr->acl = acl; - e_newmr->ipz_mr_handle = hipzout.handle; - *lkey = hipzout.lkey; - *rkey = hipzout.rkey; - return 0; -} /* end ehca_reg_maxmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_dereg_internal_maxmr(struct ehca_shca *shca) -{ - int ret; - struct ehca_mr *e_maxmr; - struct ib_pd *ib_pd; - - if (!shca->maxmr) { - ehca_err(&shca->ib_device, "bad call, shca=%p", shca); - ret = -EINVAL; - goto ehca_dereg_internal_maxmr_exit0; - } - - e_maxmr = shca->maxmr; - ib_pd = e_maxmr->ib.ib_mr.pd; - shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */ - - ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr); - if (ret) { - ehca_err(&shca->ib_device, "dereg internal max-MR failed, " - "ret=%i e_maxmr=%p shca=%p lkey=%x", - ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey); - shca->maxmr = e_maxmr; - goto ehca_dereg_internal_maxmr_exit0; - } - - atomic_dec(&ib_pd->usecnt); - -ehca_dereg_internal_maxmr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p", - ret, shca, shca->maxmr); - return ret; -} /* end ehca_dereg_internal_maxmr() */ - -/*----------------------------------------------------------------------*/ - -/* - * check physical buffer array of MR verbs for validness and - * calculates MR size - */ -int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - u64 *iova_start, - u64 *size) -{ - struct ib_phys_buf *pbuf = phys_buf_array; - u64 size_count = 0; - u32 i; - - if (num_phys_buf == 0) { - ehca_gen_err("bad phys buf array len, num_phys_buf=0"); - return -EINVAL; - } - /* check first buffer */ - if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) { - ehca_gen_err("iova_start/addr mismatch, iova_start=%p " - "pbuf->addr=%llx pbuf->size=%llx", - iova_start, pbuf->addr, pbuf->size); - return -EINVAL; - } - if (((pbuf->addr + pbuf->size) % PAGE_SIZE) && - (num_phys_buf > 1)) { - ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx " - "pbuf->size=%llx", pbuf->addr, pbuf->size); - return -EINVAL; - } - - for (i = 0; i < num_phys_buf; i++) { - if ((i > 0) && (pbuf->addr % PAGE_SIZE)) { - ehca_gen_err("bad address, i=%x pbuf->addr=%llx " - "pbuf->size=%llx", - i, pbuf->addr, pbuf->size); - return -EINVAL; - } - if (((i > 0) && /* not 1st */ - (i < (num_phys_buf - 1)) && /* not last */ - (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) { - ehca_gen_err("bad size, i=%x pbuf->size=%llx", - i, pbuf->size); - return -EINVAL; - } - size_count += pbuf->size; - pbuf++; - } - - *size = size_count; - return 0; -} /* end ehca_mr_chk_buf_and_calc_size() */ - -/*----------------------------------------------------------------------*/ - -/* check page list of map FMR verb for validness */ -int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, - u64 *page_list, - int list_len) -{ - u32 i; - u64 *page; - - if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) { - ehca_gen_err("bad list_len, list_len=%x " - "e_fmr->fmr_max_pages=%x fmr=%p", - list_len, e_fmr->fmr_max_pages, e_fmr); - return -EINVAL; - } - - /* each page must be aligned */ - page = page_list; - for (i = 0; i < list_len; i++) { - if (*page % e_fmr->fmr_page_size) { - ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p " - "fmr_page_size=%x", i, *page, page, e_fmr, - e_fmr->fmr_page_size); - return -EINVAL; - } - page++; - } - - return 0; -} /* end ehca_fmr_check_page_list() */ - -/*----------------------------------------------------------------------*/ - -/* PAGE_SIZE >= pginfo->hwpage_size */ -static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage) -{ - int ret = 0; - u64 pgaddr; - u32 j = 0; - int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size; - struct scatterlist **sg = &pginfo->u.usr.next_sg; - - while (*sg != NULL) { - pgaddr = page_to_pfn(sg_page(*sg)) - << PAGE_SHIFT; - *kpage = pgaddr + (pginfo->next_hwpage * - pginfo->hwpage_size); - if (!(*kpage)) { - ehca_gen_err("pgaddr=%llx " - "sg_dma_address=%llx " - "entry=%llx next_hwpage=%llx", - pgaddr, (u64)sg_dma_address(*sg), - pginfo->u.usr.next_nmap, - pginfo->next_hwpage); - return -EFAULT; - } - (pginfo->hwpage_cnt)++; - (pginfo->next_hwpage)++; - kpage++; - if (pginfo->next_hwpage % hwpages_per_kpage == 0) { - (pginfo->kpage_cnt)++; - (pginfo->u.usr.next_nmap)++; - pginfo->next_hwpage = 0; - *sg = sg_next(*sg); - } - j++; - if (j >= number) - break; - } - - return ret; -} - -/* - * check given pages for contiguous layout - * last page addr is returned in prev_pgaddr for further check - */ -static int ehca_check_kpages_per_ate(struct scatterlist **sg, - int num_pages, - u64 *prev_pgaddr) -{ - for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) { - u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT; - if (ehca_debug_level >= 3) - ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr, - *(u64 *)__va(pgaddr)); - if (pgaddr - PAGE_SIZE != *prev_pgaddr) { - ehca_gen_err("uncontiguous page found pgaddr=%llx " - "prev_pgaddr=%llx entries_left_in_hwpage=%x", - pgaddr, *prev_pgaddr, num_pages); - return -EINVAL; - } - *prev_pgaddr = pgaddr; - } - return 0; -} - -/* PAGE_SIZE < pginfo->hwpage_size */ -static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage) -{ - int ret = 0; - u64 pgaddr, prev_pgaddr; - u32 j = 0; - int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE; - int nr_kpages = kpages_per_hwpage; - struct scatterlist **sg = &pginfo->u.usr.next_sg; - - while (*sg != NULL) { - - if (nr_kpages == kpages_per_hwpage) { - pgaddr = (page_to_pfn(sg_page(*sg)) - << PAGE_SHIFT); - *kpage = pgaddr; - if (!(*kpage)) { - ehca_gen_err("pgaddr=%llx entry=%llx", - pgaddr, pginfo->u.usr.next_nmap); - ret = -EFAULT; - return ret; - } - /* - * The first page in a hwpage must be aligned; - * the first MR page is exempt from this rule. - */ - if (pgaddr & (pginfo->hwpage_size - 1)) { - if (pginfo->hwpage_cnt) { - ehca_gen_err( - "invalid alignment " - "pgaddr=%llx entry=%llx " - "mr_pgsize=%llx", - pgaddr, pginfo->u.usr.next_nmap, - pginfo->hwpage_size); - ret = -EFAULT; - return ret; - } - /* first MR page */ - pginfo->kpage_cnt = - (pgaddr & - (pginfo->hwpage_size - 1)) >> - PAGE_SHIFT; - nr_kpages -= pginfo->kpage_cnt; - *kpage = pgaddr & - ~(pginfo->hwpage_size - 1); - } - if (ehca_debug_level >= 3) { - u64 val = *(u64 *)__va(pgaddr); - ehca_gen_dbg("kpage=%llx page=%llx " - "value=%016llx", - *kpage, pgaddr, val); - } - prev_pgaddr = pgaddr; - *sg = sg_next(*sg); - pginfo->kpage_cnt++; - pginfo->u.usr.next_nmap++; - nr_kpages--; - if (!nr_kpages) - goto next_kpage; - continue; - } - - ret = ehca_check_kpages_per_ate(sg, nr_kpages, - &prev_pgaddr); - if (ret) - return ret; - pginfo->kpage_cnt += nr_kpages; - pginfo->u.usr.next_nmap += nr_kpages; - -next_kpage: - nr_kpages = kpages_per_hwpage; - (pginfo->hwpage_cnt)++; - kpage++; - j++; - if (j >= number) - break; - } - - return ret; -} - -static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, - u32 number, u64 *kpage) -{ - int ret = 0; - struct ib_phys_buf *pbuf; - u64 num_hw, offs_hw; - u32 i = 0; - - /* loop over desired phys_buf_array entries */ - while (i < number) { - pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf; - num_hw = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) + - pbuf->size, pginfo->hwpage_size); - offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) / - pginfo->hwpage_size; - while (pginfo->next_hwpage < offs_hw + num_hw) { - /* sanity check */ - if ((pginfo->kpage_cnt >= pginfo->num_kpages) || - (pginfo->hwpage_cnt >= pginfo->num_hwpages)) { - ehca_gen_err("kpage_cnt >= num_kpages, " - "kpage_cnt=%llx num_kpages=%llx " - "hwpage_cnt=%llx " - "num_hwpages=%llx i=%x", - pginfo->kpage_cnt, - pginfo->num_kpages, - pginfo->hwpage_cnt, - pginfo->num_hwpages, i); - return -EFAULT; - } - *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) + - (pginfo->next_hwpage * pginfo->hwpage_size); - if ( !(*kpage) && pbuf->addr ) { - ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx " - "next_hwpage=%llx", pbuf->addr, - pbuf->size, pginfo->next_hwpage); - return -EFAULT; - } - (pginfo->hwpage_cnt)++; - (pginfo->next_hwpage)++; - if (PAGE_SIZE >= pginfo->hwpage_size) { - if (pginfo->next_hwpage % - (PAGE_SIZE / pginfo->hwpage_size) == 0) - (pginfo->kpage_cnt)++; - } else - pginfo->kpage_cnt += pginfo->hwpage_size / - PAGE_SIZE; - kpage++; - i++; - if (i >= number) break; - } - if (pginfo->next_hwpage >= offs_hw + num_hw) { - (pginfo->u.phy.next_buf)++; - pginfo->next_hwpage = 0; - } - } - return ret; -} - -static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, - u32 number, u64 *kpage) -{ - int ret = 0; - u64 *fmrlist; - u32 i; - - /* loop over desired page_list entries */ - fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem; - for (i = 0; i < number; i++) { - *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) + - pginfo->next_hwpage * pginfo->hwpage_size; - if ( !(*kpage) ) { - ehca_gen_err("*fmrlist=%llx fmrlist=%p " - "next_listelem=%llx next_hwpage=%llx", - *fmrlist, fmrlist, - pginfo->u.fmr.next_listelem, - pginfo->next_hwpage); - return -EFAULT; - } - (pginfo->hwpage_cnt)++; - if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) { - if (pginfo->next_hwpage % - (pginfo->u.fmr.fmr_pgsize / - pginfo->hwpage_size) == 0) { - (pginfo->kpage_cnt)++; - (pginfo->u.fmr.next_listelem)++; - fmrlist++; - pginfo->next_hwpage = 0; - } else - (pginfo->next_hwpage)++; - } else { - unsigned int cnt_per_hwpage = pginfo->hwpage_size / - pginfo->u.fmr.fmr_pgsize; - unsigned int j; - u64 prev = *kpage; - /* check if adrs are contiguous */ - for (j = 1; j < cnt_per_hwpage; j++) { - u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1); - if (prev + pginfo->u.fmr.fmr_pgsize != p) { - ehca_gen_err("uncontiguous fmr pages " - "found prev=%llx p=%llx " - "idx=%x", prev, p, i + j); - return -EINVAL; - } - prev = p; - } - pginfo->kpage_cnt += cnt_per_hwpage; - pginfo->u.fmr.next_listelem += cnt_per_hwpage; - fmrlist += cnt_per_hwpage; - } - kpage++; - } - return ret; -} - -/* setup page buffer from page info */ -int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage) -{ - int ret; - - switch (pginfo->type) { - case EHCA_MR_PGI_PHYS: - ret = ehca_set_pagebuf_phys(pginfo, number, kpage); - break; - case EHCA_MR_PGI_USER: - ret = PAGE_SIZE >= pginfo->hwpage_size ? - ehca_set_pagebuf_user1(pginfo, number, kpage) : - ehca_set_pagebuf_user2(pginfo, number, kpage); - break; - case EHCA_MR_PGI_FMR: - ret = ehca_set_pagebuf_fmr(pginfo, number, kpage); - break; - default: - ehca_gen_err("bad pginfo->type=%x", pginfo->type); - ret = -EFAULT; - break; - } - return ret; -} /* end ehca_set_pagebuf() */ - -/*----------------------------------------------------------------------*/ - -/* - * check MR if it is a max-MR, i.e. uses whole memory - * in case it's a max-MR 1 is returned, else 0 - */ -int ehca_mr_is_maxmr(u64 size, - u64 *iova_start) -{ - /* a MR is treated as max-MR only if it fits following: */ - if ((size == ehca_mr_len) && - (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) { - ehca_gen_dbg("this is a max-MR"); - return 1; - } else - return 0; -} /* end ehca_mr_is_maxmr() */ - -/*----------------------------------------------------------------------*/ - -/* map access control for MR/MW. This routine is used for MR and MW. */ -void ehca_mrmw_map_acl(int ib_acl, - u32 *hipz_acl) -{ - *hipz_acl = 0; - if (ib_acl & IB_ACCESS_REMOTE_READ) - *hipz_acl |= HIPZ_ACCESSCTRL_R_READ; - if (ib_acl & IB_ACCESS_REMOTE_WRITE) - *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE; - if (ib_acl & IB_ACCESS_REMOTE_ATOMIC) - *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC; - if (ib_acl & IB_ACCESS_LOCAL_WRITE) - *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE; - if (ib_acl & IB_ACCESS_MW_BIND) - *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND; -} /* end ehca_mrmw_map_acl() */ - -/*----------------------------------------------------------------------*/ - -/* sets page size in hipz access control for MR/MW. */ -void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/ -{ - *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24); -} /* end ehca_mrmw_set_pgsize_hipz_acl() */ - -/*----------------------------------------------------------------------*/ - -/* - * reverse map access control for MR/MW. - * This routine is used for MR and MW. - */ -void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, - int *ib_acl) /*OUT*/ -{ - *ib_acl = 0; - if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ) - *ib_acl |= IB_ACCESS_REMOTE_READ; - if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE) - *ib_acl |= IB_ACCESS_REMOTE_WRITE; - if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC) - *ib_acl |= IB_ACCESS_REMOTE_ATOMIC; - if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE) - *ib_acl |= IB_ACCESS_LOCAL_WRITE; - if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND) - *ib_acl |= IB_ACCESS_MW_BIND; -} /* end ehca_mrmw_reverse_map_acl() */ - - -/*----------------------------------------------------------------------*/ - -/* - * MR destructor and constructor - * used in Reregister MR verb, sets all fields in ehca_mr_t to 0, - * except struct ib_mr and spinlock - */ -void ehca_mr_deletenew(struct ehca_mr *mr) -{ - mr->flags = 0; - mr->num_kpages = 0; - mr->num_hwpages = 0; - mr->acl = 0; - mr->start = NULL; - mr->fmr_page_size = 0; - mr->fmr_max_pages = 0; - mr->fmr_max_maps = 0; - mr->fmr_map_cnt = 0; - memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle)); - memset(&mr->galpas, 0, sizeof(mr->galpas)); -} /* end ehca_mr_deletenew() */ - -int ehca_init_mrmw_cache(void) -{ - mr_cache = kmem_cache_create("ehca_cache_mr", - sizeof(struct ehca_mr), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!mr_cache) - return -ENOMEM; - mw_cache = kmem_cache_create("ehca_cache_mw", - sizeof(struct ehca_mw), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!mw_cache) { - kmem_cache_destroy(mr_cache); - mr_cache = NULL; - return -ENOMEM; - } - return 0; -} - -void ehca_cleanup_mrmw_cache(void) -{ - if (mr_cache) - kmem_cache_destroy(mr_cache); - if (mw_cache) - kmem_cache_destroy(mw_cache); -} - -static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap, - int dir) -{ - if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) { - ehca_top_bmap->dir[dir] = - kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL); - if (!ehca_top_bmap->dir[dir]) - return -ENOMEM; - /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ - memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE); - } - return 0; -} - -static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir) -{ - if (!ehca_bmap_valid(ehca_bmap->top[top])) { - ehca_bmap->top[top] = - kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL); - if (!ehca_bmap->top[top]) - return -ENOMEM; - /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ - memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE); - } - return ehca_init_top_bmap(ehca_bmap->top[top], dir); -} - -static inline int ehca_calc_index(unsigned long i, unsigned long s) -{ - return (i >> s) & EHCA_INDEX_MASK; -} - -void ehca_destroy_busmap(void) -{ - int top, dir; - - if (!ehca_bmap) - return; - - for (top = 0; top < EHCA_MAP_ENTRIES; top++) { - if (!ehca_bmap_valid(ehca_bmap->top[top])) - continue; - for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { - if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) - continue; - - kfree(ehca_bmap->top[top]->dir[dir]); - } - - kfree(ehca_bmap->top[top]); - } - - kfree(ehca_bmap); - ehca_bmap = NULL; -} - -static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages) -{ - unsigned long i, start_section, end_section; - int top, dir, idx; - - if (!nr_pages) - return 0; - - if (!ehca_bmap) { - ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL); - if (!ehca_bmap) - return -ENOMEM; - /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ - memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE); - } - - start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE; - end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE; - for (i = start_section; i < end_section; i++) { - int ret; - top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT); - dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT); - idx = i & EHCA_INDEX_MASK; - - ret = ehca_init_bmap(ehca_bmap, top, dir); - if (ret) { - ehca_destroy_busmap(); - return ret; - } - ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len; - ehca_mr_len += EHCA_SECTSIZE; - } - return 0; -} - -static int ehca_is_hugepage(unsigned long pfn) -{ - int page_order; - - if (pfn & EHCA_HUGEPAGE_PFN_MASK) - return 0; - - page_order = compound_order(pfn_to_page(pfn)); - if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT) - return 0; - - return 1; -} - -static int ehca_create_busmap_callback(unsigned long initial_pfn, - unsigned long total_nr_pages, void *arg) -{ - int ret; - unsigned long pfn, start_pfn, end_pfn, nr_pages; - - if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE) - return ehca_update_busmap(initial_pfn, total_nr_pages); - - /* Given chunk is >= 16GB -> check for hugepages */ - start_pfn = initial_pfn; - end_pfn = initial_pfn + total_nr_pages; - pfn = start_pfn; - - while (pfn < end_pfn) { - if (ehca_is_hugepage(pfn)) { - /* Add mem found in front of the hugepage */ - nr_pages = pfn - start_pfn; - ret = ehca_update_busmap(start_pfn, nr_pages); - if (ret) - return ret; - /* Skip the hugepage */ - pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE); - start_pfn = pfn; - } else - pfn += (EHCA_SECTSIZE / PAGE_SIZE); - } - - /* Add mem found behind the hugepage(s) */ - nr_pages = pfn - start_pfn; - return ehca_update_busmap(start_pfn, nr_pages); -} - -int ehca_create_busmap(void) -{ - int ret; - - ehca_mr_len = 0; - ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL, - ehca_create_busmap_callback); - return ret; -} - -static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, - struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo) -{ - int top; - u64 hret, *kpage; - - kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!kpage) { - ehca_err(&shca->ib_device, "kpage alloc failed"); - return -ENOMEM; - } - for (top = 0; top < EHCA_MAP_ENTRIES; top++) { - if (!ehca_bmap_valid(ehca_bmap->top[top])) - continue; - hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo); - if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS)) - break; - } - - ehca_free_fw_ctrlblock(kpage); - - if (hret == H_SUCCESS) - return 0; /* Everything is fine */ - else { - ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, " - "h_ret=%lli e_mr=%p top=%x lkey=%x " - "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top, - e_mr->ib.ib_mr.lkey, - shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle); - return ehca2ib_return_code(hret); - } -} - -static u64 ehca_map_vaddr(void *caddr) -{ - int top, dir, idx; - unsigned long abs_addr, offset; - u64 entry; - - if (!ehca_bmap) - return EHCA_INVAL_ADDR; - - abs_addr = __pa(caddr); - top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT); - if (!ehca_bmap_valid(ehca_bmap->top[top])) - return EHCA_INVAL_ADDR; - - dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT); - if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) - return EHCA_INVAL_ADDR; - - idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT); - - entry = ehca_bmap->top[top]->dir[dir]->ent[idx]; - if (ehca_bmap_valid(entry)) { - offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1); - return entry | offset; - } else - return EHCA_INVAL_ADDR; -} - -static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr) -{ - return dma_addr == EHCA_INVAL_ADDR; -} - -static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr, - size_t size, enum dma_data_direction direction) -{ - if (cpu_addr) - return ehca_map_vaddr(cpu_addr); - else - return EHCA_INVAL_ADDR; -} - -static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, - enum dma_data_direction direction) -{ - /* This is only a stub; nothing to be done here */ -} - -static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ - u64 addr; - - if (offset + size > PAGE_SIZE) - return EHCA_INVAL_ADDR; - - addr = ehca_map_vaddr(page_address(page)); - if (!ehca_dma_mapping_error(dev, addr)) - addr += offset; - - return addr; -} - -static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, - enum dma_data_direction direction) -{ - /* This is only a stub; nothing to be done here */ -} - -static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction direction) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) { - u64 addr; - addr = ehca_map_vaddr(sg_virt(sg)); - if (ehca_dma_mapping_error(dev, addr)) - return 0; - - sg->dma_address = addr; - sg->dma_length = sg->length; - } - return nents; -} - -static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction direction) -{ - /* This is only a stub; nothing to be done here */ -} - -static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr, - size_t size, - enum dma_data_direction dir) -{ - dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); -} - -static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr, - size_t size, - enum dma_data_direction dir) -{ - dma_sync_single_for_device(dev->dma_device, addr, size, dir); -} - -static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size, - u64 *dma_handle, gfp_t flag) -{ - struct page *p; - void *addr = NULL; - u64 dma_addr; - - p = alloc_pages(flag, get_order(size)); - if (p) { - addr = page_address(p); - dma_addr = ehca_map_vaddr(addr); - if (ehca_dma_mapping_error(dev, dma_addr)) { - free_pages((unsigned long)addr, get_order(size)); - return NULL; - } - if (dma_handle) - *dma_handle = dma_addr; - return addr; - } - return NULL; -} - -static void ehca_dma_free_coherent(struct ib_device *dev, size_t size, - void *cpu_addr, u64 dma_handle) -{ - if (cpu_addr && size) - free_pages((unsigned long)cpu_addr, get_order(size)); -} - - -struct ib_dma_mapping_ops ehca_dma_mapping_ops = { - .mapping_error = ehca_dma_mapping_error, - .map_single = ehca_dma_map_single, - .unmap_single = ehca_dma_unmap_single, - .map_page = ehca_dma_map_page, - .unmap_page = ehca_dma_unmap_page, - .map_sg = ehca_dma_map_sg, - .unmap_sg = ehca_dma_unmap_sg, - .sync_single_for_cpu = ehca_dma_sync_single_for_cpu, - .sync_single_for_device = ehca_dma_sync_single_for_device, - .alloc_coherent = ehca_dma_alloc_coherent, - .free_coherent = ehca_dma_free_coherent, -}; diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h deleted file mode 100644 index 50d8b51..0000000 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * MR/MW declarations and inline functions - * - * Authors: Dietmar Decker - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _EHCA_MRMW_H_ -#define _EHCA_MRMW_H_ - -enum ehca_reg_type { - EHCA_REG_MR, - EHCA_REG_BUSMAP_MR -}; - -int ehca_reg_mr(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - int acl, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, - u32 *rkey, - enum ehca_reg_type reg_type); - -int ehca_reg_mr_rpages(struct ehca_shca *shca, - struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo); - -int ehca_rereg_mr(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - int mr_access_flags, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, - u32 *rkey); - -int ehca_unmap_one_fmr(struct ehca_shca *shca, - struct ehca_mr *e_fmr); - -int ehca_reg_smr(struct ehca_shca *shca, - struct ehca_mr *e_origmr, - struct ehca_mr *e_newmr, - u64 *iova_start, - int acl, - struct ehca_pd *e_pd, - u32 *lkey, - u32 *rkey); - -int ehca_reg_internal_maxmr(struct ehca_shca *shca, - struct ehca_pd *e_pd, - struct ehca_mr **maxmr); - -int ehca_reg_maxmr(struct ehca_shca *shca, - struct ehca_mr *e_newmr, - u64 *iova_start, - int acl, - struct ehca_pd *e_pd, - u32 *lkey, - u32 *rkey); - -int ehca_dereg_internal_maxmr(struct ehca_shca *shca); - -int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - u64 *iova_start, - u64 *size); - -int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, - u64 *page_list, - int list_len); - -int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage); - -int ehca_mr_is_maxmr(u64 size, - u64 *iova_start); - -void ehca_mrmw_map_acl(int ib_acl, - u32 *hipz_acl); - -void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl); - -void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, - int *ib_acl); - -void ehca_mr_deletenew(struct ehca_mr *mr); - -int ehca_create_busmap(void); - -void ehca_destroy_busmap(void); - -extern struct ib_dma_mapping_ops ehca_dma_mapping_ops; -#endif /*_EHCA_MRMW_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c deleted file mode 100644 index 351577a..0000000 --- a/drivers/infiniband/hw/ehca/ehca_pd.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * PD functions - * - * Authors: Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_tools.h" -#include "ehca_iverbs.h" - -static struct kmem_cache *pd_cache; - -struct ib_pd *ehca_alloc_pd(struct ib_device *device, - struct ib_ucontext *context, struct ib_udata *udata) -{ - struct ehca_pd *pd; - int i; - - pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL); - if (!pd) { - ehca_err(device, "device=%p context=%p out of memory", - device, context); - return ERR_PTR(-ENOMEM); - } - - for (i = 0; i < 2; i++) { - INIT_LIST_HEAD(&pd->free[i]); - INIT_LIST_HEAD(&pd->full[i]); - } - mutex_init(&pd->lock); - - /* - * Kernel PD: when device = -1, 0 - * User PD: when context != -1 - */ - if (!context) { - /* - * Kernel PDs after init reuses always - * the one created in ehca_shca_reopen() - */ - struct ehca_shca *shca = container_of(device, struct ehca_shca, - ib_device); - pd->fw_pd.value = shca->pd->fw_pd.value; - } else - pd->fw_pd.value = (u64)pd; - - return &pd->ib_pd; -} - -int ehca_dealloc_pd(struct ib_pd *pd) -{ - struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); - int i, leftovers = 0; - struct ipz_small_queue_page *page, *tmp; - - for (i = 0; i < 2; i++) { - list_splice(&my_pd->full[i], &my_pd->free[i]); - list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) { - leftovers = 1; - free_page(page->page); - kmem_cache_free(small_qp_cache, page); - } - } - - if (leftovers) - ehca_warn(pd->device, - "Some small queue pages were not freed"); - - kmem_cache_free(pd_cache, my_pd); - - return 0; -} - -int ehca_init_pd_cache(void) -{ - pd_cache = kmem_cache_create("ehca_cache_pd", - sizeof(struct ehca_pd), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!pd_cache) - return -ENOMEM; - return 0; -} - -void ehca_cleanup_pd_cache(void) -{ - if (pd_cache) - kmem_cache_destroy(pd_cache); -} diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h deleted file mode 100644 index 90c4efa..0000000 --- a/drivers/infiniband/hw/ehca/ehca_qes.h +++ /dev/null @@ -1,260 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Hardware request structures - * - * Authors: Waleri Fomin - * Reinhard Ernst - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef _EHCA_QES_H_ -#define _EHCA_QES_H_ - -#include "ehca_tools.h" - -/* virtual scatter gather entry to specify remote addresses with length */ -struct ehca_vsgentry { - u64 vaddr; - u32 lkey; - u32 length; -}; - -#define GRH_FLAG_MASK EHCA_BMASK_IBM( 7, 7) -#define GRH_IPVERSION_MASK EHCA_BMASK_IBM( 0, 3) -#define GRH_TCLASS_MASK EHCA_BMASK_IBM( 4, 12) -#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13, 31) -#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32, 47) -#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48, 55) -#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56, 63) - -/* - * Unreliable Datagram Address Vector Format - * see IBTA Vol1 chapter 8.3 Global Routing Header - */ -struct ehca_ud_av { - u8 sl; - u8 lnh; - u16 dlid; - u8 reserved1; - u8 reserved2; - u8 reserved3; - u8 slid_path_bits; - u8 reserved4; - u8 ipd; - u8 reserved5; - u8 pmtu; - u32 reserved6; - u64 reserved7; - union { - struct { - u64 word_0; /* always set to 6 */ - /*should be 0x1B for IB transport */ - u64 word_1; - u64 word_2; - u64 word_3; - u64 word_4; - } grh; - struct { - u32 wd_0; - u32 wd_1; - /* DWord_1 --> SGID */ - - u32 sgid_wd3; - u32 sgid_wd2; - - u32 sgid_wd1; - u32 sgid_wd0; - /* DWord_3 --> DGID */ - - u32 dgid_wd3; - u32 dgid_wd2; - - u32 dgid_wd1; - u32 dgid_wd0; - } grh_l; - }; -}; - -/* maximum number of sg entries allowed in a WQE */ -#define MAX_WQE_SG_ENTRIES 252 - -#define WQE_OPTYPE_SEND 0x80 -#define WQE_OPTYPE_RDMAREAD 0x40 -#define WQE_OPTYPE_RDMAWRITE 0x20 -#define WQE_OPTYPE_CMPSWAP 0x10 -#define WQE_OPTYPE_FETCHADD 0x08 -#define WQE_OPTYPE_BIND 0x04 - -#define WQE_WRFLAG_REQ_SIGNAL_COM 0x80 -#define WQE_WRFLAG_FENCE 0x40 -#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20 -#define WQE_WRFLAG_SOLIC_EVENT 0x10 - -#define WQEF_CACHE_HINT 0x80 -#define WQEF_CACHE_HINT_RD_WR 0x40 -#define WQEF_TIMED_WQE 0x20 -#define WQEF_PURGE 0x08 -#define WQEF_HIGH_NIBBLE 0xF0 - -#define MW_BIND_ACCESSCTRL_R_WRITE 0x40 -#define MW_BIND_ACCESSCTRL_R_READ 0x20 -#define MW_BIND_ACCESSCTRL_R_ATOMIC 0x10 - -struct ehca_wqe { - u64 work_request_id; - u8 optype; - u8 wr_flag; - u16 pkeyi; - u8 wqef; - u8 nr_of_data_seg; - u16 wqe_provided_slid; - u32 destination_qp_number; - u32 resync_psn_sqp; - u32 local_ee_context_qkey; - u32 immediate_data; - union { - struct { - u64 remote_virtual_address; - u32 rkey; - u32 reserved; - u64 atomic_1st_op_dma_len; - u64 atomic_2nd_op; - struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; - - } nud; - struct { - u64 ehca_ud_av_ptr; - u64 reserved1; - u64 reserved2; - u64 reserved3; - struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; - } ud_avp; - struct { - struct ehca_ud_av ud_av; - struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES - - 2]; - } ud_av; - struct { - u64 reserved0; - u64 reserved1; - u64 reserved2; - u64 reserved3; - struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; - } all_rcv; - - struct { - u64 reserved; - u32 rkey; - u32 old_rkey; - u64 reserved1; - u64 reserved2; - u64 virtual_address; - u32 reserved3; - u32 length; - u32 reserved4; - u16 reserved5; - u8 reserved6; - u8 lr_ctl; - u32 lkey; - u32 reserved7; - u64 reserved8; - u64 reserved9; - u64 reserved10; - u64 reserved11; - } bind; - struct { - u64 reserved12; - u64 reserved13; - u32 size; - u32 start; - } inline_data; - } u; - -}; - -#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0) -#define WC_IMM_DATA EHCA_BMASK_IBM(1, 1) -#define WC_GRH_PRESENT EHCA_BMASK_IBM(2, 2) -#define WC_SE_BIT EHCA_BMASK_IBM(3, 3) -#define WC_STATUS_ERROR_BIT 0x80000000 -#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 -#define WC_STATUS_PURGE_BIT 0x10 -#define WC_SEND_RECEIVE_BIT 0x80 - -struct ehca_cqe { - u64 work_request_id; - u8 optype; - u8 w_completion_flags; - u16 reserved1; - u32 nr_bytes_transferred; - u32 immediate_data; - u32 local_qp_number; - u8 freed_resource_count; - u8 service_level; - u16 wqe_count; - u32 qp_token; - u32 qkey_ee_token; - u32 remote_qp_number; - u16 dlid; - u16 rlid; - u16 reserved2; - u16 pkey_index; - u32 cqe_timestamp; - u32 wqe_timestamp; - u8 wqe_timestamp_valid; - u8 reserved3; - u8 reserved4; - u8 cqe_flags; - u32 status; -}; - -struct ehca_eqe { - u64 entry; -}; - -struct ehca_mrte { - u64 starting_va; - u64 length; /* length of memory region in bytes*/ - u32 pd; - u8 key_instance; - u8 pagesize; - u8 mr_control; - u8 local_remote_access_ctrl; - u8 reserved[0x20 - 0x18]; - u64 at_pointer[4]; -}; -#endif /*_EHCA_QES_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c deleted file mode 100644 index 2e89356..0000000 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ /dev/null @@ -1,2257 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * QP functions - * - * Authors: Joachim Fenkes - * Stefan Roscher - * Waleri Fomin - * Hoang-Nam Nguyen - * Reinhard Ernst - * Heiko J Schick - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "ehca_qes.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" -#include "hipz_fns.h" - -static struct kmem_cache *qp_cache; - -/* - * attributes not supported by query qp - */ -#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS | \ - IB_QP_EN_SQD_ASYNC_NOTIFY) - -/* - * ehca (internal) qp state values - */ -enum ehca_qp_state { - EHCA_QPS_RESET = 1, - EHCA_QPS_INIT = 2, - EHCA_QPS_RTR = 3, - EHCA_QPS_RTS = 5, - EHCA_QPS_SQD = 6, - EHCA_QPS_SQE = 8, - EHCA_QPS_ERR = 128 -}; - -/* - * qp state transitions as defined by IB Arch Rel 1.1 page 431 - */ -enum ib_qp_statetrans { - IB_QPST_ANY2RESET, - IB_QPST_ANY2ERR, - IB_QPST_RESET2INIT, - IB_QPST_INIT2RTR, - IB_QPST_INIT2INIT, - IB_QPST_RTR2RTS, - IB_QPST_RTS2SQD, - IB_QPST_RTS2RTS, - IB_QPST_SQD2RTS, - IB_QPST_SQE2RTS, - IB_QPST_SQD2SQD, - IB_QPST_MAX /* nr of transitions, this must be last!!! */ -}; - -/* - * ib2ehca_qp_state maps IB to ehca qp_state - * returns ehca qp state corresponding to given ib qp state - */ -static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state) -{ - switch (ib_qp_state) { - case IB_QPS_RESET: - return EHCA_QPS_RESET; - case IB_QPS_INIT: - return EHCA_QPS_INIT; - case IB_QPS_RTR: - return EHCA_QPS_RTR; - case IB_QPS_RTS: - return EHCA_QPS_RTS; - case IB_QPS_SQD: - return EHCA_QPS_SQD; - case IB_QPS_SQE: - return EHCA_QPS_SQE; - case IB_QPS_ERR: - return EHCA_QPS_ERR; - default: - ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state); - return -EINVAL; - } -} - -/* - * ehca2ib_qp_state maps ehca to IB qp_state - * returns ib qp state corresponding to given ehca qp state - */ -static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state - ehca_qp_state) -{ - switch (ehca_qp_state) { - case EHCA_QPS_RESET: - return IB_QPS_RESET; - case EHCA_QPS_INIT: - return IB_QPS_INIT; - case EHCA_QPS_RTR: - return IB_QPS_RTR; - case EHCA_QPS_RTS: - return IB_QPS_RTS; - case EHCA_QPS_SQD: - return IB_QPS_SQD; - case EHCA_QPS_SQE: - return IB_QPS_SQE; - case EHCA_QPS_ERR: - return IB_QPS_ERR; - default: - ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state); - return -EINVAL; - } -} - -/* - * ehca_qp_type used as index for req_attr and opt_attr of - * struct ehca_modqp_statetrans - */ -enum ehca_qp_type { - QPT_RC = 0, - QPT_UC = 1, - QPT_UD = 2, - QPT_SQP = 3, - QPT_MAX -}; - -/* - * ib2ehcaqptype maps Ib to ehca qp_type - * returns ehca qp type corresponding to ib qp type - */ -static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype) -{ - switch (ibqptype) { - case IB_QPT_SMI: - case IB_QPT_GSI: - return QPT_SQP; - case IB_QPT_RC: - return QPT_RC; - case IB_QPT_UC: - return QPT_UC; - case IB_QPT_UD: - return QPT_UD; - default: - ehca_gen_err("Invalid ibqptype=%x", ibqptype); - return -EINVAL; - } -} - -static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate, - int ib_tostate) -{ - int index = -EINVAL; - switch (ib_tostate) { - case IB_QPS_RESET: - index = IB_QPST_ANY2RESET; - break; - case IB_QPS_INIT: - switch (ib_fromstate) { - case IB_QPS_RESET: - index = IB_QPST_RESET2INIT; - break; - case IB_QPS_INIT: - index = IB_QPST_INIT2INIT; - break; - } - break; - case IB_QPS_RTR: - if (ib_fromstate == IB_QPS_INIT) - index = IB_QPST_INIT2RTR; - break; - case IB_QPS_RTS: - switch (ib_fromstate) { - case IB_QPS_RTR: - index = IB_QPST_RTR2RTS; - break; - case IB_QPS_RTS: - index = IB_QPST_RTS2RTS; - break; - case IB_QPS_SQD: - index = IB_QPST_SQD2RTS; - break; - case IB_QPS_SQE: - index = IB_QPST_SQE2RTS; - break; - } - break; - case IB_QPS_SQD: - if (ib_fromstate == IB_QPS_RTS) - index = IB_QPST_RTS2SQD; - break; - case IB_QPS_SQE: - break; - case IB_QPS_ERR: - index = IB_QPST_ANY2ERR; - break; - default: - break; - } - return index; -} - -/* - * ibqptype2servicetype returns hcp service type corresponding to given - * ib qp type used by create_qp() - */ -static inline int ibqptype2servicetype(enum ib_qp_type ibqptype) -{ - switch (ibqptype) { - case IB_QPT_SMI: - case IB_QPT_GSI: - return ST_UD; - case IB_QPT_RC: - return ST_RC; - case IB_QPT_UC: - return ST_UC; - case IB_QPT_UD: - return ST_UD; - case IB_QPT_RAW_IPV6: - return -EINVAL; - case IB_QPT_RAW_ETHERTYPE: - return -EINVAL; - default: - ehca_gen_err("Invalid ibqptype=%x", ibqptype); - return -EINVAL; - } -} - -/* - * init userspace queue info from ipz_queue data - */ -static inline void queue2resp(struct ipzu_queue_resp *resp, - struct ipz_queue *queue) -{ - resp->qe_size = queue->qe_size; - resp->act_nr_of_sg = queue->act_nr_of_sg; - resp->queue_length = queue->queue_length; - resp->pagesize = queue->pagesize; - resp->toggle_state = queue->toggle_state; - resp->offset = queue->offset; -} - -/* - * init_qp_queue initializes/constructs r/squeue and registers queue pages. - */ -static inline int init_qp_queue(struct ehca_shca *shca, - struct ehca_pd *pd, - struct ehca_qp *my_qp, - struct ipz_queue *queue, - int q_type, - u64 expected_hret, - struct ehca_alloc_queue_parms *parms, - int wqe_size) -{ - int ret, cnt, ipz_rc, nr_q_pages; - void *vpage; - u64 rpage, h_ret; - struct ib_device *ib_dev = &shca->ib_device; - struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle; - - if (!parms->queue_size) - return 0; - - if (parms->is_small) { - nr_q_pages = 1; - ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, - 128 << parms->page_size, - wqe_size, parms->act_nr_sges, 1); - } else { - nr_q_pages = parms->queue_size; - ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, - EHCA_PAGESIZE, wqe_size, - parms->act_nr_sges, 0); - } - - if (!ipz_rc) { - ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%i", - ipz_rc); - return -EBUSY; - } - - /* register queue pages */ - for (cnt = 0; cnt < nr_q_pages; cnt++) { - vpage = ipz_qpageit_get_inc(queue); - if (!vpage) { - ehca_err(ib_dev, "ipz_qpageit_get_inc() " - "failed p_vpage= %p", vpage); - ret = -EINVAL; - goto init_qp_queue1; - } - rpage = __pa(vpage); - - h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, - my_qp->ipz_qp_handle, - NULL, 0, q_type, - rpage, parms->is_small ? 0 : 1, - my_qp->galpas.kernel); - if (cnt == (nr_q_pages - 1)) { /* last page! */ - if (h_ret != expected_hret) { - ehca_err(ib_dev, "hipz_qp_register_rpage() " - "h_ret=%lli", h_ret); - ret = ehca2ib_return_code(h_ret); - goto init_qp_queue1; - } - vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); - if (vpage) { - ehca_err(ib_dev, "ipz_qpageit_get_inc() " - "should not succeed vpage=%p", vpage); - ret = -EINVAL; - goto init_qp_queue1; - } - } else { - if (h_ret != H_PAGE_REGISTERED) { - ehca_err(ib_dev, "hipz_qp_register_rpage() " - "h_ret=%lli", h_ret); - ret = ehca2ib_return_code(h_ret); - goto init_qp_queue1; - } - } - } - - ipz_qeit_reset(queue); - - return 0; - -init_qp_queue1: - ipz_queue_dtor(pd, queue); - return ret; -} - -static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp) -{ - if (is_llqp) - return 128 << act_nr_sge; - else - return offsetof(struct ehca_wqe, - u.nud.sg_list[act_nr_sge]); -} - -static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue, - int req_nr_sge, int is_llqp) -{ - u32 wqe_size, q_size; - int act_nr_sge = req_nr_sge; - - if (!is_llqp) - /* round up #SGEs so WQE size is a power of 2 */ - for (act_nr_sge = 4; act_nr_sge <= 252; - act_nr_sge = 4 + 2 * act_nr_sge) - if (act_nr_sge >= req_nr_sge) - break; - - wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp); - q_size = wqe_size * (queue->max_wr + 1); - - if (q_size <= 512) - queue->page_size = 2; - else if (q_size <= 1024) - queue->page_size = 3; - else - queue->page_size = 0; - - queue->is_small = (queue->page_size != 0); -} - -/* needs to be called with cq->spinlock held */ -void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq) -{ - struct list_head *list, *node; - - /* TODO: support low latency QPs */ - if (qp->ext_type == EQPT_LLQP) - return; - - if (on_sq) { - list = &qp->send_cq->sqp_err_list; - node = &qp->sq_err_node; - } else { - list = &qp->recv_cq->rqp_err_list; - node = &qp->rq_err_node; - } - - if (list_empty(node)) - list_add_tail(node, list); - - return; -} - -static void del_from_err_list(struct ehca_cq *cq, struct list_head *node) -{ - unsigned long flags; - - spin_lock_irqsave(&cq->spinlock, flags); - - if (!list_empty(node)) - list_del_init(node); - - spin_unlock_irqrestore(&cq->spinlock, flags); -} - -static void reset_queue_map(struct ehca_queue_map *qmap) -{ - int i; - - qmap->tail = qmap->entries - 1; - qmap->left_to_poll = 0; - qmap->next_wqe_idx = 0; - for (i = 0; i < qmap->entries; i++) { - qmap->map[i].reported = 1; - qmap->map[i].cqe_req = 0; - } -} - -/* - * Create an ib_qp struct that is either a QP or an SRQ, depending on - * the value of the is_srq parameter. If init_attr and srq_init_attr share - * fields, the field out of init_attr is used. - */ -static struct ehca_qp *internal_create_qp( - struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata, int is_srq) -{ - struct ehca_qp *my_qp, *my_srq = NULL; - struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, - ib_device); - struct ib_ucontext *context = NULL; - u64 h_ret; - int is_llqp = 0, has_srq = 0, is_user = 0; - int qp_type, max_send_sge, max_recv_sge, ret; - - /* h_call's out parameters */ - struct ehca_alloc_qp_parms parms; - u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; - unsigned long flags; - - if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) { - ehca_err(pd->device, "Unable to create QP, max number of %i " - "QPs reached.", shca->max_num_qps); - ehca_err(pd->device, "To increase the maximum number of QPs " - "use the number_of_qps module parameter.\n"); - return ERR_PTR(-ENOSPC); - } - - if (init_attr->create_flags) { - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - memset(&parms, 0, sizeof(parms)); - qp_type = init_attr->qp_type; - - if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR && - init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { - ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", - init_attr->sq_sig_type); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - /* save LLQP info */ - if (qp_type & 0x80) { - is_llqp = 1; - parms.ext_type = EQPT_LLQP; - parms.ll_comp_flags = qp_type & LLQP_COMP_MASK; - } - qp_type &= 0x1F; - init_attr->qp_type &= 0x1F; - - /* handle SRQ base QPs */ - if (init_attr->srq) { - my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq); - - if (qp_type == IB_QPT_UC) { - ehca_err(pd->device, "UC with SRQ not supported"); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - has_srq = 1; - parms.ext_type = EQPT_SRQBASE; - parms.srq_qpn = my_srq->real_qp_num; - } - - if (is_llqp && has_srq) { - ehca_err(pd->device, "LLQPs can't have an SRQ"); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - /* handle SRQs */ - if (is_srq) { - parms.ext_type = EQPT_SRQ; - parms.srq_limit = srq_init_attr->attr.srq_limit; - if (init_attr->cap.max_recv_sge > 3) { - ehca_err(pd->device, "no more than three SGEs " - "supported for SRQ pd=%p max_sge=%x", - pd, init_attr->cap.max_recv_sge); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - } - - /* check QP type */ - if (qp_type != IB_QPT_UD && - qp_type != IB_QPT_UC && - qp_type != IB_QPT_RC && - qp_type != IB_QPT_SMI && - qp_type != IB_QPT_GSI) { - ehca_err(pd->device, "wrong QP Type=%x", qp_type); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - if (is_llqp) { - switch (qp_type) { - case IB_QPT_RC: - if ((init_attr->cap.max_send_wr > 255) || - (init_attr->cap.max_recv_wr > 255)) { - ehca_err(pd->device, - "Invalid Number of max_sq_wr=%x " - "or max_rq_wr=%x for RC LLQP", - init_attr->cap.max_send_wr, - init_attr->cap.max_recv_wr); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - break; - case IB_QPT_UD: - if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) { - ehca_err(pd->device, "UD LLQP not supported " - "by this adapter"); - atomic_dec(&shca->num_qps); - return ERR_PTR(-ENOSYS); - } - if (!(init_attr->cap.max_send_sge <= 5 - && init_attr->cap.max_send_sge >= 1 - && init_attr->cap.max_recv_sge <= 5 - && init_attr->cap.max_recv_sge >= 1)) { - ehca_err(pd->device, - "Invalid Number of max_send_sge=%x " - "or max_recv_sge=%x for UD LLQP", - init_attr->cap.max_send_sge, - init_attr->cap.max_recv_sge); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } else if (init_attr->cap.max_send_wr > 255) { - ehca_err(pd->device, - "Invalid Number of " - "max_send_wr=%x for UD QP_TYPE=%x", - init_attr->cap.max_send_wr, qp_type); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - break; - default: - ehca_err(pd->device, "unsupported LL QP Type=%x", - qp_type); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - } else { - int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI - || qp_type == IB_QPT_GSI) ? 250 : 252; - - if (init_attr->cap.max_send_sge > max_sge - || init_attr->cap.max_recv_sge > max_sge) { - ehca_err(pd->device, "Invalid number of SGEs requested " - "send_sge=%x recv_sge=%x max_sge=%x", - init_attr->cap.max_send_sge, - init_attr->cap.max_recv_sge, max_sge); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - } - - my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); - if (!my_qp) { - ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); - atomic_dec(&shca->num_qps); - return ERR_PTR(-ENOMEM); - } - - if (pd->uobject && udata) { - is_user = 1; - context = pd->uobject->context; - } - - atomic_set(&my_qp->nr_events, 0); - init_waitqueue_head(&my_qp->wait_completion); - spin_lock_init(&my_qp->spinlock_s); - spin_lock_init(&my_qp->spinlock_r); - my_qp->qp_type = qp_type; - my_qp->ext_type = parms.ext_type; - my_qp->state = IB_QPS_RESET; - - if (init_attr->recv_cq) - my_qp->recv_cq = - container_of(init_attr->recv_cq, struct ehca_cq, ib_cq); - if (init_attr->send_cq) - my_qp->send_cq = - container_of(init_attr->send_cq, struct ehca_cq, ib_cq); - - idr_preload(GFP_KERNEL); - write_lock_irqsave(&ehca_qp_idr_lock, flags); - - ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT); - if (ret >= 0) - my_qp->token = ret; - - write_unlock_irqrestore(&ehca_qp_idr_lock, flags); - idr_preload_end(); - if (ret < 0) { - if (ret == -ENOSPC) { - ret = -EINVAL; - ehca_err(pd->device, "Invalid number of qp"); - } else { - ret = -ENOMEM; - ehca_err(pd->device, "Can't allocate new idr entry."); - } - goto create_qp_exit0; - } - - if (has_srq) - parms.srq_token = my_qp->token; - - parms.servicetype = ibqptype2servicetype(qp_type); - if (parms.servicetype < 0) { - ret = -EINVAL; - ehca_err(pd->device, "Invalid qp_type=%x", qp_type); - goto create_qp_exit1; - } - - /* Always signal by WQE so we can hide circ. WQEs */ - parms.sigtype = HCALL_SIGT_BY_WQE; - - /* UD_AV CIRCUMVENTION */ - max_send_sge = init_attr->cap.max_send_sge; - max_recv_sge = init_attr->cap.max_recv_sge; - if (parms.servicetype == ST_UD && !is_llqp) { - max_send_sge += 2; - max_recv_sge += 2; - } - - parms.token = my_qp->token; - parms.eq_handle = shca->eq.ipz_eq_handle; - parms.pd = my_pd->fw_pd; - if (my_qp->send_cq) - parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle; - if (my_qp->recv_cq) - parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle; - - parms.squeue.max_wr = init_attr->cap.max_send_wr; - parms.rqueue.max_wr = init_attr->cap.max_recv_wr; - parms.squeue.max_sge = max_send_sge; - parms.rqueue.max_sge = max_recv_sge; - - /* RC QPs need one more SWQE for unsolicited ack circumvention */ - if (qp_type == IB_QPT_RC) - parms.squeue.max_wr++; - - if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) { - if (HAS_SQ(my_qp)) - ehca_determine_small_queue( - &parms.squeue, max_send_sge, is_llqp); - if (HAS_RQ(my_qp)) - ehca_determine_small_queue( - &parms.rqueue, max_recv_sge, is_llqp); - parms.qp_storage = - (parms.squeue.is_small || parms.rqueue.is_small); - } - - h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user); - if (h_ret != H_SUCCESS) { - ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli", - h_ret); - ret = ehca2ib_return_code(h_ret); - goto create_qp_exit1; - } - - ib_qp_num = my_qp->real_qp_num = parms.real_qp_num; - my_qp->ipz_qp_handle = parms.qp_handle; - my_qp->galpas = parms.galpas; - - swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp); - rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp); - - switch (qp_type) { - case IB_QPT_RC: - if (is_llqp) { - parms.squeue.act_nr_sges = 1; - parms.rqueue.act_nr_sges = 1; - } - /* hide the extra WQE */ - parms.squeue.act_nr_wqes--; - break; - case IB_QPT_UD: - case IB_QPT_GSI: - case IB_QPT_SMI: - /* UD circumvention */ - if (is_llqp) { - parms.squeue.act_nr_sges = 1; - parms.rqueue.act_nr_sges = 1; - } else { - parms.squeue.act_nr_sges -= 2; - parms.rqueue.act_nr_sges -= 2; - } - - if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) { - parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr; - parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr; - parms.squeue.act_nr_sges = init_attr->cap.max_send_sge; - parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge; - ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1; - } - - break; - - default: - break; - } - - /* initialize r/squeue and register queue pages */ - if (HAS_SQ(my_qp)) { - ret = init_qp_queue( - shca, my_pd, my_qp, &my_qp->ipz_squeue, 0, - HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS, - &parms.squeue, swqe_size); - if (ret) { - ehca_err(pd->device, "Couldn't initialize squeue " - "and pages ret=%i", ret); - goto create_qp_exit2; - } - - if (!is_user) { - my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / - my_qp->ipz_squeue.qe_size; - my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * - sizeof(struct ehca_qmap_entry)); - if (!my_qp->sq_map.map) { - ehca_err(pd->device, "Couldn't allocate squeue " - "map ret=%i", ret); - goto create_qp_exit3; - } - INIT_LIST_HEAD(&my_qp->sq_err_node); - /* to avoid the generation of bogus flush CQEs */ - reset_queue_map(&my_qp->sq_map); - } - } - - if (HAS_RQ(my_qp)) { - ret = init_qp_queue( - shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1, - H_SUCCESS, &parms.rqueue, rwqe_size); - if (ret) { - ehca_err(pd->device, "Couldn't initialize rqueue " - "and pages ret=%i", ret); - goto create_qp_exit4; - } - if (!is_user) { - my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / - my_qp->ipz_rqueue.qe_size; - my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * - sizeof(struct ehca_qmap_entry)); - if (!my_qp->rq_map.map) { - ehca_err(pd->device, "Couldn't allocate squeue " - "map ret=%i", ret); - goto create_qp_exit5; - } - INIT_LIST_HEAD(&my_qp->rq_err_node); - /* to avoid the generation of bogus flush CQEs */ - reset_queue_map(&my_qp->rq_map); - } - } else if (init_attr->srq && !is_user) { - /* this is a base QP, use the queue map of the SRQ */ - my_qp->rq_map = my_srq->rq_map; - INIT_LIST_HEAD(&my_qp->rq_err_node); - - my_qp->ipz_rqueue = my_srq->ipz_rqueue; - } - - if (is_srq) { - my_qp->ib_srq.pd = &my_pd->ib_pd; - my_qp->ib_srq.device = my_pd->ib_pd.device; - - my_qp->ib_srq.srq_context = init_attr->qp_context; - my_qp->ib_srq.event_handler = init_attr->event_handler; - } else { - my_qp->ib_qp.qp_num = ib_qp_num; - my_qp->ib_qp.pd = &my_pd->ib_pd; - my_qp->ib_qp.device = my_pd->ib_pd.device; - - my_qp->ib_qp.recv_cq = init_attr->recv_cq; - my_qp->ib_qp.send_cq = init_attr->send_cq; - - my_qp->ib_qp.qp_type = qp_type; - my_qp->ib_qp.srq = init_attr->srq; - - my_qp->ib_qp.qp_context = init_attr->qp_context; - my_qp->ib_qp.event_handler = init_attr->event_handler; - } - - init_attr->cap.max_inline_data = 0; /* not supported yet */ - init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges; - init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes; - init_attr->cap.max_send_sge = parms.squeue.act_nr_sges; - init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes; - my_qp->init_attr = *init_attr; - - if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { - shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = - &my_qp->ib_qp; - if (ehca_nr_ports < 0) { - /* alloc array to cache subsequent modify qp parms - * for autodetect mode - */ - my_qp->mod_qp_parm = - kzalloc(EHCA_MOD_QP_PARM_MAX * - sizeof(*my_qp->mod_qp_parm), - GFP_KERNEL); - if (!my_qp->mod_qp_parm) { - ehca_err(pd->device, - "Could not alloc mod_qp_parm"); - goto create_qp_exit5; - } - } - } - - /* NOTE: define_apq0() not supported yet */ - if (qp_type == IB_QPT_GSI) { - h_ret = ehca_define_sqp(shca, my_qp, init_attr); - if (h_ret != H_SUCCESS) { - kfree(my_qp->mod_qp_parm); - my_qp->mod_qp_parm = NULL; - /* the QP pointer is no longer valid */ - shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = - NULL; - ret = ehca2ib_return_code(h_ret); - goto create_qp_exit6; - } - } - - if (my_qp->send_cq) { - ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp); - if (ret) { - ehca_err(pd->device, - "Couldn't assign qp to send_cq ret=%i", ret); - goto create_qp_exit7; - } - } - - /* copy queues, galpa data to user space */ - if (context && udata) { - struct ehca_create_qp_resp resp; - memset(&resp, 0, sizeof(resp)); - - resp.qp_num = my_qp->real_qp_num; - resp.token = my_qp->token; - resp.qp_type = my_qp->qp_type; - resp.ext_type = my_qp->ext_type; - resp.qkey = my_qp->qkey; - resp.real_qp_num = my_qp->real_qp_num; - - if (HAS_SQ(my_qp)) - queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue); - if (HAS_RQ(my_qp)) - queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue); - resp.fw_handle_ofs = (u32) - (my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1)); - - if (ib_copy_to_udata(udata, &resp, sizeof resp)) { - ehca_err(pd->device, "Copy to udata failed"); - ret = -EINVAL; - goto create_qp_exit8; - } - } - - return my_qp; - -create_qp_exit8: - ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num); - -create_qp_exit7: - kfree(my_qp->mod_qp_parm); - -create_qp_exit6: - if (HAS_RQ(my_qp) && !is_user) - vfree(my_qp->rq_map.map); - -create_qp_exit5: - if (HAS_RQ(my_qp)) - ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); - -create_qp_exit4: - if (HAS_SQ(my_qp) && !is_user) - vfree(my_qp->sq_map.map); - -create_qp_exit3: - if (HAS_SQ(my_qp)) - ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); - -create_qp_exit2: - hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); - -create_qp_exit1: - write_lock_irqsave(&ehca_qp_idr_lock, flags); - idr_remove(&ehca_qp_idr, my_qp->token); - write_unlock_irqrestore(&ehca_qp_idr_lock, flags); - -create_qp_exit0: - kmem_cache_free(qp_cache, my_qp); - atomic_dec(&shca->num_qps); - return ERR_PTR(ret); -} - -struct ib_qp *ehca_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr, - struct ib_udata *udata) -{ - struct ehca_qp *ret; - - ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0); - return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp; -} - -static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, - struct ib_uobject *uobject); - -struct ib_srq *ehca_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata) -{ - struct ib_qp_init_attr qp_init_attr; - struct ehca_qp *my_qp; - struct ib_srq *ret; - struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, - ib_device); - struct hcp_modify_qp_control_block *mqpcb; - u64 hret, update_mask; - - if (srq_init_attr->srq_type != IB_SRQT_BASIC) - return ERR_PTR(-ENOSYS); - - /* For common attributes, internal_create_qp() takes its info - * out of qp_init_attr, so copy all common attrs there. - */ - memset(&qp_init_attr, 0, sizeof(qp_init_attr)); - qp_init_attr.event_handler = srq_init_attr->event_handler; - qp_init_attr.qp_context = srq_init_attr->srq_context; - qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; - qp_init_attr.qp_type = IB_QPT_RC; - qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr; - qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge; - - my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1); - if (IS_ERR(my_qp)) - return (struct ib_srq *)my_qp; - - /* copy back return values */ - srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr; - srq_init_attr->attr.max_sge = 3; - - /* drive SRQ into RTR state */ - mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!mqpcb) { - ehca_err(pd->device, "Could not get zeroed page for mqpcb " - "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); - ret = ERR_PTR(-ENOMEM); - goto create_srq1; - } - - mqpcb->qp_state = EHCA_QPS_INIT; - mqpcb->prim_phys_port = 1; - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); - hret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, my_qp->galpas.kernel); - if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not modify SRQ to INIT " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, my_qp->real_qp_num, hret); - goto create_srq2; - } - - mqpcb->qp_enable = 1; - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); - hret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, my_qp->galpas.kernel); - if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not enable SRQ " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, my_qp->real_qp_num, hret); - goto create_srq2; - } - - mqpcb->qp_state = EHCA_QPS_RTR; - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); - hret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, my_qp->galpas.kernel); - if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not modify SRQ to RTR " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, my_qp->real_qp_num, hret); - goto create_srq2; - } - - ehca_free_fw_ctrlblock(mqpcb); - - return &my_qp->ib_srq; - -create_srq2: - ret = ERR_PTR(ehca2ib_return_code(hret)); - ehca_free_fw_ctrlblock(mqpcb); - -create_srq1: - internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject); - - return ret; -} - -/* - * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts - * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe - * returns total number of bad wqes in bad_wqe_cnt - */ -static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, - int *bad_wqe_cnt) -{ - u64 h_ret; - struct ipz_queue *squeue; - void *bad_send_wqe_p, *bad_send_wqe_v; - u64 q_ofs; - struct ehca_wqe *wqe; - int qp_num = my_qp->ib_qp.qp_num; - - /* get send wqe pointer */ - h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, &my_qp->pf, - &bad_send_wqe_p, NULL, 2); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed" - " ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, qp_num, h_ret); - return ehca2ib_return_code(h_ret); - } - bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63))); - ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", - qp_num, bad_send_wqe_p); - /* convert wqe pointer to vadr */ - bad_send_wqe_v = __va((u64)bad_send_wqe_p); - if (ehca_debug_level >= 2) - ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num); - squeue = &my_qp->ipz_squeue; - if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) { - ehca_err(&shca->ib_device, "failed to get wqe offset qp_num=%x" - " bad_send_wqe_p=%p", qp_num, bad_send_wqe_p); - return -EFAULT; - } - - /* loop sets wqe's purge bit */ - wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); - *bad_wqe_cnt = 0; - while (wqe->optype != 0xff && wqe->wqef != 0xff) { - if (ehca_debug_level >= 2) - ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num); - wqe->nr_of_data_seg = 0; /* suppress data access */ - wqe->wqef = WQEF_PURGE; /* WQE to be purged */ - q_ofs = ipz_queue_advance_offset(squeue, q_ofs); - wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); - *bad_wqe_cnt = (*bad_wqe_cnt)+1; - } - /* - * bad wqe will be reprocessed and ignored when pol_cq() is called, - * i.e. nr of wqes with flush error status is one less - */ - ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x", - qp_num, (*bad_wqe_cnt)-1); - wqe->wqef = 0; - - return 0; -} - -static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue, - struct ehca_queue_map *qmap) -{ - void *wqe_v; - u64 q_ofs; - u32 wqe_idx; - unsigned int tail_idx; - - /* convert real to abs address */ - wqe_p = wqe_p & (~(1UL << 63)); - - wqe_v = __va(wqe_p); - - if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) { - ehca_gen_err("Invalid offset for calculating left cqes " - "wqe_p=%#llx wqe_v=%p\n", wqe_p, wqe_v); - return -EFAULT; - } - - tail_idx = next_index(qmap->tail, qmap->entries); - wqe_idx = q_ofs / ipz_queue->qe_size; - - /* check all processed wqes, whether a cqe is requested or not */ - while (tail_idx != wqe_idx) { - if (qmap->map[tail_idx].cqe_req) - qmap->left_to_poll++; - tail_idx = next_index(tail_idx, qmap->entries); - } - /* save index in queue, where we have to start flushing */ - qmap->next_wqe_idx = wqe_idx; - return 0; -} - -static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca) -{ - u64 h_ret; - void *send_wqe_p, *recv_wqe_p; - int ret; - unsigned long flags; - int qp_num = my_qp->ib_qp.qp_num; - - /* this hcall is not supported on base QPs */ - if (my_qp->ext_type != EQPT_SRQBASE) { - /* get send and receive wqe pointer */ - h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, &my_qp->pf, - &send_wqe_p, &recv_wqe_p, 4); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "disable_and_get_wqe() " - "failed ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, qp_num, h_ret); - return ehca2ib_return_code(h_ret); - } - - /* - * acquire lock to ensure that nobody is polling the cq which - * could mean that the qmap->tail pointer is in an - * inconsistent state. - */ - spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); - ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue, - &my_qp->sq_map); - spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); - if (ret) - return ret; - - - spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); - ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue, - &my_qp->rq_map); - spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); - if (ret) - return ret; - } else { - spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); - my_qp->sq_map.left_to_poll = 0; - my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, - my_qp->sq_map.entries); - spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); - - spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); - my_qp->rq_map.left_to_poll = 0; - my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, - my_qp->rq_map.entries); - spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); - } - - /* this assures flush cqes being generated only for pending wqes */ - if ((my_qp->sq_map.left_to_poll == 0) && - (my_qp->rq_map.left_to_poll == 0)) { - spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); - ehca_add_to_err_list(my_qp, 1); - spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); - - if (HAS_RQ(my_qp)) { - spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); - ehca_add_to_err_list(my_qp, 0); - spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, - flags); - } - } - - return 0; -} - -/* - * internal_modify_qp with circumvention to handle aqp0 properly - * smi_reset2init indicates if this is an internal reset-to-init-call for - * smi. This flag must always be zero if called from ehca_modify_qp()! - * This internal func was intorduced to avoid recursion of ehca_modify_qp()! - */ -static int internal_modify_qp(struct ib_qp *ibqp, - struct ib_qp_attr *attr, - int attr_mask, int smi_reset2init) -{ - enum ib_qp_state qp_cur_state, qp_new_state; - int cnt, qp_attr_idx, ret = 0; - enum ib_qp_statetrans statetrans; - struct hcp_modify_qp_control_block *mqpcb; - struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - struct ehca_shca *shca = - container_of(ibqp->pd->device, struct ehca_shca, ib_device); - u64 update_mask; - u64 h_ret; - int bad_wqe_cnt = 0; - int is_user = 0; - int squeue_locked = 0; - unsigned long flags = 0; - - /* do query_qp to obtain current attr values */ - mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); - if (!mqpcb) { - ehca_err(ibqp->device, "Could not get zeroed page for mqpcb " - "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num); - return -ENOMEM; - } - - h_ret = hipz_h_query_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - mqpcb, my_qp->galpas.kernel); - if (h_ret != H_SUCCESS) { - ehca_err(ibqp->device, "hipz_h_query_qp() failed " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, ibqp->qp_num, h_ret); - ret = ehca2ib_return_code(h_ret); - goto modify_qp_exit1; - } - if (ibqp->uobject) - is_user = 1; - - qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state); - - if (qp_cur_state == -EINVAL) { /* invalid qp state */ - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x " - "ehca_qp=%p qp_num=%x", - mqpcb->qp_state, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - /* - * circumvention to set aqp0 initial state to init - * as expected by IB spec - */ - if (smi_reset2init == 0 && - ibqp->qp_type == IB_QPT_SMI && - qp_cur_state == IB_QPS_RESET && - (attr_mask & IB_QP_STATE) && - attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */ - struct ib_qp_attr smiqp_attr = { - .qp_state = IB_QPS_INIT, - .port_num = my_qp->init_attr.port_num, - .pkey_index = 0, - .qkey = 0 - }; - int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT | - IB_QP_PKEY_INDEX | IB_QP_QKEY; - int smirc = internal_modify_qp( - ibqp, &smiqp_attr, smiqp_attr_mask, 1); - if (smirc) { - ehca_err(ibqp->device, "SMI RESET -> INIT failed. " - "ehca_modify_qp() rc=%i", smirc); - ret = H_PARAMETER; - goto modify_qp_exit1; - } - qp_cur_state = IB_QPS_INIT; - ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded"); - } - /* is transmitted current state equal to "real" current state */ - if ((attr_mask & IB_QP_CUR_STATE) && - qp_cur_state != attr->cur_qp_state) { - ret = -EINVAL; - ehca_err(ibqp->device, - "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>" - " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x", - attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - - ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x " - "new qp_state=%x attribute_mask=%x", - my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask); - - qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state; - if (!smi_reset2init && - !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type, - attr_mask, IB_LINK_LAYER_UNSPECIFIED)) { - ret = -EINVAL; - ehca_err(ibqp->device, - "Invalid qp transition new_state=%x cur_state=%x " - "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state, - qp_cur_state, my_qp, ibqp->qp_num, attr_mask); - goto modify_qp_exit1; - } - - mqpcb->qp_state = ib2ehca_qp_state(qp_new_state); - if (mqpcb->qp_state) - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); - else { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid new qp state=%x " - "ehca_qp=%p qp_num=%x", - qp_new_state, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - - /* retrieve state transition struct to get req and opt attrs */ - statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state); - if (statetrans < 0) { - ret = -EINVAL; - ehca_err(ibqp->device, " qp_cur_state=%x " - "new_qp_state=%x State_xsition=%x ehca_qp=%p " - "qp_num=%x", qp_cur_state, qp_new_state, - statetrans, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - - qp_attr_idx = ib2ehcaqptype(ibqp->qp_type); - - if (qp_attr_idx < 0) { - ret = qp_attr_idx; - ehca_err(ibqp->device, - "Invalid QP type=%x ehca_qp=%p qp_num=%x", - ibqp->qp_type, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - - ehca_dbg(ibqp->device, - "ehca_qp=%p qp_num=%x qp_state_xsit=%x", - my_qp, ibqp->qp_num, statetrans); - - /* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set - * in non-LL UD QPs. - */ - if ((my_qp->qp_type == IB_QPT_UD) && - (my_qp->ext_type != EQPT_LLQP) && - (statetrans == IB_QPST_INIT2RTR) && - (shca->hw_level >= 0x22)) { - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); - mqpcb->send_grh_flag = 1; - } - - /* sqe -> rts: set purge bit of bad wqe before actual trans */ - if ((my_qp->qp_type == IB_QPT_UD || - my_qp->qp_type == IB_QPT_GSI || - my_qp->qp_type == IB_QPT_SMI) && - statetrans == IB_QPST_SQE2RTS) { - /* mark next free wqe if kernel */ - if (!ibqp->uobject) { - struct ehca_wqe *wqe; - /* lock send queue */ - spin_lock_irqsave(&my_qp->spinlock_s, flags); - squeue_locked = 1; - /* mark next free wqe */ - wqe = (struct ehca_wqe *) - ipz_qeit_get(&my_qp->ipz_squeue); - wqe->optype = wqe->wqef = 0xff; - ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p", - ibqp->qp_num, wqe); - } - ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt); - if (ret) { - ehca_err(ibqp->device, "prepare_sqe_rts() failed " - "ehca_qp=%p qp_num=%x ret=%i", - my_qp, ibqp->qp_num, ret); - goto modify_qp_exit2; - } - } - - /* - * enable RDMA_Atomic_Control if reset->init und reliable con - * this is necessary since gen2 does not provide that flag, - * but pHyp requires it - */ - if (statetrans == IB_QPST_RESET2INIT && - (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) { - mqpcb->rdma_atomic_ctrl = 3; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1); - } - /* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */ - if (statetrans == IB_QPST_INIT2RTR && - (ibqp->qp_type == IB_QPT_UC) && - !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) { - mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */ - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); - } - - if (attr_mask & IB_QP_PKEY_INDEX) { - if (attr->pkey_index >= 16) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid pkey_index=%x. " - "ehca_qp=%p qp_num=%x max_pkey_index=f", - attr->pkey_index, my_qp, ibqp->qp_num); - goto modify_qp_exit2; - } - mqpcb->prim_p_key_idx = attr->pkey_index; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1); - } - if (attr_mask & IB_QP_PORT) { - struct ehca_sport *sport; - struct ehca_qp *aqp1; - if (attr->port_num < 1 || attr->port_num > shca->num_ports) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid port=%x. " - "ehca_qp=%p qp_num=%x num_ports=%x", - attr->port_num, my_qp, ibqp->qp_num, - shca->num_ports); - goto modify_qp_exit2; - } - sport = &shca->sport[attr->port_num - 1]; - if (!sport->ibqp_sqp[IB_QPT_GSI]) { - /* should not occur */ - ret = -EFAULT; - ehca_err(ibqp->device, "AQP1 was not created for " - "port=%x", attr->port_num); - goto modify_qp_exit2; - } - aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI], - struct ehca_qp, ib_qp); - if (ibqp->qp_type != IB_QPT_GSI && - ibqp->qp_type != IB_QPT_SMI && - aqp1->mod_qp_parm) { - /* - * firmware will reject this modify_qp() because - * port is not activated/initialized fully - */ - ret = -EFAULT; - ehca_warn(ibqp->device, "Couldn't modify qp port=%x: " - "either port is being activated (try again) " - "or cabling issue", attr->port_num); - goto modify_qp_exit2; - } - mqpcb->prim_phys_port = attr->port_num; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1); - } - if (attr_mask & IB_QP_QKEY) { - mqpcb->qkey = attr->qkey; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1); - } - if (attr_mask & IB_QP_AV) { - mqpcb->dlid = attr->ah_attr.dlid; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1); - mqpcb->source_path_bits = attr->ah_attr.src_path_bits; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1); - mqpcb->service_level = attr->ah_attr.sl; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1); - - if (ehca_calc_ipd(shca, mqpcb->prim_phys_port, - attr->ah_attr.static_rate, - &mqpcb->max_static_rate)) { - ret = -EINVAL; - goto modify_qp_exit2; - } - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); - - /* - * Always supply the GRH flag, even if it's zero, to give the - * hypervisor a clear "yes" or "no" instead of a "perhaps" - */ - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); - - /* - * only if GRH is TRUE we might consider SOURCE_GID_IDX - * and DEST_GID otherwise phype will return H_ATTR_PARM!!! - */ - if (attr->ah_attr.ah_flags == IB_AH_GRH) { - mqpcb->send_grh_flag = 1; - - mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1); - - for (cnt = 0; cnt < 16; cnt++) - mqpcb->dest_gid.byte[cnt] = - attr->ah_attr.grh.dgid.raw[cnt]; - - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1); - mqpcb->flow_label = attr->ah_attr.grh.flow_label; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1); - mqpcb->hop_limit = attr->ah_attr.grh.hop_limit; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1); - mqpcb->traffic_class = attr->ah_attr.grh.traffic_class; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1); - } - } - - if (attr_mask & IB_QP_PATH_MTU) { - /* store ld(MTU) */ - my_qp->mtu_shift = attr->path_mtu + 7; - mqpcb->path_mtu = attr->path_mtu; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1); - } - if (attr_mask & IB_QP_TIMEOUT) { - mqpcb->timeout = attr->timeout; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1); - } - if (attr_mask & IB_QP_RETRY_CNT) { - mqpcb->retry_count = attr->retry_cnt; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1); - } - if (attr_mask & IB_QP_RNR_RETRY) { - mqpcb->rnr_retry_count = attr->rnr_retry; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1); - } - if (attr_mask & IB_QP_RQ_PSN) { - mqpcb->receive_psn = attr->rq_psn; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1); - } - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { - mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ? - attr->max_dest_rd_atomic : 2; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); - } - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { - mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ? - attr->max_rd_atomic : 2; - update_mask |= - EHCA_BMASK_SET - (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1); - } - if (attr_mask & IB_QP_ALT_PATH) { - if (attr->alt_port_num < 1 - || attr->alt_port_num > shca->num_ports) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid alt_port=%x. " - "ehca_qp=%p qp_num=%x num_ports=%x", - attr->alt_port_num, my_qp, ibqp->qp_num, - shca->num_ports); - goto modify_qp_exit2; - } - mqpcb->alt_phys_port = attr->alt_port_num; - - if (attr->alt_pkey_index >= 16) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid alt_pkey_index=%x. " - "ehca_qp=%p qp_num=%x max_pkey_index=f", - attr->pkey_index, my_qp, ibqp->qp_num); - goto modify_qp_exit2; - } - mqpcb->alt_p_key_idx = attr->alt_pkey_index; - - mqpcb->timeout_al = attr->alt_timeout; - mqpcb->dlid_al = attr->alt_ah_attr.dlid; - mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; - mqpcb->service_level_al = attr->alt_ah_attr.sl; - - if (ehca_calc_ipd(shca, mqpcb->alt_phys_port, - attr->alt_ah_attr.static_rate, - &mqpcb->max_static_rate_al)) { - ret = -EINVAL; - goto modify_qp_exit2; - } - - /* OpenIB doesn't support alternate retry counts - copy them */ - mqpcb->retry_count_al = mqpcb->retry_count; - mqpcb->rnr_retry_count_al = mqpcb->rnr_retry_count; - - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_ALT_PHYS_PORT, 1) - | EHCA_BMASK_SET(MQPCB_MASK_ALT_P_KEY_IDX, 1) - | EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT_AL, 1); - - /* - * Always supply the GRH flag, even if it's zero, to give the - * hypervisor a clear "yes" or "no" instead of a "perhaps" - */ - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1); - - /* - * only if GRH is TRUE we might consider SOURCE_GID_IDX - * and DEST_GID otherwise phype will return H_ATTR_PARM!!! - */ - if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) { - mqpcb->send_grh_flag_al = 1; - - for (cnt = 0; cnt < 16; cnt++) - mqpcb->dest_gid_al.byte[cnt] = - attr->alt_ah_attr.grh.dgid.raw[cnt]; - mqpcb->source_gid_idx_al = - attr->alt_ah_attr.grh.sgid_index; - mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label; - mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit; - mqpcb->traffic_class_al = - attr->alt_ah_attr.grh.traffic_class; - - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1) | - EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1); - } - } - - if (attr_mask & IB_QP_MIN_RNR_TIMER) { - mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1); - } - - if (attr_mask & IB_QP_SQ_PSN) { - mqpcb->send_psn = attr->sq_psn; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1); - } - - if (attr_mask & IB_QP_DEST_QPN) { - mqpcb->dest_qp_nr = attr->dest_qp_num; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1); - } - - if (attr_mask & IB_QP_PATH_MIG_STATE) { - if (attr->path_mig_state != IB_MIG_REARM - && attr->path_mig_state != IB_MIG_MIGRATED) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid mig_state=%x", - attr->path_mig_state); - goto modify_qp_exit2; - } - mqpcb->path_migration_state = attr->path_mig_state + 1; - if (attr->path_mig_state == IB_MIG_REARM) - my_qp->mig_armed = 1; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1); - } - - if (attr_mask & IB_QP_CAP) { - mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1); - mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1); - /* no support for max_send/recv_sge yet */ - } - - if (ehca_debug_level >= 2) - ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num); - - h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%lli " - "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num); - goto modify_qp_exit2; - } - - if ((my_qp->qp_type == IB_QPT_UD || - my_qp->qp_type == IB_QPT_GSI || - my_qp->qp_type == IB_QPT_SMI) && - statetrans == IB_QPST_SQE2RTS) { - /* doorbell to reprocessing wqes */ - iosync(); /* serialize GAL register access */ - hipz_update_sqa(my_qp, bad_wqe_cnt-1); - ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt); - } - - if (statetrans == IB_QPST_RESET2INIT || - statetrans == IB_QPST_INIT2INIT) { - mqpcb->qp_enable = 1; - mqpcb->qp_state = EHCA_QPS_INIT; - update_mask = 0; - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); - - h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, - my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(ibqp->device, "ENABLE in context of " - "RESET_2_INIT failed! Maybe you didn't get " - "a LID h_ret=%lli ehca_qp=%p qp_num=%x", - h_ret, my_qp, ibqp->qp_num); - goto modify_qp_exit2; - } - } - if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR) - && !is_user) { - ret = check_for_left_cqes(my_qp, shca); - if (ret) - goto modify_qp_exit2; - } - - if (statetrans == IB_QPST_ANY2RESET) { - ipz_qeit_reset(&my_qp->ipz_rqueue); - ipz_qeit_reset(&my_qp->ipz_squeue); - - if (qp_cur_state == IB_QPS_ERR && !is_user) { - del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); - - if (HAS_RQ(my_qp)) - del_from_err_list(my_qp->recv_cq, - &my_qp->rq_err_node); - } - if (!is_user) - reset_queue_map(&my_qp->sq_map); - - if (HAS_RQ(my_qp) && !is_user) - reset_queue_map(&my_qp->rq_map); - } - - if (attr_mask & IB_QP_QKEY) - my_qp->qkey = attr->qkey; - -modify_qp_exit2: - if (squeue_locked) { /* this means: sqe -> rts */ - spin_unlock_irqrestore(&my_qp->spinlock_s, flags); - my_qp->sqerr_purgeflag = 1; - } - -modify_qp_exit1: - ehca_free_fw_ctrlblock(mqpcb); - - return ret; -} - -int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, - struct ib_udata *udata) -{ - int ret = 0; - - struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, - ib_device); - struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - - /* The if-block below caches qp_attr to be modified for GSI and SMI - * qps during the initialization by ib_mad. When the respective port - * is activated, ie we got an event PORT_ACTIVE, we'll replay the - * cached modify calls sequence, see ehca_recover_sqs() below. - * Why that is required: - * 1) If one port is connected, older code requires that port one - * to be connected and module option nr_ports=1 to be given by - * user, which is very inconvenient for end user. - * 2) Firmware accepts modify_qp() only if respective port has become - * active. Older code had a wait loop of 30sec create_qp()/ - * define_aqp1(), which is not appropriate in practice. This - * code now removes that wait loop, see define_aqp1(), and always - * reports all ports to ib_mad resp. users. Only activated ports - * will then usable for the users. - */ - if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) { - int port = my_qp->init_attr.port_num; - struct ehca_sport *sport = &shca->sport[port - 1]; - unsigned long flags; - spin_lock_irqsave(&sport->mod_sqp_lock, flags); - /* cache qp_attr only during init */ - if (my_qp->mod_qp_parm) { - struct ehca_mod_qp_parm *p; - if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) { - ehca_err(&shca->ib_device, - "mod_qp_parm overflow state=%x port=%x" - " type=%x", attr->qp_state, - my_qp->init_attr.port_num, - ibqp->qp_type); - spin_unlock_irqrestore(&sport->mod_sqp_lock, - flags); - return -EINVAL; - } - p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx]; - p->mask = attr_mask; - p->attr = *attr; - my_qp->mod_qp_parm_idx++; - ehca_dbg(&shca->ib_device, - "Saved qp_attr for state=%x port=%x type=%x", - attr->qp_state, my_qp->init_attr.port_num, - ibqp->qp_type); - spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - goto out; - } - spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - } - - ret = internal_modify_qp(ibqp, attr, attr_mask, 0); - -out: - if ((ret == 0) && (attr_mask & IB_QP_STATE)) - my_qp->state = attr->qp_state; - - return ret; -} - -void ehca_recover_sqp(struct ib_qp *sqp) -{ - struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp); - int port = my_sqp->init_attr.port_num; - struct ib_qp_attr attr; - struct ehca_mod_qp_parm *qp_parm; - int i, qp_parm_idx, ret; - unsigned long flags, wr_cnt; - - if (!my_sqp->mod_qp_parm) - return; - ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num); - - qp_parm = my_sqp->mod_qp_parm; - qp_parm_idx = my_sqp->mod_qp_parm_idx; - for (i = 0; i < qp_parm_idx; i++) { - attr = qp_parm[i].attr; - ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0); - if (ret) { - ehca_err(sqp->device, "Could not modify SQP port=%x " - "qp_num=%x ret=%x", port, sqp->qp_num, ret); - goto free_qp_parm; - } - ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x", - port, sqp->qp_num, attr.qp_state); - } - - /* re-trigger posted recv wrs */ - wr_cnt = my_sqp->ipz_rqueue.current_q_offset / - my_sqp->ipz_rqueue.qe_size; - if (wr_cnt) { - spin_lock_irqsave(&my_sqp->spinlock_r, flags); - hipz_update_rqa(my_sqp, wr_cnt); - spin_unlock_irqrestore(&my_sqp->spinlock_r, flags); - ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx", - port, sqp->qp_num, wr_cnt); - } - -free_qp_parm: - kfree(qp_parm); - /* this prevents subsequent calls to modify_qp() to cache qp_attr */ - my_sqp->mod_qp_parm = NULL; -} - -int ehca_query_qp(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, - int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) -{ - struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); - struct ehca_shca *shca = container_of(qp->device, struct ehca_shca, - ib_device); - struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; - struct hcp_modify_qp_control_block *qpcb; - int cnt, ret = 0; - u64 h_ret; - - if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) { - ehca_err(qp->device, "Invalid attribute mask " - "ehca_qp=%p qp_num=%x qp_attr_mask=%x ", - my_qp, qp->qp_num, qp_attr_mask); - return -EINVAL; - } - - qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!qpcb) { - ehca_err(qp->device, "Out of memory for qpcb " - "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); - return -ENOMEM; - } - - h_ret = hipz_h_query_qp(adapter_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - qpcb, my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(qp->device, "hipz_h_query_qp() failed " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, qp->qp_num, h_ret); - goto query_qp_exit1; - } - - qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state); - qp_attr->qp_state = qp_attr->cur_qp_state; - - if (qp_attr->cur_qp_state == -EINVAL) { - ret = -EINVAL; - ehca_err(qp->device, "Got invalid ehca_qp_state=%x " - "ehca_qp=%p qp_num=%x", - qpcb->qp_state, my_qp, qp->qp_num); - goto query_qp_exit1; - } - - if (qp_attr->qp_state == IB_QPS_SQD) - qp_attr->sq_draining = 1; - - qp_attr->qkey = qpcb->qkey; - qp_attr->path_mtu = qpcb->path_mtu; - qp_attr->path_mig_state = qpcb->path_migration_state - 1; - qp_attr->rq_psn = qpcb->receive_psn; - qp_attr->sq_psn = qpcb->send_psn; - qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field; - qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1; - qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1; - /* UD_AV CIRCUMVENTION */ - if (my_qp->qp_type == IB_QPT_UD) { - qp_attr->cap.max_send_sge = - qpcb->actual_nr_sges_in_sq_wqe - 2; - qp_attr->cap.max_recv_sge = - qpcb->actual_nr_sges_in_rq_wqe - 2; - } else { - qp_attr->cap.max_send_sge = - qpcb->actual_nr_sges_in_sq_wqe; - qp_attr->cap.max_recv_sge = - qpcb->actual_nr_sges_in_rq_wqe; - } - - qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size; - qp_attr->dest_qp_num = qpcb->dest_qp_nr; - - qp_attr->pkey_index = qpcb->prim_p_key_idx; - qp_attr->port_num = qpcb->prim_phys_port; - qp_attr->timeout = qpcb->timeout; - qp_attr->retry_cnt = qpcb->retry_count; - qp_attr->rnr_retry = qpcb->rnr_retry_count; - - qp_attr->alt_pkey_index = qpcb->alt_p_key_idx; - qp_attr->alt_port_num = qpcb->alt_phys_port; - qp_attr->alt_timeout = qpcb->timeout_al; - - qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res; - qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp; - - /* primary av */ - qp_attr->ah_attr.sl = qpcb->service_level; - - if (qpcb->send_grh_flag) { - qp_attr->ah_attr.ah_flags = IB_AH_GRH; - } - - qp_attr->ah_attr.static_rate = qpcb->max_static_rate; - qp_attr->ah_attr.dlid = qpcb->dlid; - qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits; - qp_attr->ah_attr.port_num = qp_attr->port_num; - - /* primary GRH */ - qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class; - qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit; - qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx; - qp_attr->ah_attr.grh.flow_label = qpcb->flow_label; - - for (cnt = 0; cnt < 16; cnt++) - qp_attr->ah_attr.grh.dgid.raw[cnt] = - qpcb->dest_gid.byte[cnt]; - - /* alternate AV */ - qp_attr->alt_ah_attr.sl = qpcb->service_level_al; - if (qpcb->send_grh_flag_al) { - qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH; - } - - qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al; - qp_attr->alt_ah_attr.dlid = qpcb->dlid_al; - qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al; - - /* alternate GRH */ - qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al; - qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al; - qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al; - qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al; - - for (cnt = 0; cnt < 16; cnt++) - qp_attr->alt_ah_attr.grh.dgid.raw[cnt] = - qpcb->dest_gid_al.byte[cnt]; - - /* return init attributes given in ehca_create_qp */ - if (qp_init_attr) - *qp_init_attr = my_qp->init_attr; - - if (ehca_debug_level >= 2) - ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num); - -query_qp_exit1: - ehca_free_fw_ctrlblock(qpcb); - - return ret; -} - -int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) -{ - struct ehca_qp *my_qp = - container_of(ibsrq, struct ehca_qp, ib_srq); - struct ehca_shca *shca = - container_of(ibsrq->pd->device, struct ehca_shca, ib_device); - struct hcp_modify_qp_control_block *mqpcb; - u64 update_mask; - u64 h_ret; - int ret = 0; - - mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!mqpcb) { - ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb " - "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); - return -ENOMEM; - } - - update_mask = 0; - if (attr_mask & IB_SRQ_LIMIT) { - attr_mask &= ~IB_SRQ_LIMIT; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1) - | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1); - mqpcb->curr_srq_limit = attr->srq_limit; - mqpcb->qp_aff_asyn_ev_log_reg = - EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1); - } - - /* by now, all bits in attr_mask should have been cleared */ - if (attr_mask) { - ehca_err(ibsrq->device, "invalid attribute mask bits set " - "attr_mask=%x", attr_mask); - ret = -EINVAL; - goto modify_srq_exit0; - } - - if (ehca_debug_level >= 2) - ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); - - h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle, - NULL, update_mask, mqpcb, - my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%lli " - "ehca_qp=%p qp_num=%x", - h_ret, my_qp, my_qp->real_qp_num); - } - -modify_srq_exit0: - ehca_free_fw_ctrlblock(mqpcb); - - return ret; -} - -int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) -{ - struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq); - struct ehca_shca *shca = container_of(srq->device, struct ehca_shca, - ib_device); - struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; - struct hcp_modify_qp_control_block *qpcb; - int ret = 0; - u64 h_ret; - - qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!qpcb) { - ehca_err(srq->device, "Out of memory for qpcb " - "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num); - return -ENOMEM; - } - - h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle, - NULL, qpcb, my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(srq->device, "hipz_h_query_qp() failed " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, my_qp->real_qp_num, h_ret); - goto query_srq_exit1; - } - - srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1; - srq_attr->max_sge = 3; - srq_attr->srq_limit = qpcb->curr_srq_limit; - - if (ehca_debug_level >= 2) - ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); - -query_srq_exit1: - ehca_free_fw_ctrlblock(qpcb); - - return ret; -} - -static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, - struct ib_uobject *uobject) -{ - struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device); - struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, - ib_pd); - struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1]; - u32 qp_num = my_qp->real_qp_num; - int ret; - u64 h_ret; - u8 port_num; - int is_user = 0; - enum ib_qp_type qp_type; - unsigned long flags; - - if (uobject) { - is_user = 1; - if (my_qp->mm_count_galpa || - my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { - ehca_err(dev, "Resources still referenced in " - "user space qp_num=%x", qp_num); - return -EINVAL; - } - } - - if (my_qp->send_cq) { - ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num); - if (ret) { - ehca_err(dev, "Couldn't unassign qp from " - "send_cq ret=%i qp_num=%x cq_num=%x", ret, - qp_num, my_qp->send_cq->cq_number); - return ret; - } - } - - write_lock_irqsave(&ehca_qp_idr_lock, flags); - idr_remove(&ehca_qp_idr, my_qp->token); - write_unlock_irqrestore(&ehca_qp_idr_lock, flags); - - /* - * SRQs will never get into an error list and do not have a recv_cq, - * so we need to skip them here. - */ - if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user) - del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); - - if (HAS_SQ(my_qp) && !is_user) - del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); - - /* now wait until all pending events have completed */ - wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events)); - - h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); - if (h_ret != H_SUCCESS) { - ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%lli " - "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num); - return ehca2ib_return_code(h_ret); - } - - port_num = my_qp->init_attr.port_num; - qp_type = my_qp->init_attr.qp_type; - - if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { - spin_lock_irqsave(&sport->mod_sqp_lock, flags); - kfree(my_qp->mod_qp_parm); - my_qp->mod_qp_parm = NULL; - shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL; - spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - } - - /* no support for IB_QPT_SMI yet */ - if (qp_type == IB_QPT_GSI) { - struct ib_event event; - ehca_info(dev, "device %s: port %x is inactive.", - shca->ib_device.name, port_num); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ERR; - event.element.port_num = port_num; - shca->sport[port_num - 1].port_state = IB_PORT_DOWN; - ib_dispatch_event(&event); - } - - if (HAS_RQ(my_qp)) { - ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); - if (!is_user) - vfree(my_qp->rq_map.map); - } - if (HAS_SQ(my_qp)) { - ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); - if (!is_user) - vfree(my_qp->sq_map.map); - } - kmem_cache_free(qp_cache, my_qp); - atomic_dec(&shca->num_qps); - return 0; -} - -int ehca_destroy_qp(struct ib_qp *qp) -{ - return internal_destroy_qp(qp->device, - container_of(qp, struct ehca_qp, ib_qp), - qp->uobject); -} - -int ehca_destroy_srq(struct ib_srq *srq) -{ - return internal_destroy_qp(srq->device, - container_of(srq, struct ehca_qp, ib_srq), - srq->uobject); -} - -int ehca_init_qp_cache(void) -{ - qp_cache = kmem_cache_create("ehca_cache_qp", - sizeof(struct ehca_qp), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!qp_cache) - return -ENOMEM; - return 0; -} - -void ehca_cleanup_qp_cache(void) -{ - if (qp_cache) - kmem_cache_destroy(qp_cache); -} diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c deleted file mode 100644 index 47f9498..0000000 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ /dev/null @@ -1,953 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * post_send/recv, poll_cq, req_notify - * - * Authors: Hoang-Nam Nguyen - * Waleri Fomin - * Joachim Fenkes - * Reinhard Ernst - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - - -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "ehca_qes.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" -#include "hipz_fns.h" - -/* in RC traffic, insert an empty RDMA READ every this many packets */ -#define ACK_CIRC_THRESHOLD 2000000 - -static u64 replace_wr_id(u64 wr_id, u16 idx) -{ - u64 ret; - - ret = wr_id & ~QMAP_IDX_MASK; - ret |= idx & QMAP_IDX_MASK; - - return ret; -} - -static u16 get_app_wr_id(u64 wr_id) -{ - return wr_id & QMAP_IDX_MASK; -} - -static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, - struct ehca_wqe *wqe_p, - struct ib_recv_wr *recv_wr, - u32 rq_map_idx) -{ - u8 cnt_ds; - if (unlikely((recv_wr->num_sge < 0) || - (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) { - ehca_gen_err("Invalid number of WQE SGE. " - "num_sqe=%x max_nr_of_sg=%x", - recv_wr->num_sge, ipz_rqueue->act_nr_of_sg); - return -EINVAL; /* invalid SG list length */ - } - - /* clear wqe header until sglist */ - memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - - wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx); - wqe_p->nr_of_data_seg = recv_wr->num_sge; - - for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { - wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr = - recv_wr->sg_list[cnt_ds].addr; - wqe_p->u.all_rcv.sg_list[cnt_ds].lkey = - recv_wr->sg_list[cnt_ds].lkey; - wqe_p->u.all_rcv.sg_list[cnt_ds].length = - recv_wr->sg_list[cnt_ds].length; - } - - if (ehca_debug_level >= 3) { - ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", - ipz_rqueue); - ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); - } - - return 0; -} - -#if defined(DEBUG_GSI_SEND_WR) - -/* need ib_mad struct */ -#include - -static void trace_send_wr_ud(const struct ib_send_wr *send_wr) -{ - int idx; - int j; - while (send_wr) { - struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; - struct ib_sge *sge = send_wr->sg_list; - ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " - "send_flags=%x opcode=%x", idx, send_wr->wr_id, - send_wr->num_sge, send_wr->send_flags, - send_wr->opcode); - if (mad_hdr) { - ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x " - "mgmt_class=%x class_version=%x method=%x " - "status=%x class_specific=%x tid=%lx " - "attr_id=%x resv=%x attr_mod=%x", - idx, mad_hdr->base_version, - mad_hdr->mgmt_class, - mad_hdr->class_version, mad_hdr->method, - mad_hdr->status, mad_hdr->class_specific, - mad_hdr->tid, mad_hdr->attr_id, - mad_hdr->resv, - mad_hdr->attr_mod); - } - for (j = 0; j < send_wr->num_sge; j++) { - u8 *data = __va(sge->addr); - ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " - "lkey=%x", - idx, j, data, sge->length, sge->lkey); - /* assume length is n*16 */ - ehca_dmp(data, sge->length, "send_wr#%x sge#%x", - idx, j); - sge++; - } /* eof for j */ - idx++; - send_wr = send_wr->next; - } /* eof while send_wr */ -} - -#endif /* DEBUG_GSI_SEND_WR */ - -static inline int ehca_write_swqe(struct ehca_qp *qp, - struct ehca_wqe *wqe_p, - const struct ib_send_wr *send_wr, - u32 sq_map_idx, - int hidden) -{ - u32 idx; - u64 dma_length; - struct ehca_av *my_av; - u32 remote_qkey = send_wr->wr.ud.remote_qkey; - struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; - - if (unlikely((send_wr->num_sge < 0) || - (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { - ehca_gen_err("Invalid number of WQE SGE. " - "num_sqe=%x max_nr_of_sg=%x", - send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg); - return -EINVAL; /* invalid SG list length */ - } - - /* clear wqe header until sglist */ - memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - - wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx); - - qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); - qmap_entry->reported = 0; - qmap_entry->cqe_req = 0; - - switch (send_wr->opcode) { - case IB_WR_SEND: - case IB_WR_SEND_WITH_IMM: - wqe_p->optype = WQE_OPTYPE_SEND; - break; - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - wqe_p->optype = WQE_OPTYPE_RDMAWRITE; - break; - case IB_WR_RDMA_READ: - wqe_p->optype = WQE_OPTYPE_RDMAREAD; - break; - default: - ehca_gen_err("Invalid opcode=%x", send_wr->opcode); - return -EINVAL; /* invalid opcode */ - } - - wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE; - - wqe_p->wr_flag = 0; - - if ((send_wr->send_flags & IB_SEND_SIGNALED || - qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) - && !hidden) { - wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; - qmap_entry->cqe_req = 1; - } - - if (send_wr->opcode == IB_WR_SEND_WITH_IMM || - send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { - /* this might not work as long as HW does not support it */ - wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data); - wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT; - } - - wqe_p->nr_of_data_seg = send_wr->num_sge; - - switch (qp->qp_type) { - case IB_QPT_SMI: - case IB_QPT_GSI: - /* no break is intential here */ - case IB_QPT_UD: - /* IB 1.2 spec C10-15 compliance */ - if (send_wr->wr.ud.remote_qkey & 0x80000000) - remote_qkey = qp->qkey; - - wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; - wqe_p->local_ee_context_qkey = remote_qkey; - if (unlikely(!send_wr->wr.ud.ah)) { - ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); - return -EINVAL; - } - if (unlikely(send_wr->wr.ud.remote_qpn == 0)) { - ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num); - return -EINVAL; - } - my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah); - wqe_p->u.ud_av.ud_av = my_av->av; - - /* - * omitted check of IB_SEND_INLINE - * since HW does not support it - */ - for (idx = 0; idx < send_wr->num_sge; idx++) { - wqe_p->u.ud_av.sg_list[idx].vaddr = - send_wr->sg_list[idx].addr; - wqe_p->u.ud_av.sg_list[idx].lkey = - send_wr->sg_list[idx].lkey; - wqe_p->u.ud_av.sg_list[idx].length = - send_wr->sg_list[idx].length; - } /* eof for idx */ - if (qp->qp_type == IB_QPT_SMI || - qp->qp_type == IB_QPT_GSI) - wqe_p->u.ud_av.ud_av.pmtu = 1; - if (qp->qp_type == IB_QPT_GSI) { - wqe_p->pkeyi = send_wr->wr.ud.pkey_index; -#ifdef DEBUG_GSI_SEND_WR - trace_send_wr_ud(send_wr); -#endif /* DEBUG_GSI_SEND_WR */ - } - break; - - case IB_QPT_UC: - if (send_wr->send_flags & IB_SEND_FENCE) - wqe_p->wr_flag |= WQE_WRFLAG_FENCE; - /* no break is intentional here */ - case IB_QPT_RC: - /* TODO: atomic not implemented */ - wqe_p->u.nud.remote_virtual_address = - send_wr->wr.rdma.remote_addr; - wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey; - - /* - * omitted checking of IB_SEND_INLINE - * since HW does not support it - */ - dma_length = 0; - for (idx = 0; idx < send_wr->num_sge; idx++) { - wqe_p->u.nud.sg_list[idx].vaddr = - send_wr->sg_list[idx].addr; - wqe_p->u.nud.sg_list[idx].lkey = - send_wr->sg_list[idx].lkey; - wqe_p->u.nud.sg_list[idx].length = - send_wr->sg_list[idx].length; - dma_length += send_wr->sg_list[idx].length; - } /* eof idx */ - wqe_p->u.nud.atomic_1st_op_dma_len = dma_length; - - /* unsolicited ack circumvention */ - if (send_wr->opcode == IB_WR_RDMA_READ) { - /* on RDMA read, switch on and reset counters */ - qp->message_count = qp->packet_count = 0; - qp->unsol_ack_circ = 1; - } else - /* else estimate #packets */ - qp->packet_count += (dma_length >> qp->mtu_shift) + 1; - - break; - - default: - ehca_gen_err("Invalid qptype=%x", qp->qp_type); - return -EINVAL; - } - - if (ehca_debug_level >= 3) { - ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp); - ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe"); - } - return 0; -} - -/* map_ib_wc_status converts raw cqe_status to ib_wc_status */ -static inline void map_ib_wc_status(u32 cqe_status, - enum ib_wc_status *wc_status) -{ - if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) { - switch (cqe_status & 0x3F) { - case 0x01: - case 0x21: - *wc_status = IB_WC_LOC_LEN_ERR; - break; - case 0x02: - case 0x22: - *wc_status = IB_WC_LOC_QP_OP_ERR; - break; - case 0x03: - case 0x23: - *wc_status = IB_WC_LOC_EEC_OP_ERR; - break; - case 0x04: - case 0x24: - *wc_status = IB_WC_LOC_PROT_ERR; - break; - case 0x05: - case 0x25: - *wc_status = IB_WC_WR_FLUSH_ERR; - break; - case 0x06: - *wc_status = IB_WC_MW_BIND_ERR; - break; - case 0x07: /* remote error - look into bits 20:24 */ - switch ((cqe_status - & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) { - case 0x0: - /* - * PSN Sequence Error! - * couldn't find a matching status! - */ - *wc_status = IB_WC_GENERAL_ERR; - break; - case 0x1: - *wc_status = IB_WC_REM_INV_REQ_ERR; - break; - case 0x2: - *wc_status = IB_WC_REM_ACCESS_ERR; - break; - case 0x3: - *wc_status = IB_WC_REM_OP_ERR; - break; - case 0x4: - *wc_status = IB_WC_REM_INV_RD_REQ_ERR; - break; - } - break; - case 0x08: - *wc_status = IB_WC_RETRY_EXC_ERR; - break; - case 0x09: - *wc_status = IB_WC_RNR_RETRY_EXC_ERR; - break; - case 0x0A: - case 0x2D: - *wc_status = IB_WC_REM_ABORT_ERR; - break; - case 0x0B: - case 0x2E: - *wc_status = IB_WC_INV_EECN_ERR; - break; - case 0x0C: - case 0x2F: - *wc_status = IB_WC_INV_EEC_STATE_ERR; - break; - case 0x0D: - *wc_status = IB_WC_BAD_RESP_ERR; - break; - case 0x10: - /* WQE purged */ - *wc_status = IB_WC_WR_FLUSH_ERR; - break; - default: - *wc_status = IB_WC_FATAL_ERR; - - } - } else - *wc_status = IB_WC_SUCCESS; -} - -static inline int post_one_send(struct ehca_qp *my_qp, - struct ib_send_wr *cur_send_wr, - int hidden) -{ - struct ehca_wqe *wqe_p; - int ret; - u32 sq_map_idx; - u64 start_offset = my_qp->ipz_squeue.current_q_offset; - - /* get pointer next to free WQE */ - wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); - if (unlikely(!wqe_p)) { - /* too many posted work requests: queue overflow */ - ehca_err(my_qp->ib_qp.device, "Too many posted WQEs " - "qp_num=%x", my_qp->ib_qp.qp_num); - return -ENOMEM; - } - - /* - * Get the index of the WQE in the send queue. The same index is used - * for writing into the sq_map. - */ - sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size; - - /* write a SEND WQE into the QUEUE */ - ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden); - /* - * if something failed, - * reset the free entry pointer to the start value - */ - if (unlikely(ret)) { - my_qp->ipz_squeue.current_q_offset = start_offset; - ehca_err(my_qp->ib_qp.device, "Could not write WQE " - "qp_num=%x", my_qp->ib_qp.qp_num); - return -EINVAL; - } - - return 0; -} - -int ehca_post_send(struct ib_qp *qp, - struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr) -{ - struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); - int wqe_cnt = 0; - int ret = 0; - unsigned long flags; - - /* Reject WR if QP is in RESET, INIT or RTR state */ - if (unlikely(my_qp->state < IB_QPS_RTS)) { - ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", - my_qp->state, qp->qp_num); - ret = -EINVAL; - goto out; - } - - /* LOCK the QUEUE */ - spin_lock_irqsave(&my_qp->spinlock_s, flags); - - /* Send an empty extra RDMA read if: - * 1) there has been an RDMA read on this connection before - * 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets - * 3) we can be sure that any previous extra RDMA read has been - * processed so we don't overflow the SQ - */ - if (unlikely(my_qp->unsol_ack_circ && - my_qp->packet_count > ACK_CIRC_THRESHOLD && - my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) { - /* insert an empty RDMA READ to fix up the remote QP state */ - struct ib_send_wr circ_wr; - memset(&circ_wr, 0, sizeof(circ_wr)); - circ_wr.opcode = IB_WR_RDMA_READ; - post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */ - wqe_cnt++; - ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num); - my_qp->message_count = my_qp->packet_count = 0; - } - - /* loop processes list of send reqs */ - while (send_wr) { - ret = post_one_send(my_qp, send_wr, 0); - if (unlikely(ret)) { - goto post_send_exit0; - } - wqe_cnt++; - send_wr = send_wr->next; - } - -post_send_exit0: - iosync(); /* serialize GAL register access */ - hipz_update_sqa(my_qp, wqe_cnt); - if (unlikely(ret || ehca_debug_level >= 2)) - ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", - my_qp, qp->qp_num, wqe_cnt, ret); - my_qp->message_count += wqe_cnt; - spin_unlock_irqrestore(&my_qp->spinlock_s, flags); - -out: - if (ret) - *bad_send_wr = send_wr; - return ret; -} - -static int internal_post_recv(struct ehca_qp *my_qp, - struct ib_device *dev, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) -{ - struct ehca_wqe *wqe_p; - int wqe_cnt = 0; - int ret = 0; - u32 rq_map_idx; - unsigned long flags; - struct ehca_qmap_entry *qmap_entry; - - if (unlikely(!HAS_RQ(my_qp))) { - ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", - my_qp, my_qp->real_qp_num, my_qp->ext_type); - ret = -ENODEV; - goto out; - } - - /* LOCK the QUEUE */ - spin_lock_irqsave(&my_qp->spinlock_r, flags); - - /* loop processes list of recv reqs */ - while (recv_wr) { - u64 start_offset = my_qp->ipz_rqueue.current_q_offset; - /* get pointer next to free WQE */ - wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue); - if (unlikely(!wqe_p)) { - /* too many posted work requests: queue overflow */ - ret = -ENOMEM; - ehca_err(dev, "Too many posted WQEs " - "qp_num=%x", my_qp->real_qp_num); - goto post_recv_exit0; - } - /* - * Get the index of the WQE in the recv queue. The same index - * is used for writing into the rq_map. - */ - rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; - - /* write a RECV WQE into the QUEUE */ - ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr, - rq_map_idx); - /* - * if something failed, - * reset the free entry pointer to the start value - */ - if (unlikely(ret)) { - my_qp->ipz_rqueue.current_q_offset = start_offset; - ret = -EINVAL; - ehca_err(dev, "Could not write WQE " - "qp_num=%x", my_qp->real_qp_num); - goto post_recv_exit0; - } - - qmap_entry = &my_qp->rq_map.map[rq_map_idx]; - qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id); - qmap_entry->reported = 0; - qmap_entry->cqe_req = 1; - - wqe_cnt++; - recv_wr = recv_wr->next; - } /* eof for recv_wr */ - -post_recv_exit0: - iosync(); /* serialize GAL register access */ - hipz_update_rqa(my_qp, wqe_cnt); - if (unlikely(ret || ehca_debug_level >= 2)) - ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", - my_qp, my_qp->real_qp_num, wqe_cnt, ret); - spin_unlock_irqrestore(&my_qp->spinlock_r, flags); - -out: - if (ret) - *bad_recv_wr = recv_wr; - - return ret; -} - -int ehca_post_recv(struct ib_qp *qp, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) -{ - struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); - - /* Reject WR if QP is in RESET state */ - if (unlikely(my_qp->state == IB_QPS_RESET)) { - ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", - my_qp->state, qp->qp_num); - *bad_recv_wr = recv_wr; - return -EINVAL; - } - - return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr); -} - -int ehca_post_srq_recv(struct ib_srq *srq, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) -{ - return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq), - srq->device, recv_wr, bad_recv_wr); -} - -/* - * ib_wc_opcode table converts ehca wc opcode to ib - * Since we use zero to indicate invalid opcode, the actual ib opcode must - * be decremented!!! - */ -static const u8 ib_wc_opcode[255] = { - [0x01] = IB_WC_RECV+1, - [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1, - [0x04] = IB_WC_BIND_MW+1, - [0x08] = IB_WC_FETCH_ADD+1, - [0x10] = IB_WC_COMP_SWAP+1, - [0x20] = IB_WC_RDMA_WRITE+1, - [0x40] = IB_WC_RDMA_READ+1, - [0x80] = IB_WC_SEND+1 -}; - -/* internal function to poll one entry of cq */ -static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) -{ - int ret = 0, qmap_tail_idx; - struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - struct ehca_cqe *cqe; - struct ehca_qp *my_qp; - struct ehca_qmap_entry *qmap_entry; - struct ehca_queue_map *qmap; - int cqe_count = 0, is_error; - -repoll: - cqe = (struct ehca_cqe *) - ipz_qeit_get_inc_valid(&my_cq->ipz_queue); - if (!cqe) { - ret = -EAGAIN; - if (ehca_debug_level >= 3) - ehca_dbg(cq->device, "Completion queue is empty " - "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number); - goto poll_cq_one_exit0; - } - - /* prevents loads being reordered across this point */ - rmb(); - - cqe_count++; - if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { - struct ehca_qp *qp; - int purgeflag; - unsigned long flags; - - qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number); - if (!qp) { - ehca_err(cq->device, "cq_num=%x qp_num=%x " - "could not find qp -> ignore cqe", - my_cq->cq_number, cqe->local_qp_number); - ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x", - my_cq->cq_number, cqe->local_qp_number); - /* ignore this purged cqe */ - goto repoll; - } - spin_lock_irqsave(&qp->spinlock_s, flags); - purgeflag = qp->sqerr_purgeflag; - spin_unlock_irqrestore(&qp->spinlock_s, flags); - - if (purgeflag) { - ehca_dbg(cq->device, - "Got CQE with purged bit qp_num=%x src_qp=%x", - cqe->local_qp_number, cqe->remote_qp_number); - if (ehca_debug_level >= 2) - ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", - cqe->local_qp_number, - cqe->remote_qp_number); - /* - * ignore this to avoid double cqes of bad wqe - * that caused sqe and turn off purge flag - */ - qp->sqerr_purgeflag = 0; - goto repoll; - } - } - - is_error = cqe->status & WC_STATUS_ERROR_BIT; - - /* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */ - if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) { - ehca_dbg(cq->device, - "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----", - is_error ? "ERROR " : "", my_cq, my_cq->cq_number); - ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", - my_cq, my_cq->cq_number); - ehca_dbg(cq->device, - "ehca_cq=%p cq_num=%x -------------------------", - my_cq, my_cq->cq_number); - } - - read_lock(&ehca_qp_idr_lock); - my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); - read_unlock(&ehca_qp_idr_lock); - if (!my_qp) - goto repoll; - wc->qp = &my_qp->ib_qp; - - qmap_tail_idx = get_app_wr_id(cqe->work_request_id); - if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) - /* We got a send completion. */ - qmap = &my_qp->sq_map; - else - /* We got a receive completion. */ - qmap = &my_qp->rq_map; - - /* advance the tail pointer */ - qmap->tail = qmap_tail_idx; - - if (is_error) { - /* - * set left_to_poll to 0 because in error state, we will not - * get any additional CQEs - */ - my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, - my_qp->sq_map.entries); - my_qp->sq_map.left_to_poll = 0; - ehca_add_to_err_list(my_qp, 1); - - my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, - my_qp->rq_map.entries); - my_qp->rq_map.left_to_poll = 0; - if (HAS_RQ(my_qp)) - ehca_add_to_err_list(my_qp, 0); - } - - qmap_entry = &qmap->map[qmap_tail_idx]; - if (qmap_entry->reported) { - ehca_warn(cq->device, "Double cqe on qp_num=%#x", - my_qp->real_qp_num); - /* found a double cqe, discard it and read next one */ - goto repoll; - } - - wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); - qmap_entry->reported = 1; - - /* if left_to_poll is decremented to 0, add the QP to the error list */ - if (qmap->left_to_poll > 0) { - qmap->left_to_poll--; - if ((my_qp->sq_map.left_to_poll == 0) && - (my_qp->rq_map.left_to_poll == 0)) { - ehca_add_to_err_list(my_qp, 1); - if (HAS_RQ(my_qp)) - ehca_add_to_err_list(my_qp, 0); - } - } - - /* eval ib_wc_opcode */ - wc->opcode = ib_wc_opcode[cqe->optype]-1; - if (unlikely(wc->opcode == -1)) { - ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x " - "ehca_cq=%p cq_num=%x", - cqe->optype, cqe->status, my_cq, my_cq->cq_number); - /* dump cqe for other infos */ - ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", - my_cq, my_cq->cq_number); - /* update also queue adder to throw away this entry!!! */ - goto repoll; - } - - /* eval ib_wc_status */ - if (unlikely(is_error)) { - /* complete with errors */ - map_ib_wc_status(cqe->status, &wc->status); - wc->vendor_err = wc->status; - } else - wc->status = IB_WC_SUCCESS; - - wc->byte_len = cqe->nr_bytes_transferred; - wc->pkey_index = cqe->pkey_index; - wc->slid = cqe->rlid; - wc->dlid_path_bits = cqe->dlid; - wc->src_qp = cqe->remote_qp_number; - /* - * HW has "Immed data present" and "GRH present" in bits 6 and 5. - * SW defines those in bits 1 and 0, so we can just shift and mask. - */ - wc->wc_flags = (cqe->w_completion_flags >> 5) & 3; - wc->ex.imm_data = cpu_to_be32(cqe->immediate_data); - wc->sl = cqe->service_level; - -poll_cq_one_exit0: - if (cqe_count > 0) - hipz_update_feca(my_cq, cqe_count); - - return ret; -} - -static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, - struct ib_wc *wc, int num_entries, - struct ipz_queue *ipz_queue, int on_sq) -{ - int nr = 0; - struct ehca_wqe *wqe; - u64 offset; - struct ehca_queue_map *qmap; - struct ehca_qmap_entry *qmap_entry; - - if (on_sq) - qmap = &my_qp->sq_map; - else - qmap = &my_qp->rq_map; - - qmap_entry = &qmap->map[qmap->next_wqe_idx]; - - while ((nr < num_entries) && (qmap_entry->reported == 0)) { - /* generate flush CQE */ - - memset(wc, 0, sizeof(*wc)); - - offset = qmap->next_wqe_idx * ipz_queue->qe_size; - wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); - if (!wqe) { - ehca_err(cq->device, "Invalid wqe offset=%#llx on " - "qp_num=%#x", offset, my_qp->real_qp_num); - return nr; - } - - wc->wr_id = replace_wr_id(wqe->work_request_id, - qmap_entry->app_wr_id); - - if (on_sq) { - switch (wqe->optype) { - case WQE_OPTYPE_SEND: - wc->opcode = IB_WC_SEND; - break; - case WQE_OPTYPE_RDMAWRITE: - wc->opcode = IB_WC_RDMA_WRITE; - break; - case WQE_OPTYPE_RDMAREAD: - wc->opcode = IB_WC_RDMA_READ; - break; - default: - ehca_err(cq->device, "Invalid optype=%x", - wqe->optype); - return nr; - } - } else - wc->opcode = IB_WC_RECV; - - if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) { - wc->ex.imm_data = wqe->immediate_data; - wc->wc_flags |= IB_WC_WITH_IMM; - } - - wc->status = IB_WC_WR_FLUSH_ERR; - - wc->qp = &my_qp->ib_qp; - - /* mark as reported and advance next_wqe pointer */ - qmap_entry->reported = 1; - qmap->next_wqe_idx = next_index(qmap->next_wqe_idx, - qmap->entries); - qmap_entry = &qmap->map[qmap->next_wqe_idx]; - - wc++; nr++; - } - - return nr; - -} - -int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) -{ - struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - int nr; - struct ehca_qp *err_qp; - struct ib_wc *current_wc = wc; - int ret = 0; - unsigned long flags; - int entries_left = num_entries; - - if (num_entries < 1) { - ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " - "cq_num=%x", num_entries, my_cq, my_cq->cq_number); - ret = -EINVAL; - goto poll_cq_exit0; - } - - spin_lock_irqsave(&my_cq->spinlock, flags); - - /* generate flush cqes for send queues */ - list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) { - nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, - &err_qp->ipz_squeue, 1); - entries_left -= nr; - current_wc += nr; - - if (entries_left == 0) - break; - } - - /* generate flush cqes for receive queues */ - list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) { - nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, - &err_qp->ipz_rqueue, 0); - entries_left -= nr; - current_wc += nr; - - if (entries_left == 0) - break; - } - - for (nr = 0; nr < entries_left; nr++) { - ret = ehca_poll_cq_one(cq, current_wc); - if (ret) - break; - current_wc++; - } /* eof for nr */ - entries_left -= nr; - - spin_unlock_irqrestore(&my_cq->spinlock, flags); - if (ret == -EAGAIN || !ret) - ret = num_entries - entries_left; - -poll_cq_exit0: - return ret; -} - -int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) -{ - struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - int ret = 0; - - switch (notify_flags & IB_CQ_SOLICITED_MASK) { - case IB_CQ_SOLICITED: - hipz_set_cqx_n0(my_cq, 1); - break; - case IB_CQ_NEXT_COMP: - hipz_set_cqx_n1(my_cq, 1); - break; - default: - return -EINVAL; - } - - if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { - unsigned long spl_flags; - spin_lock_irqsave(&my_cq->spinlock, spl_flags); - ret = ipz_qeit_is_valid(&my_cq->ipz_queue); - spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); - } - - return ret; -} diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c deleted file mode 100644 index 376b031..0000000 --- a/drivers/infiniband/hw/ehca/ehca_sqp.c +++ /dev/null @@ -1,245 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * SQP functions - * - * Authors: Khadija Souissi - * Heiko J Schick - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" - -#define IB_MAD_STATUS_REDIRECT cpu_to_be16(0x0002) -#define IB_MAD_STATUS_UNSUP_VERSION cpu_to_be16(0x0004) -#define IB_MAD_STATUS_UNSUP_METHOD cpu_to_be16(0x0008) - -#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001) - -/** - * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue - * pair is created successfully, the corresponding port gets active. - * - * Define Special Queue pair 0 (SMI QP) is still not supported. - * - * @qp_init_attr: Queue pair init attributes with port and queue pair type - */ - -u64 ehca_define_sqp(struct ehca_shca *shca, - struct ehca_qp *ehca_qp, - struct ib_qp_init_attr *qp_init_attr) -{ - u32 pma_qp_nr, bma_qp_nr; - u64 ret; - u8 port = qp_init_attr->port_num; - int counter; - - shca->sport[port - 1].port_state = IB_PORT_DOWN; - - switch (qp_init_attr->qp_type) { - case IB_QPT_SMI: - /* function not supported yet */ - break; - case IB_QPT_GSI: - ret = hipz_h_define_aqp1(shca->ipz_hca_handle, - ehca_qp->ipz_qp_handle, - ehca_qp->galpas.kernel, - (u32) qp_init_attr->port_num, - &pma_qp_nr, &bma_qp_nr); - - if (ret != H_SUCCESS) { - ehca_err(&shca->ib_device, - "Can't define AQP1 for port %x. h_ret=%lli", - port, ret); - return ret; - } - shca->sport[port - 1].pma_qp_nr = pma_qp_nr; - ehca_dbg(&shca->ib_device, "port=%x pma_qp_nr=%x", - port, pma_qp_nr); - break; - default: - ehca_err(&shca->ib_device, "invalid qp_type=%x", - qp_init_attr->qp_type); - return H_PARAMETER; - } - - if (ehca_nr_ports < 0) /* autodetect mode */ - return H_SUCCESS; - - for (counter = 0; - shca->sport[port - 1].port_state != IB_PORT_ACTIVE && - counter < ehca_port_act_time; - counter++) { - ehca_dbg(&shca->ib_device, "... wait until port %x is active", - port); - msleep_interruptible(1000); - } - - if (counter == ehca_port_act_time) { - ehca_err(&shca->ib_device, "Port %x is not active.", port); - return H_HARDWARE; - } - - return H_SUCCESS; -} - -struct ib_perf { - struct ib_mad_hdr mad_hdr; - u8 reserved[40]; - u8 data[192]; -} __attribute__ ((packed)); - -/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */ -struct tcslfl { - u32 tc:8; - u32 sl:4; - u32 fl:20; -} __attribute__ ((packed)); - -/* IP Version/TC/FL packed into 32 bits, as in GRH */ -struct vertcfl { - u32 ver:4; - u32 tc:8; - u32 fl:20; -} __attribute__ ((packed)); - -static int ehca_process_perf(struct ib_device *ibdev, u8 port_num, - const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad *in_mad, struct ib_mad *out_mad) -{ - const struct ib_perf *in_perf = (const struct ib_perf *)in_mad; - struct ib_perf *out_perf = (struct ib_perf *)out_mad; - struct ib_class_port_info *poi = - (struct ib_class_port_info *)out_perf->data; - struct tcslfl *tcslfl = - (struct tcslfl *)&poi->redirect_tcslfl; - struct ehca_shca *shca = - container_of(ibdev, struct ehca_shca, ib_device); - struct ehca_sport *sport = &shca->sport[port_num - 1]; - - ehca_dbg(ibdev, "method=%x", in_perf->mad_hdr.method); - - *out_mad = *in_mad; - - if (in_perf->mad_hdr.class_version != 1) { - ehca_warn(ibdev, "Unsupported class_version=%x", - in_perf->mad_hdr.class_version); - out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_VERSION; - goto perf_reply; - } - - switch (in_perf->mad_hdr.method) { - case IB_MGMT_METHOD_GET: - case IB_MGMT_METHOD_SET: - /* set class port info for redirection */ - out_perf->mad_hdr.attr_id = IB_PMA_CLASS_PORT_INFO; - out_perf->mad_hdr.status = IB_MAD_STATUS_REDIRECT; - memset(poi, 0, sizeof(*poi)); - poi->base_version = 1; - poi->class_version = 1; - poi->resp_time_value = 18; - - /* copy local routing information from WC where applicable */ - tcslfl->sl = in_wc->sl; - poi->redirect_lid = - sport->saved_attr.lid | in_wc->dlid_path_bits; - poi->redirect_qp = sport->pma_qp_nr; - poi->redirect_qkey = IB_QP1_QKEY; - - ehca_query_pkey(ibdev, port_num, in_wc->pkey_index, - &poi->redirect_pkey); - - /* if request was globally routed, copy route info */ - if (in_grh) { - const struct vertcfl *vertcfl = - (const struct vertcfl *)&in_grh->version_tclass_flow; - memcpy(poi->redirect_gid, in_grh->dgid.raw, - sizeof(poi->redirect_gid)); - tcslfl->tc = vertcfl->tc; - tcslfl->fl = vertcfl->fl; - } else - /* else only fill in default GID */ - ehca_query_gid(ibdev, port_num, 0, - (union ib_gid *)&poi->redirect_gid); - - ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x", - sport->saved_attr.lid, sport->pma_qp_nr); - break; - - case IB_MGMT_METHOD_GET_RESP: - return IB_MAD_RESULT_FAILURE; - - default: - out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_METHOD; - break; - } - -perf_reply: - out_perf->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; - - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; -} - -int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) -{ - int ret; - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - - if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc) - return IB_MAD_RESULT_FAILURE; - - /* accept only pma request */ - if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) - return IB_MAD_RESULT_SUCCESS; - - ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp); - ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh, - in_mad, out_mad); - - return ret; -} diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h deleted file mode 100644 index d280b12..0000000 --- a/drivers/infiniband/hw/ehca/ehca_tools.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * auxiliary functions - * - * Authors: Christoph Raisch - * Hoang-Nam Nguyen - * Khadija Souissi - * Waleri Fomin - * Heiko J Schick - * - * Copyright (c) 2005 IBM Corporation - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef EHCA_TOOLS_H -#define EHCA_TOOLS_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -extern int ehca_debug_level; - -#define ehca_dbg(ib_dev, format, arg...) \ - do { \ - if (unlikely(ehca_debug_level)) \ - dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \ - "PU%04x EHCA_DBG:%s " format "\n", \ - raw_smp_processor_id(), __func__, \ - ## arg); \ - } while (0) - -#define ehca_info(ib_dev, format, arg...) \ - dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -#define ehca_warn(ib_dev, format, arg...) \ - dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -#define ehca_err(ib_dev, format, arg...) \ - dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -/* use this one only if no ib_dev available */ -#define ehca_gen_dbg(format, arg...) \ - do { \ - if (unlikely(ehca_debug_level)) \ - printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg); \ - } while (0) - -#define ehca_gen_warn(format, arg...) \ - printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -#define ehca_gen_err(format, arg...) \ - printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -/** - * ehca_dmp - printk a memory block, whose length is n*8 bytes. - * Each line has the following layout: - * adr=X ofs=Y <8 bytes hex> <8 bytes hex> - */ -#define ehca_dmp(adr, len, format, args...) \ - do { \ - unsigned int x; \ - unsigned int l = (unsigned int)(len); \ - unsigned char *deb = (unsigned char *)(adr); \ - for (x = 0; x < l; x += 16) { \ - printk(KERN_INFO "EHCA_DMP:%s " format \ - " adr=%p ofs=%04x %016llx %016llx\n", \ - __func__, ##args, deb, x, \ - *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ - deb += 16; \ - } \ - } while (0) - -/* define a bitmask, little endian version */ -#define EHCA_BMASK(pos, length) (((pos) << 16) + (length)) - -/* define a bitmask, the ibm way... */ -#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1)) - -/* internal function, don't use */ -#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff) - -/* internal function, don't use */ -#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff)) - -/** - * EHCA_BMASK_SET - return value shifted and masked by mask - * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable - * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask - * in variable - */ -#define EHCA_BMASK_SET(mask, value) \ - ((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask)) - -/** - * EHCA_BMASK_GET - extract a parameter from value by mask - */ -#define EHCA_BMASK_GET(mask, value) \ - (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask))) - -/* Converts ehca to ib return code */ -int ehca2ib_return_code(u64 ehca_rc); - -#endif /* EHCA_TOOLS_H */ diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c deleted file mode 100644 index 1a1d5d9..0000000 --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ /dev/null @@ -1,309 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * userspace support verbs - * - * Authors: Christoph Raisch - * Hoang-Nam Nguyen - * Heiko J Schick - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_classes.h" -#include "ehca_iverbs.h" -#include "ehca_mrmw.h" -#include "ehca_tools.h" -#include "hcp_if.h" - -struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, - struct ib_udata *udata) -{ - struct ehca_ucontext *my_context; - - my_context = kzalloc(sizeof *my_context, GFP_KERNEL); - if (!my_context) { - ehca_err(device, "Out of memory device=%p", device); - return ERR_PTR(-ENOMEM); - } - - return &my_context->ib_ucontext; -} - -int ehca_dealloc_ucontext(struct ib_ucontext *context) -{ - kfree(container_of(context, struct ehca_ucontext, ib_ucontext)); - return 0; -} - -static void ehca_mm_open(struct vm_area_struct *vma) -{ - u32 *count = (u32 *)vma->vm_private_data; - if (!count) { - ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", - vma->vm_start, vma->vm_end); - return; - } - (*count)++; - if (!(*count)) - ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx", - vma->vm_start, vma->vm_end); - ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", - vma->vm_start, vma->vm_end, *count); -} - -static void ehca_mm_close(struct vm_area_struct *vma) -{ - u32 *count = (u32 *)vma->vm_private_data; - if (!count) { - ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", - vma->vm_start, vma->vm_end); - return; - } - (*count)--; - ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", - vma->vm_start, vma->vm_end, *count); -} - -static const struct vm_operations_struct vm_ops = { - .open = ehca_mm_open, - .close = ehca_mm_close, -}; - -static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas, - u32 *mm_count) -{ - int ret; - u64 vsize, physical; - - vsize = vma->vm_end - vma->vm_start; - if (vsize < EHCA_PAGESIZE) { - ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start); - return -EINVAL; - } - - physical = galpas->user.fw_handle; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical); - /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ - ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT, - vma->vm_page_prot); - if (unlikely(ret)) { - ehca_gen_err("remap_pfn_range() failed ret=%i", ret); - return -ENOMEM; - } - - vma->vm_private_data = mm_count; - (*mm_count)++; - vma->vm_ops = &vm_ops; - - return 0; -} - -static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue, - u32 *mm_count) -{ - int ret; - u64 start, ofs; - struct page *page; - - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; - start = vma->vm_start; - for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { - u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); - page = virt_to_page(virt_addr); - ret = vm_insert_page(vma, start, page); - if (unlikely(ret)) { - ehca_gen_err("vm_insert_page() failed rc=%i", ret); - return ret; - } - start += PAGE_SIZE; - } - vma->vm_private_data = mm_count; - (*mm_count)++; - vma->vm_ops = &vm_ops; - - return 0; -} - -static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq, - u32 rsrc_type) -{ - int ret; - - switch (rsrc_type) { - case 0: /* galpa fw handle */ - ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number); - ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa); - if (unlikely(ret)) { - ehca_err(cq->ib_cq.device, - "ehca_mmap_fw() failed rc=%i cq_num=%x", - ret, cq->cq_number); - return ret; - } - break; - - case 1: /* cq queue_addr */ - ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number); - ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue); - if (unlikely(ret)) { - ehca_err(cq->ib_cq.device, - "ehca_mmap_queue() failed rc=%i cq_num=%x", - ret, cq->cq_number); - return ret; - } - break; - - default: - ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x", - rsrc_type, cq->cq_number); - return -EINVAL; - } - - return 0; -} - -static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, - u32 rsrc_type) -{ - int ret; - - switch (rsrc_type) { - case 0: /* galpa fw handle */ - ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num); - ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa); - if (unlikely(ret)) { - ehca_err(qp->ib_qp.device, - "remap_pfn_range() failed ret=%i qp_num=%x", - ret, qp->ib_qp.qp_num); - return -ENOMEM; - } - break; - - case 1: /* qp rqueue_addr */ - ehca_dbg(qp->ib_qp.device, "qp_num=%x rq", qp->ib_qp.qp_num); - ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, - &qp->mm_count_rqueue); - if (unlikely(ret)) { - ehca_err(qp->ib_qp.device, - "ehca_mmap_queue(rq) failed rc=%i qp_num=%x", - ret, qp->ib_qp.qp_num); - return ret; - } - break; - - case 2: /* qp squeue_addr */ - ehca_dbg(qp->ib_qp.device, "qp_num=%x sq", qp->ib_qp.qp_num); - ret = ehca_mmap_queue(vma, &qp->ipz_squeue, - &qp->mm_count_squeue); - if (unlikely(ret)) { - ehca_err(qp->ib_qp.device, - "ehca_mmap_queue(sq) failed rc=%i qp_num=%x", - ret, qp->ib_qp.qp_num); - return ret; - } - break; - - default: - ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x", - rsrc_type, qp->ib_qp.qp_num); - return -EINVAL; - } - - return 0; -} - -int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) -{ - u64 fileoffset = vma->vm_pgoff; - u32 idr_handle = fileoffset & 0x1FFFFFF; - u32 q_type = (fileoffset >> 27) & 0x1; /* CQ, QP,... */ - u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */ - u32 ret; - struct ehca_cq *cq; - struct ehca_qp *qp; - struct ib_uobject *uobject; - - switch (q_type) { - case 0: /* CQ */ - read_lock(&ehca_cq_idr_lock); - cq = idr_find(&ehca_cq_idr, idr_handle); - read_unlock(&ehca_cq_idr_lock); - - /* make sure this mmap really belongs to the authorized user */ - if (!cq) - return -EINVAL; - - if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context) - return -EINVAL; - - ret = ehca_mmap_cq(vma, cq, rsrc_type); - if (unlikely(ret)) { - ehca_err(cq->ib_cq.device, - "ehca_mmap_cq() failed rc=%i cq_num=%x", - ret, cq->cq_number); - return ret; - } - break; - - case 1: /* QP */ - read_lock(&ehca_qp_idr_lock); - qp = idr_find(&ehca_qp_idr, idr_handle); - read_unlock(&ehca_qp_idr_lock); - - /* make sure this mmap really belongs to the authorized user */ - if (!qp) - return -EINVAL; - - uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject; - if (!uobject || uobject->context != context) - return -EINVAL; - - ret = ehca_mmap_qp(vma, qp, rsrc_type); - if (unlikely(ret)) { - ehca_err(qp->ib_qp.device, - "ehca_mmap_qp() failed rc=%i qp_num=%x", - ret, qp->ib_qp.qp_num); - return ret; - } - break; - - default: - ehca_gen_err("bad queue type %x", q_type); - return -EINVAL; - } - - return 0; -} diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c deleted file mode 100644 index 89517ff..0000000 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ /dev/null @@ -1,949 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Firmware Infiniband Interface code for POWER - * - * Authors: Christoph Raisch - * Hoang-Nam Nguyen - * Joachim Fenkes - * Gerd Bayer - * Waleri Fomin - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include "ehca_tools.h" -#include "hcp_if.h" -#include "hcp_phyp.h" -#include "hipz_fns.h" -#include "ipz_pt_fn.h" - -#define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9, 11) -#define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12, 12) -#define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13, 15) -#define H_ALL_RES_QP_STORAGE EHCA_BMASK_IBM(16, 17) -#define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18, 18) -#define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19, 21) -#define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22, 23) -#define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31, 31) -#define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35) -#define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39) -#define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56, 63) - -#define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0, 15) -#define H_ALL_RES_QP_MAX_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31) -#define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32, 39) -#define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40, 47) - -#define H_ALL_RES_QP_UD_AV_LKEY EHCA_BMASK_IBM(32, 63) -#define H_ALL_RES_QP_SRQ_QP_TOKEN EHCA_BMASK_IBM(0, 31) -#define H_ALL_RES_QP_SRQ_QP_HANDLE EHCA_BMASK_IBM(0, 64) -#define H_ALL_RES_QP_SRQ_LIMIT EHCA_BMASK_IBM(48, 63) -#define H_ALL_RES_QP_SRQ_QPN EHCA_BMASK_IBM(40, 63) - -#define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) -#define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48, 63) -#define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8, 15) -#define H_ALL_RES_QP_ACT_RECV_SGE EHCA_BMASK_IBM(24, 31) - -#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31) -#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63) - -#define H_MP_INIT_TYPE EHCA_BMASK_IBM(44, 47) -#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48) -#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49) - -#define HCALL4_REGS_FORMAT "r4=%lx r5=%lx r6=%lx r7=%lx" -#define HCALL7_REGS_FORMAT HCALL4_REGS_FORMAT " r8=%lx r9=%lx r10=%lx" -#define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx" - -static DEFINE_SPINLOCK(hcall_lock); - -static long ehca_plpar_hcall_norets(unsigned long opcode, - unsigned long arg1, - unsigned long arg2, - unsigned long arg3, - unsigned long arg4, - unsigned long arg5, - unsigned long arg6, - unsigned long arg7) -{ - long ret; - int i, sleep_msecs; - unsigned long flags = 0; - - if (unlikely(ehca_debug_level >= 2)) - ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT, - opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); - - for (i = 0; i < 5; i++) { - /* serialize hCalls to work around firmware issue */ - if (ehca_lock_hcalls) - spin_lock_irqsave(&hcall_lock, flags); - - ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4, - arg5, arg6, arg7); - - if (ehca_lock_hcalls) - spin_unlock_irqrestore(&hcall_lock, flags); - - if (H_IS_LONG_BUSY(ret)) { - sleep_msecs = get_longbusy_msecs(ret); - msleep_interruptible(sleep_msecs); - continue; - } - - if (ret < H_SUCCESS) - ehca_gen_err("opcode=%lx ret=%li " HCALL7_REGS_FORMAT, - opcode, ret, arg1, arg2, arg3, - arg4, arg5, arg6, arg7); - else - if (unlikely(ehca_debug_level >= 2)) - ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret); - - return ret; - } - - return H_BUSY; -} - -static long ehca_plpar_hcall9(unsigned long opcode, - unsigned long *outs, /* array of 9 outputs */ - unsigned long arg1, - unsigned long arg2, - unsigned long arg3, - unsigned long arg4, - unsigned long arg5, - unsigned long arg6, - unsigned long arg7, - unsigned long arg8, - unsigned long arg9) -{ - long ret; - int i, sleep_msecs; - unsigned long flags = 0; - - if (unlikely(ehca_debug_level >= 2)) - ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode, - arg1, arg2, arg3, arg4, arg5, - arg6, arg7, arg8, arg9); - - for (i = 0; i < 5; i++) { - /* serialize hCalls to work around firmware issue */ - if (ehca_lock_hcalls) - spin_lock_irqsave(&hcall_lock, flags); - - ret = plpar_hcall9(opcode, outs, - arg1, arg2, arg3, arg4, arg5, - arg6, arg7, arg8, arg9); - - if (ehca_lock_hcalls) - spin_unlock_irqrestore(&hcall_lock, flags); - - if (H_IS_LONG_BUSY(ret)) { - sleep_msecs = get_longbusy_msecs(ret); - msleep_interruptible(sleep_msecs); - continue; - } - - if (ret < H_SUCCESS) { - ehca_gen_err("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, - opcode, arg1, arg2, arg3, arg4, arg5, - arg6, arg7, arg8, arg9); - ehca_gen_err("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT, - ret, outs[0], outs[1], outs[2], outs[3], - outs[4], outs[5], outs[6], outs[7], - outs[8]); - } else if (unlikely(ehca_debug_level >= 2)) - ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT, - ret, outs[0], outs[1], outs[2], outs[3], - outs[4], outs[5], outs[6], outs[7], - outs[8]); - return ret; - } - - return H_BUSY; -} - -u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, - struct ehca_pfeq *pfeq, - const u32 neq_control, - const u32 number_of_entries, - struct ipz_eq_handle *eq_handle, - u32 *act_nr_of_entries, - u32 *act_pages, - u32 *eq_ist) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - u64 allocate_controls; - - /* resource type */ - allocate_controls = 3ULL; - - /* ISN is associated */ - if (neq_control != 1) - allocate_controls = (1ULL << (63 - 7)) | allocate_controls; - else /* notification event queue */ - allocate_controls = (1ULL << 63) | allocate_controls; - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - allocate_controls, /* r5 */ - number_of_entries, /* r6 */ - 0, 0, 0, 0, 0, 0); - eq_handle->handle = outs[0]; - *act_nr_of_entries = (u32)outs[3]; - *act_pages = (u32)outs[4]; - *eq_ist = (u32)outs[5]; - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resource - ret=%lli ", ret); - - return ret; -} - -u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, - struct ipz_eq_handle eq_handle, - const u64 event_mask) -{ - return ehca_plpar_hcall_norets(H_RESET_EVENTS, - adapter_handle.handle, /* r4 */ - eq_handle.handle, /* r5 */ - event_mask, /* r6 */ - 0, 0, 0, 0); -} - -u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, - struct ehca_cq *cq, - struct ehca_alloc_cq_parms *param) -{ - int rc; - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - 2, /* r5 */ - param->eq_handle.handle, /* r6 */ - cq->token, /* r7 */ - param->nr_cqe, /* r8 */ - 0, 0, 0, 0); - cq->ipz_cq_handle.handle = outs[0]; - param->act_nr_of_entries = (u32)outs[3]; - param->act_pages = (u32)outs[4]; - - if (ret == H_SUCCESS) { - rc = hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]); - if (rc) { - ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", - rc, outs[5]); - - ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - cq->ipz_cq_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); - ret = H_NO_MEM; - } - } - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resources. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms, int is_user) -{ - int rc; - u64 ret; - u64 allocate_controls, max_r10_reg, r11, r12; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - allocate_controls = - EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type) - | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0) - | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype) - | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype) - | EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage) - | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE, - parms->squeue.page_size) - | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE, - parms->rqueue.page_size) - | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING, - !!(parms->ll_comp_flags & LLQP_RECV_COMP)) - | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING, - !!(parms->ll_comp_flags & LLQP_SEND_COMP)) - | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL, - parms->ud_av_l_key_ctl) - | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1); - - max_r10_reg = - EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR, - parms->squeue.max_wr + 1) - | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR, - parms->rqueue.max_wr + 1) - | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE, - parms->squeue.max_sge) - | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE, - parms->rqueue.max_sge); - - r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token); - - if (parms->ext_type == EQPT_SRQ) - r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit); - else - r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn); - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - allocate_controls, /* r5 */ - parms->send_cq_handle.handle, - parms->recv_cq_handle.handle, - parms->eq_handle.handle, - ((u64)parms->token << 32) | parms->pd.value, - max_r10_reg, r11, r12); - - parms->qp_handle.handle = outs[0]; - parms->real_qp_num = (u32)outs[1]; - parms->squeue.act_nr_wqes = - (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]); - parms->rqueue.act_nr_wqes = - (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]); - parms->squeue.act_nr_sges = - (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]); - parms->rqueue.act_nr_sges = - (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]); - parms->squeue.queue_size = - (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]); - parms->rqueue.queue_size = - (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); - - if (ret == H_SUCCESS) { - rc = hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]); - if (rc) { - ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", - rc, outs[6]); - - ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - parms->qp_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); - ret = H_NO_MEM; - } - } - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resources. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, - const u8 port_id, - struct hipz_query_port *query_port_response_block) -{ - u64 ret; - u64 r_cb = __pa(query_port_response_block); - - if (r_cb & (EHCA_PAGESIZE-1)) { - ehca_gen_err("response block not page aligned"); - return H_PARAMETER; - } - - ret = ehca_plpar_hcall_norets(H_QUERY_PORT, - adapter_handle.handle, /* r4 */ - port_id, /* r5 */ - r_cb, /* r6 */ - 0, 0, 0, 0); - - if (ehca_debug_level >= 2) - ehca_dmp(query_port_response_block, 64, "response_block"); - - return ret; -} - -u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, - const u8 port_id, const u32 port_cap, - const u8 init_type, const int modify_mask) -{ - u64 port_attributes = port_cap; - - if (modify_mask & IB_PORT_SHUTDOWN) - port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1); - if (modify_mask & IB_PORT_INIT_TYPE) - port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type); - if (modify_mask & IB_PORT_RESET_QKEY_CNTR) - port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1); - - return ehca_plpar_hcall_norets(H_MODIFY_PORT, - adapter_handle.handle, /* r4 */ - port_id, /* r5 */ - port_attributes, /* r6 */ - 0, 0, 0, 0); -} - -u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, - struct hipz_query_hca *query_hca_rblock) -{ - u64 r_cb = __pa(query_hca_rblock); - - if (r_cb & (EHCA_PAGESIZE-1)) { - ehca_gen_err("response_block=%p not page aligned", - query_hca_rblock); - return H_PARAMETER; - } - - return ehca_plpar_hcall_norets(H_QUERY_HCA, - adapter_handle.handle, /* r4 */ - r_cb, /* r5 */ - 0, 0, 0, 0, 0); -} - -u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, - const u8 pagesize, - const u8 queue_type, - const u64 resource_handle, - const u64 logical_address_of_page, - u64 count) -{ - return ehca_plpar_hcall_norets(H_REGISTER_RPAGES, - adapter_handle.handle, /* r4 */ - (u64)queue_type | ((u64)pagesize) << 8, - /* r5 */ - resource_handle, /* r6 */ - logical_address_of_page, /* r7 */ - count, /* r8 */ - 0, 0); -} - -u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, - const struct ipz_eq_handle eq_handle, - struct ehca_pfeq *pfeq, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count) -{ - if (count != 1) { - ehca_gen_err("Ppage counter=%llx", count); - return H_PARAMETER; - } - return hipz_h_register_rpage(adapter_handle, - pagesize, - queue_type, - eq_handle.handle, - logical_address_of_page, count); -} - -u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle, - u32 ist) -{ - u64 ret; - ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE, - adapter_handle.handle, /* r4 */ - ist, /* r5 */ - 0, 0, 0, 0, 0); - - if (ret != H_SUCCESS && ret != H_BUSY) - ehca_gen_err("Could not query interrupt state."); - - return ret; -} - -u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, - const struct ipz_cq_handle cq_handle, - struct ehca_pfcq *pfcq, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count, - const struct h_galpa gal) -{ - if (count != 1) { - ehca_gen_err("Page counter=%llx", count); - return H_PARAMETER; - } - - return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, - cq_handle.handle, logical_address_of_page, - count); -} - -u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count, - const struct h_galpa galpa) -{ - if (count > 1) { - ehca_gen_err("Page counter=%llx", count); - return H_PARAMETER; - } - - return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, - qp_handle.handle, logical_address_of_page, - count); -} - -u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - void **log_addr_next_sq_wqe2processed, - void **log_addr_next_rq_wqe2processed, - int dis_and_get_function_code) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, - adapter_handle.handle, /* r4 */ - dis_and_get_function_code, /* r5 */ - qp_handle.handle, /* r6 */ - 0, 0, 0, 0, 0, 0); - if (log_addr_next_sq_wqe2processed) - *log_addr_next_sq_wqe2processed = (void *)outs[0]; - if (log_addr_next_rq_wqe2processed) - *log_addr_next_rq_wqe2processed = (void *)outs[1]; - - return ret; -} - -u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - const u64 update_mask, - struct hcp_modify_qp_control_block *mqpcb, - struct h_galpa gal) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - ret = ehca_plpar_hcall9(H_MODIFY_QP, outs, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - update_mask, /* r6 */ - __pa(mqpcb), /* r7 */ - 0, 0, 0, 0, 0); - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Insufficient resources ret=%lli", ret); - - return ret; -} - -u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - struct hcp_modify_qp_control_block *qqpcb, - struct h_galpa gal) -{ - return ehca_plpar_hcall_norets(H_QUERY_QP, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - __pa(qqpcb), /* r6 */ - 0, 0, 0, 0); -} - -u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_qp *qp) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = hcp_galpas_dtor(&qp->galpas); - if (ret) { - ehca_gen_err("Could not destruct qp->galpas"); - return H_RESOURCE; - } - ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, - adapter_handle.handle, /* r4 */ - /* function code */ - 1, /* r5 */ - qp->ipz_qp_handle.handle, /* r6 */ - 0, 0, 0, 0, 0, 0); - if (ret == H_HARDWARE) - ehca_gen_err("HCA not operational. ret=%lli", ret); - - ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - qp->ipz_qp_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); - - if (ret == H_RESOURCE) - ehca_gen_err("Resource still in use. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u32 port) -{ - return ehca_plpar_hcall_norets(H_DEFINE_AQP0, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - port, /* r6 */ - 0, 0, 0, 0); -} - -u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u32 port, u32 * pma_qp_nr, - u32 * bma_qp_nr) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - port, /* r6 */ - 0, 0, 0, 0, 0, 0); - *pma_qp_nr = (u32)outs[0]; - *bma_qp_nr = (u32)outs[1]; - - if (ret == H_ALIAS_EXIST) - ehca_gen_err("AQP1 already exists. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u16 mcg_dlid, - u64 subnet_prefix, u64 interface_id) -{ - u64 ret; - - ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - mcg_dlid, /* r6 */ - interface_id, /* r7 */ - subnet_prefix, /* r8 */ - 0, 0); - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resources. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u16 mcg_dlid, - u64 subnet_prefix, u64 interface_id) -{ - return ehca_plpar_hcall_norets(H_DETACH_MCQP, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - mcg_dlid, /* r6 */ - interface_id, /* r7 */ - subnet_prefix, /* r8 */ - 0, 0); -} - -u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, - struct ehca_cq *cq, - u8 force_flag) -{ - u64 ret; - - ret = hcp_galpas_dtor(&cq->galpas); - if (ret) { - ehca_gen_err("Could not destruct cp->galpas"); - return H_RESOURCE; - } - - ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - cq->ipz_cq_handle.handle, /* r5 */ - force_flag != 0 ? 1L : 0L, /* r6 */ - 0, 0, 0, 0); - - if (ret == H_RESOURCE) - ehca_gen_err("H_FREE_RESOURCE failed ret=%lli ", ret); - - return ret; -} - -u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, - struct ehca_eq *eq) -{ - u64 ret; - - ret = hcp_galpas_dtor(&eq->galpas); - if (ret) { - ehca_gen_err("Could not destruct eq->galpas"); - return H_RESOURCE; - } - - ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - eq->ipz_eq_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); - - if (ret == H_RESOURCE) - ehca_gen_err("Resource in use. ret=%lli ", ret); - - return ret; -} - -u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u64 vaddr, - const u64 length, - const u32 access_ctrl, - const struct ipz_pd pd, - struct ehca_mr_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - 5, /* r5 */ - vaddr, /* r6 */ - length, /* r7 */ - (((u64)access_ctrl) << 32ULL), /* r8 */ - pd.value, /* r9 */ - 0, 0, 0); - outparms->handle.handle = outs[0]; - outparms->lkey = (u32)outs[2]; - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count) -{ - u64 ret; - - if (unlikely(ehca_debug_level >= 3)) { - if (count > 1) { - u64 *kpage; - int i; - kpage = __va(logical_address_of_page); - for (i = 0; i < count; i++) - ehca_gen_dbg("kpage[%d]=%p", - i, (void *)kpage[i]); - } else - ehca_gen_dbg("kpage=%p", - (void *)logical_address_of_page); - } - - if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) { - ehca_gen_err("logical_address_of_page not on a 4k boundary " - "adapter_handle=%llx mr=%p mr_handle=%llx " - "pagesize=%x queue_type=%x " - "logical_address_of_page=%llx count=%llx", - adapter_handle.handle, mr, - mr->ipz_mr_handle.handle, pagesize, queue_type, - logical_address_of_page, count); - ret = H_PARAMETER; - } else - ret = hipz_h_register_rpage(adapter_handle, pagesize, - queue_type, - mr->ipz_mr_handle.handle, - logical_address_of_page, count); - return ret; -} - -u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - struct ehca_mr_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_QUERY_MR, outs, - adapter_handle.handle, /* r4 */ - mr->ipz_mr_handle.handle, /* r5 */ - 0, 0, 0, 0, 0, 0, 0); - outparms->len = outs[0]; - outparms->vaddr = outs[1]; - outparms->acl = outs[4] >> 32; - outparms->lkey = (u32)(outs[5] >> 32); - outparms->rkey = (u32)(outs[5] & (0xffffffff)); - - return ret; -} - -u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr) -{ - return ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - mr->ipz_mr_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); -} - -u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u64 vaddr_in, - const u64 length, - const u32 access_ctrl, - const struct ipz_pd pd, - const u64 mr_addr_cb, - struct ehca_mr_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs, - adapter_handle.handle, /* r4 */ - mr->ipz_mr_handle.handle, /* r5 */ - vaddr_in, /* r6 */ - length, /* r7 */ - /* r8 */ - ((((u64)access_ctrl) << 32ULL) | pd.value), - mr_addr_cb, /* r9 */ - 0, 0, 0); - outparms->vaddr = outs[1]; - outparms->lkey = (u32)outs[2]; - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const struct ehca_mr *orig_mr, - const u64 vaddr_in, - const u32 access_ctrl, - const struct ipz_pd pd, - struct ehca_mr_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs, - adapter_handle.handle, /* r4 */ - orig_mr->ipz_mr_handle.handle, /* r5 */ - vaddr_in, /* r6 */ - (((u64)access_ctrl) << 32ULL), /* r7 */ - pd.value, /* r8 */ - 0, 0, 0, 0); - outparms->handle.handle = outs[0]; - outparms->lkey = (u32)outs[2]; - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw, - const struct ipz_pd pd, - struct ehca_mw_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - 6, /* r5 */ - pd.value, /* r6 */ - 0, 0, 0, 0, 0, 0); - outparms->handle.handle = outs[0]; - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw, - struct ehca_mw_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_QUERY_MW, outs, - adapter_handle.handle, /* r4 */ - mw->ipz_mw_handle.handle, /* r5 */ - 0, 0, 0, 0, 0, 0, 0); - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw) -{ - return ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - mw->ipz_mw_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); -} - -u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, - const u64 ressource_handle, - void *rblock, - unsigned long *byte_count) -{ - u64 r_cb = __pa(rblock); - - if (r_cb & (EHCA_PAGESIZE-1)) { - ehca_gen_err("rblock not page aligned."); - return H_PARAMETER; - } - - return ehca_plpar_hcall_norets(H_ERROR_DATA, - adapter_handle.handle, - ressource_handle, - r_cb, - 0, 0, 0, 0); -} - -u64 hipz_h_eoi(int irq) -{ - unsigned long xirr; - - iosync(); - xirr = (0xffULL << 24) | irq; - - return plpar_hcall_norets(H_EOI, xirr); -} diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h deleted file mode 100644 index a46e514..0000000 --- a/drivers/infiniband/hw/ehca/hcp_if.h +++ /dev/null @@ -1,265 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Firmware Infiniband Interface code for POWER - * - * Authors: Christoph Raisch - * Hoang-Nam Nguyen - * Gerd Bayer - * Waleri Fomin - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HCP_IF_H__ -#define __HCP_IF_H__ - -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "hipz_hw.h" - -/* - * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initialize - * resources, create the empty EQPT (ring). - */ -u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, - struct ehca_pfeq *pfeq, - const u32 neq_control, - const u32 number_of_entries, - struct ipz_eq_handle *eq_handle, - u32 * act_nr_of_entries, - u32 * act_pages, - u32 * eq_ist); - -u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, - struct ipz_eq_handle eq_handle, - const u64 event_mask); -/* - * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize - * resources, create the empty CQPT (ring). - */ -u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, - struct ehca_cq *cq, - struct ehca_alloc_cq_parms *param); - - -/* - * hipz_h_alloc_resource_qp allocates QP resources in HW and FW, - * initialize resources, create empty QPPTs (2 rings). - */ -u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms, int is_user); - -u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, - const u8 port_id, - struct hipz_query_port *query_port_response_block); - -u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, - const u8 port_id, const u32 port_cap, - const u8 init_type, const int modify_mask); - -u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, - struct hipz_query_hca *query_hca_rblock); - -/* - * hipz_h_register_rpage internal function in hcp_if.h for all - * hcp_H_REGISTER_RPAGE calls. - */ -u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, - const u8 pagesize, - const u8 queue_type, - const u64 resource_handle, - const u64 logical_address_of_page, - u64 count); - -u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, - const struct ipz_eq_handle eq_handle, - struct ehca_pfeq *pfeq, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count); - -u64 hipz_h_query_int_state(const struct ipz_adapter_handle - hcp_adapter_handle, - u32 ist); - -u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, - const struct ipz_cq_handle cq_handle, - struct ehca_pfcq *pfcq, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count, - const struct h_galpa gal); - -u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count, - const struct h_galpa galpa); - -u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - void **log_addr_next_sq_wqe_tb_processed, - void **log_addr_next_rq_wqe_tb_processed, - int dis_and_get_function_code); -enum hcall_sigt { - HCALL_SIGT_NO_CQE = 0, - HCALL_SIGT_BY_WQE = 1, - HCALL_SIGT_EVERY = 2 -}; - -u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - const u64 update_mask, - struct hcp_modify_qp_control_block *mqpcb, - struct h_galpa gal); - -u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - struct hcp_modify_qp_control_block *qqpcb, - struct h_galpa gal); - -u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_qp *qp); - -u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u32 port); - -u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u32 port, u32 * pma_qp_nr, - u32 * bma_qp_nr); - -u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u16 mcg_dlid, - u64 subnet_prefix, u64 interface_id); - -u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u16 mcg_dlid, - u64 subnet_prefix, u64 interface_id); - -u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, - struct ehca_cq *cq, - u8 force_flag); - -u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, - struct ehca_eq *eq); - -/* - * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize - * resources. - */ -u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u64 vaddr, - const u64 length, - const u32 access_ctrl, - const struct ipz_pd pd, - struct ehca_mr_hipzout_parms *outparms); - -/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */ -u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count); - -/* hipz_h_query_mr queries MR in HW and FW */ -u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - struct ehca_mr_hipzout_parms *outparms); - -/* hipz_h_free_resource_mr frees MR resources in HW and FW */ -u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr); - -/* hipz_h_reregister_pmr reregisters MR in HW and FW */ -u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u64 vaddr_in, - const u64 length, - const u32 access_ctrl, - const struct ipz_pd pd, - const u64 mr_addr_cb, - struct ehca_mr_hipzout_parms *outparms); - -/* hipz_h_register_smr register shared MR in HW and FW */ -u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const struct ehca_mr *orig_mr, - const u64 vaddr_in, - const u32 access_ctrl, - const struct ipz_pd pd, - struct ehca_mr_hipzout_parms *outparms); - -/* - * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize - * resources. - */ -u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw, - const struct ipz_pd pd, - struct ehca_mw_hipzout_parms *outparms); - -/* hipz_h_query_mw queries MW in HW and FW */ -u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw, - struct ehca_mw_hipzout_parms *outparms); - -/* hipz_h_free_resource_mw frees MW resources in HW and FW */ -u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw); - -u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, - const u64 ressource_handle, - void *rblock, - unsigned long *byte_count); -u64 hipz_h_eoi(int irq); - -#endif /* __HCP_IF_H__ */ diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c deleted file mode 100644 index 077376f..0000000 --- a/drivers/infiniband/hw/ehca/hcp_phyp.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * load store abstraction for ehca register access with tracing - * - * Authors: Christoph Raisch - * Hoang-Nam Nguyen - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "ehca_classes.h" -#include "hipz_hw.h" - -u64 hcall_map_page(u64 physaddr) -{ - return (u64)ioremap(physaddr, EHCA_PAGESIZE); -} - -int hcall_unmap_page(u64 mapaddr) -{ - iounmap((volatile void __iomem *) mapaddr); - return 0; -} - -int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, - u64 paddr_kernel, u64 paddr_user) -{ - if (!is_user) { - galpas->kernel.fw_handle = hcall_map_page(paddr_kernel); - if (!galpas->kernel.fw_handle) - return -ENOMEM; - } else - galpas->kernel.fw_handle = 0; - - galpas->user.fw_handle = paddr_user; - - return 0; -} - -int hcp_galpas_dtor(struct h_galpas *galpas) -{ - if (galpas->kernel.fw_handle) { - int ret = hcall_unmap_page(galpas->kernel.fw_handle); - if (ret) - return ret; - } - - galpas->user.fw_handle = galpas->kernel.fw_handle = 0; - - return 0; -} diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h deleted file mode 100644 index d1b0299..0000000 --- a/drivers/infiniband/hw/ehca/hcp_phyp.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Firmware calls - * - * Authors: Christoph Raisch - * Hoang-Nam Nguyen - * Waleri Fomin - * Gerd Bayer - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HCP_PHYP_H__ -#define __HCP_PHYP_H__ - - -/* - * eHCA page (mapped into memory) - * resource to access eHCA register pages in CPU address space -*/ -struct h_galpa { - u64 fw_handle; - /* for pSeries this is a 64bit memory address where - I/O memory is mapped into CPU address space (kv) */ -}; - -/* - * resource to access eHCA address space registers, all types - */ -struct h_galpas { - u32 pid; /*PID of userspace galpa checking */ - struct h_galpa user; /* user space accessible resource, - set to 0 if unused */ - struct h_galpa kernel; /* kernel space accessible resource, - set to 0 if unused */ -}; - -static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset) -{ - u64 addr = galpa.fw_handle + offset; - return *(volatile u64 __force *)addr; -} - -static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value) -{ - u64 addr = galpa.fw_handle + offset; - *(volatile u64 __force *)addr = value; -} - -int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, - u64 paddr_kernel, u64 paddr_user); - -int hcp_galpas_dtor(struct h_galpas *galpas); - -u64 hcall_map_page(u64 physaddr); - -int hcall_unmap_page(u64 mapaddr); - -#endif diff --git a/drivers/infiniband/hw/ehca/hipz_fns.h b/drivers/infiniband/hw/ehca/hipz_fns.h deleted file mode 100644 index 9dac93d..0000000 --- a/drivers/infiniband/hw/ehca/hipz_fns.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * HW abstraction register functions - * - * Authors: Christoph Raisch - * Reinhard Ernst - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HIPZ_FNS_H__ -#define __HIPZ_FNS_H__ - -#include "ehca_classes.h" -#include "hipz_hw.h" - -#include "hipz_fns_core.h" - -#define hipz_galpa_store_eq(gal, offset, value) \ - hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value) - -#define hipz_galpa_load_eq(gal, offset) \ - hipz_galpa_load(gal, EQTEMM_OFFSET(offset)) - -#define hipz_galpa_store_qped(gal, offset, value) \ - hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value) - -#define hipz_galpa_load_qped(gal, offset) \ - hipz_galpa_load(gal, QPEDMM_OFFSET(offset)) - -#define hipz_galpa_store_mrmw(gal, offset, value) \ - hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value) - -#define hipz_galpa_load_mrmw(gal, offset) \ - hipz_galpa_load(gal, MRMWMM_OFFSET(offset)) - -#endif diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h deleted file mode 100644 index 868735f..0000000 --- a/drivers/infiniband/hw/ehca/hipz_fns_core.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * HW abstraction register functions - * - * Authors: Christoph Raisch - * Heiko J Schick - * Hoang-Nam Nguyen - * Reinhard Ernst - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HIPZ_FNS_CORE_H__ -#define __HIPZ_FNS_CORE_H__ - -#include "hcp_phyp.h" -#include "hipz_hw.h" - -#define hipz_galpa_store_cq(gal, offset, value) \ - hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value) - -#define hipz_galpa_load_cq(gal, offset) \ - hipz_galpa_load(gal, CQTEMM_OFFSET(offset)) - -#define hipz_galpa_store_qp(gal, offset, value) \ - hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value) -#define hipz_galpa_load_qp(gal, offset) \ - hipz_galpa_load(gal, QPTEMM_OFFSET(offset)) - -static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes) -{ - /* ringing doorbell :-) */ - hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa, - EHCA_BMASK_SET(QPX_SQADDER, nr_wqes)); -} - -static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes) -{ - /* ringing doorbell :-) */ - hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa, - EHCA_BMASK_SET(QPX_RQADDER, nr_wqes)); -} - -static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes) -{ - hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca, - EHCA_BMASK_SET(CQX_FECADDER, nr_cqes)); -} - -static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value) -{ - u64 cqx_n0_reg; - - hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0, - EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT, - value)); - cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0); -} - -static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value) -{ - u64 cqx_n1_reg; - - hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1, - EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value)); - cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1); -} - -#endif /* __HIPZ_FNC_CORE_H__ */ diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h deleted file mode 100644 index bf996c7..0000000 --- a/drivers/infiniband/hw/ehca/hipz_hw.h +++ /dev/null @@ -1,414 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * eHCA register definitions - * - * Authors: Waleri Fomin - * Christoph Raisch - * Reinhard Ernst - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HIPZ_HW_H__ -#define __HIPZ_HW_H__ - -#include "ehca_tools.h" - -#define EHCA_MAX_MTU 4 - -/* QP Table Entry Memory Map */ -struct hipz_qptemm { - u64 qpx_hcr; - u64 qpx_c; - u64 qpx_herr; - u64 qpx_aer; -/* 0x20*/ - u64 qpx_sqa; - u64 qpx_sqc; - u64 qpx_rqa; - u64 qpx_rqc; -/* 0x40*/ - u64 qpx_st; - u64 qpx_pmstate; - u64 qpx_pmfa; - u64 qpx_pkey; -/* 0x60*/ - u64 qpx_pkeya; - u64 qpx_pkeyb; - u64 qpx_pkeyc; - u64 qpx_pkeyd; -/* 0x80*/ - u64 qpx_qkey; - u64 qpx_dqp; - u64 qpx_dlidp; - u64 qpx_portp; -/* 0xa0*/ - u64 qpx_slidp; - u64 qpx_slidpp; - u64 qpx_dlida; - u64 qpx_porta; -/* 0xc0*/ - u64 qpx_slida; - u64 qpx_slidpa; - u64 qpx_slvl; - u64 qpx_ipd; -/* 0xe0*/ - u64 qpx_mtu; - u64 qpx_lato; - u64 qpx_rlimit; - u64 qpx_rnrlimit; -/* 0x100*/ - u64 qpx_t; - u64 qpx_sqhp; - u64 qpx_sqptp; - u64 qpx_nspsn; -/* 0x120*/ - u64 qpx_nspsnhwm; - u64 reserved1; - u64 qpx_sdsi; - u64 qpx_sdsbc; -/* 0x140*/ - u64 qpx_sqwsize; - u64 qpx_sqwts; - u64 qpx_lsn; - u64 qpx_nssn; -/* 0x160 */ - u64 qpx_mor; - u64 qpx_cor; - u64 qpx_sqsize; - u64 qpx_erc; -/* 0x180*/ - u64 qpx_rnrrc; - u64 qpx_ernrwt; - u64 qpx_rnrresp; - u64 qpx_lmsna; -/* 0x1a0 */ - u64 qpx_sqhpc; - u64 qpx_sqcptp; - u64 qpx_sigt; - u64 qpx_wqecnt; -/* 0x1c0*/ - u64 qpx_rqhp; - u64 qpx_rqptp; - u64 qpx_rqsize; - u64 qpx_nrr; -/* 0x1e0*/ - u64 qpx_rdmac; - u64 qpx_nrpsn; - u64 qpx_lapsn; - u64 qpx_lcr; -/* 0x200*/ - u64 qpx_rwc; - u64 qpx_rwva; - u64 qpx_rdsi; - u64 qpx_rdsbc; -/* 0x220*/ - u64 qpx_rqwsize; - u64 qpx_crmsn; - u64 qpx_rdd; - u64 qpx_larpsn; -/* 0x240*/ - u64 qpx_pd; - u64 qpx_scqn; - u64 qpx_rcqn; - u64 qpx_aeqn; -/* 0x260*/ - u64 qpx_aaelog; - u64 qpx_ram; - u64 qpx_rdmaqe0; - u64 qpx_rdmaqe1; -/* 0x280*/ - u64 qpx_rdmaqe2; - u64 qpx_rdmaqe3; - u64 qpx_nrpsnhwm; -/* 0x298*/ - u64 reserved[(0x400 - 0x298) / 8]; -/* 0x400 extended data */ - u64 reserved_ext[(0x500 - 0x400) / 8]; -/* 0x500 */ - u64 reserved2[(0x1000 - 0x500) / 8]; -/* 0x1000 */ -}; - -#define QPX_SQADDER EHCA_BMASK_IBM(48, 63) -#define QPX_RQADDER EHCA_BMASK_IBM(48, 63) -#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3) - -#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x) - -/* MRMWPT Entry Memory Map */ -struct hipz_mrmwmm { - /* 0x00 */ - u64 mrx_hcr; - - u64 mrx_c; - u64 mrx_herr; - u64 mrx_aer; - /* 0x20 */ - u64 mrx_pp; - u64 reserved1; - u64 reserved2; - u64 reserved3; - /* 0x40 */ - u64 reserved4[(0x200 - 0x40) / 8]; - /* 0x200 */ - u64 mrx_ctl[64]; - -}; - -#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x) - -struct hipz_qpedmm { - /* 0x00 */ - u64 reserved0[(0x400) / 8]; - /* 0x400 */ - u64 qpedx_phh; - u64 qpedx_ppsgp; - /* 0x410 */ - u64 qpedx_ppsgu; - u64 qpedx_ppdgp; - /* 0x420 */ - u64 qpedx_ppdgu; - u64 qpedx_aph; - /* 0x430 */ - u64 qpedx_apsgp; - u64 qpedx_apsgu; - /* 0x440 */ - u64 qpedx_apdgp; - u64 qpedx_apdgu; - /* 0x450 */ - u64 qpedx_apav; - u64 qpedx_apsav; - /* 0x460 */ - u64 qpedx_hcr; - u64 reserved1[4]; - /* 0x488 */ - u64 qpedx_rrl0; - /* 0x490 */ - u64 qpedx_rrrkey0; - u64 qpedx_rrva0; - /* 0x4a0 */ - u64 reserved2; - u64 qpedx_rrl1; - /* 0x4b0 */ - u64 qpedx_rrrkey1; - u64 qpedx_rrva1; - /* 0x4c0 */ - u64 reserved3; - u64 qpedx_rrl2; - /* 0x4d0 */ - u64 qpedx_rrrkey2; - u64 qpedx_rrva2; - /* 0x4e0 */ - u64 reserved4; - u64 qpedx_rrl3; - /* 0x4f0 */ - u64 qpedx_rrrkey3; - u64 qpedx_rrva3; -}; - -#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x) - -/* CQ Table Entry Memory Map */ -struct hipz_cqtemm { - u64 cqx_hcr; - u64 cqx_c; - u64 cqx_herr; - u64 cqx_aer; -/* 0x20 */ - u64 cqx_ptp; - u64 cqx_tp; - u64 cqx_fec; - u64 cqx_feca; -/* 0x40 */ - u64 cqx_ep; - u64 cqx_eq; -/* 0x50 */ - u64 reserved1; - u64 cqx_n0; -/* 0x60 */ - u64 cqx_n1; - u64 reserved2[(0x1000 - 0x60) / 8]; -/* 0x1000 */ -}; - -#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32, 63) -#define CQX_FECADDER EHCA_BMASK_IBM(32, 63) -#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0) -#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0) - -#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x) - -/* EQ Table Entry Memory Map */ -struct hipz_eqtemm { - u64 eqx_hcr; - u64 eqx_c; - - u64 eqx_herr; - u64 eqx_aer; -/* 0x20 */ - u64 eqx_ptp; - u64 eqx_tp; - u64 eqx_ssba; - u64 eqx_psba; - -/* 0x40 */ - u64 eqx_cec; - u64 eqx_meql; - u64 eqx_xisbi; - u64 eqx_xisc; -/* 0x60 */ - u64 eqx_it; - -}; - -#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x) - -/* access control defines for MR/MW */ -#define HIPZ_ACCESSCTRL_L_WRITE 0x00800000 -#define HIPZ_ACCESSCTRL_R_WRITE 0x00400000 -#define HIPZ_ACCESSCTRL_R_READ 0x00200000 -#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000 -#define HIPZ_ACCESSCTRL_MW_BIND 0x00080000 - -/* query hca response block */ -struct hipz_query_hca { - u32 cur_reliable_dg; - u32 cur_qp; - u32 cur_cq; - u32 cur_eq; - u32 cur_mr; - u32 cur_mw; - u32 cur_ee_context; - u32 cur_mcast_grp; - u32 cur_qp_attached_mcast_grp; - u32 reserved1; - u32 cur_ipv6_qp; - u32 cur_eth_qp; - u32 cur_hp_mr; - u32 reserved2[3]; - u32 max_rd_domain; - u32 max_qp; - u32 max_cq; - u32 max_eq; - u32 max_mr; - u32 max_hp_mr; - u32 max_mw; - u32 max_mrwpte; - u32 max_special_mrwpte; - u32 max_rd_ee_context; - u32 max_mcast_grp; - u32 max_total_mcast_qp_attach; - u32 max_mcast_qp_attach; - u32 max_raw_ipv6_qp; - u32 max_raw_ethy_qp; - u32 internal_clock_frequency; - u32 max_pd; - u32 max_ah; - u32 max_cqe; - u32 max_wqes_wq; - u32 max_partitions; - u32 max_rr_ee_context; - u32 max_rr_qp; - u32 max_rr_hca; - u32 max_act_wqs_ee_context; - u32 max_act_wqs_qp; - u32 max_sge; - u32 max_sge_rd; - u32 memory_page_size_supported; - u64 max_mr_size; - u32 local_ca_ack_delay; - u32 num_ports; - u32 vendor_id; - u32 vendor_part_id; - u32 hw_ver; - u64 node_guid; - u64 hca_cap_indicators; - u32 data_counter_register_size; - u32 max_shared_rq; - u32 max_isns_eq; - u32 max_neq; -} __attribute__ ((packed)); - -#define HCA_CAP_AH_PORT_NR_CHECK EHCA_BMASK_IBM( 0, 0) -#define HCA_CAP_ATOMIC EHCA_BMASK_IBM( 1, 1) -#define HCA_CAP_AUTO_PATH_MIG EHCA_BMASK_IBM( 2, 2) -#define HCA_CAP_BAD_P_KEY_CTR EHCA_BMASK_IBM( 3, 3) -#define HCA_CAP_SQD_RTS_PORT_CHANGE EHCA_BMASK_IBM( 4, 4) -#define HCA_CAP_CUR_QP_STATE_MOD EHCA_BMASK_IBM( 5, 5) -#define HCA_CAP_INIT_TYPE EHCA_BMASK_IBM( 6, 6) -#define HCA_CAP_PORT_ACTIVE_EVENT EHCA_BMASK_IBM( 7, 7) -#define HCA_CAP_Q_KEY_VIOL_CTR EHCA_BMASK_IBM( 8, 8) -#define HCA_CAP_WQE_RESIZE EHCA_BMASK_IBM( 9, 9) -#define HCA_CAP_RAW_PACKET_MCAST EHCA_BMASK_IBM(10, 10) -#define HCA_CAP_SHUTDOWN_PORT EHCA_BMASK_IBM(11, 11) -#define HCA_CAP_RC_LL_QP EHCA_BMASK_IBM(12, 12) -#define HCA_CAP_SRQ EHCA_BMASK_IBM(13, 13) -#define HCA_CAP_UD_LL_QP EHCA_BMASK_IBM(16, 16) -#define HCA_CAP_RESIZE_MR EHCA_BMASK_IBM(17, 17) -#define HCA_CAP_MINI_QP EHCA_BMASK_IBM(18, 18) -#define HCA_CAP_H_ALLOC_RES_SYNC EHCA_BMASK_IBM(19, 19) - -/* query port response block */ -struct hipz_query_port { - u32 state; - u32 bad_pkey_cntr; - u32 lmc; - u32 lid; - u32 subnet_timeout; - u32 qkey_viol_cntr; - u32 sm_sl; - u32 sm_lid; - u32 capability_mask; - u32 init_type_reply; - u32 pkey_tbl_len; - u32 gid_tbl_len; - u64 gid_prefix; - u32 port_nr; - u16 pkey_entries[16]; - u8 reserved1[32]; - u32 trent_size; - u32 trbuf_size; - u64 max_msg_sz; - u32 max_mtu; - u32 vl_cap; - u32 phys_pstate; - u32 phys_state; - u32 phys_speed; - u32 phys_width; - u8 reserved2[1884]; - u64 guid_entries[255]; -} __attribute__ ((packed)); - -#endif diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c deleted file mode 100644 index 7ffc748..0000000 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ /dev/null @@ -1,289 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * internal queue handling - * - * Authors: Waleri Fomin - * Reinhard Ernst - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_tools.h" -#include "ipz_pt_fn.h" -#include "ehca_classes.h" - -#define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT) - -struct kmem_cache *small_qp_cache; - -void *ipz_qpageit_get_inc(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - queue->current_q_offset += queue->pagesize; - if (queue->current_q_offset > queue->queue_length) { - queue->current_q_offset -= queue->pagesize; - ret = NULL; - } - if (((u64)ret) % queue->pagesize) { - ehca_gen_err("ERROR!! not at PAGE-Boundary"); - return NULL; - } - return ret; -} - -void *ipz_qeit_eq_get_inc(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - u64 last_entry_in_q = queue->queue_length - queue->qe_size; - - queue->current_q_offset += queue->qe_size; - if (queue->current_q_offset > last_entry_in_q) { - queue->current_q_offset = 0; - queue->toggle_state = (~queue->toggle_state) & 1; - } - - return ret; -} - -int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset) -{ - int i; - for (i = 0; i < queue->queue_length / queue->pagesize; i++) { - u64 page = __pa(queue->queue_pages[i]); - if (addr >= page && addr < page + queue->pagesize) { - *q_offset = addr - page + i * queue->pagesize; - return 0; - } - } - return -EINVAL; -} - -#if PAGE_SHIFT < EHCA_PAGESHIFT -#error Kernel pages must be at least as large than eHCA pages (4K) ! -#endif - -/* - * allocate pages for queue: - * outer loop allocates whole kernel pages (page aligned) and - * inner loop divides a kernel page into smaller hca queue pages - */ -static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages) -{ - int k, f = 0; - u8 *kpage; - - while (f < nr_of_pages) { - kpage = (u8 *)get_zeroed_page(GFP_KERNEL); - if (!kpage) - goto out; - - for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) { - queue->queue_pages[f] = (struct ipz_page *)kpage; - kpage += EHCA_PAGESIZE; - f++; - } - } - return 1; - -out: - for (f = 0; f < nr_of_pages && queue->queue_pages[f]; - f += PAGES_PER_KPAGE) - free_page((unsigned long)(queue->queue_pages)[f]); - return 0; -} - -static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) -{ - int order = ilog2(queue->pagesize) - 9; - struct ipz_small_queue_page *page; - unsigned long bit; - - mutex_lock(&pd->lock); - - if (!list_empty(&pd->free[order])) - page = list_entry(pd->free[order].next, - struct ipz_small_queue_page, list); - else { - page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL); - if (!page) - goto out; - - page->page = get_zeroed_page(GFP_KERNEL); - if (!page->page) { - kmem_cache_free(small_qp_cache, page); - goto out; - } - - list_add(&page->list, &pd->free[order]); - } - - bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order); - __set_bit(bit, page->bitmap); - page->fill++; - - if (page->fill == IPZ_SPAGE_PER_KPAGE >> order) - list_move(&page->list, &pd->full[order]); - - mutex_unlock(&pd->lock); - - queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9))); - queue->small_page = page; - queue->offset = bit << (order + 9); - return 1; - -out: - ehca_err(pd->ib_pd.device, "failed to allocate small queue page"); - mutex_unlock(&pd->lock); - return 0; -} - -static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) -{ - int order = ilog2(queue->pagesize) - 9; - struct ipz_small_queue_page *page = queue->small_page; - unsigned long bit; - int free_page = 0; - - bit = ((unsigned long)queue->queue_pages[0] & ~PAGE_MASK) - >> (order + 9); - - mutex_lock(&pd->lock); - - __clear_bit(bit, page->bitmap); - page->fill--; - - if (page->fill == 0) { - list_del(&page->list); - free_page = 1; - } - - if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1) - /* the page was full until we freed the chunk */ - list_move_tail(&page->list, &pd->free[order]); - - mutex_unlock(&pd->lock); - - if (free_page) { - free_page(page->page); - kmem_cache_free(small_qp_cache, page); - } -} - -int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, - const u32 nr_of_pages, const u32 pagesize, - const u32 qe_size, const u32 nr_of_sg, - int is_small) -{ - if (pagesize > PAGE_SIZE) { - ehca_gen_err("FATAL ERROR: pagesize=%x " - "is greater than kernel page size", pagesize); - return 0; - } - - /* init queue fields */ - queue->queue_length = nr_of_pages * pagesize; - queue->pagesize = pagesize; - queue->qe_size = qe_size; - queue->act_nr_of_sg = nr_of_sg; - queue->current_q_offset = 0; - queue->toggle_state = 1; - queue->small_page = NULL; - - /* allocate queue page pointers */ - queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *), - GFP_KERNEL | __GFP_NOWARN); - if (!queue->queue_pages) { - queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *)); - if (!queue->queue_pages) { - ehca_gen_err("Couldn't allocate queue page list"); - return 0; - } - } - - /* allocate actual queue pages */ - if (is_small) { - if (!alloc_small_queue_page(queue, pd)) - goto ipz_queue_ctor_exit0; - } else - if (!alloc_queue_pages(queue, nr_of_pages)) - goto ipz_queue_ctor_exit0; - - return 1; - -ipz_queue_ctor_exit0: - ehca_gen_err("Couldn't alloc pages queue=%p " - "nr_of_pages=%x", queue, nr_of_pages); - kvfree(queue->queue_pages); - - return 0; -} - -int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue) -{ - int i, nr_pages; - - if (!queue || !queue->queue_pages) { - ehca_gen_dbg("queue or queue_pages is NULL"); - return 0; - } - - if (queue->small_page) - free_small_queue_page(queue, pd); - else { - nr_pages = queue->queue_length / queue->pagesize; - for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE) - free_page((unsigned long)queue->queue_pages[i]); - } - - kvfree(queue->queue_pages); - - return 1; -} - -int ehca_init_small_qp_cache(void) -{ - small_qp_cache = kmem_cache_create("ehca_cache_small_qp", - sizeof(struct ipz_small_queue_page), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!small_qp_cache) - return -ENOMEM; - - return 0; -} - -void ehca_cleanup_small_qp_cache(void) -{ - kmem_cache_destroy(small_qp_cache); -} diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h deleted file mode 100644 index a801274..0000000 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h +++ /dev/null @@ -1,289 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * internal queue handling - * - * Authors: Waleri Fomin - * Reinhard Ernst - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __IPZ_PT_FN_H__ -#define __IPZ_PT_FN_H__ - -#define EHCA_PAGESHIFT 12 -#define EHCA_PAGESIZE 4096UL -#define EHCA_PAGEMASK (~(EHCA_PAGESIZE-1)) -#define EHCA_PT_ENTRIES 512UL - -#include "ehca_tools.h" -#include "ehca_qes.h" - -struct ehca_pd; -struct ipz_small_queue_page; - -extern struct kmem_cache *small_qp_cache; - -/* struct generic ehca page */ -struct ipz_page { - u8 entries[EHCA_PAGESIZE]; -}; - -#define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512) - -struct ipz_small_queue_page { - unsigned long page; - unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG]; - int fill; - void *mapped_addr; - u32 mmap_count; - struct list_head list; -}; - -/* struct generic queue in linux kernel virtual memory (kv) */ -struct ipz_queue { - u64 current_q_offset; /* current queue entry */ - - struct ipz_page **queue_pages; /* array of pages belonging to queue */ - u32 qe_size; /* queue entry size */ - u32 act_nr_of_sg; - u32 queue_length; /* queue length allocated in bytes */ - u32 pagesize; - u32 toggle_state; /* toggle flag - per page */ - u32 offset; /* save offset within page for small_qp */ - struct ipz_small_queue_page *small_page; -}; - -/* - * return current Queue Entry for a certain q_offset - * returns address (kv) of Queue Entry - */ -static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset) -{ - struct ipz_page *current_page; - if (q_offset >= queue->queue_length) - return NULL; - current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT]; - return ¤t_page->entries[q_offset & (EHCA_PAGESIZE - 1)]; -} - -/* - * return current Queue Entry - * returns address (kv) of Queue Entry - */ -static inline void *ipz_qeit_get(struct ipz_queue *queue) -{ - return ipz_qeit_calc(queue, queue->current_q_offset); -} - -/* - * return current Queue Page , increment Queue Page iterator from - * page to page in struct ipz_queue, last increment will return 0! and - * NOT wrap - * returns address (kv) of Queue Page - * warning don't use in parallel with ipz_QE_get_inc() - */ -void *ipz_qpageit_get_inc(struct ipz_queue *queue); - -/* - * return current Queue Entry, increment Queue Entry iterator by one - * step in struct ipz_queue, will wrap in ringbuffer - * returns address (kv) of Queue Entry BEFORE increment - * warning don't use in parallel with ipz_qpageit_get_inc() - */ -static inline void *ipz_qeit_get_inc(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - queue->current_q_offset += queue->qe_size; - if (queue->current_q_offset >= queue->queue_length) { - queue->current_q_offset = 0; - /* toggle the valid flag */ - queue->toggle_state = (~queue->toggle_state) & 1; - } - - return ret; -} - -/* - * return a bool indicating whether current Queue Entry is valid - */ -static inline int ipz_qeit_is_valid(struct ipz_queue *queue) -{ - struct ehca_cqe *cqe = ipz_qeit_get(queue); - return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1)); -} - -/* - * return current Queue Entry, increment Queue Entry iterator by one - * step in struct ipz_queue, will wrap in ringbuffer - * returns address (kv) of Queue Entry BEFORE increment - * returns 0 and does not increment, if wrong valid state - * warning don't use in parallel with ipz_qpageit_get_inc() - */ -static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue) -{ - return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL; -} - -/* - * returns and resets Queue Entry iterator - * returns address (kv) of first Queue Entry - */ -static inline void *ipz_qeit_reset(struct ipz_queue *queue) -{ - queue->current_q_offset = 0; - return ipz_qeit_get(queue); -} - -/* - * return the q_offset corresponding to an absolute address - */ -int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset); - -/* - * return the next queue offset. don't modify the queue. - */ -static inline u64 ipz_queue_advance_offset(struct ipz_queue *queue, u64 offset) -{ - offset += queue->qe_size; - if (offset >= queue->queue_length) offset = 0; - return offset; -} - -/* struct generic page table */ -struct ipz_pt { - u64 entries[EHCA_PT_ENTRIES]; -}; - -/* struct page table for a queue, only to be used in pf */ -struct ipz_qpt { - /* queue page tables (kv), use u64 because we know the element length */ - u64 *qpts; - u32 n_qpts; - u32 n_ptes; /* number of page table entries */ - u64 *current_pte_addr; -}; - -/* - * constructor for a ipz_queue_t, placement new for ipz_queue_t, - * new for all dependent datastructors - * all QP Tables are the same - * flow: - * allocate+pin queue - * see ipz_qpt_ctor() - * returns true if ok, false if out of memory - */ -int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, - const u32 nr_of_pages, const u32 pagesize, - const u32 qe_size, const u32 nr_of_sg, - int is_small); - -/* - * destructor for a ipz_queue_t - * -# free queue - * see ipz_queue_ctor() - * returns true if ok, false if queue was NULL-ptr of free failed - */ -int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue); - -/* - * constructor for a ipz_qpt_t, - * placement new for struct ipz_queue, new for all dependent datastructors - * all QP Tables are the same, - * flow: - * -# allocate+pin queue - * -# initialise ptcb - * -# allocate+pin PTs - * -# link PTs to a ring, according to HCA Arch, set bit62 id needed - * -# the ring must have room for exactly nr_of_PTEs - * see ipz_qpt_ctor() - */ -void ipz_qpt_ctor(struct ipz_qpt *qpt, - const u32 nr_of_qes, - const u32 pagesize, - const u32 qe_size, - const u8 lowbyte, const u8 toggle, - u32 * act_nr_of_QEs, u32 * act_nr_of_pages); - -/* - * return current Queue Entry, increment Queue Entry iterator by one - * step in struct ipz_queue, will wrap in ringbuffer - * returns address (kv) of Queue Entry BEFORE increment - * warning don't use in parallel with ipz_qpageit_get_inc() - * warning unpredictable results may occur if steps>act_nr_of_queue_entries - * fix EQ page problems - */ -void *ipz_qeit_eq_get_inc(struct ipz_queue *queue); - -/* - * return current Event Queue Entry, increment Queue Entry iterator - * by one step in struct ipz_queue if valid, will wrap in ringbuffer - * returns address (kv) of Queue Entry BEFORE increment - * returns 0 and does not increment, if wrong valid state - * warning don't use in parallel with ipz_queue_QPageit_get_inc() - * warning unpredictable results may occur if steps>act_nr_of_queue_entries - */ -static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - u32 qe = *(u8 *)ret; - if ((qe >> 7) != (queue->toggle_state & 1)) - return NULL; - ipz_qeit_eq_get_inc(queue); /* this is a good one */ - return ret; -} - -static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - u32 qe = *(u8 *)ret; - if ((qe >> 7) != (queue->toggle_state & 1)) - return NULL; - return ret; -} - -/* returns address (GX) of first queue entry */ -static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt) -{ - return be64_to_cpu(qpt->qpts[0]); -} - -/* returns address (kv) of first page of queue page table */ -static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt) -{ - return qpt->qpts; -} - -#endif /* __IPZ_PT_FN_H__ */ diff --git a/drivers/staging/rdma/Kconfig b/drivers/staging/rdma/Kconfig index cf5fe9b..d7f6235 100644 --- a/drivers/staging/rdma/Kconfig +++ b/drivers/staging/rdma/Kconfig @@ -24,6 +24,8 @@ if STAGING_RDMA source "drivers/staging/rdma/amso1100/Kconfig" +source "drivers/staging/rdma/ehca/Kconfig" + source "drivers/staging/rdma/hfi1/Kconfig" source "drivers/staging/rdma/ipath/Kconfig" diff --git a/drivers/staging/rdma/Makefile b/drivers/staging/rdma/Makefile index cbd915a..139d78e 100644 --- a/drivers/staging/rdma/Makefile +++ b/drivers/staging/rdma/Makefile @@ -1,4 +1,5 @@ # Entries for RDMA_STAGING tree obj-$(CONFIG_INFINIBAND_AMSO1100) += amso1100/ +obj-$(CONFIG_INFINIBAND_EHCA) += ehca/ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ obj-$(CONFIG_INFINIBAND_IPATH) += ipath/ diff --git a/drivers/staging/rdma/ehca/Kconfig b/drivers/staging/rdma/ehca/Kconfig new file mode 100644 index 0000000..3fadd2a --- /dev/null +++ b/drivers/staging/rdma/ehca/Kconfig @@ -0,0 +1,10 @@ +config INFINIBAND_EHCA + tristate "eHCA support" + depends on IBMEBUS + ---help--- + This driver supports the deprecated IBM pSeries eHCA InfiniBand + adapter. + + To compile the driver as a module, choose M here. The module + will be called ib_ehca. + diff --git a/drivers/staging/rdma/ehca/Makefile b/drivers/staging/rdma/ehca/Makefile new file mode 100644 index 0000000..74d284e --- /dev/null +++ b/drivers/staging/rdma/ehca/Makefile @@ -0,0 +1,16 @@ +# Authors: Heiko J Schick +# Christoph Raisch +# Joachim Fenkes +# +# Copyright (c) 2005 IBM Corporation +# +# All rights reserved. +# +# This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD. + +obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o + +ib_ehca-objs = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \ + ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \ + ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o + diff --git a/drivers/staging/rdma/ehca/TODO b/drivers/staging/rdma/ehca/TODO new file mode 100644 index 0000000..199a4a6 --- /dev/null +++ b/drivers/staging/rdma/ehca/TODO @@ -0,0 +1,4 @@ +9/2015 + +The ehca driver has been deprecated and moved to drivers/staging/rdma. +It will be removed in the 4.6 merge window. diff --git a/drivers/staging/rdma/ehca/ehca_av.c b/drivers/staging/rdma/ehca/ehca_av.c new file mode 100644 index 0000000..4659263 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_av.c @@ -0,0 +1,277 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * address vector functions + * + * Authors: Hoang-Nam Nguyen + * Khadija Souissi + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_tools.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +static struct kmem_cache *av_cache; + +int ehca_calc_ipd(struct ehca_shca *shca, int port, + enum ib_rate path_rate, u32 *ipd) +{ + int path = ib_rate_to_mult(path_rate); + int link, ret; + struct ib_port_attr pa; + + if (path_rate == IB_RATE_PORT_CURRENT) { + *ipd = 0; + return 0; + } + + if (unlikely(path < 0)) { + ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x", + path_rate); + return -EINVAL; + } + + ret = ehca_query_port(&shca->ib_device, port, &pa); + if (unlikely(ret < 0)) { + ehca_err(&shca->ib_device, "Failed to query port ret=%i", ret); + return ret; + } + + link = ib_width_enum_to_int(pa.active_width) * pa.active_speed; + + if (path >= link) + /* no need to throttle if path faster than link */ + *ipd = 0; + else + /* IPD = round((link / path) - 1) */ + *ipd = ((link + (path >> 1)) / path) - 1; + + return 0; +} + +struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + int ret; + struct ehca_av *av; + struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, + ib_device); + + av = kmem_cache_alloc(av_cache, GFP_KERNEL); + if (!av) { + ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p", + pd, ah_attr); + return ERR_PTR(-ENOMEM); + } + + av->av.sl = ah_attr->sl; + av->av.dlid = ah_attr->dlid; + av->av.slid_path_bits = ah_attr->src_path_bits; + + if (ehca_static_rate < 0) { + u32 ipd; + if (ehca_calc_ipd(shca, ah_attr->port_num, + ah_attr->static_rate, &ipd)) { + ret = -EINVAL; + goto create_ah_exit1; + } + av->av.ipd = ipd; + } else + av->av.ipd = ehca_static_rate; + + av->av.lnh = ah_attr->ah_flags; + av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK, + ah_attr->grh.traffic_class); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, + ah_attr->grh.flow_label); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, + ah_attr->grh.hop_limit); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B); + /* set sgid in grh.word_1 */ + if (ah_attr->ah_flags & IB_AH_GRH) { + int rc; + struct ib_port_attr port_attr; + union ib_gid gid; + memset(&port_attr, 0, sizeof(port_attr)); + rc = ehca_query_port(pd->device, ah_attr->port_num, + &port_attr); + if (rc) { /* invalid port number */ + ret = -EINVAL; + ehca_err(pd->device, "Invalid port number " + "ehca_query_port() returned %x " + "pd=%p ah_attr=%p", rc, pd, ah_attr); + goto create_ah_exit1; + } + memset(&gid, 0, sizeof(gid)); + rc = ehca_query_gid(pd->device, + ah_attr->port_num, + ah_attr->grh.sgid_index, &gid); + if (rc) { + ret = -EINVAL; + ehca_err(pd->device, "Failed to retrieve sgid " + "ehca_query_gid() returned %x " + "pd=%p ah_attr=%p", rc, pd, ah_attr); + goto create_ah_exit1; + } + memcpy(&av->av.grh.word_1, &gid, sizeof(gid)); + } + av->av.pmtu = shca->max_mtu; + + /* dgid comes in grh.word_3 */ + memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid, + sizeof(ah_attr->grh.dgid)); + + return &av->ib_ah; + +create_ah_exit1: + kmem_cache_free(av_cache, av); + + return ERR_PTR(ret); +} + +int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +{ + struct ehca_av *av; + struct ehca_ud_av new_ehca_av; + struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca, + ib_device); + + memset(&new_ehca_av, 0, sizeof(new_ehca_av)); + new_ehca_av.sl = ah_attr->sl; + new_ehca_av.dlid = ah_attr->dlid; + new_ehca_av.slid_path_bits = ah_attr->src_path_bits; + new_ehca_av.ipd = ah_attr->static_rate; + new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK, + (ah_attr->ah_flags & IB_AH_GRH) > 0); + new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK, + ah_attr->grh.traffic_class); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, + ah_attr->grh.flow_label); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, + ah_attr->grh.hop_limit); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b); + + /* set sgid in grh.word_1 */ + if (ah_attr->ah_flags & IB_AH_GRH) { + int rc; + struct ib_port_attr port_attr; + union ib_gid gid; + memset(&port_attr, 0, sizeof(port_attr)); + rc = ehca_query_port(ah->device, ah_attr->port_num, + &port_attr); + if (rc) { /* invalid port number */ + ehca_err(ah->device, "Invalid port number " + "ehca_query_port() returned %x " + "ah=%p ah_attr=%p port_num=%x", + rc, ah, ah_attr, ah_attr->port_num); + return -EINVAL; + } + memset(&gid, 0, sizeof(gid)); + rc = ehca_query_gid(ah->device, + ah_attr->port_num, + ah_attr->grh.sgid_index, &gid); + if (rc) { + ehca_err(ah->device, "Failed to retrieve sgid " + "ehca_query_gid() returned %x " + "ah=%p ah_attr=%p port_num=%x " + "sgid_index=%x", + rc, ah, ah_attr, ah_attr->port_num, + ah_attr->grh.sgid_index); + return -EINVAL; + } + memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid)); + } + + new_ehca_av.pmtu = shca->max_mtu; + + memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid, + sizeof(ah_attr->grh.dgid)); + + av = container_of(ah, struct ehca_av, ib_ah); + av->av = new_ehca_av; + + return 0; +} + +int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +{ + struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah); + + memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3, + sizeof(ah_attr->grh.dgid)); + ah_attr->sl = av->av.sl; + + ah_attr->dlid = av->av.dlid; + + ah_attr->src_path_bits = av->av.slid_path_bits; + ah_attr->static_rate = av->av.ipd; + ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh); + ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK, + av->av.grh.word_0); + ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK, + av->av.grh.word_0); + ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK, + av->av.grh.word_0); + + return 0; +} + +int ehca_destroy_ah(struct ib_ah *ah) +{ + kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah)); + + return 0; +} + +int ehca_init_av_cache(void) +{ + av_cache = kmem_cache_create("ehca_cache_av", + sizeof(struct ehca_av), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!av_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_av_cache(void) +{ + if (av_cache) + kmem_cache_destroy(av_cache); +} diff --git a/drivers/staging/rdma/ehca/ehca_classes.h b/drivers/staging/rdma/ehca/ehca_classes.h new file mode 100644 index 0000000..bd45e0f --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_classes.h @@ -0,0 +1,482 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Struct definition for eHCA internal structures + * + * Authors: Heiko J Schick + * Christoph Raisch + * Joachim Fenkes + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_CLASSES_H__ +#define __EHCA_CLASSES_H__ + +struct ehca_module; +struct ehca_qp; +struct ehca_cq; +struct ehca_eq; +struct ehca_mr; +struct ehca_mw; +struct ehca_pd; +struct ehca_av; + +#include +#include + +#include +#include + +#ifdef CONFIG_PPC64 +#include "ehca_classes_pSeries.h" +#endif +#include "ipz_pt_fn.h" +#include "ehca_qes.h" +#include "ehca_irq.h" + +#define EHCA_EQE_CACHE_SIZE 20 +#define EHCA_MAX_NUM_QUEUES 0xffff + +struct ehca_eqe_cache_entry { + struct ehca_eqe *eqe; + struct ehca_cq *cq; +}; + +struct ehca_eq { + u32 length; + struct ipz_queue ipz_queue; + struct ipz_eq_handle ipz_eq_handle; + struct work_struct work; + struct h_galpas galpas; + int is_initialized; + struct ehca_pfeq pf; + spinlock_t spinlock; + struct tasklet_struct interrupt_task; + u32 ist; + spinlock_t irq_spinlock; + struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE]; +}; + +struct ehca_sma_attr { + u16 lid, lmc, sm_sl, sm_lid; + u16 pkey_tbl_len, pkeys[16]; +}; + +struct ehca_sport { + struct ib_cq *ibcq_aqp1; + struct ib_qp *ibqp_sqp[2]; + /* lock to serialze modify_qp() calls for sqp in normal + * and irq path (when event PORT_ACTIVE is received first time) + */ + spinlock_t mod_sqp_lock; + enum ib_port_state port_state; + struct ehca_sma_attr saved_attr; + u32 pma_qp_nr; +}; + +#define HCA_CAP_MR_PGSIZE_4K 0x80000000 +#define HCA_CAP_MR_PGSIZE_64K 0x40000000 +#define HCA_CAP_MR_PGSIZE_1M 0x20000000 +#define HCA_CAP_MR_PGSIZE_16M 0x10000000 + +struct ehca_shca { + struct ib_device ib_device; + struct platform_device *ofdev; + u8 num_ports; + int hw_level; + struct list_head shca_list; + struct ipz_adapter_handle ipz_hca_handle; + struct ehca_sport sport[2]; + struct ehca_eq eq; + struct ehca_eq neq; + struct ehca_mr *maxmr; + struct ehca_pd *pd; + struct h_galpas galpas; + struct mutex modify_mutex; + u64 hca_cap; + /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */ + u32 hca_cap_mr_pgsize; + int max_mtu; + int max_num_qps; + int max_num_cqs; + atomic_t num_cqs; + atomic_t num_qps; +}; + +struct ehca_pd { + struct ib_pd ib_pd; + struct ipz_pd fw_pd; + /* small queue mgmt */ + struct mutex lock; + struct list_head free[2]; + struct list_head full[2]; +}; + +enum ehca_ext_qp_type { + EQPT_NORMAL = 0, + EQPT_LLQP = 1, + EQPT_SRQBASE = 2, + EQPT_SRQ = 3, +}; + +/* struct to cache modify_qp()'s parms for GSI/SMI qp */ +struct ehca_mod_qp_parm { + int mask; + struct ib_qp_attr attr; +}; + +#define EHCA_MOD_QP_PARM_MAX 4 + +#define QMAP_IDX_MASK 0xFFFFULL + +/* struct for tracking if cqes have been reported to the application */ +struct ehca_qmap_entry { + u16 app_wr_id; + u8 reported; + u8 cqe_req; +}; + +struct ehca_queue_map { + struct ehca_qmap_entry *map; + unsigned int entries; + unsigned int tail; + unsigned int left_to_poll; + unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */ +}; + +/* function to calculate the next index for the qmap */ +static inline unsigned int next_index(unsigned int cur_index, unsigned int limit) +{ + unsigned int temp = cur_index + 1; + return (temp == limit) ? 0 : temp; +} + +struct ehca_qp { + union { + struct ib_qp ib_qp; + struct ib_srq ib_srq; + }; + u32 qp_type; + enum ehca_ext_qp_type ext_type; + enum ib_qp_state state; + struct ipz_queue ipz_squeue; + struct ehca_queue_map sq_map; + struct ipz_queue ipz_rqueue; + struct ehca_queue_map rq_map; + struct h_galpas galpas; + u32 qkey; + u32 real_qp_num; + u32 token; + spinlock_t spinlock_s; + spinlock_t spinlock_r; + u32 sq_max_inline_data_size; + struct ipz_qp_handle ipz_qp_handle; + struct ehca_pfqp pf; + struct ib_qp_init_attr init_attr; + struct ehca_cq *send_cq; + struct ehca_cq *recv_cq; + unsigned int sqerr_purgeflag; + struct hlist_node list_entries; + /* array to cache modify_qp()'s parms for GSI/SMI qp */ + struct ehca_mod_qp_parm *mod_qp_parm; + int mod_qp_parm_idx; + /* mmap counter for resources mapped into user space */ + u32 mm_count_squeue; + u32 mm_count_rqueue; + u32 mm_count_galpa; + /* unsolicited ack circumvention */ + int unsol_ack_circ; + int mtu_shift; + u32 message_count; + u32 packet_count; + atomic_t nr_events; /* events seen */ + wait_queue_head_t wait_completion; + int mig_armed; + struct list_head sq_err_node; + struct list_head rq_err_node; +}; + +#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) +#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ) +#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE) + +/* must be power of 2 */ +#define QP_HASHTAB_LEN 8 + +struct ehca_cq { + struct ib_cq ib_cq; + struct ipz_queue ipz_queue; + struct h_galpas galpas; + spinlock_t spinlock; + u32 cq_number; + u32 token; + u32 nr_of_entries; + struct ipz_cq_handle ipz_cq_handle; + struct ehca_pfcq pf; + spinlock_t cb_lock; + struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; + struct list_head entry; + u32 nr_callbacks; /* #events assigned to cpu by scaling code */ + atomic_t nr_events; /* #events seen */ + wait_queue_head_t wait_completion; + spinlock_t task_lock; + /* mmap counter for resources mapped into user space */ + u32 mm_count_queue; + u32 mm_count_galpa; + struct list_head sqp_err_list; + struct list_head rqp_err_list; +}; + +enum ehca_mr_flag { + EHCA_MR_FLAG_FMR = 0x80000000, /* FMR, created with ehca_alloc_fmr */ + EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR */ +}; + +struct ehca_mr { + union { + struct ib_mr ib_mr; /* must always be first in ehca_mr */ + struct ib_fmr ib_fmr; /* must always be first in ehca_mr */ + } ib; + struct ib_umem *umem; + spinlock_t mrlock; + + enum ehca_mr_flag flags; + u32 num_kpages; /* number of kernel pages */ + u32 num_hwpages; /* number of hw pages to form MR */ + u64 hwpage_size; /* hw page size used for this MR */ + int acl; /* ACL (stored here for usage in reregister) */ + u64 *start; /* virtual start address (stored here for */ + /* usage in reregister) */ + u64 size; /* size (stored here for usage in reregister) */ + u32 fmr_page_size; /* page size for FMR */ + u32 fmr_max_pages; /* max pages for FMR */ + u32 fmr_max_maps; /* max outstanding maps for FMR */ + u32 fmr_map_cnt; /* map counter for FMR */ + /* fw specific data */ + struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */ + struct h_galpas galpas; +}; + +struct ehca_mw { + struct ib_mw ib_mw; /* gen2 mw, must always be first in ehca_mw */ + spinlock_t mwlock; + + u8 never_bound; /* indication MW was never bound */ + struct ipz_mrmw_handle ipz_mw_handle; /* MW handle for h-calls */ + struct h_galpas galpas; +}; + +enum ehca_mr_pgi_type { + EHCA_MR_PGI_PHYS = 1, /* type of ehca_reg_phys_mr, + * ehca_rereg_phys_mr, + * ehca_reg_internal_maxmr */ + EHCA_MR_PGI_USER = 2, /* type of ehca_reg_user_mr */ + EHCA_MR_PGI_FMR = 3 /* type of ehca_map_phys_fmr */ +}; + +struct ehca_mr_pginfo { + enum ehca_mr_pgi_type type; + u64 num_kpages; + u64 kpage_cnt; + u64 hwpage_size; /* hw page size used for this MR */ + u64 num_hwpages; /* number of hw pages */ + u64 hwpage_cnt; /* counter for hw pages */ + u64 next_hwpage; /* next hw page in buffer/chunk/listelem */ + + union { + struct { /* type EHCA_MR_PGI_PHYS section */ + int num_phys_buf; + struct ib_phys_buf *phys_buf_array; + u64 next_buf; + } phy; + struct { /* type EHCA_MR_PGI_USER section */ + struct ib_umem *region; + struct scatterlist *next_sg; + u64 next_nmap; + } usr; + struct { /* type EHCA_MR_PGI_FMR section */ + u64 fmr_pgsize; + u64 *page_list; + u64 next_listelem; + } fmr; + } u; +}; + +/* output parameters for MR/FMR hipz calls */ +struct ehca_mr_hipzout_parms { + struct ipz_mrmw_handle handle; + u32 lkey; + u32 rkey; + u64 len; + u64 vaddr; + u32 acl; +}; + +/* output parameters for MW hipz calls */ +struct ehca_mw_hipzout_parms { + struct ipz_mrmw_handle handle; + u32 rkey; +}; + +struct ehca_av { + struct ib_ah ib_ah; + struct ehca_ud_av av; +}; + +struct ehca_ucontext { + struct ib_ucontext ib_ucontext; +}; + +int ehca_init_pd_cache(void); +void ehca_cleanup_pd_cache(void); +int ehca_init_cq_cache(void); +void ehca_cleanup_cq_cache(void); +int ehca_init_qp_cache(void); +void ehca_cleanup_qp_cache(void); +int ehca_init_av_cache(void); +void ehca_cleanup_av_cache(void); +int ehca_init_mrmw_cache(void); +void ehca_cleanup_mrmw_cache(void); +int ehca_init_small_qp_cache(void); +void ehca_cleanup_small_qp_cache(void); + +extern rwlock_t ehca_qp_idr_lock; +extern rwlock_t ehca_cq_idr_lock; +extern struct idr ehca_qp_idr; +extern struct idr ehca_cq_idr; +extern spinlock_t shca_list_lock; + +extern int ehca_static_rate; +extern int ehca_port_act_time; +extern bool ehca_use_hp_mr; +extern bool ehca_scaling_code; +extern int ehca_lock_hcalls; +extern int ehca_nr_ports; +extern int ehca_max_cq; +extern int ehca_max_qp; + +struct ipzu_queue_resp { + u32 qe_size; /* queue entry size */ + u32 act_nr_of_sg; + u32 queue_length; /* queue length allocated in bytes */ + u32 pagesize; + u32 toggle_state; + u32 offset; /* save offset within a page for small_qp */ +}; + +struct ehca_create_cq_resp { + u32 cq_number; + u32 token; + struct ipzu_queue_resp ipz_queue; + u32 fw_handle_ofs; + u32 dummy; +}; + +struct ehca_create_qp_resp { + u32 qp_num; + u32 token; + u32 qp_type; + u32 ext_type; + u32 qkey; + /* qp_num assigned by ehca: sqp0/1 may have got different numbers */ + u32 real_qp_num; + u32 fw_handle_ofs; + u32 dummy; + struct ipzu_queue_resp ipz_squeue; + struct ipzu_queue_resp ipz_rqueue; +}; + +struct ehca_alloc_cq_parms { + u32 nr_cqe; + u32 act_nr_of_entries; + u32 act_pages; + struct ipz_eq_handle eq_handle; +}; + +enum ehca_service_type { + ST_RC = 0, + ST_UC = 1, + ST_RD = 2, + ST_UD = 3, +}; + +enum ehca_ll_comp_flags { + LLQP_SEND_COMP = 0x20, + LLQP_RECV_COMP = 0x40, + LLQP_COMP_MASK = 0x60, +}; + +struct ehca_alloc_queue_parms { + /* input parameters */ + int max_wr; + int max_sge; + int page_size; + int is_small; + + /* output parameters */ + u16 act_nr_wqes; + u8 act_nr_sges; + u32 queue_size; /* bytes for small queues, pages otherwise */ +}; + +struct ehca_alloc_qp_parms { + struct ehca_alloc_queue_parms squeue; + struct ehca_alloc_queue_parms rqueue; + + /* input parameters */ + enum ehca_service_type servicetype; + int qp_storage; + int sigtype; + enum ehca_ext_qp_type ext_type; + enum ehca_ll_comp_flags ll_comp_flags; + int ud_av_l_key_ctl; + + u32 token; + struct ipz_eq_handle eq_handle; + struct ipz_pd pd; + struct ipz_cq_handle send_cq_handle, recv_cq_handle; + + u32 srq_qpn, srq_token, srq_limit; + + /* output parameters */ + u32 real_qp_num; + struct ipz_qp_handle qp_handle; + struct h_galpas galpas; +}; + +int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); +int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num); +struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); + +#endif diff --git a/drivers/staging/rdma/ehca/ehca_classes_pSeries.h b/drivers/staging/rdma/ehca/ehca_classes_pSeries.h new file mode 100644 index 0000000..689c357 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_classes_pSeries.h @@ -0,0 +1,208 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * pSeries interface definitions + * + * Authors: Waleri Fomin + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_CLASSES_PSERIES_H__ +#define __EHCA_CLASSES_PSERIES_H__ + +#include "hcp_phyp.h" +#include "ipz_pt_fn.h" + + +struct ehca_pfqp { + struct ipz_qpt sqpt; + struct ipz_qpt rqpt; +}; + +struct ehca_pfcq { + struct ipz_qpt qpt; + u32 cqnr; +}; + +struct ehca_pfeq { + struct ipz_qpt qpt; + struct h_galpa galpa; + u32 eqnr; +}; + +struct ipz_adapter_handle { + u64 handle; +}; + +struct ipz_cq_handle { + u64 handle; +}; + +struct ipz_eq_handle { + u64 handle; +}; + +struct ipz_qp_handle { + u64 handle; +}; +struct ipz_mrmw_handle { + u64 handle; +}; + +struct ipz_pd { + u32 value; +}; + +struct hcp_modify_qp_control_block { + u32 qkey; /* 00 */ + u32 rdd; /* reliable datagram domain */ + u32 send_psn; /* 02 */ + u32 receive_psn; /* 03 */ + u32 prim_phys_port; /* 04 */ + u32 alt_phys_port; /* 05 */ + u32 prim_p_key_idx; /* 06 */ + u32 alt_p_key_idx; /* 07 */ + u32 rdma_atomic_ctrl; /* 08 */ + u32 qp_state; /* 09 */ + u32 reserved_10; /* 10 */ + u32 rdma_nr_atomic_resp_res; /* 11 */ + u32 path_migration_state; /* 12 */ + u32 rdma_atomic_outst_dest_qp; /* 13 */ + u32 dest_qp_nr; /* 14 */ + u32 min_rnr_nak_timer_field; /* 15 */ + u32 service_level; /* 16 */ + u32 send_grh_flag; /* 17 */ + u32 retry_count; /* 18 */ + u32 timeout; /* 19 */ + u32 path_mtu; /* 20 */ + u32 max_static_rate; /* 21 */ + u32 dlid; /* 22 */ + u32 rnr_retry_count; /* 23 */ + u32 source_path_bits; /* 24 */ + u32 traffic_class; /* 25 */ + u32 hop_limit; /* 26 */ + u32 source_gid_idx; /* 27 */ + u32 flow_label; /* 28 */ + u32 reserved_29; /* 29 */ + union { /* 30 */ + u64 dw[2]; + u8 byte[16]; + } dest_gid; + u32 service_level_al; /* 34 */ + u32 send_grh_flag_al; /* 35 */ + u32 retry_count_al; /* 36 */ + u32 timeout_al; /* 37 */ + u32 max_static_rate_al; /* 38 */ + u32 dlid_al; /* 39 */ + u32 rnr_retry_count_al; /* 40 */ + u32 source_path_bits_al; /* 41 */ + u32 traffic_class_al; /* 42 */ + u32 hop_limit_al; /* 43 */ + u32 source_gid_idx_al; /* 44 */ + u32 flow_label_al; /* 45 */ + u32 reserved_46; /* 46 */ + u32 reserved_47; /* 47 */ + union { /* 48 */ + u64 dw[2]; + u8 byte[16]; + } dest_gid_al; + u32 max_nr_outst_send_wr; /* 52 */ + u32 max_nr_outst_recv_wr; /* 53 */ + u32 disable_ete_credit_check; /* 54 */ + u32 qp_number; /* 55 */ + u64 send_queue_handle; /* 56 */ + u64 recv_queue_handle; /* 58 */ + u32 actual_nr_sges_in_sq_wqe; /* 60 */ + u32 actual_nr_sges_in_rq_wqe; /* 61 */ + u32 qp_enable; /* 62 */ + u32 curr_srq_limit; /* 63 */ + u64 qp_aff_asyn_ev_log_reg; /* 64 */ + u64 shared_rq_hndl; /* 66 */ + u64 trigg_doorbell_qp_hndl; /* 68 */ + u32 reserved_70_127[58]; /* 70 */ +}; + +#define MQPCB_MASK_QKEY EHCA_BMASK_IBM( 0, 0) +#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM( 2, 2) +#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM( 3, 3) +#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM( 4, 4) +#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM( 5, 5) +#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM( 6, 6) +#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7) +#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8) +#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9) +#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11) +#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12) +#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13) +#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14, 14) +#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15, 15) +#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16, 16) +#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17, 17) +#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18) +#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19) +#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20) +#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21) +#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22) +#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23) +#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24) +#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25) +#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26) +#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27) +#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28) +#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30) +#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31) +#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32) +#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33) +#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34) +#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35) +#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36) +#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37) +#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38) +#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39) +#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40) +#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41) +#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42) +#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44) +#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45) +#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46) +#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47) +#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48) +#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49) +#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50) +#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51) + +#endif /* __EHCA_CLASSES_PSERIES_H__ */ diff --git a/drivers/staging/rdma/ehca/ehca_cq.c b/drivers/staging/rdma/ehca/ehca_cq.c new file mode 100644 index 0000000..9b68b17 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_cq.c @@ -0,0 +1,397 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Completion queue handling + * + * Authors: Waleri Fomin + * Khadija Souissi + * Reinhard Ernst + * Heiko J Schick + * Hoang-Nam Nguyen + * + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_iverbs.h" +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "hcp_if.h" + +static struct kmem_cache *cq_cache; + +int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp) +{ + unsigned int qp_num = qp->real_qp_num; + unsigned int key = qp_num & (QP_HASHTAB_LEN-1); + unsigned long flags; + + spin_lock_irqsave(&cq->spinlock, flags); + hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]); + spin_unlock_irqrestore(&cq->spinlock, flags); + + ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x", + cq->cq_number, qp_num); + + return 0; +} + +int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) +{ + int ret = -EINVAL; + unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); + struct hlist_node *iter; + struct ehca_qp *qp; + unsigned long flags; + + spin_lock_irqsave(&cq->spinlock, flags); + hlist_for_each(iter, &cq->qp_hashtab[key]) { + qp = hlist_entry(iter, struct ehca_qp, list_entries); + if (qp->real_qp_num == real_qp_num) { + hlist_del(iter); + ehca_dbg(cq->ib_cq.device, + "removed qp from cq .cq_num=%x real_qp_num=%x", + cq->cq_number, real_qp_num); + ret = 0; + break; + } + } + spin_unlock_irqrestore(&cq->spinlock, flags); + if (ret) + ehca_err(cq->ib_cq.device, + "qp not found cq_num=%x real_qp_num=%x", + cq->cq_number, real_qp_num); + + return ret; +} + +struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) +{ + struct ehca_qp *ret = NULL; + unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); + struct hlist_node *iter; + struct ehca_qp *qp; + hlist_for_each(iter, &cq->qp_hashtab[key]) { + qp = hlist_entry(iter, struct ehca_qp, list_entries); + if (qp->real_qp_num == real_qp_num) { + ret = qp; + break; + } + } + return ret; +} + +struct ib_cq *ehca_create_cq(struct ib_device *device, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + int cqe = attr->cqe; + static const u32 additional_cqe = 20; + struct ib_cq *cq; + struct ehca_cq *my_cq; + struct ehca_shca *shca = + container_of(device, struct ehca_shca, ib_device); + struct ipz_adapter_handle adapter_handle; + struct ehca_alloc_cq_parms param; /* h_call's out parameters */ + struct h_galpa gal; + void *vpage; + u32 counter; + u64 rpage, cqx_fec, h_ret; + int ipz_rc, i; + unsigned long flags; + + if (attr->flags) + return ERR_PTR(-EINVAL); + + if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) + return ERR_PTR(-EINVAL); + + if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) { + ehca_err(device, "Unable to create CQ, max number of %i " + "CQs reached.", shca->max_num_cqs); + ehca_err(device, "To increase the maximum number of CQs " + "use the number_of_cqs module parameter.\n"); + return ERR_PTR(-ENOSPC); + } + + my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL); + if (!my_cq) { + ehca_err(device, "Out of memory for ehca_cq struct device=%p", + device); + atomic_dec(&shca->num_cqs); + return ERR_PTR(-ENOMEM); + } + + memset(¶m, 0, sizeof(struct ehca_alloc_cq_parms)); + + spin_lock_init(&my_cq->spinlock); + spin_lock_init(&my_cq->cb_lock); + spin_lock_init(&my_cq->task_lock); + atomic_set(&my_cq->nr_events, 0); + init_waitqueue_head(&my_cq->wait_completion); + + cq = &my_cq->ib_cq; + + adapter_handle = shca->ipz_hca_handle; + param.eq_handle = shca->eq.ipz_eq_handle; + + idr_preload(GFP_KERNEL); + write_lock_irqsave(&ehca_cq_idr_lock, flags); + my_cq->token = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); + idr_preload_end(); + + if (my_cq->token < 0) { + cq = ERR_PTR(-ENOMEM); + ehca_err(device, "Can't allocate new idr entry. device=%p", + device); + goto create_cq_exit1; + } + + /* + * CQs maximum depth is 4GB-64, but we need additional 20 as buffer + * for receiving errors CQEs. + */ + param.nr_cqe = cqe + additional_cqe; + h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, ¶m); + + if (h_ret != H_SUCCESS) { + ehca_err(device, "hipz_h_alloc_resource_cq() failed " + "h_ret=%lli device=%p", h_ret, device); + cq = ERR_PTR(ehca2ib_return_code(h_ret)); + goto create_cq_exit2; + } + + ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages, + EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0); + if (!ipz_rc) { + ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%i device=%p", + ipz_rc, device); + cq = ERR_PTR(-EINVAL); + goto create_cq_exit3; + } + + for (counter = 0; counter < param.act_pages; counter++) { + vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); + if (!vpage) { + ehca_err(device, "ipz_qpageit_get_inc() " + "returns NULL device=%p", device); + cq = ERR_PTR(-EAGAIN); + goto create_cq_exit4; + } + rpage = __pa(vpage); + + h_ret = hipz_h_register_rpage_cq(adapter_handle, + my_cq->ipz_cq_handle, + &my_cq->pf, + 0, + 0, + rpage, + 1, + my_cq->galpas. + kernel); + + if (h_ret < H_SUCCESS) { + ehca_err(device, "hipz_h_register_rpage_cq() failed " + "ehca_cq=%p cq_num=%x h_ret=%lli counter=%i " + "act_pages=%i", my_cq, my_cq->cq_number, + h_ret, counter, param.act_pages); + cq = ERR_PTR(-EINVAL); + goto create_cq_exit4; + } + + if (counter == (param.act_pages - 1)) { + vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); + if ((h_ret != H_SUCCESS) || vpage) { + ehca_err(device, "Registration of pages not " + "complete ehca_cq=%p cq_num=%x " + "h_ret=%lli", my_cq, my_cq->cq_number, + h_ret); + cq = ERR_PTR(-EAGAIN); + goto create_cq_exit4; + } + } else { + if (h_ret != H_PAGE_REGISTERED) { + ehca_err(device, "Registration of page failed " + "ehca_cq=%p cq_num=%x h_ret=%lli " + "counter=%i act_pages=%i", + my_cq, my_cq->cq_number, + h_ret, counter, param.act_pages); + cq = ERR_PTR(-ENOMEM); + goto create_cq_exit4; + } + } + } + + ipz_qeit_reset(&my_cq->ipz_queue); + + gal = my_cq->galpas.kernel; + cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec)); + ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%llx", + my_cq, my_cq->cq_number, cqx_fec); + + my_cq->ib_cq.cqe = my_cq->nr_of_entries = + param.act_nr_of_entries - additional_cqe; + my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff; + + for (i = 0; i < QP_HASHTAB_LEN; i++) + INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]); + + INIT_LIST_HEAD(&my_cq->sqp_err_list); + INIT_LIST_HEAD(&my_cq->rqp_err_list); + + if (context) { + struct ipz_queue *ipz_queue = &my_cq->ipz_queue; + struct ehca_create_cq_resp resp; + memset(&resp, 0, sizeof(resp)); + resp.cq_number = my_cq->cq_number; + resp.token = my_cq->token; + resp.ipz_queue.qe_size = ipz_queue->qe_size; + resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg; + resp.ipz_queue.queue_length = ipz_queue->queue_length; + resp.ipz_queue.pagesize = ipz_queue->pagesize; + resp.ipz_queue.toggle_state = ipz_queue->toggle_state; + resp.fw_handle_ofs = (u32) + (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1)); + if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { + ehca_err(device, "Copy to udata failed."); + cq = ERR_PTR(-EFAULT); + goto create_cq_exit4; + } + } + + return cq; + +create_cq_exit4: + ipz_queue_dtor(NULL, &my_cq->ipz_queue); + +create_cq_exit3: + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); + if (h_ret != H_SUCCESS) + ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p " + "cq_num=%x h_ret=%lli", my_cq, my_cq->cq_number, h_ret); + +create_cq_exit2: + write_lock_irqsave(&ehca_cq_idr_lock, flags); + idr_remove(&ehca_cq_idr, my_cq->token); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); + +create_cq_exit1: + kmem_cache_free(cq_cache, my_cq); + + atomic_dec(&shca->num_cqs); + return cq; +} + +int ehca_destroy_cq(struct ib_cq *cq) +{ + u64 h_ret; + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + int cq_num = my_cq->cq_number; + struct ib_device *device = cq->device; + struct ehca_shca *shca = container_of(device, struct ehca_shca, + ib_device); + struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; + unsigned long flags; + + if (cq->uobject) { + if (my_cq->mm_count_galpa || my_cq->mm_count_queue) { + ehca_err(device, "Resources still referenced in " + "user space cq_num=%x", my_cq->cq_number); + return -EINVAL; + } + } + + /* + * remove the CQ from the idr first to make sure + * no more interrupt tasklets will touch this CQ + */ + write_lock_irqsave(&ehca_cq_idr_lock, flags); + idr_remove(&ehca_cq_idr, my_cq->token); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + /* now wait until all pending events have completed */ + wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events)); + + /* nobody's using our CQ any longer -- we can destroy it */ + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); + if (h_ret == H_R_STATE) { + /* cq in err: read err data and destroy it forcibly */ + ehca_dbg(device, "ehca_cq=%p cq_num=%x resource=%llx in err " + "state. Try to delete it forcibly.", + my_cq, cq_num, my_cq->ipz_cq_handle.handle); + ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle); + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); + if (h_ret == H_SUCCESS) + ehca_dbg(device, "cq_num=%x deleted successfully.", + cq_num); + } + if (h_ret != H_SUCCESS) { + ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lli " + "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num); + return ehca2ib_return_code(h_ret); + } + ipz_queue_dtor(NULL, &my_cq->ipz_queue); + kmem_cache_free(cq_cache, my_cq); + + atomic_dec(&shca->num_cqs); + return 0; +} + +int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) +{ + /* TODO: proper resize needs to be done */ + ehca_err(cq->device, "not implemented yet"); + + return -EFAULT; +} + +int ehca_init_cq_cache(void) +{ + cq_cache = kmem_cache_create("ehca_cache_cq", + sizeof(struct ehca_cq), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!cq_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_cq_cache(void) +{ + if (cq_cache) + kmem_cache_destroy(cq_cache); +} diff --git a/drivers/staging/rdma/ehca/ehca_eq.c b/drivers/staging/rdma/ehca/ehca_eq.c new file mode 100644 index 0000000..90da674 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_eq.c @@ -0,0 +1,189 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Event queue handling + * + * Authors: Waleri Fomin + * Khadija Souissi + * Reinhard Ernst + * Heiko J Schick + * Hoang-Nam Nguyen + * + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "ehca_iverbs.h" +#include "ehca_qes.h" +#include "hcp_if.h" +#include "ipz_pt_fn.h" + +int ehca_create_eq(struct ehca_shca *shca, + struct ehca_eq *eq, + const enum ehca_eq_type type, const u32 length) +{ + int ret; + u64 h_ret; + u32 nr_pages; + u32 i; + void *vpage; + struct ib_device *ib_dev = &shca->ib_device; + + spin_lock_init(&eq->spinlock); + spin_lock_init(&eq->irq_spinlock); + eq->is_initialized = 0; + + if (type != EHCA_EQ && type != EHCA_NEQ) { + ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq); + return -EINVAL; + } + if (!length) { + ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq); + return -EINVAL; + } + + h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle, + &eq->pf, + type, + length, + &eq->ipz_eq_handle, + &eq->length, + &nr_pages, &eq->ist); + + if (h_ret != H_SUCCESS) { + ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq); + return -EINVAL; + } + + ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages, + EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0); + if (!ret) { + ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq); + goto create_eq_exit1; + } + + for (i = 0; i < nr_pages; i++) { + u64 rpage; + + vpage = ipz_qpageit_get_inc(&eq->ipz_queue); + if (!vpage) + goto create_eq_exit2; + + rpage = __pa(vpage); + h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, + eq->ipz_eq_handle, + &eq->pf, + 0, 0, rpage, 1); + + if (i == (nr_pages - 1)) { + /* last page */ + vpage = ipz_qpageit_get_inc(&eq->ipz_queue); + if (h_ret != H_SUCCESS || vpage) + goto create_eq_exit2; + } else { + if (h_ret != H_PAGE_REGISTERED) + goto create_eq_exit2; + } + } + + ipz_qeit_reset(&eq->ipz_queue); + + /* register interrupt handlers and initialize work queues */ + if (type == EHCA_EQ) { + tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); + + ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq, + 0, "ehca_eq", + (void *)shca); + if (ret < 0) + ehca_err(ib_dev, "Can't map interrupt handler."); + } else if (type == EHCA_NEQ) { + tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); + + ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq, + 0, "ehca_neq", + (void *)shca); + if (ret < 0) + ehca_err(ib_dev, "Can't map interrupt handler."); + } + + eq->is_initialized = 1; + + return 0; + +create_eq_exit2: + ipz_queue_dtor(NULL, &eq->ipz_queue); + +create_eq_exit1: + hipz_h_destroy_eq(shca->ipz_hca_handle, eq); + + return -EINVAL; +} + +void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq) +{ + unsigned long flags; + void *eqe; + + spin_lock_irqsave(&eq->spinlock, flags); + eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue); + spin_unlock_irqrestore(&eq->spinlock, flags); + + return eqe; +} + +int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq) +{ + unsigned long flags; + u64 h_ret; + + ibmebus_free_irq(eq->ist, (void *)shca); + + spin_lock_irqsave(&shca_list_lock, flags); + eq->is_initialized = 0; + spin_unlock_irqrestore(&shca_list_lock, flags); + + tasklet_kill(&eq->interrupt_task); + + h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq); + + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't free EQ resources."); + return -EINVAL; + } + ipz_queue_dtor(NULL, &eq->ipz_queue); + + return 0; +} diff --git a/drivers/staging/rdma/ehca/ehca_hca.c b/drivers/staging/rdma/ehca/ehca_hca.c new file mode 100644 index 0000000..e8b1bb6 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_hca.c @@ -0,0 +1,414 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HCA query functions + * + * Authors: Heiko J Schick + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_tools.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +static unsigned int limit_uint(unsigned int value) +{ + return min_t(unsigned int, value, INT_MAX); +} + +int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) +{ + int i, ret = 0; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, + ib_device); + struct hipz_query_hca *rblock; + + static const u32 cap_mapping[] = { + IB_DEVICE_RESIZE_MAX_WR, HCA_CAP_WQE_RESIZE, + IB_DEVICE_BAD_PKEY_CNTR, HCA_CAP_BAD_P_KEY_CTR, + IB_DEVICE_BAD_QKEY_CNTR, HCA_CAP_Q_KEY_VIOL_CTR, + IB_DEVICE_RAW_MULTI, HCA_CAP_RAW_PACKET_MCAST, + IB_DEVICE_AUTO_PATH_MIG, HCA_CAP_AUTO_PATH_MIG, + IB_DEVICE_CHANGE_PHY_PORT, HCA_CAP_SQD_RTS_PORT_CHANGE, + IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK, + IB_DEVICE_CURR_QP_STATE_MOD, HCA_CAP_CUR_QP_STATE_MOD, + IB_DEVICE_SHUTDOWN_PORT, HCA_CAP_SHUTDOWN_PORT, + IB_DEVICE_INIT_TYPE, HCA_CAP_INIT_TYPE, + IB_DEVICE_PORT_ACTIVE_EVENT, HCA_CAP_PORT_ACTIVE_EVENT, + }; + + if (uhw->inlen || uhw->outlen) + return -EINVAL; + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query device properties"); + ret = -EINVAL; + goto query_device1; + } + + memset(props, 0, sizeof(struct ib_device_attr)); + props->page_size_cap = shca->hca_cap_mr_pgsize; + props->fw_ver = rblock->hw_ver; + props->max_mr_size = rblock->max_mr_size; + props->vendor_id = rblock->vendor_id >> 8; + props->vendor_part_id = rblock->vendor_part_id >> 16; + props->hw_ver = rblock->hw_ver; + props->max_qp = limit_uint(rblock->max_qp); + props->max_qp_wr = limit_uint(rblock->max_wqes_wq); + props->max_sge = limit_uint(rblock->max_sge); + props->max_sge_rd = limit_uint(rblock->max_sge_rd); + props->max_cq = limit_uint(rblock->max_cq); + props->max_cqe = limit_uint(rblock->max_cqe); + props->max_mr = limit_uint(rblock->max_mr); + props->max_mw = limit_uint(rblock->max_mw); + props->max_pd = limit_uint(rblock->max_pd); + props->max_ah = limit_uint(rblock->max_ah); + props->max_ee = limit_uint(rblock->max_rd_ee_context); + props->max_rdd = limit_uint(rblock->max_rd_domain); + props->max_fmr = limit_uint(rblock->max_mr); + props->max_qp_rd_atom = limit_uint(rblock->max_rr_qp); + props->max_ee_rd_atom = limit_uint(rblock->max_rr_ee_context); + props->max_res_rd_atom = limit_uint(rblock->max_rr_hca); + props->max_qp_init_rd_atom = limit_uint(rblock->max_act_wqs_qp); + props->max_ee_init_rd_atom = limit_uint(rblock->max_act_wqs_ee_context); + + if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { + props->max_srq = limit_uint(props->max_qp); + props->max_srq_wr = limit_uint(props->max_qp_wr); + props->max_srq_sge = 3; + } + + props->max_pkeys = 16; + /* Some FW versions say 0 here; insert sensible value in that case */ + props->local_ca_ack_delay = rblock->local_ca_ack_delay ? + min_t(u8, rblock->local_ca_ack_delay, 255) : 12; + props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp); + props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp); + props->max_mcast_grp = limit_uint(rblock->max_mcast_grp); + props->max_mcast_qp_attach = limit_uint(rblock->max_mcast_qp_attach); + props->max_total_mcast_qp_attach + = limit_uint(rblock->max_total_mcast_qp_attach); + + /* translate device capabilities */ + props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID | + IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ; + for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2) + if (rblock->hca_cap_indicators & cap_mapping[i + 1]) + props->device_cap_flags |= cap_mapping[i]; + +query_device1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu) +{ + switch (fw_mtu) { + case 0x1: + return IB_MTU_256; + case 0x2: + return IB_MTU_512; + case 0x3: + return IB_MTU_1024; + case 0x4: + return IB_MTU_2048; + case 0x5: + return IB_MTU_4096; + default: + ehca_err(&shca->ib_device, "Unknown MTU size: %x.", + fw_mtu); + return 0; + } +} + +static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap) +{ + switch (vl_cap) { + case 0x1: + return 1; + case 0x2: + return 2; + case 0x3: + return 4; + case 0x4: + return 8; + case 0x5: + return 15; + default: + ehca_err(&shca->ib_device, "invalid Vl Capability: %x.", + vl_cap); + return 0; + } +} + +int ehca_query_port(struct ib_device *ibdev, + u8 port, struct ib_port_attr *props) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, + ib_device); + struct hipz_query_port *rblock; + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_port1; + } + + memset(props, 0, sizeof(struct ib_port_attr)); + + props->active_mtu = props->max_mtu = map_mtu(shca, rblock->max_mtu); + props->port_cap_flags = rblock->capability_mask; + props->gid_tbl_len = rblock->gid_tbl_len; + if (rblock->max_msg_sz) + props->max_msg_sz = rblock->max_msg_sz; + else + props->max_msg_sz = 0x1 << 31; + props->bad_pkey_cntr = rblock->bad_pkey_cntr; + props->qkey_viol_cntr = rblock->qkey_viol_cntr; + props->pkey_tbl_len = rblock->pkey_tbl_len; + props->lid = rblock->lid; + props->sm_lid = rblock->sm_lid; + props->lmc = rblock->lmc; + props->sm_sl = rblock->sm_sl; + props->subnet_timeout = rblock->subnet_timeout; + props->init_type_reply = rblock->init_type_reply; + props->max_vl_num = map_number_of_vls(shca, rblock->vl_cap); + + if (rblock->state && rblock->phys_width) { + props->phys_state = rblock->phys_pstate; + props->state = rblock->phys_state; + props->active_width = rblock->phys_width; + props->active_speed = rblock->phys_speed; + } else { + /* old firmware releases don't report physical + * port info, so use default values + */ + props->phys_state = 5; + props->state = rblock->state; + props->active_width = IB_WIDTH_12X; + props->active_speed = IB_SPEED_SDR; + } + +query_port1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +int ehca_query_sma_attr(struct ehca_shca *shca, + u8 port, struct ehca_sma_attr *attr) +{ + int ret = 0; + u64 h_ret; + struct hipz_query_port *rblock; + + rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_sma_attr1; + } + + memset(attr, 0, sizeof(struct ehca_sma_attr)); + + attr->lid = rblock->lid; + attr->lmc = rblock->lmc; + attr->sm_sl = rblock->sm_sl; + attr->sm_lid = rblock->sm_lid; + + attr->pkey_tbl_len = rblock->pkey_tbl_len; + memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys)); + +query_sma_attr1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca; + struct hipz_query_port *rblock; + + shca = container_of(ibdev, struct ehca_shca, ib_device); + if (index > 16) { + ehca_err(&shca->ib_device, "Invalid index: %x.", index); + return -EINVAL; + } + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_pkey1; + } + + memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16)); + +query_pkey1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +int ehca_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, + ib_device); + struct hipz_query_port *rblock; + + if (index < 0 || index > 255) { + ehca_err(&shca->ib_device, "Invalid index: %x.", index); + return -EINVAL; + } + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_gid1; + } + + memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64)); + memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64)); + +query_gid1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +static const u32 allowed_port_caps = ( + IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP | + IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP | + IB_PORT_VENDOR_CLASS_SUP); + +int ehca_modify_port(struct ib_device *ibdev, + u8 port, int port_modify_mask, + struct ib_port_modify *props) +{ + int ret = 0; + struct ehca_shca *shca; + struct hipz_query_port *rblock; + u32 cap; + u64 hret; + + shca = container_of(ibdev, struct ehca_shca, ib_device); + if ((props->set_port_cap_mask | props->clr_port_cap_mask) + & ~allowed_port_caps) { + ehca_err(&shca->ib_device, "Non-changeable bits set in masks " + "set=%x clr=%x allowed=%x", props->set_port_cap_mask, + props->clr_port_cap_mask, allowed_port_caps); + return -EINVAL; + } + + if (mutex_lock_interruptible(&shca->modify_mutex)) + return -ERESTARTSYS; + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + ret = -ENOMEM; + goto modify_port1; + } + + hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (hret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto modify_port2; + } + + cap = (rblock->capability_mask | props->set_port_cap_mask) + & ~props->clr_port_cap_mask; + + hret = hipz_h_modify_port(shca->ipz_hca_handle, port, + cap, props->init_type, port_modify_mask); + if (hret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Modify port failed h_ret=%lli", + hret); + ret = -EINVAL; + } + +modify_port2: + ehca_free_fw_ctrlblock(rblock); + +modify_port1: + mutex_unlock(&shca->modify_mutex); + + return ret; +} diff --git a/drivers/staging/rdma/ehca/ehca_irq.c b/drivers/staging/rdma/ehca/ehca_irq.c new file mode 100644 index 0000000..8615d7c --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_irq.c @@ -0,0 +1,870 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Functions for EQs, NEQs and interrupts + * + * Authors: Heiko J Schick + * Khadija Souissi + * Hoang-Nam Nguyen + * Joachim Fenkes + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "ehca_iverbs.h" +#include "ehca_tools.h" +#include "hcp_if.h" +#include "hipz_fns.h" +#include "ipz_pt_fn.h" + +#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7) +#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63) +#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63) + +#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7) +#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) +#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) +#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) +#define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23) + +#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) +#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) + +static void queue_comp_task(struct ehca_cq *__cq); + +static struct ehca_comp_pool *pool; + +static inline void comp_event_callback(struct ehca_cq *cq) +{ + if (!cq->ib_cq.comp_handler) + return; + + spin_lock(&cq->cb_lock); + cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context); + spin_unlock(&cq->cb_lock); + + return; +} + +static void print_error_data(struct ehca_shca *shca, void *data, + u64 *rblock, int length) +{ + u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); + u64 resource = rblock[1]; + + switch (type) { + case 0x1: /* Queue Pair */ + { + struct ehca_qp *qp = (struct ehca_qp *)data; + + /* only print error data if AER is set */ + if (rblock[6] == 0) + return; + + ehca_err(&shca->ib_device, + "QP 0x%x (resource=%llx) has errors.", + qp->ib_qp.qp_num, resource); + break; + } + case 0x4: /* Completion Queue */ + { + struct ehca_cq *cq = (struct ehca_cq *)data; + + ehca_err(&shca->ib_device, + "CQ 0x%x (resource=%llx) has errors.", + cq->cq_number, resource); + break; + } + default: + ehca_err(&shca->ib_device, + "Unknown error type: %llx on %s.", + type, shca->ib_device.name); + break; + } + + ehca_err(&shca->ib_device, "Error data is available: %llx.", resource); + ehca_err(&shca->ib_device, "EHCA ----- error data begin " + "---------------------------------------------------"); + ehca_dmp(rblock, length, "resource=%llx", resource); + ehca_err(&shca->ib_device, "EHCA ----- error data end " + "----------------------------------------------------"); + + return; +} + +int ehca_error_data(struct ehca_shca *shca, void *data, + u64 resource) +{ + + unsigned long ret; + u64 *rblock; + unsigned long block_count; + + rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); + if (!rblock) { + ehca_err(&shca->ib_device, "Cannot allocate rblock memory."); + ret = -ENOMEM; + goto error_data1; + } + + /* rblock must be 4K aligned and should be 4K large */ + ret = hipz_h_error_data(shca->ipz_hca_handle, + resource, + rblock, + &block_count); + + if (ret == H_R_STATE) + ehca_err(&shca->ib_device, + "No error data is available: %llx.", resource); + else if (ret == H_SUCCESS) { + int length; + + length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]); + + if (length > EHCA_PAGESIZE) + length = EHCA_PAGESIZE; + + print_error_data(shca, data, rblock, length); + } else + ehca_err(&shca->ib_device, + "Error data could not be fetched: %llx", resource); + + ehca_free_fw_ctrlblock(rblock); + +error_data1: + return ret; + +} + +static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp, + enum ib_event_type event_type) +{ + struct ib_event event; + + /* PATH_MIG without the QP ever having been armed is false alarm */ + if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed) + return; + + event.device = &shca->ib_device; + event.event = event_type; + + if (qp->ext_type == EQPT_SRQ) { + if (!qp->ib_srq.event_handler) + return; + + event.element.srq = &qp->ib_srq; + qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context); + } else { + if (!qp->ib_qp.event_handler) + return; + + event.element.qp = &qp->ib_qp; + qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); + } +} + +static void qp_event_callback(struct ehca_shca *shca, u64 eqe, + enum ib_event_type event_type, int fatal) +{ + struct ehca_qp *qp; + u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); + + read_lock(&ehca_qp_idr_lock); + qp = idr_find(&ehca_qp_idr, token); + if (qp) + atomic_inc(&qp->nr_events); + read_unlock(&ehca_qp_idr_lock); + + if (!qp) + return; + + if (fatal) + ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); + + dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ? + IB_EVENT_SRQ_ERR : event_type); + + /* + * eHCA only processes one WQE at a time for SRQ base QPs, + * so the last WQE has been processed as soon as the QP enters + * error state. + */ + if (fatal && qp->ext_type == EQPT_SRQBASE) + dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED); + + if (atomic_dec_and_test(&qp->nr_events)) + wake_up(&qp->wait_completion); + return; +} + +static void cq_event_callback(struct ehca_shca *shca, + u64 eqe) +{ + struct ehca_cq *cq; + u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe); + + read_lock(&ehca_cq_idr_lock); + cq = idr_find(&ehca_cq_idr, token); + if (cq) + atomic_inc(&cq->nr_events); + read_unlock(&ehca_cq_idr_lock); + + if (!cq) + return; + + ehca_error_data(shca, cq, cq->ipz_cq_handle.handle); + + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + + return; +} + +static void parse_identifier(struct ehca_shca *shca, u64 eqe) +{ + u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe); + + switch (identifier) { + case 0x02: /* path migrated */ + qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0); + break; + case 0x03: /* communication established */ + qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0); + break; + case 0x04: /* send queue drained */ + qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0); + break; + case 0x05: /* QP error */ + case 0x06: /* QP error */ + qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1); + break; + case 0x07: /* CQ error */ + case 0x08: /* CQ error */ + cq_event_callback(shca, eqe); + break; + case 0x09: /* MRMWPTE error */ + ehca_err(&shca->ib_device, "MRMWPTE error."); + break; + case 0x0A: /* port event */ + ehca_err(&shca->ib_device, "Port event."); + break; + case 0x0B: /* MR access error */ + ehca_err(&shca->ib_device, "MR access error."); + break; + case 0x0C: /* EQ error */ + ehca_err(&shca->ib_device, "EQ error."); + break; + case 0x0D: /* P/Q_Key mismatch */ + ehca_err(&shca->ib_device, "P/Q_Key mismatch."); + break; + case 0x10: /* sampling complete */ + ehca_err(&shca->ib_device, "Sampling complete."); + break; + case 0x11: /* unaffiliated access error */ + ehca_err(&shca->ib_device, "Unaffiliated access error."); + break; + case 0x12: /* path migrating */ + ehca_err(&shca->ib_device, "Path migrating."); + break; + case 0x13: /* interface trace stopped */ + ehca_err(&shca->ib_device, "Interface trace stopped."); + break; + case 0x14: /* first error capture info available */ + ehca_info(&shca->ib_device, "First error capture available"); + break; + case 0x15: /* SRQ limit reached */ + qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0); + break; + default: + ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.", + identifier, shca->ib_device.name); + break; + } + + return; +} + +static void dispatch_port_event(struct ehca_shca *shca, int port_num, + enum ib_event_type type, const char *msg) +{ + struct ib_event event; + + ehca_info(&shca->ib_device, "port %d %s.", port_num, msg); + event.device = &shca->ib_device; + event.event = type; + event.element.port_num = port_num; + ib_dispatch_event(&event); +} + +static void notify_port_conf_change(struct ehca_shca *shca, int port_num) +{ + struct ehca_sma_attr new_attr; + struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr; + + ehca_query_sma_attr(shca, port_num, &new_attr); + + if (new_attr.sm_sl != old_attr->sm_sl || + new_attr.sm_lid != old_attr->sm_lid) + dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE, + "SM changed"); + + if (new_attr.lid != old_attr->lid || + new_attr.lmc != old_attr->lmc) + dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE, + "LID changed"); + + if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len || + memcmp(new_attr.pkeys, old_attr->pkeys, + sizeof(u16) * new_attr.pkey_tbl_len)) + dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE, + "P_Key changed"); + + *old_attr = new_attr; +} + +/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */ +static int replay_modify_qp(struct ehca_sport *sport) +{ + int aqp1_destroyed; + unsigned long flags; + + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + + aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI]; + + if (sport->ibqp_sqp[IB_QPT_SMI]) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]); + if (!aqp1_destroyed) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]); + + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + + return aqp1_destroyed; +} + +static void parse_ec(struct ehca_shca *shca, u64 eqe) +{ + u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); + u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); + u8 spec_event; + struct ehca_sport *sport = &shca->sport[port - 1]; + + switch (ec) { + case 0x30: /* port availability change */ + if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { + /* only replay modify_qp calls in autodetect mode; + * if AQP1 was destroyed, the port is already down + * again and we can drop the event. + */ + if (ehca_nr_ports < 0) + if (replay_modify_qp(sport)) + break; + + sport->port_state = IB_PORT_ACTIVE; + dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, + "is active"); + ehca_query_sma_attr(shca, port, &sport->saved_attr); + } else { + sport->port_state = IB_PORT_DOWN; + dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, + "is inactive"); + } + break; + case 0x31: + /* port configuration change + * disruptive change is caused by + * LID, PKEY or SM change + */ + if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) { + ehca_warn(&shca->ib_device, "disruptive port " + "%d configuration change", port); + + sport->port_state = IB_PORT_DOWN; + dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, + "is inactive"); + + sport->port_state = IB_PORT_ACTIVE; + dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, + "is active"); + ehca_query_sma_attr(shca, port, + &sport->saved_attr); + } else + notify_port_conf_change(shca, port); + break; + case 0x32: /* adapter malfunction */ + ehca_err(&shca->ib_device, "Adapter malfunction."); + break; + case 0x33: /* trace stopped */ + ehca_err(&shca->ib_device, "Traced stopped."); + break; + case 0x34: /* util async event */ + spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe); + if (spec_event == 0x80) /* client reregister required */ + dispatch_port_event(shca, port, + IB_EVENT_CLIENT_REREGISTER, + "client reregister req."); + else + ehca_warn(&shca->ib_device, "Unknown util async " + "event %x on port %x", spec_event, port); + break; + default: + ehca_err(&shca->ib_device, "Unknown event code: %x on %s.", + ec, shca->ib_device.name); + break; + } + + return; +} + +static inline void reset_eq_pending(struct ehca_cq *cq) +{ + u64 CQx_EP; + struct h_galpa gal = cq->galpas.kernel; + + hipz_galpa_store_cq(gal, cqx_ep, 0x0); + CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep)); + + return; +} + +irqreturn_t ehca_interrupt_neq(int irq, void *dev_id) +{ + struct ehca_shca *shca = (struct ehca_shca*)dev_id; + + tasklet_hi_schedule(&shca->neq.interrupt_task); + + return IRQ_HANDLED; +} + +void ehca_tasklet_neq(unsigned long data) +{ + struct ehca_shca *shca = (struct ehca_shca*)data; + struct ehca_eqe *eqe; + u64 ret; + + eqe = ehca_poll_eq(shca, &shca->neq); + + while (eqe) { + if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) + parse_ec(shca, eqe->entry); + + eqe = ehca_poll_eq(shca, &shca->neq); + } + + ret = hipz_h_reset_event(shca->ipz_hca_handle, + shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL); + + if (ret != H_SUCCESS) + ehca_err(&shca->ib_device, "Can't clear notification events."); + + return; +} + +irqreturn_t ehca_interrupt_eq(int irq, void *dev_id) +{ + struct ehca_shca *shca = (struct ehca_shca*)dev_id; + + tasklet_hi_schedule(&shca->eq.interrupt_task); + + return IRQ_HANDLED; +} + + +static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) +{ + u64 eqe_value; + u32 token; + struct ehca_cq *cq; + + eqe_value = eqe->entry; + ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value); + if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { + ehca_dbg(&shca->ib_device, "Got completion event"); + token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); + read_lock(&ehca_cq_idr_lock); + cq = idr_find(&ehca_cq_idr, token); + if (cq) + atomic_inc(&cq->nr_events); + read_unlock(&ehca_cq_idr_lock); + if (cq == NULL) { + ehca_err(&shca->ib_device, + "Invalid eqe for non-existing cq token=%x", + token); + return; + } + reset_eq_pending(cq); + if (ehca_scaling_code) + queue_comp_task(cq); + else { + comp_event_callback(cq); + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + } + } else { + ehca_dbg(&shca->ib_device, "Got non completion event"); + parse_identifier(shca, eqe_value); + } +} + +void ehca_process_eq(struct ehca_shca *shca, int is_irq) +{ + struct ehca_eq *eq = &shca->eq; + struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; + u64 eqe_value, ret; + int eqe_cnt, i; + int eq_empty = 0; + + spin_lock(&eq->irq_spinlock); + if (is_irq) { + const int max_query_cnt = 100; + int query_cnt = 0; + int int_state = 1; + do { + int_state = hipz_h_query_int_state( + shca->ipz_hca_handle, eq->ist); + query_cnt++; + iosync(); + } while (int_state && query_cnt < max_query_cnt); + if (unlikely((query_cnt == max_query_cnt))) + ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x", + int_state, query_cnt); + } + + /* read out all eqes */ + eqe_cnt = 0; + do { + u32 token; + eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq); + if (!eqe_cache[eqe_cnt].eqe) + break; + eqe_value = eqe_cache[eqe_cnt].eqe->entry; + if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { + token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); + read_lock(&ehca_cq_idr_lock); + eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token); + if (eqe_cache[eqe_cnt].cq) + atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events); + read_unlock(&ehca_cq_idr_lock); + if (!eqe_cache[eqe_cnt].cq) { + ehca_err(&shca->ib_device, + "Invalid eqe for non-existing cq " + "token=%x", token); + continue; + } + } else + eqe_cache[eqe_cnt].cq = NULL; + eqe_cnt++; + } while (eqe_cnt < EHCA_EQE_CACHE_SIZE); + if (!eqe_cnt) { + if (is_irq) + ehca_dbg(&shca->ib_device, + "No eqe found for irq event"); + goto unlock_irq_spinlock; + } else if (!is_irq) { + ret = hipz_h_eoi(eq->ist); + if (ret != H_SUCCESS) + ehca_err(&shca->ib_device, + "bad return code EOI -rc = %lld\n", ret); + ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); + } + if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) + ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); + /* enable irq for new packets */ + for (i = 0; i < eqe_cnt; i++) { + if (eq->eqe_cache[i].cq) + reset_eq_pending(eq->eqe_cache[i].cq); + } + /* check eq */ + spin_lock(&eq->spinlock); + eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue)); + spin_unlock(&eq->spinlock); + /* call completion handler for cached eqes */ + for (i = 0; i < eqe_cnt; i++) + if (eq->eqe_cache[i].cq) { + if (ehca_scaling_code) + queue_comp_task(eq->eqe_cache[i].cq); + else { + struct ehca_cq *cq = eq->eqe_cache[i].cq; + comp_event_callback(cq); + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + } + } else { + ehca_dbg(&shca->ib_device, "Got non completion event"); + parse_identifier(shca, eq->eqe_cache[i].eqe->entry); + } + /* poll eq if not empty */ + if (eq_empty) + goto unlock_irq_spinlock; + do { + struct ehca_eqe *eqe; + eqe = ehca_poll_eq(shca, &shca->eq); + if (!eqe) + break; + process_eqe(shca, eqe); + } while (1); + +unlock_irq_spinlock: + spin_unlock(&eq->irq_spinlock); +} + +void ehca_tasklet_eq(unsigned long data) +{ + ehca_process_eq((struct ehca_shca*)data, 1); +} + +static int find_next_online_cpu(struct ehca_comp_pool *pool) +{ + int cpu; + unsigned long flags; + + WARN_ON_ONCE(!in_interrupt()); + if (ehca_debug_level >= 3) + ehca_dmp(cpu_online_mask, cpumask_size(), ""); + + spin_lock_irqsave(&pool->last_cpu_lock, flags); + do { + cpu = cpumask_next(pool->last_cpu, cpu_online_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(cpu_online_mask); + pool->last_cpu = cpu; + } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active); + spin_unlock_irqrestore(&pool->last_cpu_lock, flags); + + return cpu; +} + +static void __queue_comp_task(struct ehca_cq *__cq, + struct ehca_cpu_comp_task *cct, + struct task_struct *thread) +{ + unsigned long flags; + + spin_lock_irqsave(&cct->task_lock, flags); + spin_lock(&__cq->task_lock); + + if (__cq->nr_callbacks == 0) { + __cq->nr_callbacks++; + list_add_tail(&__cq->entry, &cct->cq_list); + cct->cq_jobs++; + wake_up_process(thread); + } else + __cq->nr_callbacks++; + + spin_unlock(&__cq->task_lock); + spin_unlock_irqrestore(&cct->task_lock, flags); +} + +static void queue_comp_task(struct ehca_cq *__cq) +{ + int cpu_id; + struct ehca_cpu_comp_task *cct; + struct task_struct *thread; + int cq_jobs; + unsigned long flags; + + cpu_id = find_next_online_cpu(pool); + BUG_ON(!cpu_online(cpu_id)); + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); + + spin_lock_irqsave(&cct->task_lock, flags); + cq_jobs = cct->cq_jobs; + spin_unlock_irqrestore(&cct->task_lock, flags); + if (cq_jobs > 0) { + cpu_id = find_next_online_cpu(pool); + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); + } + __queue_comp_task(__cq, cct, thread); +} + +static void run_comp_task(struct ehca_cpu_comp_task *cct) +{ + struct ehca_cq *cq; + + while (!list_empty(&cct->cq_list)) { + cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); + spin_unlock_irq(&cct->task_lock); + + comp_event_callback(cq); + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + + spin_lock_irq(&cct->task_lock); + spin_lock(&cq->task_lock); + cq->nr_callbacks--; + if (!cq->nr_callbacks) { + list_del_init(cct->cq_list.next); + cct->cq_jobs--; + } + spin_unlock(&cq->task_lock); + } +} + +static void comp_task_park(unsigned int cpu) +{ + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + struct ehca_cpu_comp_task *target; + struct task_struct *thread; + struct ehca_cq *cq, *tmp; + LIST_HEAD(list); + + spin_lock_irq(&cct->task_lock); + cct->cq_jobs = 0; + cct->active = 0; + list_splice_init(&cct->cq_list, &list); + spin_unlock_irq(&cct->task_lock); + + cpu = find_next_online_cpu(pool); + target = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu); + spin_lock_irq(&target->task_lock); + list_for_each_entry_safe(cq, tmp, &list, entry) { + list_del(&cq->entry); + __queue_comp_task(cq, target, thread); + } + spin_unlock_irq(&target->task_lock); +} + +static void comp_task_stop(unsigned int cpu, bool online) +{ + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + + spin_lock_irq(&cct->task_lock); + cct->cq_jobs = 0; + cct->active = 0; + WARN_ON(!list_empty(&cct->cq_list)); + spin_unlock_irq(&cct->task_lock); +} + +static int comp_task_should_run(unsigned int cpu) +{ + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + + return cct->cq_jobs; +} + +static void comp_task(unsigned int cpu) +{ + struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks); + int cql_empty; + + spin_lock_irq(&cct->task_lock); + cql_empty = list_empty(&cct->cq_list); + if (!cql_empty) { + __set_current_state(TASK_RUNNING); + run_comp_task(cct); + } + spin_unlock_irq(&cct->task_lock); +} + +static struct smp_hotplug_thread comp_pool_threads = { + .thread_should_run = comp_task_should_run, + .thread_fn = comp_task, + .thread_comm = "ehca_comp/%u", + .cleanup = comp_task_stop, + .park = comp_task_park, +}; + +int ehca_create_comp_pool(void) +{ + int cpu, ret = -ENOMEM; + + if (!ehca_scaling_code) + return 0; + + pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL); + if (pool == NULL) + return -ENOMEM; + + spin_lock_init(&pool->last_cpu_lock); + pool->last_cpu = cpumask_any(cpu_online_mask); + + pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); + if (!pool->cpu_comp_tasks) + goto out_pool; + + pool->cpu_comp_threads = alloc_percpu(struct task_struct *); + if (!pool->cpu_comp_threads) + goto out_tasks; + + for_each_present_cpu(cpu) { + struct ehca_cpu_comp_task *cct; + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + spin_lock_init(&cct->task_lock); + INIT_LIST_HEAD(&cct->cq_list); + } + + comp_pool_threads.store = pool->cpu_comp_threads; + ret = smpboot_register_percpu_thread(&comp_pool_threads); + if (ret) + goto out_threads; + + pr_info("eHCA scaling code enabled\n"); + return ret; + +out_threads: + free_percpu(pool->cpu_comp_threads); +out_tasks: + free_percpu(pool->cpu_comp_tasks); +out_pool: + kfree(pool); + return ret; +} + +void ehca_destroy_comp_pool(void) +{ + if (!ehca_scaling_code) + return; + + smpboot_unregister_percpu_thread(&comp_pool_threads); + + free_percpu(pool->cpu_comp_threads); + free_percpu(pool->cpu_comp_tasks); + kfree(pool); +} diff --git a/drivers/staging/rdma/ehca/ehca_irq.h b/drivers/staging/rdma/ehca/ehca_irq.h new file mode 100644 index 0000000..5370199 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_irq.h @@ -0,0 +1,77 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Function definitions and structs for EQs, NEQs and interrupts + * + * Authors: Heiko J Schick + * Khadija Souissi + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_IRQ_H +#define __EHCA_IRQ_H + + +struct ehca_shca; + +#include +#include + +int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource); + +irqreturn_t ehca_interrupt_neq(int irq, void *dev_id); +void ehca_tasklet_neq(unsigned long data); + +irqreturn_t ehca_interrupt_eq(int irq, void *dev_id); +void ehca_tasklet_eq(unsigned long data); +void ehca_process_eq(struct ehca_shca *shca, int is_irq); + +struct ehca_cpu_comp_task { + struct list_head cq_list; + spinlock_t task_lock; + int cq_jobs; + int active; +}; + +struct ehca_comp_pool { + struct ehca_cpu_comp_task __percpu *cpu_comp_tasks; + struct task_struct * __percpu *cpu_comp_threads; + int last_cpu; + spinlock_t last_cpu_lock; +}; + +int ehca_create_comp_pool(void); +void ehca_destroy_comp_pool(void); + +#endif diff --git a/drivers/staging/rdma/ehca/ehca_iverbs.h b/drivers/staging/rdma/ehca/ehca_iverbs.h new file mode 100644 index 0000000..80e6a3d --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_iverbs.h @@ -0,0 +1,218 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Function definitions for internal functions + * + * Authors: Heiko J Schick + * Dietmar Decker + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_IVERBS_H__ +#define __EHCA_IVERBS_H__ + +#include "ehca_classes.h" + +int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw); + +int ehca_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props); + +enum rdma_protocol_type +ehca_query_protocol(struct ib_device *device, u8 port_num); + +int ehca_query_sma_attr(struct ehca_shca *shca, u8 port, + struct ehca_sma_attr *attr); + +int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey); + +int ehca_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid); + +int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, + struct ib_port_modify *props); + +struct ib_pd *ehca_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, + struct ib_udata *udata); + +int ehca_dealloc_pd(struct ib_pd *pd); + +struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); + +int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +int ehca_destroy_ah(struct ib_ah *ah); + +struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags); + +struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, u64 *iova_start); + +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int mr_access_flags, + struct ib_udata *udata); + +int ehca_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, int mr_access_flags, u64 *iova_start); + +int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); + +int ehca_dereg_mr(struct ib_mr *mr); + +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); + +int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw, + struct ib_mw_bind *mw_bind); + +int ehca_dealloc_mw(struct ib_mw *mw); + +struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr); + +int ehca_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, int list_len, u64 iova); + +int ehca_unmap_fmr(struct list_head *fmr_list); + +int ehca_dealloc_fmr(struct ib_fmr *fmr); + +enum ehca_eq_type { + EHCA_EQ = 0, /* Event Queue */ + EHCA_NEQ /* Notification Event Queue */ +}; + +int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq, + enum ehca_eq_type type, const u32 length); + +int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq); + +void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq); + + +struct ib_cq *ehca_create_cq(struct ib_device *device, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata); + +int ehca_destroy_cq(struct ib_cq *cq); + +int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); + +int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); + +int ehca_peek_cq(struct ib_cq *cq, int wc_cnt); + +int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags); + +struct ib_qp *ehca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); + +int ehca_destroy_qp(struct ib_qp *qp); + +int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata); + +int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); + +int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr); + +int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); + +int ehca_post_srq_recv(struct ib_srq *srq, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); + +struct ib_srq *ehca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); + +int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); + +int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); + +int ehca_destroy_srq(struct ib_srq *srq); + +u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp, + struct ib_qp_init_attr *qp_init_attr); + +int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, + struct ib_udata *udata); + +int ehca_dealloc_ucontext(struct ib_ucontext *context); + +int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); + +int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); + +void ehca_poll_eqs(unsigned long data); + +int ehca_calc_ipd(struct ehca_shca *shca, int port, + enum ib_rate path_rate, u32 *ipd); + +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq); + +#ifdef CONFIG_PPC_64K_PAGES +void *ehca_alloc_fw_ctrlblock(gfp_t flags); +void ehca_free_fw_ctrlblock(void *ptr); +#else +#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags)) +#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) +#endif + +void ehca_recover_sqp(struct ib_qp *sqp); + +#endif diff --git a/drivers/staging/rdma/ehca/ehca_main.c b/drivers/staging/rdma/ehca/ehca_main.c new file mode 100644 index 0000000..8246418 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_main.c @@ -0,0 +1,1123 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * module start stop, hca detection + * + * Authors: Heiko J Schick + * Hoang-Nam Nguyen + * Joachim Fenkes + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef CONFIG_PPC_64K_PAGES +#include +#endif + +#include +#include +#include +#include "ehca_classes.h" +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "ehca_tools.h" +#include "hcp_if.h" + +#define HCAD_VERSION "0029" + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Christoph Raisch "); +MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); +MODULE_VERSION(HCAD_VERSION); + +static bool ehca_open_aqp1 = 0; +static int ehca_hw_level = 0; +static bool ehca_poll_all_eqs = 1; + +int ehca_debug_level = 0; +int ehca_nr_ports = -1; +bool ehca_use_hp_mr = 0; +int ehca_port_act_time = 30; +int ehca_static_rate = -1; +bool ehca_scaling_code = 0; +int ehca_lock_hcalls = -1; +int ehca_max_cq = -1; +int ehca_max_qp = -1; + +module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO); +module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); +module_param_named(hw_level, ehca_hw_level, int, S_IRUGO); +module_param_named(nr_ports, ehca_nr_ports, int, S_IRUGO); +module_param_named(use_hp_mr, ehca_use_hp_mr, bool, S_IRUGO); +module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO); +module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO); +module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); +module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); +module_param_named(lock_hcalls, ehca_lock_hcalls, bint, S_IRUGO); +module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO); +module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO); + +MODULE_PARM_DESC(open_aqp1, + "Open AQP1 on startup (default: no)"); +MODULE_PARM_DESC(debug_level, + "Amount of debug output (0: none (default), 1: traces, " + "2: some dumps, 3: lots)"); +MODULE_PARM_DESC(hw_level, + "Hardware level (0: autosensing (default), " + "0x10..0x14: eHCA, 0x20..0x23: eHCA2)"); +MODULE_PARM_DESC(nr_ports, + "number of connected ports (-1: autodetect (default), " + "1: port one only, 2: two ports)"); +MODULE_PARM_DESC(use_hp_mr, + "Use high performance MRs (default: no)"); +MODULE_PARM_DESC(port_act_time, + "Time to wait for port activation (default: 30 sec)"); +MODULE_PARM_DESC(poll_all_eqs, + "Poll all event queues periodically (default: yes)"); +MODULE_PARM_DESC(static_rate, + "Set permanent static rate (default: no static rate)"); +MODULE_PARM_DESC(scaling_code, + "Enable scaling code (default: no)"); +MODULE_PARM_DESC(lock_hcalls, + "Serialize all hCalls made by the driver " + "(default: autodetect)"); +MODULE_PARM_DESC(number_of_cqs, + "Max number of CQs which can be allocated " + "(default: autodetect)"); +MODULE_PARM_DESC(number_of_qps, + "Max number of QPs which can be allocated " + "(default: autodetect)"); + +DEFINE_RWLOCK(ehca_qp_idr_lock); +DEFINE_RWLOCK(ehca_cq_idr_lock); +DEFINE_IDR(ehca_qp_idr); +DEFINE_IDR(ehca_cq_idr); + +static LIST_HEAD(shca_list); /* list of all registered ehcas */ +DEFINE_SPINLOCK(shca_list_lock); + +static struct timer_list poll_eqs_timer; + +#ifdef CONFIG_PPC_64K_PAGES +static struct kmem_cache *ctblk_cache; + +void *ehca_alloc_fw_ctrlblock(gfp_t flags) +{ + void *ret = kmem_cache_zalloc(ctblk_cache, flags); + if (!ret) + ehca_gen_err("Out of memory for ctblk"); + return ret; +} + +void ehca_free_fw_ctrlblock(void *ptr) +{ + if (ptr) + kmem_cache_free(ctblk_cache, ptr); + +} +#endif + +int ehca2ib_return_code(u64 ehca_rc) +{ + switch (ehca_rc) { + case H_SUCCESS: + return 0; + case H_RESOURCE: /* Resource in use */ + case H_BUSY: + return -EBUSY; + case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ + case H_CONSTRAINED: /* resource constraint */ + case H_NO_MEM: + return -ENOMEM; + default: + return -EINVAL; + } +} + +static int ehca_create_slab_caches(void) +{ + int ret; + + ret = ehca_init_pd_cache(); + if (ret) { + ehca_gen_err("Cannot create PD SLAB cache."); + return ret; + } + + ret = ehca_init_cq_cache(); + if (ret) { + ehca_gen_err("Cannot create CQ SLAB cache."); + goto create_slab_caches2; + } + + ret = ehca_init_qp_cache(); + if (ret) { + ehca_gen_err("Cannot create QP SLAB cache."); + goto create_slab_caches3; + } + + ret = ehca_init_av_cache(); + if (ret) { + ehca_gen_err("Cannot create AV SLAB cache."); + goto create_slab_caches4; + } + + ret = ehca_init_mrmw_cache(); + if (ret) { + ehca_gen_err("Cannot create MR&MW SLAB cache."); + goto create_slab_caches5; + } + + ret = ehca_init_small_qp_cache(); + if (ret) { + ehca_gen_err("Cannot create small queue SLAB cache."); + goto create_slab_caches6; + } + +#ifdef CONFIG_PPC_64K_PAGES + ctblk_cache = kmem_cache_create("ehca_cache_ctblk", + EHCA_PAGESIZE, H_CB_ALIGNMENT, + SLAB_HWCACHE_ALIGN, + NULL); + if (!ctblk_cache) { + ehca_gen_err("Cannot create ctblk SLAB cache."); + ehca_cleanup_small_qp_cache(); + ret = -ENOMEM; + goto create_slab_caches6; + } +#endif + return 0; + +create_slab_caches6: + ehca_cleanup_mrmw_cache(); + +create_slab_caches5: + ehca_cleanup_av_cache(); + +create_slab_caches4: + ehca_cleanup_qp_cache(); + +create_slab_caches3: + ehca_cleanup_cq_cache(); + +create_slab_caches2: + ehca_cleanup_pd_cache(); + + return ret; +} + +static void ehca_destroy_slab_caches(void) +{ + ehca_cleanup_small_qp_cache(); + ehca_cleanup_mrmw_cache(); + ehca_cleanup_av_cache(); + ehca_cleanup_qp_cache(); + ehca_cleanup_cq_cache(); + ehca_cleanup_pd_cache(); +#ifdef CONFIG_PPC_64K_PAGES + if (ctblk_cache) + kmem_cache_destroy(ctblk_cache); +#endif +} + +#define EHCA_HCAAVER EHCA_BMASK_IBM(32, 39) +#define EHCA_REVID EHCA_BMASK_IBM(40, 63) + +static struct cap_descr { + u64 mask; + char *descr; +} hca_cap_descr[] = { + { HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" }, + { HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" }, + { HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" }, + { HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" }, + { HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" }, + { HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" }, + { HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" }, + { HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" }, + { HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" }, + { HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" }, + { HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" }, + { HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" }, + { HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" }, + { HCA_CAP_SRQ, "HCA_CAP_SRQ" }, + { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" }, + { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" }, + { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" }, + { HCA_CAP_H_ALLOC_RES_SYNC, "HCA_CAP_H_ALLOC_RES_SYNC" }, +}; + +static int ehca_sense_attributes(struct ehca_shca *shca) +{ + int i, ret = 0; + u64 h_ret; + struct hipz_query_hca *rblock; + struct hipz_query_port *port; + const char *loc_code; + + static const u32 pgsize_map[] = { + HCA_CAP_MR_PGSIZE_4K, 0x1000, + HCA_CAP_MR_PGSIZE_64K, 0x10000, + HCA_CAP_MR_PGSIZE_1M, 0x100000, + HCA_CAP_MR_PGSIZE_16M, 0x1000000, + }; + + ehca_gen_dbg("Probing adapter %s...", + shca->ofdev->dev.of_node->full_name); + loc_code = of_get_property(shca->ofdev->dev.of_node, "ibm,loc-code", + NULL); + if (loc_code) + ehca_gen_dbg(" ... location lode=%s", loc_code); + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_gen_err("Cannot allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock); + if (h_ret != H_SUCCESS) { + ehca_gen_err("Cannot query device properties. h_ret=%lli", + h_ret); + ret = -EPERM; + goto sense_attributes1; + } + + if (ehca_nr_ports == 1) + shca->num_ports = 1; + else + shca->num_ports = (u8)rblock->num_ports; + + ehca_gen_dbg(" ... found %x ports", rblock->num_ports); + + if (ehca_hw_level == 0) { + u32 hcaaver; + u32 revid; + + hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver); + revid = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver); + + ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid); + + if (hcaaver == 1) { + if (revid <= 3) + shca->hw_level = 0x10 | (revid + 1); + else + shca->hw_level = 0x14; + } else if (hcaaver == 2) { + if (revid == 0) + shca->hw_level = 0x21; + else if (revid == 0x10) + shca->hw_level = 0x22; + else if (revid == 0x20 || revid == 0x21) + shca->hw_level = 0x23; + } + + if (!shca->hw_level) { + ehca_gen_warn("unknown hardware version" + " - assuming default level"); + shca->hw_level = 0x22; + } + } else + shca->hw_level = ehca_hw_level; + ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); + + shca->hca_cap = rblock->hca_cap_indicators; + ehca_gen_dbg(" ... HCA capabilities:"); + for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++) + if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) + ehca_gen_dbg(" %s", hca_cap_descr[i].descr); + + /* Autodetect hCall locking -- the "H_ALLOC_RESOURCE synced" flag is + * a firmware property, so it's valid across all adapters + */ + if (ehca_lock_hcalls == -1) + ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC, + shca->hca_cap); + + /* translate supported MR page sizes; always support 4K */ + shca->hca_cap_mr_pgsize = EHCA_PAGESIZE; + for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2) + if (rblock->memory_page_size_supported & pgsize_map[i]) + shca->hca_cap_mr_pgsize |= pgsize_map[i + 1]; + + /* Set maximum number of CQs and QPs to calculate EQ size */ + if (shca->max_num_qps == -1) + shca->max_num_qps = min_t(int, rblock->max_qp, + EHCA_MAX_NUM_QUEUES); + else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) { + ehca_gen_warn("The requested number of QPs is out of range " + "(1 - %i) specified by HW. Value is set to %i", + rblock->max_qp, rblock->max_qp); + shca->max_num_qps = rblock->max_qp; + } + + if (shca->max_num_cqs == -1) + shca->max_num_cqs = min_t(int, rblock->max_cq, + EHCA_MAX_NUM_QUEUES); + else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) { + ehca_gen_warn("The requested number of CQs is out of range " + "(1 - %i) specified by HW. Value is set to %i", + rblock->max_cq, rblock->max_cq); + } + + /* query max MTU from first port -- it's the same for all ports */ + port = (struct hipz_query_port *)rblock; + h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); + if (h_ret != H_SUCCESS) { + ehca_gen_err("Cannot query port properties. h_ret=%lli", + h_ret); + ret = -EPERM; + goto sense_attributes1; + } + + shca->max_mtu = port->max_mtu; + +sense_attributes1: + ehca_free_fw_ctrlblock(rblock); + return ret; +} + +static int init_node_guid(struct ehca_shca *shca) +{ + int ret = 0; + struct hipz_query_hca *rblock; + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query device properties"); + ret = -EINVAL; + goto init_node_guid1; + } + + memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64)); + +init_node_guid1: + ehca_free_fw_ctrlblock(rblock); + return ret; +} + +static int ehca_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = ehca_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + +static int ehca_init_device(struct ehca_shca *shca) +{ + int ret; + + ret = init_node_guid(shca); + if (ret) + return ret; + + strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); + shca->ib_device.owner = THIS_MODULE; + + shca->ib_device.uverbs_abi_ver = 8; + shca->ib_device.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); + + shca->ib_device.node_type = RDMA_NODE_IB_CA; + shca->ib_device.phys_port_cnt = shca->num_ports; + shca->ib_device.num_comp_vectors = 1; + shca->ib_device.dma_device = &shca->ofdev->dev; + shca->ib_device.query_device = ehca_query_device; + shca->ib_device.query_port = ehca_query_port; + shca->ib_device.query_gid = ehca_query_gid; + shca->ib_device.query_pkey = ehca_query_pkey; + /* shca->in_device.modify_device = ehca_modify_device */ + shca->ib_device.modify_port = ehca_modify_port; + shca->ib_device.alloc_ucontext = ehca_alloc_ucontext; + shca->ib_device.dealloc_ucontext = ehca_dealloc_ucontext; + shca->ib_device.alloc_pd = ehca_alloc_pd; + shca->ib_device.dealloc_pd = ehca_dealloc_pd; + shca->ib_device.create_ah = ehca_create_ah; + /* shca->ib_device.modify_ah = ehca_modify_ah; */ + shca->ib_device.query_ah = ehca_query_ah; + shca->ib_device.destroy_ah = ehca_destroy_ah; + shca->ib_device.create_qp = ehca_create_qp; + shca->ib_device.modify_qp = ehca_modify_qp; + shca->ib_device.query_qp = ehca_query_qp; + shca->ib_device.destroy_qp = ehca_destroy_qp; + shca->ib_device.post_send = ehca_post_send; + shca->ib_device.post_recv = ehca_post_recv; + shca->ib_device.create_cq = ehca_create_cq; + shca->ib_device.destroy_cq = ehca_destroy_cq; + shca->ib_device.resize_cq = ehca_resize_cq; + shca->ib_device.poll_cq = ehca_poll_cq; + /* shca->ib_device.peek_cq = ehca_peek_cq; */ + shca->ib_device.req_notify_cq = ehca_req_notify_cq; + /* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */ + shca->ib_device.get_dma_mr = ehca_get_dma_mr; + shca->ib_device.reg_phys_mr = ehca_reg_phys_mr; + shca->ib_device.reg_user_mr = ehca_reg_user_mr; + shca->ib_device.query_mr = ehca_query_mr; + shca->ib_device.dereg_mr = ehca_dereg_mr; + shca->ib_device.rereg_phys_mr = ehca_rereg_phys_mr; + shca->ib_device.alloc_mw = ehca_alloc_mw; + shca->ib_device.bind_mw = ehca_bind_mw; + shca->ib_device.dealloc_mw = ehca_dealloc_mw; + shca->ib_device.alloc_fmr = ehca_alloc_fmr; + shca->ib_device.map_phys_fmr = ehca_map_phys_fmr; + shca->ib_device.unmap_fmr = ehca_unmap_fmr; + shca->ib_device.dealloc_fmr = ehca_dealloc_fmr; + shca->ib_device.attach_mcast = ehca_attach_mcast; + shca->ib_device.detach_mcast = ehca_detach_mcast; + shca->ib_device.process_mad = ehca_process_mad; + shca->ib_device.mmap = ehca_mmap; + shca->ib_device.dma_ops = &ehca_dma_mapping_ops; + shca->ib_device.get_port_immutable = ehca_port_immutable; + + if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { + shca->ib_device.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + + shca->ib_device.create_srq = ehca_create_srq; + shca->ib_device.modify_srq = ehca_modify_srq; + shca->ib_device.query_srq = ehca_query_srq; + shca->ib_device.destroy_srq = ehca_destroy_srq; + shca->ib_device.post_srq_recv = ehca_post_srq_recv; + } + + return ret; +} + +static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) +{ + struct ehca_sport *sport = &shca->sport[port - 1]; + struct ib_cq *ibcq; + struct ib_qp *ibqp; + struct ib_qp_init_attr qp_init_attr; + struct ib_cq_init_attr cq_attr = {}; + int ret; + + if (sport->ibcq_aqp1) { + ehca_err(&shca->ib_device, "AQP1 CQ is already created."); + return -EPERM; + } + + cq_attr.cqe = 10; + ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), + &cq_attr); + if (IS_ERR(ibcq)) { + ehca_err(&shca->ib_device, "Cannot create AQP1 CQ."); + return PTR_ERR(ibcq); + } + sport->ibcq_aqp1 = ibcq; + + if (sport->ibqp_sqp[IB_QPT_GSI]) { + ehca_err(&shca->ib_device, "AQP1 QP is already created."); + ret = -EPERM; + goto create_aqp1; + } + + memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr)); + qp_init_attr.send_cq = ibcq; + qp_init_attr.recv_cq = ibcq; + qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.cap.max_send_wr = 100; + qp_init_attr.cap.max_recv_wr = 100; + qp_init_attr.cap.max_send_sge = 2; + qp_init_attr.cap.max_recv_sge = 1; + qp_init_attr.qp_type = IB_QPT_GSI; + qp_init_attr.port_num = port; + qp_init_attr.qp_context = NULL; + qp_init_attr.event_handler = NULL; + qp_init_attr.srq = NULL; + + ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr); + if (IS_ERR(ibqp)) { + ehca_err(&shca->ib_device, "Cannot create AQP1 QP."); + ret = PTR_ERR(ibqp); + goto create_aqp1; + } + sport->ibqp_sqp[IB_QPT_GSI] = ibqp; + + return 0; + +create_aqp1: + ib_destroy_cq(sport->ibcq_aqp1); + return ret; +} + +static int ehca_destroy_aqp1(struct ehca_sport *sport) +{ + int ret; + + ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]); + if (ret) { + ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret); + return ret; + } + + ret = ib_destroy_cq(sport->ibcq_aqp1); + if (ret) + ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret); + + return ret; +} + +static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", ehca_debug_level); +} + +static ssize_t ehca_store_debug_level(struct device_driver *ddp, + const char *buf, size_t count) +{ + int value = (*buf) - '0'; + if (value >= 0 && value <= 9) + ehca_debug_level = value; + return 1; +} + +static DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR, + ehca_show_debug_level, ehca_store_debug_level); + +static struct attribute *ehca_drv_attrs[] = { + &driver_attr_debug_level.attr, + NULL +}; + +static struct attribute_group ehca_drv_attr_grp = { + .attrs = ehca_drv_attrs +}; + +static const struct attribute_group *ehca_drv_attr_groups[] = { + &ehca_drv_attr_grp, + NULL, +}; + +#define EHCA_RESOURCE_ATTR(name) \ +static ssize_t ehca_show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct ehca_shca *shca; \ + struct hipz_query_hca *rblock; \ + int data; \ + \ + shca = dev_get_drvdata(dev); \ + \ + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); \ + if (!rblock) { \ + dev_err(dev, "Can't allocate rblock memory.\n"); \ + return 0; \ + } \ + \ + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \ + dev_err(dev, "Can't query device properties\n"); \ + ehca_free_fw_ctrlblock(rblock); \ + return 0; \ + } \ + \ + data = rblock->name; \ + ehca_free_fw_ctrlblock(rblock); \ + \ + if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1)) \ + return snprintf(buf, 256, "1\n"); \ + else \ + return snprintf(buf, 256, "%d\n", data); \ + \ +} \ +static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL); + +EHCA_RESOURCE_ATTR(num_ports); +EHCA_RESOURCE_ATTR(hw_ver); +EHCA_RESOURCE_ATTR(max_eq); +EHCA_RESOURCE_ATTR(cur_eq); +EHCA_RESOURCE_ATTR(max_cq); +EHCA_RESOURCE_ATTR(cur_cq); +EHCA_RESOURCE_ATTR(max_qp); +EHCA_RESOURCE_ATTR(cur_qp); +EHCA_RESOURCE_ATTR(max_mr); +EHCA_RESOURCE_ATTR(cur_mr); +EHCA_RESOURCE_ATTR(max_mw); +EHCA_RESOURCE_ATTR(cur_mw); +EHCA_RESOURCE_ATTR(max_pd); +EHCA_RESOURCE_ATTR(max_ah); + +static ssize_t ehca_show_adapter_handle(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ehca_shca *shca = dev_get_drvdata(dev); + + return sprintf(buf, "%llx\n", shca->ipz_hca_handle.handle); + +} +static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); + +static struct attribute *ehca_dev_attrs[] = { + &dev_attr_adapter_handle.attr, + &dev_attr_num_ports.attr, + &dev_attr_hw_ver.attr, + &dev_attr_max_eq.attr, + &dev_attr_cur_eq.attr, + &dev_attr_max_cq.attr, + &dev_attr_cur_cq.attr, + &dev_attr_max_qp.attr, + &dev_attr_cur_qp.attr, + &dev_attr_max_mr.attr, + &dev_attr_cur_mr.attr, + &dev_attr_max_mw.attr, + &dev_attr_cur_mw.attr, + &dev_attr_max_pd.attr, + &dev_attr_max_ah.attr, + NULL +}; + +static struct attribute_group ehca_dev_attr_grp = { + .attrs = ehca_dev_attrs +}; + +static int ehca_probe(struct platform_device *dev) +{ + struct ehca_shca *shca; + const u64 *handle; + struct ib_pd *ibpd; + int ret, i, eq_size; + unsigned long flags; + + handle = of_get_property(dev->dev.of_node, "ibm,hca-handle", NULL); + if (!handle) { + ehca_gen_err("Cannot get eHCA handle for adapter: %s.", + dev->dev.of_node->full_name); + return -ENODEV; + } + + if (!(*handle)) { + ehca_gen_err("Wrong eHCA handle for adapter: %s.", + dev->dev.of_node->full_name); + return -ENODEV; + } + + shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca)); + if (!shca) { + ehca_gen_err("Cannot allocate shca memory."); + return -ENOMEM; + } + + mutex_init(&shca->modify_mutex); + atomic_set(&shca->num_cqs, 0); + atomic_set(&shca->num_qps, 0); + shca->max_num_qps = ehca_max_qp; + shca->max_num_cqs = ehca_max_cq; + + for (i = 0; i < ARRAY_SIZE(shca->sport); i++) + spin_lock_init(&shca->sport[i].mod_sqp_lock); + + shca->ofdev = dev; + shca->ipz_hca_handle.handle = *handle; + dev_set_drvdata(&dev->dev, shca); + + ret = ehca_sense_attributes(shca); + if (ret < 0) { + ehca_gen_err("Cannot sense eHCA attributes."); + goto probe1; + } + + ret = ehca_init_device(shca); + if (ret) { + ehca_gen_err("Cannot init ehca device struct"); + goto probe1; + } + + eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps; + /* create event queues */ + ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size); + if (ret) { + ehca_err(&shca->ib_device, "Cannot create EQ."); + goto probe1; + } + + ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513); + if (ret) { + ehca_err(&shca->ib_device, "Cannot create NEQ."); + goto probe3; + } + + /* create internal protection domain */ + ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL); + if (IS_ERR(ibpd)) { + ehca_err(&shca->ib_device, "Cannot create internal PD."); + ret = PTR_ERR(ibpd); + goto probe4; + } + + shca->pd = container_of(ibpd, struct ehca_pd, ib_pd); + shca->pd->ib_pd.device = &shca->ib_device; + + /* create internal max MR */ + ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr); + + if (ret) { + ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i", + ret); + goto probe5; + } + + ret = ib_register_device(&shca->ib_device, NULL); + if (ret) { + ehca_err(&shca->ib_device, + "ib_register_device() failed ret=%i", ret); + goto probe6; + } + + /* create AQP1 for port 1 */ + if (ehca_open_aqp1 == 1) { + shca->sport[0].port_state = IB_PORT_DOWN; + ret = ehca_create_aqp1(shca, 1); + if (ret) { + ehca_err(&shca->ib_device, + "Cannot create AQP1 for port 1."); + goto probe7; + } + } + + /* create AQP1 for port 2 */ + if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) { + shca->sport[1].port_state = IB_PORT_DOWN; + ret = ehca_create_aqp1(shca, 2); + if (ret) { + ehca_err(&shca->ib_device, + "Cannot create AQP1 for port 2."); + goto probe8; + } + } + + ret = sysfs_create_group(&dev->dev.kobj, &ehca_dev_attr_grp); + if (ret) /* only complain; we can live without attributes */ + ehca_err(&shca->ib_device, + "Cannot create device attributes ret=%d", ret); + + spin_lock_irqsave(&shca_list_lock, flags); + list_add(&shca->shca_list, &shca_list); + spin_unlock_irqrestore(&shca_list_lock, flags); + + return 0; + +probe8: + ret = ehca_destroy_aqp1(&shca->sport[0]); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy AQP1 for port 1. ret=%i", ret); + +probe7: + ib_unregister_device(&shca->ib_device); + +probe6: + ret = ehca_dereg_internal_maxmr(shca); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal MR. ret=%x", ret); + +probe5: + ret = ehca_dealloc_pd(&shca->pd->ib_pd); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal PD. ret=%x", ret); + +probe4: + ret = ehca_destroy_eq(shca, &shca->neq); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy NEQ. ret=%x", ret); + +probe3: + ret = ehca_destroy_eq(shca, &shca->eq); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy EQ. ret=%x", ret); + +probe1: + ib_dealloc_device(&shca->ib_device); + + return -EINVAL; +} + +static int ehca_remove(struct platform_device *dev) +{ + struct ehca_shca *shca = dev_get_drvdata(&dev->dev); + unsigned long flags; + int ret; + + sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp); + + if (ehca_open_aqp1 == 1) { + int i; + for (i = 0; i < shca->num_ports; i++) { + ret = ehca_destroy_aqp1(&shca->sport[i]); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy AQP1 for port %x " + "ret=%i", ret, i); + } + } + + ib_unregister_device(&shca->ib_device); + + ret = ehca_dereg_internal_maxmr(shca); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal MR. ret=%i", ret); + + ret = ehca_dealloc_pd(&shca->pd->ib_pd); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal PD. ret=%i", ret); + + ret = ehca_destroy_eq(shca, &shca->eq); + if (ret) + ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret); + + ret = ehca_destroy_eq(shca, &shca->neq); + if (ret) + ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret); + + ib_dealloc_device(&shca->ib_device); + + spin_lock_irqsave(&shca_list_lock, flags); + list_del(&shca->shca_list); + spin_unlock_irqrestore(&shca_list_lock, flags); + + return ret; +} + +static struct of_device_id ehca_device_table[] = +{ + { + .name = "lhca", + .compatible = "IBM,lhca", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, ehca_device_table); + +static struct platform_driver ehca_driver = { + .probe = ehca_probe, + .remove = ehca_remove, + .driver = { + .name = "ehca", + .owner = THIS_MODULE, + .groups = ehca_drv_attr_groups, + .of_match_table = ehca_device_table, + }, +}; + +void ehca_poll_eqs(unsigned long data) +{ + struct ehca_shca *shca; + + spin_lock(&shca_list_lock); + list_for_each_entry(shca, &shca_list, shca_list) { + if (shca->eq.is_initialized) { + /* call deadman proc only if eq ptr does not change */ + struct ehca_eq *eq = &shca->eq; + int max = 3; + volatile u64 q_ofs, q_ofs2; + unsigned long flags; + spin_lock_irqsave(&eq->spinlock, flags); + q_ofs = eq->ipz_queue.current_q_offset; + spin_unlock_irqrestore(&eq->spinlock, flags); + do { + spin_lock_irqsave(&eq->spinlock, flags); + q_ofs2 = eq->ipz_queue.current_q_offset; + spin_unlock_irqrestore(&eq->spinlock, flags); + max--; + } while (q_ofs == q_ofs2 && max > 0); + if (q_ofs == q_ofs2) + ehca_process_eq(shca, 0); + } + } + mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ)); + spin_unlock(&shca_list_lock); +} + +static int ehca_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + static unsigned long ehca_dmem_warn_time; + unsigned long flags; + + switch (action) { + case MEM_CANCEL_OFFLINE: + case MEM_CANCEL_ONLINE: + case MEM_ONLINE: + case MEM_OFFLINE: + return NOTIFY_OK; + case MEM_GOING_ONLINE: + case MEM_GOING_OFFLINE: + /* only ok if no hca is attached to the lpar */ + spin_lock_irqsave(&shca_list_lock, flags); + if (list_empty(&shca_list)) { + spin_unlock_irqrestore(&shca_list_lock, flags); + return NOTIFY_OK; + } else { + spin_unlock_irqrestore(&shca_list_lock, flags); + if (printk_timed_ratelimit(&ehca_dmem_warn_time, + 30 * 1000)) + ehca_gen_err("DMEM operations are not allowed" + "in conjunction with eHCA"); + return NOTIFY_BAD; + } + } + return NOTIFY_OK; +} + +static struct notifier_block ehca_mem_nb = { + .notifier_call = ehca_mem_notifier, +}; + +static int __init ehca_module_init(void) +{ + int ret; + + printk(KERN_INFO "eHCA Infiniband Device Driver " + "(Version " HCAD_VERSION ")\n"); + + ret = ehca_create_comp_pool(); + if (ret) { + ehca_gen_err("Cannot create comp pool."); + return ret; + } + + ret = ehca_create_slab_caches(); + if (ret) { + ehca_gen_err("Cannot create SLAB caches"); + ret = -ENOMEM; + goto module_init1; + } + + ret = ehca_create_busmap(); + if (ret) { + ehca_gen_err("Cannot create busmap."); + goto module_init2; + } + + ret = ibmebus_register_driver(&ehca_driver); + if (ret) { + ehca_gen_err("Cannot register eHCA device driver"); + ret = -EINVAL; + goto module_init3; + } + + ret = register_memory_notifier(&ehca_mem_nb); + if (ret) { + ehca_gen_err("Failed registering memory add/remove notifier"); + goto module_init4; + } + + if (ehca_poll_all_eqs != 1) { + ehca_gen_err("WARNING!!!"); + ehca_gen_err("It is possible to lose interrupts."); + } else { + init_timer(&poll_eqs_timer); + poll_eqs_timer.function = ehca_poll_eqs; + poll_eqs_timer.expires = jiffies + HZ; + add_timer(&poll_eqs_timer); + } + + return 0; + +module_init4: + ibmebus_unregister_driver(&ehca_driver); + +module_init3: + ehca_destroy_busmap(); + +module_init2: + ehca_destroy_slab_caches(); + +module_init1: + ehca_destroy_comp_pool(); + return ret; +}; + +static void __exit ehca_module_exit(void) +{ + if (ehca_poll_all_eqs == 1) + del_timer_sync(&poll_eqs_timer); + + ibmebus_unregister_driver(&ehca_driver); + + unregister_memory_notifier(&ehca_mem_nb); + + ehca_destroy_busmap(); + + ehca_destroy_slab_caches(); + + ehca_destroy_comp_pool(); + + idr_destroy(&ehca_cq_idr); + idr_destroy(&ehca_qp_idr); +}; + +module_init(ehca_module_init); +module_exit(ehca_module_exit); diff --git a/drivers/staging/rdma/ehca/ehca_mcast.c b/drivers/staging/rdma/ehca/ehca_mcast.c new file mode 100644 index 0000000..cec1815 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_mcast.c @@ -0,0 +1,131 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * mcast functions + * + * Authors: Khadija Souissi + * Waleri Fomin + * Reinhard Ernst + * Hoang-Nam Nguyen + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +#define MAX_MC_LID 0xFFFE +#define MIN_MC_LID 0xC000 /* Multicast limits */ +#define EHCA_VALID_MULTICAST_GID(gid) ((gid)[0] == 0xFF) +#define EHCA_VALID_MULTICAST_LID(lid) \ + (((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID)) + +int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, + ib_device); + union ib_gid my_gid; + u64 subnet_prefix, interface_id, h_ret; + + if (ibqp->qp_type != IB_QPT_UD) { + ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type); + return -EINVAL; + } + + if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { + ehca_err(ibqp->device, "invalid mulitcast gid"); + return -EINVAL; + } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { + ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); + return -EINVAL; + } + + memcpy(&my_gid, gid->raw, sizeof(union ib_gid)); + + subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); + interface_id = be64_to_cpu(my_gid.global.interface_id); + h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + my_qp->galpas.kernel, + lid, subnet_prefix, interface_id); + if (h_ret != H_SUCCESS) + ehca_err(ibqp->device, + "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed " + "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); + + return ehca2ib_return_code(h_ret); +} + +int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = container_of(ibqp->pd->device, + struct ehca_shca, ib_device); + union ib_gid my_gid; + u64 subnet_prefix, interface_id, h_ret; + + if (ibqp->qp_type != IB_QPT_UD) { + ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type); + return -EINVAL; + } + + if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { + ehca_err(ibqp->device, "invalid mulitcast gid"); + return -EINVAL; + } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { + ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); + return -EINVAL; + } + + memcpy(&my_gid, gid->raw, sizeof(union ib_gid)); + + subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); + interface_id = be64_to_cpu(my_gid.global.interface_id); + h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + my_qp->galpas.kernel, + lid, subnet_prefix, interface_id); + if (h_ret != H_SUCCESS) + ehca_err(ibqp->device, + "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed " + "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); + + return ehca2ib_return_code(h_ret); +} diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.c b/drivers/staging/rdma/ehca/ehca_mrmw.c new file mode 100644 index 0000000..f914b30 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_mrmw.c @@ -0,0 +1,2593 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * MR/MW functions + * + * Authors: Dietmar Decker + * Christoph Raisch + * Hoang-Nam Nguyen + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "hcp_if.h" +#include "hipz_hw.h" + +#define NUM_CHUNKS(length, chunk_size) \ + (((length) + (chunk_size - 1)) / (chunk_size)) + +/* max number of rpages (per hcall register_rpages) */ +#define MAX_RPAGES 512 + +/* DMEM toleration management */ +#define EHCA_SECTSHIFT SECTION_SIZE_BITS +#define EHCA_SECTSIZE (1UL << EHCA_SECTSHIFT) +#define EHCA_HUGEPAGESHIFT 34 +#define EHCA_HUGEPAGE_SIZE (1UL << EHCA_HUGEPAGESHIFT) +#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT) +#define EHCA_INVAL_ADDR 0xFFFFFFFFFFFFFFFFULL +#define EHCA_DIR_INDEX_SHIFT 13 /* 8k Entries in 64k block */ +#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2) +#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT) +#define EHCA_TOP_MAP_SIZE (0x10000) /* currently fixed map size */ +#define EHCA_DIR_MAP_SIZE (0x10000) +#define EHCA_ENT_MAP_SIZE (0x10000) +#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1) + +static unsigned long ehca_mr_len; + +/* + * Memory map data structures + */ +struct ehca_dir_bmap { + u64 ent[EHCA_MAP_ENTRIES]; +}; +struct ehca_top_bmap { + struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES]; +}; +struct ehca_bmap { + struct ehca_top_bmap *top[EHCA_MAP_ENTRIES]; +}; + +static struct ehca_bmap *ehca_bmap; + +static struct kmem_cache *mr_cache; +static struct kmem_cache *mw_cache; + +enum ehca_mr_pgsize { + EHCA_MR_PGSIZE4K = 0x1000L, + EHCA_MR_PGSIZE64K = 0x10000L, + EHCA_MR_PGSIZE1M = 0x100000L, + EHCA_MR_PGSIZE16M = 0x1000000L +}; + +#define EHCA_MR_PGSHIFT4K 12 +#define EHCA_MR_PGSHIFT64K 16 +#define EHCA_MR_PGSHIFT1M 20 +#define EHCA_MR_PGSHIFT16M 24 + +static u64 ehca_map_vaddr(void *caddr); + +static u32 ehca_encode_hwpage_size(u32 pgsize) +{ + int log = ilog2(pgsize); + WARN_ON(log < 12 || log > 24 || log & 3); + return (log - 12) / 4; +} + +static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca) +{ + return rounddown_pow_of_two(shca->hca_cap_mr_pgsize); +} + +static struct ehca_mr *ehca_mr_new(void) +{ + struct ehca_mr *me; + + me = kmem_cache_zalloc(mr_cache, GFP_KERNEL); + if (me) + spin_lock_init(&me->mrlock); + else + ehca_gen_err("alloc failed"); + + return me; +} + +static void ehca_mr_delete(struct ehca_mr *me) +{ + kmem_cache_free(mr_cache, me); +} + +static struct ehca_mw *ehca_mw_new(void) +{ + struct ehca_mw *me; + + me = kmem_cache_zalloc(mw_cache, GFP_KERNEL); + if (me) + spin_lock_init(&me->mwlock); + else + ehca_gen_err("alloc failed"); + + return me; +} + +static void ehca_mw_delete(struct ehca_mw *me) +{ + kmem_cache_free(mw_cache, me); +} + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) +{ + struct ib_mr *ib_mr; + int ret; + struct ehca_mr *e_maxmr; + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + + if (shca->maxmr) { + e_maxmr = ehca_mr_new(); + if (!e_maxmr) { + ehca_err(&shca->ib_device, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto get_dma_mr_exit0; + } + + ret = ehca_reg_maxmr(shca, e_maxmr, + (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)), + mr_access_flags, e_pd, + &e_maxmr->ib.ib_mr.lkey, + &e_maxmr->ib.ib_mr.rkey); + if (ret) { + ehca_mr_delete(e_maxmr); + ib_mr = ERR_PTR(ret); + goto get_dma_mr_exit0; + } + ib_mr = &e_maxmr->ib.ib_mr; + } else { + ehca_err(&shca->ib_device, "no internal max-MR exist!"); + ib_mr = ERR_PTR(-EINVAL); + goto get_dma_mr_exit0; + } + +get_dma_mr_exit0: + if (IS_ERR(ib_mr)) + ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x", + PTR_ERR(ib_mr), pd, mr_access_flags); + return ib_mr; +} /* end ehca_get_dma_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start) +{ + struct ib_mr *ib_mr; + int ret; + struct ehca_mr *e_mr; + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + + u64 size; + + if ((num_phys_buf <= 0) || !phys_buf_array) { + ehca_err(pd->device, "bad input values: num_phys_buf=%x " + "phys_buf_array=%p", num_phys_buf, phys_buf_array); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + + /* check physical buffer list and calculate size */ + ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf, + iova_start, &size); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit0; + } + if ((size == 0) || + (((u64)iova_start + size) < (u64)iova_start)) { + ehca_err(pd->device, "bad input values: size=%llx iova_start=%p", + size, iova_start); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + + e_mr = ehca_mr_new(); + if (!e_mr) { + ehca_err(pd->device, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto reg_phys_mr_exit0; + } + + /* register MR on HCA */ + if (ehca_mr_is_maxmr(size, iova_start)) { + e_mr->flags |= EHCA_MR_FLAG_MAXMR; + ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags, + e_pd, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit1; + } + } else { + struct ehca_mr_pginfo pginfo; + u32 num_kpages; + u32 num_hwpages; + u64 hw_pgsize; + + num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size, + PAGE_SIZE); + /* for kernel space we try most possible pgsize */ + hw_pgsize = ehca_get_max_hwpage_size(shca); + num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size, + hw_pgsize); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.hwpage_size = hw_pgsize; + pginfo.num_hwpages = num_hwpages; + pginfo.u.phy.num_phys_buf = num_phys_buf; + pginfo.u.phy.phys_buf_array = phys_buf_array; + pginfo.next_hwpage = + ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; + + ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, + e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit1; + } + } + + /* successful registration of all pages */ + return &e_mr->ib.ib_mr; + +reg_phys_mr_exit1: + ehca_mr_delete(e_mr); +reg_phys_mr_exit0: + if (IS_ERR(ib_mr)) + ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p " + "num_phys_buf=%x mr_access_flags=%x iova_start=%p", + PTR_ERR(ib_mr), pd, phys_buf_array, + num_phys_buf, mr_access_flags, iova_start); + return ib_mr; +} /* end ehca_reg_phys_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int mr_access_flags, + struct ib_udata *udata) +{ + struct ib_mr *ib_mr; + struct ehca_mr *e_mr; + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_mr_pginfo pginfo; + int ret, page_shift; + u32 num_kpages; + u32 num_hwpages; + u64 hwpage_size; + + if (!pd) { + ehca_gen_err("bad pd=%p", pd); + return ERR_PTR(-EFAULT); + } + + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + + if (length == 0 || virt + length < virt) { + ehca_err(pd->device, "bad input values: length=%llx " + "virt_base=%llx", length, virt); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + + e_mr = ehca_mr_new(); + if (!e_mr) { + ehca_err(pd->device, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto reg_user_mr_exit0; + } + + e_mr->umem = ib_umem_get(pd->uobject->context, start, length, + mr_access_flags, 0); + if (IS_ERR(e_mr->umem)) { + ib_mr = (void *)e_mr->umem; + goto reg_user_mr_exit1; + } + + if (e_mr->umem->page_size != PAGE_SIZE) { + ehca_err(pd->device, "page size not supported, " + "e_mr->umem->page_size=%x", e_mr->umem->page_size); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit2; + } + + /* determine number of MR pages */ + num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE); + /* select proper hw_pgsize */ + page_shift = PAGE_SHIFT; + if (e_mr->umem->hugetlb) { + /* determine page_shift, clamp between 4K and 16M */ + page_shift = (fls64(length - 1) + 3) & ~3; + page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K), + EHCA_MR_PGSHIFT16M); + } + hwpage_size = 1UL << page_shift; + + /* now that we have the desired page size, shift until it's + * supported, too. 4K is always supported, so this terminates. + */ + while (!(hwpage_size & shca->hca_cap_mr_pgsize)) + hwpage_size >>= 4; + +reg_user_mr_fallback: + num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size); + /* register MR on HCA */ + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_USER; + pginfo.hwpage_size = hwpage_size; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.u.usr.region = e_mr->umem; + pginfo.next_hwpage = ib_umem_offset(e_mr->umem) / hwpage_size; + pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl; + ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, + e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); + if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) { + ehca_warn(pd->device, "failed to register mr " + "with hwpage_size=%llx", hwpage_size); + ehca_info(pd->device, "try to register mr with " + "kpage_size=%lx", PAGE_SIZE); + /* + * this means kpages are not contiguous for a hw page + * try kernel page size as fallback solution + */ + hwpage_size = PAGE_SIZE; + goto reg_user_mr_fallback; + } + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_user_mr_exit2; + } + + /* successful registration of all pages */ + return &e_mr->ib.ib_mr; + +reg_user_mr_exit2: + ib_umem_release(e_mr->umem); +reg_user_mr_exit1: + ehca_mr_delete(e_mr); +reg_user_mr_exit0: + if (IS_ERR(ib_mr)) + ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p", + PTR_ERR(ib_mr), pd, mr_access_flags, udata); + return ib_mr; +} /* end ehca_reg_user_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start) +{ + int ret; + + struct ehca_shca *shca = + container_of(mr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + u64 new_size; + u64 *new_start; + u32 new_acl; + struct ehca_pd *new_pd; + u32 tmp_lkey, tmp_rkey; + unsigned long sl_flags; + u32 num_kpages = 0; + u32 num_hwpages = 0; + struct ehca_mr_pginfo pginfo; + + if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) { + /* TODO not supported, because PHYP rereg hCall needs pages */ + ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not " + "supported yet, mr_rereg_mask=%x", mr_rereg_mask); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + if (mr_rereg_mask & IB_MR_REREG_PD) { + if (!pd) { + ehca_err(mr->device, "rereg with bad pd, pd=%p " + "mr_rereg_mask=%x", pd, mr_rereg_mask); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + } + + if ((mr_rereg_mask & + ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) || + (mr_rereg_mask == 0)) { + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + /* check other parameters */ + if (e_mr == shca->maxmr) { + /* should be impossible, however reject to be sure */ + ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p " + "shca->maxmr=%p mr->lkey=%x", + mr, shca->maxmr, mr->lkey); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */ + if (e_mr->flags & EHCA_MR_FLAG_FMR) { + ehca_err(mr->device, "not supported for FMR, mr=%p " + "flags=%x", mr, e_mr->flags); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + if (!phys_buf_array || num_phys_buf <= 0) { + ehca_err(mr->device, "bad input values mr_rereg_mask=%x" + " phys_buf_array=%p num_phys_buf=%x", + mr_rereg_mask, phys_buf_array, num_phys_buf); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + } + if ((mr_rereg_mask & IB_MR_REREG_ACCESS) && /* change ACL */ + (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(mr->device, "bad input values: mr_rereg_mask=%x " + "mr_access_flags=%x", mr_rereg_mask, mr_access_flags); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + /* set requested values dependent on rereg request */ + spin_lock_irqsave(&e_mr->mrlock, sl_flags); + new_start = e_mr->start; + new_size = e_mr->size; + new_acl = e_mr->acl; + new_pd = container_of(mr->pd, struct ehca_pd, ib_pd); + + if (mr_rereg_mask & IB_MR_REREG_TRANS) { + u64 hw_pgsize = ehca_get_max_hwpage_size(shca); + + new_start = iova_start; /* change address */ + /* check physical buffer list and calculate size */ + ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, + num_phys_buf, iova_start, + &new_size); + if (ret) + goto rereg_phys_mr_exit1; + if ((new_size == 0) || + (((u64)iova_start + new_size) < (u64)iova_start)) { + ehca_err(mr->device, "bad input values: new_size=%llx " + "iova_start=%p", new_size, iova_start); + ret = -EINVAL; + goto rereg_phys_mr_exit1; + } + num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) + + new_size, PAGE_SIZE); + num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) + + new_size, hw_pgsize); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.hwpage_size = hw_pgsize; + pginfo.num_hwpages = num_hwpages; + pginfo.u.phy.num_phys_buf = num_phys_buf; + pginfo.u.phy.phys_buf_array = phys_buf_array; + pginfo.next_hwpage = + ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; + } + if (mr_rereg_mask & IB_MR_REREG_ACCESS) + new_acl = mr_access_flags; + if (mr_rereg_mask & IB_MR_REREG_PD) + new_pd = container_of(pd, struct ehca_pd, ib_pd); + + ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl, + new_pd, &pginfo, &tmp_lkey, &tmp_rkey); + if (ret) + goto rereg_phys_mr_exit1; + + /* successful reregistration */ + if (mr_rereg_mask & IB_MR_REREG_PD) + mr->pd = pd; + mr->lkey = tmp_lkey; + mr->rkey = tmp_rkey; + +rereg_phys_mr_exit1: + spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); +rereg_phys_mr_exit0: + if (ret) + ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p " + "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x " + "iova_start=%p", + ret, mr, mr_rereg_mask, pd, phys_buf_array, + num_phys_buf, mr_access_flags, iova_start); + return ret; +} /* end ehca_rereg_phys_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = + container_of(mr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + unsigned long sl_flags; + struct ehca_mr_hipzout_parms hipzout; + + if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " + "e_mr->flags=%x", mr, e_mr, e_mr->flags); + ret = -EINVAL; + goto query_mr_exit0; + } + + memset(mr_attr, 0, sizeof(struct ib_mr_attr)); + spin_lock_irqsave(&e_mr->mrlock, sl_flags); + + h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p " + "hca_hndl=%llx mr_hndl=%llx lkey=%x", + h_ret, mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, mr->lkey); + ret = ehca2ib_return_code(h_ret); + goto query_mr_exit1; + } + mr_attr->pd = mr->pd; + mr_attr->device_virt_addr = hipzout.vaddr; + mr_attr->size = hipzout.len; + mr_attr->lkey = hipzout.lkey; + mr_attr->rkey = hipzout.rkey; + ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags); + +query_mr_exit1: + spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); +query_mr_exit0: + if (ret) + ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p", + ret, mr, mr_attr); + return ret; +} /* end ehca_query_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dereg_mr(struct ib_mr *mr) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = + container_of(mr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + + if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " + "e_mr->flags=%x", mr, e_mr, e_mr->flags); + ret = -EINVAL; + goto dereg_mr_exit0; + } else if (e_mr == shca->maxmr) { + /* should be impossible, however reject to be sure */ + ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p " + "shca->maxmr=%p mr->lkey=%x", + mr, shca->maxmr, mr->lkey); + ret = -EINVAL; + goto dereg_mr_exit0; + } + + /* TODO: BUSY: MR still has bound window(s) */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p " + "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x", + h_ret, shca, e_mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, mr->lkey); + ret = ehca2ib_return_code(h_ret); + goto dereg_mr_exit0; + } + + if (e_mr->umem) + ib_umem_release(e_mr->umem); + + /* successful deregistration */ + ehca_mr_delete(e_mr); + +dereg_mr_exit0: + if (ret) + ehca_err(mr->device, "ret=%i mr=%p", ret, mr); + return ret; +} /* end ehca_dereg_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) +{ + struct ib_mw *ib_mw; + u64 h_ret; + struct ehca_mw *e_mw; + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_mw_hipzout_parms hipzout; + + if (type != IB_MW_TYPE_1) + return ERR_PTR(-EINVAL); + + e_mw = ehca_mw_new(); + if (!e_mw) { + ib_mw = ERR_PTR(-ENOMEM); + goto alloc_mw_exit0; + } + + h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw, + e_pd->fw_pd, &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli " + "shca=%p hca_hndl=%llx mw=%p", + h_ret, shca, shca->ipz_hca_handle.handle, e_mw); + ib_mw = ERR_PTR(ehca2ib_return_code(h_ret)); + goto alloc_mw_exit1; + } + /* successful MW allocation */ + e_mw->ipz_mw_handle = hipzout.handle; + e_mw->ib_mw.rkey = hipzout.rkey; + return &e_mw->ib_mw; + +alloc_mw_exit1: + ehca_mw_delete(e_mw); +alloc_mw_exit0: + if (IS_ERR(ib_mw)) + ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd); + return ib_mw; +} /* end ehca_alloc_mw() */ + +/*----------------------------------------------------------------------*/ + +int ehca_bind_mw(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind) +{ + /* TODO: not supported up to now */ + ehca_gen_err("bind MW currently not supported by HCAD"); + + return -EPERM; +} /* end ehca_bind_mw() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dealloc_mw(struct ib_mw *mw) +{ + u64 h_ret; + struct ehca_shca *shca = + container_of(mw->device, struct ehca_shca, ib_device); + struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw); + + h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw); + if (h_ret != H_SUCCESS) { + ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p " + "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx", + h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle, + e_mw->ipz_mw_handle.handle); + return ehca2ib_return_code(h_ret); + } + /* successful deallocation */ + ehca_mw_delete(e_mw); + return 0; +} /* end ehca_dealloc_mw() */ + +/*----------------------------------------------------------------------*/ + +struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr) +{ + struct ib_fmr *ib_fmr; + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_mr *e_fmr; + int ret; + u32 tmp_lkey, tmp_rkey; + struct ehca_mr_pginfo pginfo; + u64 hw_pgsize; + + /* check other parameters */ + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + if (mr_access_flags & IB_ACCESS_MW_BIND) { + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) { + ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x " + "fmr_attr->max_maps=%x fmr_attr->page_shift=%x", + fmr_attr->max_pages, fmr_attr->max_maps, + fmr_attr->page_shift); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + + hw_pgsize = 1 << fmr_attr->page_shift; + if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) { + ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x", + fmr_attr->page_shift); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + + e_fmr = ehca_mr_new(); + if (!e_fmr) { + ib_fmr = ERR_PTR(-ENOMEM); + goto alloc_fmr_exit0; + } + e_fmr->flags |= EHCA_MR_FLAG_FMR; + + /* register MR on HCA */ + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.hwpage_size = hw_pgsize; + /* + * pginfo.num_hwpages==0, ie register_rpages() will not be called + * but deferred to map_phys_fmr() + */ + ret = ehca_reg_mr(shca, e_fmr, NULL, + fmr_attr->max_pages * (1 << fmr_attr->page_shift), + mr_access_flags, e_pd, &pginfo, + &tmp_lkey, &tmp_rkey, EHCA_REG_MR); + if (ret) { + ib_fmr = ERR_PTR(ret); + goto alloc_fmr_exit1; + } + + /* successful */ + e_fmr->hwpage_size = hw_pgsize; + e_fmr->fmr_page_size = 1 << fmr_attr->page_shift; + e_fmr->fmr_max_pages = fmr_attr->max_pages; + e_fmr->fmr_max_maps = fmr_attr->max_maps; + e_fmr->fmr_map_cnt = 0; + return &e_fmr->ib.ib_fmr; + +alloc_fmr_exit1: + ehca_mr_delete(e_fmr); +alloc_fmr_exit0: + return ib_fmr; +} /* end ehca_alloc_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, + int list_len, + u64 iova) +{ + int ret; + struct ehca_shca *shca = + container_of(fmr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); + struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd); + struct ehca_mr_pginfo pginfo; + u32 tmp_lkey, tmp_rkey; + + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", + e_fmr, e_fmr->flags); + ret = -EINVAL; + goto map_phys_fmr_exit0; + } + ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len); + if (ret) + goto map_phys_fmr_exit0; + if (iova % e_fmr->fmr_page_size) { + /* only whole-numbered pages */ + ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x", + iova, e_fmr->fmr_page_size); + ret = -EINVAL; + goto map_phys_fmr_exit0; + } + if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) { + /* HCAD does not limit the maps, however trace this anyway */ + ehca_info(fmr->device, "map limit exceeded, fmr=%p " + "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x", + fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps); + } + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_FMR; + pginfo.num_kpages = list_len; + pginfo.hwpage_size = e_fmr->hwpage_size; + pginfo.num_hwpages = + list_len * e_fmr->fmr_page_size / pginfo.hwpage_size; + pginfo.u.fmr.page_list = page_list; + pginfo.next_hwpage = + (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size; + pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size; + + ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova, + list_len * e_fmr->fmr_page_size, + e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey); + if (ret) + goto map_phys_fmr_exit0; + + /* successful reregistration */ + e_fmr->fmr_map_cnt++; + e_fmr->ib.ib_fmr.lkey = tmp_lkey; + e_fmr->ib.ib_fmr.rkey = tmp_rkey; + return 0; + +map_phys_fmr_exit0: + if (ret) + ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x " + "iova=%llx", ret, fmr, page_list, list_len, iova); + return ret; +} /* end ehca_map_phys_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_unmap_fmr(struct list_head *fmr_list) +{ + int ret = 0; + struct ib_fmr *ib_fmr; + struct ehca_shca *shca = NULL; + struct ehca_shca *prev_shca; + struct ehca_mr *e_fmr; + u32 num_fmr = 0; + u32 unmap_fmr_cnt = 0; + + /* check all FMR belong to same SHCA, and check internal flag */ + list_for_each_entry(ib_fmr, fmr_list, list) { + prev_shca = shca; + shca = container_of(ib_fmr->device, struct ehca_shca, + ib_device); + e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); + if ((shca != prev_shca) && prev_shca) { + ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p " + "prev_shca=%p e_fmr=%p", + shca, prev_shca, e_fmr); + ret = -EINVAL; + goto unmap_fmr_exit0; + } + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p " + "e_fmr->flags=%x", e_fmr, e_fmr->flags); + ret = -EINVAL; + goto unmap_fmr_exit0; + } + num_fmr++; + } + + /* loop over all FMRs to unmap */ + list_for_each_entry(ib_fmr, fmr_list, list) { + unmap_fmr_cnt++; + e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); + shca = container_of(ib_fmr->device, struct ehca_shca, + ib_device); + ret = ehca_unmap_one_fmr(shca, e_fmr); + if (ret) { + /* unmap failed, stop unmapping of rest of FMRs */ + ehca_err(&shca->ib_device, "unmap of one FMR failed, " + "stop rest, e_fmr=%p num_fmr=%x " + "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr, + unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey); + goto unmap_fmr_exit0; + } + } + +unmap_fmr_exit0: + if (ret) + ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x", + ret, fmr_list, num_fmr, unmap_fmr_cnt); + return ret; +} /* end ehca_unmap_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dealloc_fmr(struct ib_fmr *fmr) +{ + int ret; + u64 h_ret; + struct ehca_shca *shca = + container_of(fmr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); + + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", + e_fmr, e_fmr->flags); + ret = -EINVAL; + goto free_fmr_exit0; + } + + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); + if (h_ret != H_SUCCESS) { + ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p " + "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, fmr->lkey); + ret = ehca2ib_return_code(h_ret); + goto free_fmr_exit0; + } + /* successful deregistration */ + ehca_mr_delete(e_fmr); + return 0; + +free_fmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr); + return ret; +} /* end ehca_dealloc_fmr() */ + +/*----------------------------------------------------------------------*/ + +static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo); + +int ehca_reg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, /*OUT*/ + u32 *rkey, /*OUT*/ + enum ehca_reg_type reg_type) +{ + int ret; + u64 h_ret; + u32 hipz_acl; + struct ehca_mr_hipzout_parms hipzout; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); + if (ehca_use_hp_mr == 1) + hipz_acl |= 0x00000001; + + h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr, + (u64)iova_start, size, hipz_acl, + e_pd->fw_pd, &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli " + "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle); + ret = ehca2ib_return_code(h_ret); + goto ehca_reg_mr_exit0; + } + + e_mr->ipz_mr_handle = hipzout.handle; + + if (reg_type == EHCA_REG_BUSMAP_MR) + ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo); + else if (reg_type == EHCA_REG_MR) + ret = ehca_reg_mr_rpages(shca, e_mr, pginfo); + else + ret = -EINVAL; + + if (ret) + goto ehca_reg_mr_exit1; + + /* successful registration */ + e_mr->num_kpages = pginfo->num_kpages; + e_mr->num_hwpages = pginfo->num_hwpages; + e_mr->hwpage_size = pginfo->hwpage_size; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + return 0; + +ehca_reg_mr_exit1: + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p " + "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x " + "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i", + h_ret, shca, e_mr, iova_start, size, acl, e_pd, + hipzout.lkey, pginfo, pginfo->num_kpages, + pginfo->num_hwpages, ret); + ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, " + "not recoverable"); + } +ehca_reg_mr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " + "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " + "num_kpages=%llx num_hwpages=%llx", + ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, + pginfo->num_kpages, pginfo->num_hwpages); + return ret; +} /* end ehca_reg_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo) +{ + int ret = 0; + u64 h_ret; + u32 rnum; + u64 rpage; + u32 i; + u64 *kpage; + + if (!pginfo->num_hwpages) /* in case of fmr */ + return 0; + + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!kpage) { + ehca_err(&shca->ib_device, "kpage alloc failed"); + ret = -ENOMEM; + goto ehca_reg_mr_rpages_exit0; + } + + /* max MAX_RPAGES ehca mr pages per register call */ + for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) { + + if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { + rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */ + if (rnum == 0) + rnum = MAX_RPAGES; /* last shot is full */ + } else + rnum = MAX_RPAGES; + + ret = ehca_set_pagebuf(pginfo, rnum, kpage); + if (ret) { + ehca_err(&shca->ib_device, "ehca_set_pagebuf " + "bad rc, ret=%i rnum=%x kpage=%p", + ret, rnum, kpage); + goto ehca_reg_mr_rpages_exit1; + } + + if (rnum > 1) { + rpage = __pa(kpage); + if (!rpage) { + ehca_err(&shca->ib_device, "kpage=%p i=%x", + kpage, i); + ret = -EFAULT; + goto ehca_reg_mr_rpages_exit1; + } + } else + rpage = *kpage; + + h_ret = hipz_h_register_rpage_mr( + shca->ipz_hca_handle, e_mr, + ehca_encode_hwpage_size(pginfo->hwpage_size), + 0, rpage, rnum); + + if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { + /* + * check for 'registration complete'==H_SUCCESS + * and for 'page registered'==H_PAGE_REGISTERED + */ + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "last " + "hipz_reg_rpage_mr failed, h_ret=%lli " + "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx" + " lkey=%x", h_ret, e_mr, i, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey); + ret = ehca2ib_return_code(h_ret); + break; + } else + ret = 0; + } else if (h_ret != H_PAGE_REGISTERED) { + ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, " + "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx " + "mr_hndl=%llx", h_ret, e_mr, i, + e_mr->ib.ib_mr.lkey, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle); + ret = ehca2ib_return_code(h_ret); + break; + } else + ret = 0; + } /* end for(i) */ + + +ehca_reg_mr_rpages_exit1: + ehca_free_fw_ctrlblock(kpage); +ehca_reg_mr_rpages_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p " + "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr, + pginfo, pginfo->num_kpages, pginfo->num_hwpages); + return ret; +} /* end ehca_reg_mr_rpages() */ + +/*----------------------------------------------------------------------*/ + +inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + u32 acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, /*OUT*/ + u32 *rkey) /*OUT*/ +{ + int ret; + u64 h_ret; + u32 hipz_acl; + u64 *kpage; + u64 rpage; + struct ehca_mr_pginfo pginfo_save; + struct ehca_mr_hipzout_parms hipzout; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); + + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!kpage) { + ehca_err(&shca->ib_device, "kpage alloc failed"); + ret = -ENOMEM; + goto ehca_rereg_mr_rereg1_exit0; + } + + pginfo_save = *pginfo; + ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage); + if (ret) { + ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p " + "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx " + "kpage=%p", e_mr, pginfo, pginfo->type, + pginfo->num_kpages, pginfo->num_hwpages, kpage); + goto ehca_rereg_mr_rereg1_exit1; + } + rpage = __pa(kpage); + if (!rpage) { + ehca_err(&shca->ib_device, "kpage=%p", kpage); + ret = -EFAULT; + goto ehca_rereg_mr_rereg1_exit1; + } + h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr, + (u64)iova_start, size, hipz_acl, + e_pd->fw_pd, rpage, &hipzout); + if (h_ret != H_SUCCESS) { + /* + * reregistration unsuccessful, try it again with the 3 hCalls, + * e.g. this is required in case H_MR_CONDITION + * (MW bound or MR is shared) + */ + ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed " + "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr); + *pginfo = pginfo_save; + ret = -EAGAIN; + } else if ((u64 *)hipzout.vaddr != iova_start) { + ehca_err(&shca->ib_device, "PHYP changed iova_start in " + "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p " + "mr_handle=%llx lkey=%x lkey_out=%x", iova_start, + hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey, hipzout.lkey); + ret = -EFAULT; + } else { + /* + * successful reregistration + * note: start and start_out are identical for eServer HCAs + */ + e_mr->num_kpages = pginfo->num_kpages; + e_mr->num_hwpages = pginfo->num_hwpages; + e_mr->hwpage_size = pginfo->hwpage_size; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + } + +ehca_rereg_mr_rereg1_exit1: + ehca_free_fw_ctrlblock(kpage); +ehca_rereg_mr_rereg1_exit0: + if ( ret && (ret != -EAGAIN) ) + ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x " + "pginfo=%p num_kpages=%llx num_hwpages=%llx", + ret, *lkey, *rkey, pginfo, pginfo->num_kpages, + pginfo->num_hwpages); + return ret; +} /* end ehca_rereg_mr_rereg1() */ + +/*----------------------------------------------------------------------*/ + +int ehca_rereg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey) +{ + int ret = 0; + u64 h_ret; + int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */ + int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */ + + /* first determine reregistration hCall(s) */ + if ((pginfo->num_hwpages > MAX_RPAGES) || + (e_mr->num_hwpages > MAX_RPAGES) || + (pginfo->num_hwpages > e_mr->num_hwpages)) { + ehca_dbg(&shca->ib_device, "Rereg3 case, " + "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x", + pginfo->num_hwpages, e_mr->num_hwpages); + rereg_1_hcall = 0; + rereg_3_hcall = 1; + } + + if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */ + rereg_1_hcall = 0; + rereg_3_hcall = 1; + e_mr->flags &= ~EHCA_MR_FLAG_MAXMR; + ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p", + e_mr); + } + + if (rereg_1_hcall) { + ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size, + acl, e_pd, pginfo, lkey, rkey); + if (ret) { + if (ret == -EAGAIN) + rereg_3_hcall = 1; + else + goto ehca_rereg_mr_exit0; + } + } + + if (rereg_3_hcall) { + struct ehca_mr save_mr; + + /* first deregister old MR */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_free_mr failed, " + "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx " + "mr->lkey=%x", + h_ret, e_mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey); + ret = ehca2ib_return_code(h_ret); + goto ehca_rereg_mr_exit0; + } + /* clean ehca_mr_t, without changing struct ib_mr and lock */ + save_mr = *e_mr; + ehca_mr_deletenew(e_mr); + + /* set some MR values */ + e_mr->flags = save_mr.flags; + e_mr->hwpage_size = save_mr.hwpage_size; + e_mr->fmr_page_size = save_mr.fmr_page_size; + e_mr->fmr_max_pages = save_mr.fmr_max_pages; + e_mr->fmr_max_maps = save_mr.fmr_max_maps; + e_mr->fmr_map_cnt = save_mr.fmr_map_cnt; + + ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl, + e_pd, pginfo, lkey, rkey, EHCA_REG_MR); + if (ret) { + u32 offset = (u64)(&e_mr->flags) - (u64)e_mr; + memcpy(&e_mr->flags, &(save_mr.flags), + sizeof(struct ehca_mr) - offset); + goto ehca_rereg_mr_exit0; + } + } + +ehca_rereg_mr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " + "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " + "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x " + "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size, + acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey, + rereg_1_hcall, rereg_3_hcall); + return ret; +} /* end ehca_rereg_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_unmap_one_fmr(struct ehca_shca *shca, + struct ehca_mr *e_fmr) +{ + int ret = 0; + u64 h_ret; + struct ehca_pd *e_pd = + container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd); + struct ehca_mr save_fmr; + u32 tmp_lkey, tmp_rkey; + struct ehca_mr_pginfo pginfo; + struct ehca_mr_hipzout_parms hipzout; + struct ehca_mr save_mr; + + if (e_fmr->fmr_max_pages <= MAX_RPAGES) { + /* + * note: after using rereg hcall with len=0, + * rereg hcall must be used again for registering pages + */ + h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0, + 0, 0, e_pd->fw_pd, 0, &hipzout); + if (h_ret == H_SUCCESS) { + /* successful reregistration */ + e_fmr->start = NULL; + e_fmr->size = 0; + tmp_lkey = hipzout.lkey; + tmp_rkey = hipzout.rkey; + return 0; + } + /* + * should not happen, because length checked above, + * FMRs are not shared and no MW bound to FMRs + */ + ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " + "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx " + "mr_hndl=%llx lkey=%x lkey_out=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey, hipzout.lkey); + /* try free and rereg */ + } + + /* first free old FMR */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_free_mr failed, " + "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx " + "lkey=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey); + ret = ehca2ib_return_code(h_ret); + goto ehca_unmap_one_fmr_exit0; + } + /* clean ehca_mr_t, without changing lock */ + save_fmr = *e_fmr; + ehca_mr_deletenew(e_fmr); + + /* set some MR values */ + e_fmr->flags = save_fmr.flags; + e_fmr->hwpage_size = save_fmr.hwpage_size; + e_fmr->fmr_page_size = save_fmr.fmr_page_size; + e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; + e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; + e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; + e_fmr->acl = save_fmr.acl; + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_FMR; + ret = ehca_reg_mr(shca, e_fmr, NULL, + (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), + e_fmr->acl, e_pd, &pginfo, &tmp_lkey, + &tmp_rkey, EHCA_REG_MR); + if (ret) { + u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; + memcpy(&e_fmr->flags, &(save_mr.flags), + sizeof(struct ehca_mr) - offset); + } + +ehca_unmap_one_fmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x " + "fmr_max_pages=%x", + ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages); + return ret; +} /* end ehca_unmap_one_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_smr(struct ehca_shca *shca, + struct ehca_mr *e_origmr, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, /*OUT*/ + u32 *rkey) /*OUT*/ +{ + int ret = 0; + u64 h_ret; + u32 hipz_acl; + struct ehca_mr_hipzout_parms hipzout; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); + + h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, + (u64)iova_start, hipz_acl, e_pd->fw_pd, + &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " + "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x " + "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", + h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd, + shca->ipz_hca_handle.handle, + e_origmr->ipz_mr_handle.handle, + e_origmr->ib.ib_mr.lkey); + ret = ehca2ib_return_code(h_ret); + goto ehca_reg_smr_exit0; + } + /* successful registration */ + e_newmr->num_kpages = e_origmr->num_kpages; + e_newmr->num_hwpages = e_origmr->num_hwpages; + e_newmr->hwpage_size = e_origmr->hwpage_size; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; + e_newmr->ipz_mr_handle = hipzout.handle; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + return 0; + +ehca_reg_smr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p " + "e_newmr=%p iova_start=%p acl=%x e_pd=%p", + ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd); + return ret; +} /* end ehca_reg_smr() */ + +/*----------------------------------------------------------------------*/ +static inline void *ehca_calc_sectbase(int top, int dir, int idx) +{ + unsigned long ret = idx; + ret |= dir << EHCA_DIR_INDEX_SHIFT; + ret |= top << EHCA_TOP_INDEX_SHIFT; + return __va(ret << SECTION_SIZE_BITS); +} + +#define ehca_bmap_valid(entry) \ + ((u64)entry != (u64)EHCA_INVAL_ADDR) + +static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage, + struct ehca_shca *shca, struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 h_ret = 0; + unsigned long page = 0; + u64 rpage = __pa(kpage); + int page_count; + + void *sectbase = ehca_calc_sectbase(top, dir, idx); + if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) { + ehca_err(&shca->ib_device, "reg_mr_section will probably fail:" + "hwpage_size does not fit to " + "section start address"); + } + page_count = EHCA_SECTSIZE / pginfo->hwpage_size; + + while (page < page_count) { + u64 rnum; + for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count); + rnum++) { + void *pg = sectbase + ((page++) * pginfo->hwpage_size); + kpage[rnum] = __pa(pg); + } + + h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr, + ehca_encode_hwpage_size(pginfo->hwpage_size), + 0, rpage, rnum); + + if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) { + ehca_err(&shca->ib_device, "register_rpage_mr failed"); + return h_ret; + } + } + return h_ret; +} + +static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage, + struct ehca_shca *shca, struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 hret = H_SUCCESS; + int idx; + + for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx])) + continue; + + hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr, + pginfo); + if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) + return hret; + } + return hret; +} + +static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca, + struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 hret = H_SUCCESS; + int dir; + + for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + continue; + + hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo); + if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) + return hret; + } + return hret; +} + +/* register internal max-MR to internal SHCA */ +int ehca_reg_internal_maxmr( + struct ehca_shca *shca, + struct ehca_pd *e_pd, + struct ehca_mr **e_maxmr) /*OUT*/ +{ + int ret; + struct ehca_mr *e_mr; + u64 *iova_start; + u64 size_maxmr; + struct ehca_mr_pginfo pginfo; + struct ib_phys_buf ib_pbuf; + u32 num_kpages; + u32 num_hwpages; + u64 hw_pgsize; + + if (!ehca_bmap) { + ret = -EFAULT; + goto ehca_reg_internal_maxmr_exit0; + } + + e_mr = ehca_mr_new(); + if (!e_mr) { + ehca_err(&shca->ib_device, "out of memory"); + ret = -ENOMEM; + goto ehca_reg_internal_maxmr_exit0; + } + e_mr->flags |= EHCA_MR_FLAG_MAXMR; + + /* register internal max-MR on HCA */ + size_maxmr = ehca_mr_len; + iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)); + ib_pbuf.addr = 0; + ib_pbuf.size = size_maxmr; + num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr, + PAGE_SIZE); + hw_pgsize = ehca_get_max_hwpage_size(shca); + num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr, + hw_pgsize); + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.hwpage_size = hw_pgsize; + pginfo.u.phy.num_phys_buf = 1; + pginfo.u.phy.phys_buf_array = &ib_pbuf; + + ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, + &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR); + if (ret) { + ehca_err(&shca->ib_device, "reg of internal max MR failed, " + "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x " + "num_hwpages=%x", e_mr, iova_start, size_maxmr, + num_kpages, num_hwpages); + goto ehca_reg_internal_maxmr_exit1; + } + + /* successful registration of all pages */ + e_mr->ib.ib_mr.device = e_pd->ib_pd.device; + e_mr->ib.ib_mr.pd = &e_pd->ib_pd; + e_mr->ib.ib_mr.uobject = NULL; + atomic_inc(&(e_pd->ib_pd.usecnt)); + atomic_set(&(e_mr->ib.ib_mr.usecnt), 0); + *e_maxmr = e_mr; + return 0; + +ehca_reg_internal_maxmr_exit1: + ehca_mr_delete(e_mr); +ehca_reg_internal_maxmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p", + ret, shca, e_pd, e_maxmr); + return ret; +} /* end ehca_reg_internal_maxmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_maxmr(struct ehca_shca *shca, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey) +{ + u64 h_ret; + struct ehca_mr *e_origmr = shca->maxmr; + u32 hipz_acl; + struct ehca_mr_hipzout_parms hipzout; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); + + h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, + (u64)iova_start, hipz_acl, e_pd->fw_pd, + &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " + "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", + h_ret, e_origmr, shca->ipz_hca_handle.handle, + e_origmr->ipz_mr_handle.handle, + e_origmr->ib.ib_mr.lkey); + return ehca2ib_return_code(h_ret); + } + /* successful registration */ + e_newmr->num_kpages = e_origmr->num_kpages; + e_newmr->num_hwpages = e_origmr->num_hwpages; + e_newmr->hwpage_size = e_origmr->hwpage_size; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; + e_newmr->ipz_mr_handle = hipzout.handle; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + return 0; +} /* end ehca_reg_maxmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dereg_internal_maxmr(struct ehca_shca *shca) +{ + int ret; + struct ehca_mr *e_maxmr; + struct ib_pd *ib_pd; + + if (!shca->maxmr) { + ehca_err(&shca->ib_device, "bad call, shca=%p", shca); + ret = -EINVAL; + goto ehca_dereg_internal_maxmr_exit0; + } + + e_maxmr = shca->maxmr; + ib_pd = e_maxmr->ib.ib_mr.pd; + shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */ + + ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr); + if (ret) { + ehca_err(&shca->ib_device, "dereg internal max-MR failed, " + "ret=%i e_maxmr=%p shca=%p lkey=%x", + ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey); + shca->maxmr = e_maxmr; + goto ehca_dereg_internal_maxmr_exit0; + } + + atomic_dec(&ib_pd->usecnt); + +ehca_dereg_internal_maxmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p", + ret, shca, shca->maxmr); + return ret; +} /* end ehca_dereg_internal_maxmr() */ + +/*----------------------------------------------------------------------*/ + +/* + * check physical buffer array of MR verbs for validness and + * calculates MR size + */ +int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + u64 *iova_start, + u64 *size) +{ + struct ib_phys_buf *pbuf = phys_buf_array; + u64 size_count = 0; + u32 i; + + if (num_phys_buf == 0) { + ehca_gen_err("bad phys buf array len, num_phys_buf=0"); + return -EINVAL; + } + /* check first buffer */ + if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) { + ehca_gen_err("iova_start/addr mismatch, iova_start=%p " + "pbuf->addr=%llx pbuf->size=%llx", + iova_start, pbuf->addr, pbuf->size); + return -EINVAL; + } + if (((pbuf->addr + pbuf->size) % PAGE_SIZE) && + (num_phys_buf > 1)) { + ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx " + "pbuf->size=%llx", pbuf->addr, pbuf->size); + return -EINVAL; + } + + for (i = 0; i < num_phys_buf; i++) { + if ((i > 0) && (pbuf->addr % PAGE_SIZE)) { + ehca_gen_err("bad address, i=%x pbuf->addr=%llx " + "pbuf->size=%llx", + i, pbuf->addr, pbuf->size); + return -EINVAL; + } + if (((i > 0) && /* not 1st */ + (i < (num_phys_buf - 1)) && /* not last */ + (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) { + ehca_gen_err("bad size, i=%x pbuf->size=%llx", + i, pbuf->size); + return -EINVAL; + } + size_count += pbuf->size; + pbuf++; + } + + *size = size_count; + return 0; +} /* end ehca_mr_chk_buf_and_calc_size() */ + +/*----------------------------------------------------------------------*/ + +/* check page list of map FMR verb for validness */ +int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, + u64 *page_list, + int list_len) +{ + u32 i; + u64 *page; + + if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) { + ehca_gen_err("bad list_len, list_len=%x " + "e_fmr->fmr_max_pages=%x fmr=%p", + list_len, e_fmr->fmr_max_pages, e_fmr); + return -EINVAL; + } + + /* each page must be aligned */ + page = page_list; + for (i = 0; i < list_len; i++) { + if (*page % e_fmr->fmr_page_size) { + ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p " + "fmr_page_size=%x", i, *page, page, e_fmr, + e_fmr->fmr_page_size); + return -EINVAL; + } + page++; + } + + return 0; +} /* end ehca_fmr_check_page_list() */ + +/*----------------------------------------------------------------------*/ + +/* PAGE_SIZE >= pginfo->hwpage_size */ +static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret = 0; + u64 pgaddr; + u32 j = 0; + int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size; + struct scatterlist **sg = &pginfo->u.usr.next_sg; + + while (*sg != NULL) { + pgaddr = page_to_pfn(sg_page(*sg)) + << PAGE_SHIFT; + *kpage = pgaddr + (pginfo->next_hwpage * + pginfo->hwpage_size); + if (!(*kpage)) { + ehca_gen_err("pgaddr=%llx " + "sg_dma_address=%llx " + "entry=%llx next_hwpage=%llx", + pgaddr, (u64)sg_dma_address(*sg), + pginfo->u.usr.next_nmap, + pginfo->next_hwpage); + return -EFAULT; + } + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + kpage++; + if (pginfo->next_hwpage % hwpages_per_kpage == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.usr.next_nmap)++; + pginfo->next_hwpage = 0; + *sg = sg_next(*sg); + } + j++; + if (j >= number) + break; + } + + return ret; +} + +/* + * check given pages for contiguous layout + * last page addr is returned in prev_pgaddr for further check + */ +static int ehca_check_kpages_per_ate(struct scatterlist **sg, + int num_pages, + u64 *prev_pgaddr) +{ + for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) { + u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT; + if (ehca_debug_level >= 3) + ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr, + *(u64 *)__va(pgaddr)); + if (pgaddr - PAGE_SIZE != *prev_pgaddr) { + ehca_gen_err("uncontiguous page found pgaddr=%llx " + "prev_pgaddr=%llx entries_left_in_hwpage=%x", + pgaddr, *prev_pgaddr, num_pages); + return -EINVAL; + } + *prev_pgaddr = pgaddr; + } + return 0; +} + +/* PAGE_SIZE < pginfo->hwpage_size */ +static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret = 0; + u64 pgaddr, prev_pgaddr; + u32 j = 0; + int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE; + int nr_kpages = kpages_per_hwpage; + struct scatterlist **sg = &pginfo->u.usr.next_sg; + + while (*sg != NULL) { + + if (nr_kpages == kpages_per_hwpage) { + pgaddr = (page_to_pfn(sg_page(*sg)) + << PAGE_SHIFT); + *kpage = pgaddr; + if (!(*kpage)) { + ehca_gen_err("pgaddr=%llx entry=%llx", + pgaddr, pginfo->u.usr.next_nmap); + ret = -EFAULT; + return ret; + } + /* + * The first page in a hwpage must be aligned; + * the first MR page is exempt from this rule. + */ + if (pgaddr & (pginfo->hwpage_size - 1)) { + if (pginfo->hwpage_cnt) { + ehca_gen_err( + "invalid alignment " + "pgaddr=%llx entry=%llx " + "mr_pgsize=%llx", + pgaddr, pginfo->u.usr.next_nmap, + pginfo->hwpage_size); + ret = -EFAULT; + return ret; + } + /* first MR page */ + pginfo->kpage_cnt = + (pgaddr & + (pginfo->hwpage_size - 1)) >> + PAGE_SHIFT; + nr_kpages -= pginfo->kpage_cnt; + *kpage = pgaddr & + ~(pginfo->hwpage_size - 1); + } + if (ehca_debug_level >= 3) { + u64 val = *(u64 *)__va(pgaddr); + ehca_gen_dbg("kpage=%llx page=%llx " + "value=%016llx", + *kpage, pgaddr, val); + } + prev_pgaddr = pgaddr; + *sg = sg_next(*sg); + pginfo->kpage_cnt++; + pginfo->u.usr.next_nmap++; + nr_kpages--; + if (!nr_kpages) + goto next_kpage; + continue; + } + + ret = ehca_check_kpages_per_ate(sg, nr_kpages, + &prev_pgaddr); + if (ret) + return ret; + pginfo->kpage_cnt += nr_kpages; + pginfo->u.usr.next_nmap += nr_kpages; + +next_kpage: + nr_kpages = kpages_per_hwpage; + (pginfo->hwpage_cnt)++; + kpage++; + j++; + if (j >= number) + break; + } + + return ret; +} + +static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, + u32 number, u64 *kpage) +{ + int ret = 0; + struct ib_phys_buf *pbuf; + u64 num_hw, offs_hw; + u32 i = 0; + + /* loop over desired phys_buf_array entries */ + while (i < number) { + pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf; + num_hw = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) + + pbuf->size, pginfo->hwpage_size); + offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) / + pginfo->hwpage_size; + while (pginfo->next_hwpage < offs_hw + num_hw) { + /* sanity check */ + if ((pginfo->kpage_cnt >= pginfo->num_kpages) || + (pginfo->hwpage_cnt >= pginfo->num_hwpages)) { + ehca_gen_err("kpage_cnt >= num_kpages, " + "kpage_cnt=%llx num_kpages=%llx " + "hwpage_cnt=%llx " + "num_hwpages=%llx i=%x", + pginfo->kpage_cnt, + pginfo->num_kpages, + pginfo->hwpage_cnt, + pginfo->num_hwpages, i); + return -EFAULT; + } + *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) + + (pginfo->next_hwpage * pginfo->hwpage_size); + if ( !(*kpage) && pbuf->addr ) { + ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx " + "next_hwpage=%llx", pbuf->addr, + pbuf->size, pginfo->next_hwpage); + return -EFAULT; + } + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + if (PAGE_SIZE >= pginfo->hwpage_size) { + if (pginfo->next_hwpage % + (PAGE_SIZE / pginfo->hwpage_size) == 0) + (pginfo->kpage_cnt)++; + } else + pginfo->kpage_cnt += pginfo->hwpage_size / + PAGE_SIZE; + kpage++; + i++; + if (i >= number) break; + } + if (pginfo->next_hwpage >= offs_hw + num_hw) { + (pginfo->u.phy.next_buf)++; + pginfo->next_hwpage = 0; + } + } + return ret; +} + +static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, + u32 number, u64 *kpage) +{ + int ret = 0; + u64 *fmrlist; + u32 i; + + /* loop over desired page_list entries */ + fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem; + for (i = 0; i < number; i++) { + *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) + + pginfo->next_hwpage * pginfo->hwpage_size; + if ( !(*kpage) ) { + ehca_gen_err("*fmrlist=%llx fmrlist=%p " + "next_listelem=%llx next_hwpage=%llx", + *fmrlist, fmrlist, + pginfo->u.fmr.next_listelem, + pginfo->next_hwpage); + return -EFAULT; + } + (pginfo->hwpage_cnt)++; + if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) { + if (pginfo->next_hwpage % + (pginfo->u.fmr.fmr_pgsize / + pginfo->hwpage_size) == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.fmr.next_listelem)++; + fmrlist++; + pginfo->next_hwpage = 0; + } else + (pginfo->next_hwpage)++; + } else { + unsigned int cnt_per_hwpage = pginfo->hwpage_size / + pginfo->u.fmr.fmr_pgsize; + unsigned int j; + u64 prev = *kpage; + /* check if adrs are contiguous */ + for (j = 1; j < cnt_per_hwpage; j++) { + u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1); + if (prev + pginfo->u.fmr.fmr_pgsize != p) { + ehca_gen_err("uncontiguous fmr pages " + "found prev=%llx p=%llx " + "idx=%x", prev, p, i + j); + return -EINVAL; + } + prev = p; + } + pginfo->kpage_cnt += cnt_per_hwpage; + pginfo->u.fmr.next_listelem += cnt_per_hwpage; + fmrlist += cnt_per_hwpage; + } + kpage++; + } + return ret; +} + +/* setup page buffer from page info */ +int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret; + + switch (pginfo->type) { + case EHCA_MR_PGI_PHYS: + ret = ehca_set_pagebuf_phys(pginfo, number, kpage); + break; + case EHCA_MR_PGI_USER: + ret = PAGE_SIZE >= pginfo->hwpage_size ? + ehca_set_pagebuf_user1(pginfo, number, kpage) : + ehca_set_pagebuf_user2(pginfo, number, kpage); + break; + case EHCA_MR_PGI_FMR: + ret = ehca_set_pagebuf_fmr(pginfo, number, kpage); + break; + default: + ehca_gen_err("bad pginfo->type=%x", pginfo->type); + ret = -EFAULT; + break; + } + return ret; +} /* end ehca_set_pagebuf() */ + +/*----------------------------------------------------------------------*/ + +/* + * check MR if it is a max-MR, i.e. uses whole memory + * in case it's a max-MR 1 is returned, else 0 + */ +int ehca_mr_is_maxmr(u64 size, + u64 *iova_start) +{ + /* a MR is treated as max-MR only if it fits following: */ + if ((size == ehca_mr_len) && + (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) { + ehca_gen_dbg("this is a max-MR"); + return 1; + } else + return 0; +} /* end ehca_mr_is_maxmr() */ + +/*----------------------------------------------------------------------*/ + +/* map access control for MR/MW. This routine is used for MR and MW. */ +void ehca_mrmw_map_acl(int ib_acl, + u32 *hipz_acl) +{ + *hipz_acl = 0; + if (ib_acl & IB_ACCESS_REMOTE_READ) + *hipz_acl |= HIPZ_ACCESSCTRL_R_READ; + if (ib_acl & IB_ACCESS_REMOTE_WRITE) + *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE; + if (ib_acl & IB_ACCESS_REMOTE_ATOMIC) + *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC; + if (ib_acl & IB_ACCESS_LOCAL_WRITE) + *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE; + if (ib_acl & IB_ACCESS_MW_BIND) + *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND; +} /* end ehca_mrmw_map_acl() */ + +/*----------------------------------------------------------------------*/ + +/* sets page size in hipz access control for MR/MW. */ +void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/ +{ + *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24); +} /* end ehca_mrmw_set_pgsize_hipz_acl() */ + +/*----------------------------------------------------------------------*/ + +/* + * reverse map access control for MR/MW. + * This routine is used for MR and MW. + */ +void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, + int *ib_acl) /*OUT*/ +{ + *ib_acl = 0; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ) + *ib_acl |= IB_ACCESS_REMOTE_READ; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE) + *ib_acl |= IB_ACCESS_REMOTE_WRITE; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC) + *ib_acl |= IB_ACCESS_REMOTE_ATOMIC; + if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE) + *ib_acl |= IB_ACCESS_LOCAL_WRITE; + if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND) + *ib_acl |= IB_ACCESS_MW_BIND; +} /* end ehca_mrmw_reverse_map_acl() */ + + +/*----------------------------------------------------------------------*/ + +/* + * MR destructor and constructor + * used in Reregister MR verb, sets all fields in ehca_mr_t to 0, + * except struct ib_mr and spinlock + */ +void ehca_mr_deletenew(struct ehca_mr *mr) +{ + mr->flags = 0; + mr->num_kpages = 0; + mr->num_hwpages = 0; + mr->acl = 0; + mr->start = NULL; + mr->fmr_page_size = 0; + mr->fmr_max_pages = 0; + mr->fmr_max_maps = 0; + mr->fmr_map_cnt = 0; + memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle)); + memset(&mr->galpas, 0, sizeof(mr->galpas)); +} /* end ehca_mr_deletenew() */ + +int ehca_init_mrmw_cache(void) +{ + mr_cache = kmem_cache_create("ehca_cache_mr", + sizeof(struct ehca_mr), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!mr_cache) + return -ENOMEM; + mw_cache = kmem_cache_create("ehca_cache_mw", + sizeof(struct ehca_mw), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!mw_cache) { + kmem_cache_destroy(mr_cache); + mr_cache = NULL; + return -ENOMEM; + } + return 0; +} + +void ehca_cleanup_mrmw_cache(void) +{ + if (mr_cache) + kmem_cache_destroy(mr_cache); + if (mw_cache) + kmem_cache_destroy(mw_cache); +} + +static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap, + int dir) +{ + if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) { + ehca_top_bmap->dir[dir] = + kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL); + if (!ehca_top_bmap->dir[dir]) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE); + } + return 0; +} + +static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir) +{ + if (!ehca_bmap_valid(ehca_bmap->top[top])) { + ehca_bmap->top[top] = + kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL); + if (!ehca_bmap->top[top]) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE); + } + return ehca_init_top_bmap(ehca_bmap->top[top], dir); +} + +static inline int ehca_calc_index(unsigned long i, unsigned long s) +{ + return (i >> s) & EHCA_INDEX_MASK; +} + +void ehca_destroy_busmap(void) +{ + int top, dir; + + if (!ehca_bmap) + return; + + for (top = 0; top < EHCA_MAP_ENTRIES; top++) { + if (!ehca_bmap_valid(ehca_bmap->top[top])) + continue; + for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + continue; + + kfree(ehca_bmap->top[top]->dir[dir]); + } + + kfree(ehca_bmap->top[top]); + } + + kfree(ehca_bmap); + ehca_bmap = NULL; +} + +static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages) +{ + unsigned long i, start_section, end_section; + int top, dir, idx; + + if (!nr_pages) + return 0; + + if (!ehca_bmap) { + ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL); + if (!ehca_bmap) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE); + } + + start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE; + end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE; + for (i = start_section; i < end_section; i++) { + int ret; + top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT); + dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT); + idx = i & EHCA_INDEX_MASK; + + ret = ehca_init_bmap(ehca_bmap, top, dir); + if (ret) { + ehca_destroy_busmap(); + return ret; + } + ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len; + ehca_mr_len += EHCA_SECTSIZE; + } + return 0; +} + +static int ehca_is_hugepage(unsigned long pfn) +{ + int page_order; + + if (pfn & EHCA_HUGEPAGE_PFN_MASK) + return 0; + + page_order = compound_order(pfn_to_page(pfn)); + if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT) + return 0; + + return 1; +} + +static int ehca_create_busmap_callback(unsigned long initial_pfn, + unsigned long total_nr_pages, void *arg) +{ + int ret; + unsigned long pfn, start_pfn, end_pfn, nr_pages; + + if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE) + return ehca_update_busmap(initial_pfn, total_nr_pages); + + /* Given chunk is >= 16GB -> check for hugepages */ + start_pfn = initial_pfn; + end_pfn = initial_pfn + total_nr_pages; + pfn = start_pfn; + + while (pfn < end_pfn) { + if (ehca_is_hugepage(pfn)) { + /* Add mem found in front of the hugepage */ + nr_pages = pfn - start_pfn; + ret = ehca_update_busmap(start_pfn, nr_pages); + if (ret) + return ret; + /* Skip the hugepage */ + pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE); + start_pfn = pfn; + } else + pfn += (EHCA_SECTSIZE / PAGE_SIZE); + } + + /* Add mem found behind the hugepage(s) */ + nr_pages = pfn - start_pfn; + return ehca_update_busmap(start_pfn, nr_pages); +} + +int ehca_create_busmap(void) +{ + int ret; + + ehca_mr_len = 0; + ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL, + ehca_create_busmap_callback); + return ret; +} + +static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo) +{ + int top; + u64 hret, *kpage; + + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!kpage) { + ehca_err(&shca->ib_device, "kpage alloc failed"); + return -ENOMEM; + } + for (top = 0; top < EHCA_MAP_ENTRIES; top++) { + if (!ehca_bmap_valid(ehca_bmap->top[top])) + continue; + hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo); + if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS)) + break; + } + + ehca_free_fw_ctrlblock(kpage); + + if (hret == H_SUCCESS) + return 0; /* Everything is fine */ + else { + ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, " + "h_ret=%lli e_mr=%p top=%x lkey=%x " + "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top, + e_mr->ib.ib_mr.lkey, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle); + return ehca2ib_return_code(hret); + } +} + +static u64 ehca_map_vaddr(void *caddr) +{ + int top, dir, idx; + unsigned long abs_addr, offset; + u64 entry; + + if (!ehca_bmap) + return EHCA_INVAL_ADDR; + + abs_addr = __pa(caddr); + top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT); + if (!ehca_bmap_valid(ehca_bmap->top[top])) + return EHCA_INVAL_ADDR; + + dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT); + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + return EHCA_INVAL_ADDR; + + idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT); + + entry = ehca_bmap->top[top]->dir[dir]->ent[idx]; + if (ehca_bmap_valid(entry)) { + offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1); + return entry | offset; + } else + return EHCA_INVAL_ADDR; +} + +static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr) +{ + return dma_addr == EHCA_INVAL_ADDR; +} + +static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr, + size_t size, enum dma_data_direction direction) +{ + if (cpu_addr) + return ehca_map_vaddr(cpu_addr); + else + return EHCA_INVAL_ADDR; +} + +static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + u64 addr; + + if (offset + size > PAGE_SIZE) + return EHCA_INVAL_ADDR; + + addr = ehca_map_vaddr(page_address(page)); + if (!ehca_dma_mapping_error(dev, addr)) + addr += offset; + + return addr; +} + +static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) { + u64 addr; + addr = ehca_map_vaddr(sg_virt(sg)); + if (ehca_dma_mapping_error(dev, addr)) + return 0; + + sg->dma_address = addr; + sg->dma_length = sg->length; + } + return nents; +} + +static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr, + size_t size, + enum dma_data_direction dir) +{ + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); +} + +static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr, + size_t size, + enum dma_data_direction dir) +{ + dma_sync_single_for_device(dev->dma_device, addr, size, dir); +} + +static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size, + u64 *dma_handle, gfp_t flag) +{ + struct page *p; + void *addr = NULL; + u64 dma_addr; + + p = alloc_pages(flag, get_order(size)); + if (p) { + addr = page_address(p); + dma_addr = ehca_map_vaddr(addr); + if (ehca_dma_mapping_error(dev, dma_addr)) { + free_pages((unsigned long)addr, get_order(size)); + return NULL; + } + if (dma_handle) + *dma_handle = dma_addr; + return addr; + } + return NULL; +} + +static void ehca_dma_free_coherent(struct ib_device *dev, size_t size, + void *cpu_addr, u64 dma_handle) +{ + if (cpu_addr && size) + free_pages((unsigned long)cpu_addr, get_order(size)); +} + + +struct ib_dma_mapping_ops ehca_dma_mapping_ops = { + .mapping_error = ehca_dma_mapping_error, + .map_single = ehca_dma_map_single, + .unmap_single = ehca_dma_unmap_single, + .map_page = ehca_dma_map_page, + .unmap_page = ehca_dma_unmap_page, + .map_sg = ehca_dma_map_sg, + .unmap_sg = ehca_dma_unmap_sg, + .sync_single_for_cpu = ehca_dma_sync_single_for_cpu, + .sync_single_for_device = ehca_dma_sync_single_for_device, + .alloc_coherent = ehca_dma_alloc_coherent, + .free_coherent = ehca_dma_free_coherent, +}; diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.h b/drivers/staging/rdma/ehca/ehca_mrmw.h new file mode 100644 index 0000000..50d8b51 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_mrmw.h @@ -0,0 +1,132 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * MR/MW declarations and inline functions + * + * Authors: Dietmar Decker + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _EHCA_MRMW_H_ +#define _EHCA_MRMW_H_ + +enum ehca_reg_type { + EHCA_REG_MR, + EHCA_REG_BUSMAP_MR +}; + +int ehca_reg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey, + enum ehca_reg_type reg_type); + +int ehca_reg_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo); + +int ehca_rereg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int mr_access_flags, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey); + +int ehca_unmap_one_fmr(struct ehca_shca *shca, + struct ehca_mr *e_fmr); + +int ehca_reg_smr(struct ehca_shca *shca, + struct ehca_mr *e_origmr, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey); + +int ehca_reg_internal_maxmr(struct ehca_shca *shca, + struct ehca_pd *e_pd, + struct ehca_mr **maxmr); + +int ehca_reg_maxmr(struct ehca_shca *shca, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey); + +int ehca_dereg_internal_maxmr(struct ehca_shca *shca); + +int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + u64 *iova_start, + u64 *size); + +int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, + u64 *page_list, + int list_len); + +int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage); + +int ehca_mr_is_maxmr(u64 size, + u64 *iova_start); + +void ehca_mrmw_map_acl(int ib_acl, + u32 *hipz_acl); + +void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl); + +void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, + int *ib_acl); + +void ehca_mr_deletenew(struct ehca_mr *mr); + +int ehca_create_busmap(void); + +void ehca_destroy_busmap(void); + +extern struct ib_dma_mapping_ops ehca_dma_mapping_ops; +#endif /*_EHCA_MRMW_H_*/ diff --git a/drivers/staging/rdma/ehca/ehca_pd.c b/drivers/staging/rdma/ehca/ehca_pd.c new file mode 100644 index 0000000..351577a --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_pd.c @@ -0,0 +1,124 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * PD functions + * + * Authors: Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_tools.h" +#include "ehca_iverbs.h" + +static struct kmem_cache *pd_cache; + +struct ib_pd *ehca_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, struct ib_udata *udata) +{ + struct ehca_pd *pd; + int i; + + pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL); + if (!pd) { + ehca_err(device, "device=%p context=%p out of memory", + device, context); + return ERR_PTR(-ENOMEM); + } + + for (i = 0; i < 2; i++) { + INIT_LIST_HEAD(&pd->free[i]); + INIT_LIST_HEAD(&pd->full[i]); + } + mutex_init(&pd->lock); + + /* + * Kernel PD: when device = -1, 0 + * User PD: when context != -1 + */ + if (!context) { + /* + * Kernel PDs after init reuses always + * the one created in ehca_shca_reopen() + */ + struct ehca_shca *shca = container_of(device, struct ehca_shca, + ib_device); + pd->fw_pd.value = shca->pd->fw_pd.value; + } else + pd->fw_pd.value = (u64)pd; + + return &pd->ib_pd; +} + +int ehca_dealloc_pd(struct ib_pd *pd) +{ + struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); + int i, leftovers = 0; + struct ipz_small_queue_page *page, *tmp; + + for (i = 0; i < 2; i++) { + list_splice(&my_pd->full[i], &my_pd->free[i]); + list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) { + leftovers = 1; + free_page(page->page); + kmem_cache_free(small_qp_cache, page); + } + } + + if (leftovers) + ehca_warn(pd->device, + "Some small queue pages were not freed"); + + kmem_cache_free(pd_cache, my_pd); + + return 0; +} + +int ehca_init_pd_cache(void) +{ + pd_cache = kmem_cache_create("ehca_cache_pd", + sizeof(struct ehca_pd), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!pd_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_pd_cache(void) +{ + if (pd_cache) + kmem_cache_destroy(pd_cache); +} diff --git a/drivers/staging/rdma/ehca/ehca_qes.h b/drivers/staging/rdma/ehca/ehca_qes.h new file mode 100644 index 0000000..90c4efa --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_qes.h @@ -0,0 +1,260 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Hardware request structures + * + * Authors: Waleri Fomin + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef _EHCA_QES_H_ +#define _EHCA_QES_H_ + +#include "ehca_tools.h" + +/* virtual scatter gather entry to specify remote addresses with length */ +struct ehca_vsgentry { + u64 vaddr; + u32 lkey; + u32 length; +}; + +#define GRH_FLAG_MASK EHCA_BMASK_IBM( 7, 7) +#define GRH_IPVERSION_MASK EHCA_BMASK_IBM( 0, 3) +#define GRH_TCLASS_MASK EHCA_BMASK_IBM( 4, 12) +#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13, 31) +#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32, 47) +#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48, 55) +#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56, 63) + +/* + * Unreliable Datagram Address Vector Format + * see IBTA Vol1 chapter 8.3 Global Routing Header + */ +struct ehca_ud_av { + u8 sl; + u8 lnh; + u16 dlid; + u8 reserved1; + u8 reserved2; + u8 reserved3; + u8 slid_path_bits; + u8 reserved4; + u8 ipd; + u8 reserved5; + u8 pmtu; + u32 reserved6; + u64 reserved7; + union { + struct { + u64 word_0; /* always set to 6 */ + /*should be 0x1B for IB transport */ + u64 word_1; + u64 word_2; + u64 word_3; + u64 word_4; + } grh; + struct { + u32 wd_0; + u32 wd_1; + /* DWord_1 --> SGID */ + + u32 sgid_wd3; + u32 sgid_wd2; + + u32 sgid_wd1; + u32 sgid_wd0; + /* DWord_3 --> DGID */ + + u32 dgid_wd3; + u32 dgid_wd2; + + u32 dgid_wd1; + u32 dgid_wd0; + } grh_l; + }; +}; + +/* maximum number of sg entries allowed in a WQE */ +#define MAX_WQE_SG_ENTRIES 252 + +#define WQE_OPTYPE_SEND 0x80 +#define WQE_OPTYPE_RDMAREAD 0x40 +#define WQE_OPTYPE_RDMAWRITE 0x20 +#define WQE_OPTYPE_CMPSWAP 0x10 +#define WQE_OPTYPE_FETCHADD 0x08 +#define WQE_OPTYPE_BIND 0x04 + +#define WQE_WRFLAG_REQ_SIGNAL_COM 0x80 +#define WQE_WRFLAG_FENCE 0x40 +#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20 +#define WQE_WRFLAG_SOLIC_EVENT 0x10 + +#define WQEF_CACHE_HINT 0x80 +#define WQEF_CACHE_HINT_RD_WR 0x40 +#define WQEF_TIMED_WQE 0x20 +#define WQEF_PURGE 0x08 +#define WQEF_HIGH_NIBBLE 0xF0 + +#define MW_BIND_ACCESSCTRL_R_WRITE 0x40 +#define MW_BIND_ACCESSCTRL_R_READ 0x20 +#define MW_BIND_ACCESSCTRL_R_ATOMIC 0x10 + +struct ehca_wqe { + u64 work_request_id; + u8 optype; + u8 wr_flag; + u16 pkeyi; + u8 wqef; + u8 nr_of_data_seg; + u16 wqe_provided_slid; + u32 destination_qp_number; + u32 resync_psn_sqp; + u32 local_ee_context_qkey; + u32 immediate_data; + union { + struct { + u64 remote_virtual_address; + u32 rkey; + u32 reserved; + u64 atomic_1st_op_dma_len; + u64 atomic_2nd_op; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + + } nud; + struct { + u64 ehca_ud_av_ptr; + u64 reserved1; + u64 reserved2; + u64 reserved3; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + } ud_avp; + struct { + struct ehca_ud_av ud_av; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES - + 2]; + } ud_av; + struct { + u64 reserved0; + u64 reserved1; + u64 reserved2; + u64 reserved3; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + } all_rcv; + + struct { + u64 reserved; + u32 rkey; + u32 old_rkey; + u64 reserved1; + u64 reserved2; + u64 virtual_address; + u32 reserved3; + u32 length; + u32 reserved4; + u16 reserved5; + u8 reserved6; + u8 lr_ctl; + u32 lkey; + u32 reserved7; + u64 reserved8; + u64 reserved9; + u64 reserved10; + u64 reserved11; + } bind; + struct { + u64 reserved12; + u64 reserved13; + u32 size; + u32 start; + } inline_data; + } u; + +}; + +#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0) +#define WC_IMM_DATA EHCA_BMASK_IBM(1, 1) +#define WC_GRH_PRESENT EHCA_BMASK_IBM(2, 2) +#define WC_SE_BIT EHCA_BMASK_IBM(3, 3) +#define WC_STATUS_ERROR_BIT 0x80000000 +#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 +#define WC_STATUS_PURGE_BIT 0x10 +#define WC_SEND_RECEIVE_BIT 0x80 + +struct ehca_cqe { + u64 work_request_id; + u8 optype; + u8 w_completion_flags; + u16 reserved1; + u32 nr_bytes_transferred; + u32 immediate_data; + u32 local_qp_number; + u8 freed_resource_count; + u8 service_level; + u16 wqe_count; + u32 qp_token; + u32 qkey_ee_token; + u32 remote_qp_number; + u16 dlid; + u16 rlid; + u16 reserved2; + u16 pkey_index; + u32 cqe_timestamp; + u32 wqe_timestamp; + u8 wqe_timestamp_valid; + u8 reserved3; + u8 reserved4; + u8 cqe_flags; + u32 status; +}; + +struct ehca_eqe { + u64 entry; +}; + +struct ehca_mrte { + u64 starting_va; + u64 length; /* length of memory region in bytes*/ + u32 pd; + u8 key_instance; + u8 pagesize; + u8 mr_control; + u8 local_remote_access_ctrl; + u8 reserved[0x20 - 0x18]; + u64 at_pointer[4]; +}; +#endif /*_EHCA_QES_H_*/ diff --git a/drivers/staging/rdma/ehca/ehca_qp.c b/drivers/staging/rdma/ehca/ehca_qp.c new file mode 100644 index 0000000..2e89356 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_qp.c @@ -0,0 +1,2257 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * QP functions + * + * Authors: Joachim Fenkes + * Stefan Roscher + * Waleri Fomin + * Hoang-Nam Nguyen + * Reinhard Ernst + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" +#include "hipz_fns.h" + +static struct kmem_cache *qp_cache; + +/* + * attributes not supported by query qp + */ +#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS | \ + IB_QP_EN_SQD_ASYNC_NOTIFY) + +/* + * ehca (internal) qp state values + */ +enum ehca_qp_state { + EHCA_QPS_RESET = 1, + EHCA_QPS_INIT = 2, + EHCA_QPS_RTR = 3, + EHCA_QPS_RTS = 5, + EHCA_QPS_SQD = 6, + EHCA_QPS_SQE = 8, + EHCA_QPS_ERR = 128 +}; + +/* + * qp state transitions as defined by IB Arch Rel 1.1 page 431 + */ +enum ib_qp_statetrans { + IB_QPST_ANY2RESET, + IB_QPST_ANY2ERR, + IB_QPST_RESET2INIT, + IB_QPST_INIT2RTR, + IB_QPST_INIT2INIT, + IB_QPST_RTR2RTS, + IB_QPST_RTS2SQD, + IB_QPST_RTS2RTS, + IB_QPST_SQD2RTS, + IB_QPST_SQE2RTS, + IB_QPST_SQD2SQD, + IB_QPST_MAX /* nr of transitions, this must be last!!! */ +}; + +/* + * ib2ehca_qp_state maps IB to ehca qp_state + * returns ehca qp state corresponding to given ib qp state + */ +static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state) +{ + switch (ib_qp_state) { + case IB_QPS_RESET: + return EHCA_QPS_RESET; + case IB_QPS_INIT: + return EHCA_QPS_INIT; + case IB_QPS_RTR: + return EHCA_QPS_RTR; + case IB_QPS_RTS: + return EHCA_QPS_RTS; + case IB_QPS_SQD: + return EHCA_QPS_SQD; + case IB_QPS_SQE: + return EHCA_QPS_SQE; + case IB_QPS_ERR: + return EHCA_QPS_ERR; + default: + ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state); + return -EINVAL; + } +} + +/* + * ehca2ib_qp_state maps ehca to IB qp_state + * returns ib qp state corresponding to given ehca qp state + */ +static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state + ehca_qp_state) +{ + switch (ehca_qp_state) { + case EHCA_QPS_RESET: + return IB_QPS_RESET; + case EHCA_QPS_INIT: + return IB_QPS_INIT; + case EHCA_QPS_RTR: + return IB_QPS_RTR; + case EHCA_QPS_RTS: + return IB_QPS_RTS; + case EHCA_QPS_SQD: + return IB_QPS_SQD; + case EHCA_QPS_SQE: + return IB_QPS_SQE; + case EHCA_QPS_ERR: + return IB_QPS_ERR; + default: + ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state); + return -EINVAL; + } +} + +/* + * ehca_qp_type used as index for req_attr and opt_attr of + * struct ehca_modqp_statetrans + */ +enum ehca_qp_type { + QPT_RC = 0, + QPT_UC = 1, + QPT_UD = 2, + QPT_SQP = 3, + QPT_MAX +}; + +/* + * ib2ehcaqptype maps Ib to ehca qp_type + * returns ehca qp type corresponding to ib qp type + */ +static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype) +{ + switch (ibqptype) { + case IB_QPT_SMI: + case IB_QPT_GSI: + return QPT_SQP; + case IB_QPT_RC: + return QPT_RC; + case IB_QPT_UC: + return QPT_UC; + case IB_QPT_UD: + return QPT_UD; + default: + ehca_gen_err("Invalid ibqptype=%x", ibqptype); + return -EINVAL; + } +} + +static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate, + int ib_tostate) +{ + int index = -EINVAL; + switch (ib_tostate) { + case IB_QPS_RESET: + index = IB_QPST_ANY2RESET; + break; + case IB_QPS_INIT: + switch (ib_fromstate) { + case IB_QPS_RESET: + index = IB_QPST_RESET2INIT; + break; + case IB_QPS_INIT: + index = IB_QPST_INIT2INIT; + break; + } + break; + case IB_QPS_RTR: + if (ib_fromstate == IB_QPS_INIT) + index = IB_QPST_INIT2RTR; + break; + case IB_QPS_RTS: + switch (ib_fromstate) { + case IB_QPS_RTR: + index = IB_QPST_RTR2RTS; + break; + case IB_QPS_RTS: + index = IB_QPST_RTS2RTS; + break; + case IB_QPS_SQD: + index = IB_QPST_SQD2RTS; + break; + case IB_QPS_SQE: + index = IB_QPST_SQE2RTS; + break; + } + break; + case IB_QPS_SQD: + if (ib_fromstate == IB_QPS_RTS) + index = IB_QPST_RTS2SQD; + break; + case IB_QPS_SQE: + break; + case IB_QPS_ERR: + index = IB_QPST_ANY2ERR; + break; + default: + break; + } + return index; +} + +/* + * ibqptype2servicetype returns hcp service type corresponding to given + * ib qp type used by create_qp() + */ +static inline int ibqptype2servicetype(enum ib_qp_type ibqptype) +{ + switch (ibqptype) { + case IB_QPT_SMI: + case IB_QPT_GSI: + return ST_UD; + case IB_QPT_RC: + return ST_RC; + case IB_QPT_UC: + return ST_UC; + case IB_QPT_UD: + return ST_UD; + case IB_QPT_RAW_IPV6: + return -EINVAL; + case IB_QPT_RAW_ETHERTYPE: + return -EINVAL; + default: + ehca_gen_err("Invalid ibqptype=%x", ibqptype); + return -EINVAL; + } +} + +/* + * init userspace queue info from ipz_queue data + */ +static inline void queue2resp(struct ipzu_queue_resp *resp, + struct ipz_queue *queue) +{ + resp->qe_size = queue->qe_size; + resp->act_nr_of_sg = queue->act_nr_of_sg; + resp->queue_length = queue->queue_length; + resp->pagesize = queue->pagesize; + resp->toggle_state = queue->toggle_state; + resp->offset = queue->offset; +} + +/* + * init_qp_queue initializes/constructs r/squeue and registers queue pages. + */ +static inline int init_qp_queue(struct ehca_shca *shca, + struct ehca_pd *pd, + struct ehca_qp *my_qp, + struct ipz_queue *queue, + int q_type, + u64 expected_hret, + struct ehca_alloc_queue_parms *parms, + int wqe_size) +{ + int ret, cnt, ipz_rc, nr_q_pages; + void *vpage; + u64 rpage, h_ret; + struct ib_device *ib_dev = &shca->ib_device; + struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle; + + if (!parms->queue_size) + return 0; + + if (parms->is_small) { + nr_q_pages = 1; + ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, + 128 << parms->page_size, + wqe_size, parms->act_nr_sges, 1); + } else { + nr_q_pages = parms->queue_size; + ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, + EHCA_PAGESIZE, wqe_size, + parms->act_nr_sges, 0); + } + + if (!ipz_rc) { + ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%i", + ipz_rc); + return -EBUSY; + } + + /* register queue pages */ + for (cnt = 0; cnt < nr_q_pages; cnt++) { + vpage = ipz_qpageit_get_inc(queue); + if (!vpage) { + ehca_err(ib_dev, "ipz_qpageit_get_inc() " + "failed p_vpage= %p", vpage); + ret = -EINVAL; + goto init_qp_queue1; + } + rpage = __pa(vpage); + + h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, + my_qp->ipz_qp_handle, + NULL, 0, q_type, + rpage, parms->is_small ? 0 : 1, + my_qp->galpas.kernel); + if (cnt == (nr_q_pages - 1)) { /* last page! */ + if (h_ret != expected_hret) { + ehca_err(ib_dev, "hipz_qp_register_rpage() " + "h_ret=%lli", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queue1; + } + vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); + if (vpage) { + ehca_err(ib_dev, "ipz_qpageit_get_inc() " + "should not succeed vpage=%p", vpage); + ret = -EINVAL; + goto init_qp_queue1; + } + } else { + if (h_ret != H_PAGE_REGISTERED) { + ehca_err(ib_dev, "hipz_qp_register_rpage() " + "h_ret=%lli", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queue1; + } + } + } + + ipz_qeit_reset(queue); + + return 0; + +init_qp_queue1: + ipz_queue_dtor(pd, queue); + return ret; +} + +static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp) +{ + if (is_llqp) + return 128 << act_nr_sge; + else + return offsetof(struct ehca_wqe, + u.nud.sg_list[act_nr_sge]); +} + +static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue, + int req_nr_sge, int is_llqp) +{ + u32 wqe_size, q_size; + int act_nr_sge = req_nr_sge; + + if (!is_llqp) + /* round up #SGEs so WQE size is a power of 2 */ + for (act_nr_sge = 4; act_nr_sge <= 252; + act_nr_sge = 4 + 2 * act_nr_sge) + if (act_nr_sge >= req_nr_sge) + break; + + wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp); + q_size = wqe_size * (queue->max_wr + 1); + + if (q_size <= 512) + queue->page_size = 2; + else if (q_size <= 1024) + queue->page_size = 3; + else + queue->page_size = 0; + + queue->is_small = (queue->page_size != 0); +} + +/* needs to be called with cq->spinlock held */ +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq) +{ + struct list_head *list, *node; + + /* TODO: support low latency QPs */ + if (qp->ext_type == EQPT_LLQP) + return; + + if (on_sq) { + list = &qp->send_cq->sqp_err_list; + node = &qp->sq_err_node; + } else { + list = &qp->recv_cq->rqp_err_list; + node = &qp->rq_err_node; + } + + if (list_empty(node)) + list_add_tail(node, list); + + return; +} + +static void del_from_err_list(struct ehca_cq *cq, struct list_head *node) +{ + unsigned long flags; + + spin_lock_irqsave(&cq->spinlock, flags); + + if (!list_empty(node)) + list_del_init(node); + + spin_unlock_irqrestore(&cq->spinlock, flags); +} + +static void reset_queue_map(struct ehca_queue_map *qmap) +{ + int i; + + qmap->tail = qmap->entries - 1; + qmap->left_to_poll = 0; + qmap->next_wqe_idx = 0; + for (i = 0; i < qmap->entries; i++) { + qmap->map[i].reported = 1; + qmap->map[i].cqe_req = 0; + } +} + +/* + * Create an ib_qp struct that is either a QP or an SRQ, depending on + * the value of the is_srq parameter. If init_attr and srq_init_attr share + * fields, the field out of init_attr is used. + */ +static struct ehca_qp *internal_create_qp( + struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata, int is_srq) +{ + struct ehca_qp *my_qp, *my_srq = NULL; + struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, + ib_device); + struct ib_ucontext *context = NULL; + u64 h_ret; + int is_llqp = 0, has_srq = 0, is_user = 0; + int qp_type, max_send_sge, max_recv_sge, ret; + + /* h_call's out parameters */ + struct ehca_alloc_qp_parms parms; + u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; + unsigned long flags; + + if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) { + ehca_err(pd->device, "Unable to create QP, max number of %i " + "QPs reached.", shca->max_num_qps); + ehca_err(pd->device, "To increase the maximum number of QPs " + "use the number_of_qps module parameter.\n"); + return ERR_PTR(-ENOSPC); + } + + if (init_attr->create_flags) { + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + memset(&parms, 0, sizeof(parms)); + qp_type = init_attr->qp_type; + + if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR && + init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { + ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", + init_attr->sq_sig_type); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + /* save LLQP info */ + if (qp_type & 0x80) { + is_llqp = 1; + parms.ext_type = EQPT_LLQP; + parms.ll_comp_flags = qp_type & LLQP_COMP_MASK; + } + qp_type &= 0x1F; + init_attr->qp_type &= 0x1F; + + /* handle SRQ base QPs */ + if (init_attr->srq) { + my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq); + + if (qp_type == IB_QPT_UC) { + ehca_err(pd->device, "UC with SRQ not supported"); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + has_srq = 1; + parms.ext_type = EQPT_SRQBASE; + parms.srq_qpn = my_srq->real_qp_num; + } + + if (is_llqp && has_srq) { + ehca_err(pd->device, "LLQPs can't have an SRQ"); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + /* handle SRQs */ + if (is_srq) { + parms.ext_type = EQPT_SRQ; + parms.srq_limit = srq_init_attr->attr.srq_limit; + if (init_attr->cap.max_recv_sge > 3) { + ehca_err(pd->device, "no more than three SGEs " + "supported for SRQ pd=%p max_sge=%x", + pd, init_attr->cap.max_recv_sge); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + } + + /* check QP type */ + if (qp_type != IB_QPT_UD && + qp_type != IB_QPT_UC && + qp_type != IB_QPT_RC && + qp_type != IB_QPT_SMI && + qp_type != IB_QPT_GSI) { + ehca_err(pd->device, "wrong QP Type=%x", qp_type); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + if (is_llqp) { + switch (qp_type) { + case IB_QPT_RC: + if ((init_attr->cap.max_send_wr > 255) || + (init_attr->cap.max_recv_wr > 255)) { + ehca_err(pd->device, + "Invalid Number of max_sq_wr=%x " + "or max_rq_wr=%x for RC LLQP", + init_attr->cap.max_send_wr, + init_attr->cap.max_recv_wr); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + break; + case IB_QPT_UD: + if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) { + ehca_err(pd->device, "UD LLQP not supported " + "by this adapter"); + atomic_dec(&shca->num_qps); + return ERR_PTR(-ENOSYS); + } + if (!(init_attr->cap.max_send_sge <= 5 + && init_attr->cap.max_send_sge >= 1 + && init_attr->cap.max_recv_sge <= 5 + && init_attr->cap.max_recv_sge >= 1)) { + ehca_err(pd->device, + "Invalid Number of max_send_sge=%x " + "or max_recv_sge=%x for UD LLQP", + init_attr->cap.max_send_sge, + init_attr->cap.max_recv_sge); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } else if (init_attr->cap.max_send_wr > 255) { + ehca_err(pd->device, + "Invalid Number of " + "max_send_wr=%x for UD QP_TYPE=%x", + init_attr->cap.max_send_wr, qp_type); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + break; + default: + ehca_err(pd->device, "unsupported LL QP Type=%x", + qp_type); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + } else { + int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI + || qp_type == IB_QPT_GSI) ? 250 : 252; + + if (init_attr->cap.max_send_sge > max_sge + || init_attr->cap.max_recv_sge > max_sge) { + ehca_err(pd->device, "Invalid number of SGEs requested " + "send_sge=%x recv_sge=%x max_sge=%x", + init_attr->cap.max_send_sge, + init_attr->cap.max_recv_sge, max_sge); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + } + + my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); + if (!my_qp) { + ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); + atomic_dec(&shca->num_qps); + return ERR_PTR(-ENOMEM); + } + + if (pd->uobject && udata) { + is_user = 1; + context = pd->uobject->context; + } + + atomic_set(&my_qp->nr_events, 0); + init_waitqueue_head(&my_qp->wait_completion); + spin_lock_init(&my_qp->spinlock_s); + spin_lock_init(&my_qp->spinlock_r); + my_qp->qp_type = qp_type; + my_qp->ext_type = parms.ext_type; + my_qp->state = IB_QPS_RESET; + + if (init_attr->recv_cq) + my_qp->recv_cq = + container_of(init_attr->recv_cq, struct ehca_cq, ib_cq); + if (init_attr->send_cq) + my_qp->send_cq = + container_of(init_attr->send_cq, struct ehca_cq, ib_cq); + + idr_preload(GFP_KERNEL); + write_lock_irqsave(&ehca_qp_idr_lock, flags); + + ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT); + if (ret >= 0) + my_qp->token = ret; + + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + idr_preload_end(); + if (ret < 0) { + if (ret == -ENOSPC) { + ret = -EINVAL; + ehca_err(pd->device, "Invalid number of qp"); + } else { + ret = -ENOMEM; + ehca_err(pd->device, "Can't allocate new idr entry."); + } + goto create_qp_exit0; + } + + if (has_srq) + parms.srq_token = my_qp->token; + + parms.servicetype = ibqptype2servicetype(qp_type); + if (parms.servicetype < 0) { + ret = -EINVAL; + ehca_err(pd->device, "Invalid qp_type=%x", qp_type); + goto create_qp_exit1; + } + + /* Always signal by WQE so we can hide circ. WQEs */ + parms.sigtype = HCALL_SIGT_BY_WQE; + + /* UD_AV CIRCUMVENTION */ + max_send_sge = init_attr->cap.max_send_sge; + max_recv_sge = init_attr->cap.max_recv_sge; + if (parms.servicetype == ST_UD && !is_llqp) { + max_send_sge += 2; + max_recv_sge += 2; + } + + parms.token = my_qp->token; + parms.eq_handle = shca->eq.ipz_eq_handle; + parms.pd = my_pd->fw_pd; + if (my_qp->send_cq) + parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle; + if (my_qp->recv_cq) + parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle; + + parms.squeue.max_wr = init_attr->cap.max_send_wr; + parms.rqueue.max_wr = init_attr->cap.max_recv_wr; + parms.squeue.max_sge = max_send_sge; + parms.rqueue.max_sge = max_recv_sge; + + /* RC QPs need one more SWQE for unsolicited ack circumvention */ + if (qp_type == IB_QPT_RC) + parms.squeue.max_wr++; + + if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) { + if (HAS_SQ(my_qp)) + ehca_determine_small_queue( + &parms.squeue, max_send_sge, is_llqp); + if (HAS_RQ(my_qp)) + ehca_determine_small_queue( + &parms.rqueue, max_recv_sge, is_llqp); + parms.qp_storage = + (parms.squeue.is_small || parms.rqueue.is_small); + } + + h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user); + if (h_ret != H_SUCCESS) { + ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli", + h_ret); + ret = ehca2ib_return_code(h_ret); + goto create_qp_exit1; + } + + ib_qp_num = my_qp->real_qp_num = parms.real_qp_num; + my_qp->ipz_qp_handle = parms.qp_handle; + my_qp->galpas = parms.galpas; + + swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp); + rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp); + + switch (qp_type) { + case IB_QPT_RC: + if (is_llqp) { + parms.squeue.act_nr_sges = 1; + parms.rqueue.act_nr_sges = 1; + } + /* hide the extra WQE */ + parms.squeue.act_nr_wqes--; + break; + case IB_QPT_UD: + case IB_QPT_GSI: + case IB_QPT_SMI: + /* UD circumvention */ + if (is_llqp) { + parms.squeue.act_nr_sges = 1; + parms.rqueue.act_nr_sges = 1; + } else { + parms.squeue.act_nr_sges -= 2; + parms.rqueue.act_nr_sges -= 2; + } + + if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) { + parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr; + parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr; + parms.squeue.act_nr_sges = init_attr->cap.max_send_sge; + parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge; + ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1; + } + + break; + + default: + break; + } + + /* initialize r/squeue and register queue pages */ + if (HAS_SQ(my_qp)) { + ret = init_qp_queue( + shca, my_pd, my_qp, &my_qp->ipz_squeue, 0, + HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS, + &parms.squeue, swqe_size); + if (ret) { + ehca_err(pd->device, "Couldn't initialize squeue " + "and pages ret=%i", ret); + goto create_qp_exit2; + } + + if (!is_user) { + my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / + my_qp->ipz_squeue.qe_size; + my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->sq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit3; + } + INIT_LIST_HEAD(&my_qp->sq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->sq_map); + } + } + + if (HAS_RQ(my_qp)) { + ret = init_qp_queue( + shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1, + H_SUCCESS, &parms.rqueue, rwqe_size); + if (ret) { + ehca_err(pd->device, "Couldn't initialize rqueue " + "and pages ret=%i", ret); + goto create_qp_exit4; + } + if (!is_user) { + my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / + my_qp->ipz_rqueue.qe_size; + my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->rq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit5; + } + INIT_LIST_HEAD(&my_qp->rq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->rq_map); + } + } else if (init_attr->srq && !is_user) { + /* this is a base QP, use the queue map of the SRQ */ + my_qp->rq_map = my_srq->rq_map; + INIT_LIST_HEAD(&my_qp->rq_err_node); + + my_qp->ipz_rqueue = my_srq->ipz_rqueue; + } + + if (is_srq) { + my_qp->ib_srq.pd = &my_pd->ib_pd; + my_qp->ib_srq.device = my_pd->ib_pd.device; + + my_qp->ib_srq.srq_context = init_attr->qp_context; + my_qp->ib_srq.event_handler = init_attr->event_handler; + } else { + my_qp->ib_qp.qp_num = ib_qp_num; + my_qp->ib_qp.pd = &my_pd->ib_pd; + my_qp->ib_qp.device = my_pd->ib_pd.device; + + my_qp->ib_qp.recv_cq = init_attr->recv_cq; + my_qp->ib_qp.send_cq = init_attr->send_cq; + + my_qp->ib_qp.qp_type = qp_type; + my_qp->ib_qp.srq = init_attr->srq; + + my_qp->ib_qp.qp_context = init_attr->qp_context; + my_qp->ib_qp.event_handler = init_attr->event_handler; + } + + init_attr->cap.max_inline_data = 0; /* not supported yet */ + init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges; + init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes; + init_attr->cap.max_send_sge = parms.squeue.act_nr_sges; + init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes; + my_qp->init_attr = *init_attr; + + if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { + shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = + &my_qp->ib_qp; + if (ehca_nr_ports < 0) { + /* alloc array to cache subsequent modify qp parms + * for autodetect mode + */ + my_qp->mod_qp_parm = + kzalloc(EHCA_MOD_QP_PARM_MAX * + sizeof(*my_qp->mod_qp_parm), + GFP_KERNEL); + if (!my_qp->mod_qp_parm) { + ehca_err(pd->device, + "Could not alloc mod_qp_parm"); + goto create_qp_exit5; + } + } + } + + /* NOTE: define_apq0() not supported yet */ + if (qp_type == IB_QPT_GSI) { + h_ret = ehca_define_sqp(shca, my_qp, init_attr); + if (h_ret != H_SUCCESS) { + kfree(my_qp->mod_qp_parm); + my_qp->mod_qp_parm = NULL; + /* the QP pointer is no longer valid */ + shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = + NULL; + ret = ehca2ib_return_code(h_ret); + goto create_qp_exit6; + } + } + + if (my_qp->send_cq) { + ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp); + if (ret) { + ehca_err(pd->device, + "Couldn't assign qp to send_cq ret=%i", ret); + goto create_qp_exit7; + } + } + + /* copy queues, galpa data to user space */ + if (context && udata) { + struct ehca_create_qp_resp resp; + memset(&resp, 0, sizeof(resp)); + + resp.qp_num = my_qp->real_qp_num; + resp.token = my_qp->token; + resp.qp_type = my_qp->qp_type; + resp.ext_type = my_qp->ext_type; + resp.qkey = my_qp->qkey; + resp.real_qp_num = my_qp->real_qp_num; + + if (HAS_SQ(my_qp)) + queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue); + if (HAS_RQ(my_qp)) + queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue); + resp.fw_handle_ofs = (u32) + (my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1)); + + if (ib_copy_to_udata(udata, &resp, sizeof resp)) { + ehca_err(pd->device, "Copy to udata failed"); + ret = -EINVAL; + goto create_qp_exit8; + } + } + + return my_qp; + +create_qp_exit8: + ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num); + +create_qp_exit7: + kfree(my_qp->mod_qp_parm); + +create_qp_exit6: + if (HAS_RQ(my_qp) && !is_user) + vfree(my_qp->rq_map.map); + +create_qp_exit5: + if (HAS_RQ(my_qp)) + ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); + +create_qp_exit4: + if (HAS_SQ(my_qp) && !is_user) + vfree(my_qp->sq_map.map); + +create_qp_exit3: + if (HAS_SQ(my_qp)) + ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); + +create_qp_exit2: + hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); + +create_qp_exit1: + write_lock_irqsave(&ehca_qp_idr_lock, flags); + idr_remove(&ehca_qp_idr, my_qp->token); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + +create_qp_exit0: + kmem_cache_free(qp_cache, my_qp); + atomic_dec(&shca->num_qps); + return ERR_PTR(ret); +} + +struct ib_qp *ehca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata) +{ + struct ehca_qp *ret; + + ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0); + return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp; +} + +static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, + struct ib_uobject *uobject); + +struct ib_srq *ehca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) +{ + struct ib_qp_init_attr qp_init_attr; + struct ehca_qp *my_qp; + struct ib_srq *ret; + struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, + ib_device); + struct hcp_modify_qp_control_block *mqpcb; + u64 hret, update_mask; + + if (srq_init_attr->srq_type != IB_SRQT_BASIC) + return ERR_PTR(-ENOSYS); + + /* For common attributes, internal_create_qp() takes its info + * out of qp_init_attr, so copy all common attrs there. + */ + memset(&qp_init_attr, 0, sizeof(qp_init_attr)); + qp_init_attr.event_handler = srq_init_attr->event_handler; + qp_init_attr.qp_context = srq_init_attr->srq_context; + qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.qp_type = IB_QPT_RC; + qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr; + qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge; + + my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1); + if (IS_ERR(my_qp)) + return (struct ib_srq *)my_qp; + + /* copy back return values */ + srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr; + srq_init_attr->attr.max_sge = 3; + + /* drive SRQ into RTR state */ + mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!mqpcb) { + ehca_err(pd->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); + ret = ERR_PTR(-ENOMEM); + goto create_srq1; + } + + mqpcb->qp_state = EHCA_QPS_INIT; + mqpcb->prim_phys_port = 1; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not modify SRQ to INIT " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + mqpcb->qp_enable = 1; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not enable SRQ " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + mqpcb->qp_state = EHCA_QPS_RTR; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not modify SRQ to RTR " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + ehca_free_fw_ctrlblock(mqpcb); + + return &my_qp->ib_srq; + +create_srq2: + ret = ERR_PTR(ehca2ib_return_code(hret)); + ehca_free_fw_ctrlblock(mqpcb); + +create_srq1: + internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject); + + return ret; +} + +/* + * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts + * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe + * returns total number of bad wqes in bad_wqe_cnt + */ +static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, + int *bad_wqe_cnt) +{ + u64 h_ret; + struct ipz_queue *squeue; + void *bad_send_wqe_p, *bad_send_wqe_v; + u64 q_ofs; + struct ehca_wqe *wqe; + int qp_num = my_qp->ib_qp.qp_num; + + /* get send wqe pointer */ + h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, &my_qp->pf, + &bad_send_wqe_p, NULL, 2); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed" + " ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, qp_num, h_ret); + return ehca2ib_return_code(h_ret); + } + bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63))); + ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", + qp_num, bad_send_wqe_p); + /* convert wqe pointer to vadr */ + bad_send_wqe_v = __va((u64)bad_send_wqe_p); + if (ehca_debug_level >= 2) + ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num); + squeue = &my_qp->ipz_squeue; + if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) { + ehca_err(&shca->ib_device, "failed to get wqe offset qp_num=%x" + " bad_send_wqe_p=%p", qp_num, bad_send_wqe_p); + return -EFAULT; + } + + /* loop sets wqe's purge bit */ + wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); + *bad_wqe_cnt = 0; + while (wqe->optype != 0xff && wqe->wqef != 0xff) { + if (ehca_debug_level >= 2) + ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num); + wqe->nr_of_data_seg = 0; /* suppress data access */ + wqe->wqef = WQEF_PURGE; /* WQE to be purged */ + q_ofs = ipz_queue_advance_offset(squeue, q_ofs); + wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); + *bad_wqe_cnt = (*bad_wqe_cnt)+1; + } + /* + * bad wqe will be reprocessed and ignored when pol_cq() is called, + * i.e. nr of wqes with flush error status is one less + */ + ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x", + qp_num, (*bad_wqe_cnt)-1); + wqe->wqef = 0; + + return 0; +} + +static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue, + struct ehca_queue_map *qmap) +{ + void *wqe_v; + u64 q_ofs; + u32 wqe_idx; + unsigned int tail_idx; + + /* convert real to abs address */ + wqe_p = wqe_p & (~(1UL << 63)); + + wqe_v = __va(wqe_p); + + if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) { + ehca_gen_err("Invalid offset for calculating left cqes " + "wqe_p=%#llx wqe_v=%p\n", wqe_p, wqe_v); + return -EFAULT; + } + + tail_idx = next_index(qmap->tail, qmap->entries); + wqe_idx = q_ofs / ipz_queue->qe_size; + + /* check all processed wqes, whether a cqe is requested or not */ + while (tail_idx != wqe_idx) { + if (qmap->map[tail_idx].cqe_req) + qmap->left_to_poll++; + tail_idx = next_index(tail_idx, qmap->entries); + } + /* save index in queue, where we have to start flushing */ + qmap->next_wqe_idx = wqe_idx; + return 0; +} + +static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca) +{ + u64 h_ret; + void *send_wqe_p, *recv_wqe_p; + int ret; + unsigned long flags; + int qp_num = my_qp->ib_qp.qp_num; + + /* this hcall is not supported on base QPs */ + if (my_qp->ext_type != EQPT_SRQBASE) { + /* get send and receive wqe pointer */ + h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, &my_qp->pf, + &send_wqe_p, &recv_wqe_p, 4); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "disable_and_get_wqe() " + "failed ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, qp_num, h_ret); + return ehca2ib_return_code(h_ret); + } + + /* + * acquire lock to ensure that nobody is polling the cq which + * could mean that the qmap->tail pointer is in an + * inconsistent state. + */ + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue, + &my_qp->sq_map); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + if (ret) + return ret; + + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue, + &my_qp->rq_map); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + if (ret) + return ret; + } else { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + my_qp->sq_map.left_to_poll = 0; + my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, + my_qp->sq_map.entries); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + my_qp->rq_map.left_to_poll = 0; + my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, + my_qp->rq_map.entries); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + } + + /* this assures flush cqes being generated only for pending wqes */ + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 1); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + if (HAS_RQ(my_qp)) { + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 0); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, + flags); + } + } + + return 0; +} + +/* + * internal_modify_qp with circumvention to handle aqp0 properly + * smi_reset2init indicates if this is an internal reset-to-init-call for + * smi. This flag must always be zero if called from ehca_modify_qp()! + * This internal func was intorduced to avoid recursion of ehca_modify_qp()! + */ +static int internal_modify_qp(struct ib_qp *ibqp, + struct ib_qp_attr *attr, + int attr_mask, int smi_reset2init) +{ + enum ib_qp_state qp_cur_state, qp_new_state; + int cnt, qp_attr_idx, ret = 0; + enum ib_qp_statetrans statetrans; + struct hcp_modify_qp_control_block *mqpcb; + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = + container_of(ibqp->pd->device, struct ehca_shca, ib_device); + u64 update_mask; + u64 h_ret; + int bad_wqe_cnt = 0; + int is_user = 0; + int squeue_locked = 0; + unsigned long flags = 0; + + /* do query_qp to obtain current attr values */ + mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); + if (!mqpcb) { + ehca_err(ibqp->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num); + return -ENOMEM; + } + + h_ret = hipz_h_query_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + mqpcb, my_qp->galpas.kernel); + if (h_ret != H_SUCCESS) { + ehca_err(ibqp->device, "hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, ibqp->qp_num, h_ret); + ret = ehca2ib_return_code(h_ret); + goto modify_qp_exit1; + } + if (ibqp->uobject) + is_user = 1; + + qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state); + + if (qp_cur_state == -EINVAL) { /* invalid qp state */ + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x " + "ehca_qp=%p qp_num=%x", + mqpcb->qp_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + /* + * circumvention to set aqp0 initial state to init + * as expected by IB spec + */ + if (smi_reset2init == 0 && + ibqp->qp_type == IB_QPT_SMI && + qp_cur_state == IB_QPS_RESET && + (attr_mask & IB_QP_STATE) && + attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */ + struct ib_qp_attr smiqp_attr = { + .qp_state = IB_QPS_INIT, + .port_num = my_qp->init_attr.port_num, + .pkey_index = 0, + .qkey = 0 + }; + int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT | + IB_QP_PKEY_INDEX | IB_QP_QKEY; + int smirc = internal_modify_qp( + ibqp, &smiqp_attr, smiqp_attr_mask, 1); + if (smirc) { + ehca_err(ibqp->device, "SMI RESET -> INIT failed. " + "ehca_modify_qp() rc=%i", smirc); + ret = H_PARAMETER; + goto modify_qp_exit1; + } + qp_cur_state = IB_QPS_INIT; + ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded"); + } + /* is transmitted current state equal to "real" current state */ + if ((attr_mask & IB_QP_CUR_STATE) && + qp_cur_state != attr->cur_qp_state) { + ret = -EINVAL; + ehca_err(ibqp->device, + "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>" + " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x", + attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x " + "new qp_state=%x attribute_mask=%x", + my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask); + + qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state; + if (!smi_reset2init && + !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type, + attr_mask, IB_LINK_LAYER_UNSPECIFIED)) { + ret = -EINVAL; + ehca_err(ibqp->device, + "Invalid qp transition new_state=%x cur_state=%x " + "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state, + qp_cur_state, my_qp, ibqp->qp_num, attr_mask); + goto modify_qp_exit1; + } + + mqpcb->qp_state = ib2ehca_qp_state(qp_new_state); + if (mqpcb->qp_state) + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + else { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid new qp state=%x " + "ehca_qp=%p qp_num=%x", + qp_new_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + /* retrieve state transition struct to get req and opt attrs */ + statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state); + if (statetrans < 0) { + ret = -EINVAL; + ehca_err(ibqp->device, " qp_cur_state=%x " + "new_qp_state=%x State_xsition=%x ehca_qp=%p " + "qp_num=%x", qp_cur_state, qp_new_state, + statetrans, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + qp_attr_idx = ib2ehcaqptype(ibqp->qp_type); + + if (qp_attr_idx < 0) { + ret = qp_attr_idx; + ehca_err(ibqp->device, + "Invalid QP type=%x ehca_qp=%p qp_num=%x", + ibqp->qp_type, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + ehca_dbg(ibqp->device, + "ehca_qp=%p qp_num=%x qp_state_xsit=%x", + my_qp, ibqp->qp_num, statetrans); + + /* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set + * in non-LL UD QPs. + */ + if ((my_qp->qp_type == IB_QPT_UD) && + (my_qp->ext_type != EQPT_LLQP) && + (statetrans == IB_QPST_INIT2RTR) && + (shca->hw_level >= 0x22)) { + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); + mqpcb->send_grh_flag = 1; + } + + /* sqe -> rts: set purge bit of bad wqe before actual trans */ + if ((my_qp->qp_type == IB_QPT_UD || + my_qp->qp_type == IB_QPT_GSI || + my_qp->qp_type == IB_QPT_SMI) && + statetrans == IB_QPST_SQE2RTS) { + /* mark next free wqe if kernel */ + if (!ibqp->uobject) { + struct ehca_wqe *wqe; + /* lock send queue */ + spin_lock_irqsave(&my_qp->spinlock_s, flags); + squeue_locked = 1; + /* mark next free wqe */ + wqe = (struct ehca_wqe *) + ipz_qeit_get(&my_qp->ipz_squeue); + wqe->optype = wqe->wqef = 0xff; + ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p", + ibqp->qp_num, wqe); + } + ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt); + if (ret) { + ehca_err(ibqp->device, "prepare_sqe_rts() failed " + "ehca_qp=%p qp_num=%x ret=%i", + my_qp, ibqp->qp_num, ret); + goto modify_qp_exit2; + } + } + + /* + * enable RDMA_Atomic_Control if reset->init und reliable con + * this is necessary since gen2 does not provide that flag, + * but pHyp requires it + */ + if (statetrans == IB_QPST_RESET2INIT && + (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) { + mqpcb->rdma_atomic_ctrl = 3; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1); + } + /* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */ + if (statetrans == IB_QPST_INIT2RTR && + (ibqp->qp_type == IB_QPT_UC) && + !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) { + mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */ + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); + } + + if (attr_mask & IB_QP_PKEY_INDEX) { + if (attr->pkey_index >= 16) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid pkey_index=%x. " + "ehca_qp=%p qp_num=%x max_pkey_index=f", + attr->pkey_index, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + mqpcb->prim_p_key_idx = attr->pkey_index; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1); + } + if (attr_mask & IB_QP_PORT) { + struct ehca_sport *sport; + struct ehca_qp *aqp1; + if (attr->port_num < 1 || attr->port_num > shca->num_ports) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid port=%x. " + "ehca_qp=%p qp_num=%x num_ports=%x", + attr->port_num, my_qp, ibqp->qp_num, + shca->num_ports); + goto modify_qp_exit2; + } + sport = &shca->sport[attr->port_num - 1]; + if (!sport->ibqp_sqp[IB_QPT_GSI]) { + /* should not occur */ + ret = -EFAULT; + ehca_err(ibqp->device, "AQP1 was not created for " + "port=%x", attr->port_num); + goto modify_qp_exit2; + } + aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI], + struct ehca_qp, ib_qp); + if (ibqp->qp_type != IB_QPT_GSI && + ibqp->qp_type != IB_QPT_SMI && + aqp1->mod_qp_parm) { + /* + * firmware will reject this modify_qp() because + * port is not activated/initialized fully + */ + ret = -EFAULT; + ehca_warn(ibqp->device, "Couldn't modify qp port=%x: " + "either port is being activated (try again) " + "or cabling issue", attr->port_num); + goto modify_qp_exit2; + } + mqpcb->prim_phys_port = attr->port_num; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1); + } + if (attr_mask & IB_QP_QKEY) { + mqpcb->qkey = attr->qkey; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1); + } + if (attr_mask & IB_QP_AV) { + mqpcb->dlid = attr->ah_attr.dlid; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1); + mqpcb->source_path_bits = attr->ah_attr.src_path_bits; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1); + mqpcb->service_level = attr->ah_attr.sl; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1); + + if (ehca_calc_ipd(shca, mqpcb->prim_phys_port, + attr->ah_attr.static_rate, + &mqpcb->max_static_rate)) { + ret = -EINVAL; + goto modify_qp_exit2; + } + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); + + /* + * Always supply the GRH flag, even if it's zero, to give the + * hypervisor a clear "yes" or "no" instead of a "perhaps" + */ + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); + + /* + * only if GRH is TRUE we might consider SOURCE_GID_IDX + * and DEST_GID otherwise phype will return H_ATTR_PARM!!! + */ + if (attr->ah_attr.ah_flags == IB_AH_GRH) { + mqpcb->send_grh_flag = 1; + + mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1); + + for (cnt = 0; cnt < 16; cnt++) + mqpcb->dest_gid.byte[cnt] = + attr->ah_attr.grh.dgid.raw[cnt]; + + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1); + mqpcb->flow_label = attr->ah_attr.grh.flow_label; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1); + mqpcb->hop_limit = attr->ah_attr.grh.hop_limit; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1); + mqpcb->traffic_class = attr->ah_attr.grh.traffic_class; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1); + } + } + + if (attr_mask & IB_QP_PATH_MTU) { + /* store ld(MTU) */ + my_qp->mtu_shift = attr->path_mtu + 7; + mqpcb->path_mtu = attr->path_mtu; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1); + } + if (attr_mask & IB_QP_TIMEOUT) { + mqpcb->timeout = attr->timeout; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1); + } + if (attr_mask & IB_QP_RETRY_CNT) { + mqpcb->retry_count = attr->retry_cnt; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1); + } + if (attr_mask & IB_QP_RNR_RETRY) { + mqpcb->rnr_retry_count = attr->rnr_retry; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1); + } + if (attr_mask & IB_QP_RQ_PSN) { + mqpcb->receive_psn = attr->rq_psn; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1); + } + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { + mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ? + attr->max_dest_rd_atomic : 2; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); + } + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ? + attr->max_rd_atomic : 2; + update_mask |= + EHCA_BMASK_SET + (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1); + } + if (attr_mask & IB_QP_ALT_PATH) { + if (attr->alt_port_num < 1 + || attr->alt_port_num > shca->num_ports) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid alt_port=%x. " + "ehca_qp=%p qp_num=%x num_ports=%x", + attr->alt_port_num, my_qp, ibqp->qp_num, + shca->num_ports); + goto modify_qp_exit2; + } + mqpcb->alt_phys_port = attr->alt_port_num; + + if (attr->alt_pkey_index >= 16) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid alt_pkey_index=%x. " + "ehca_qp=%p qp_num=%x max_pkey_index=f", + attr->pkey_index, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + mqpcb->alt_p_key_idx = attr->alt_pkey_index; + + mqpcb->timeout_al = attr->alt_timeout; + mqpcb->dlid_al = attr->alt_ah_attr.dlid; + mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; + mqpcb->service_level_al = attr->alt_ah_attr.sl; + + if (ehca_calc_ipd(shca, mqpcb->alt_phys_port, + attr->alt_ah_attr.static_rate, + &mqpcb->max_static_rate_al)) { + ret = -EINVAL; + goto modify_qp_exit2; + } + + /* OpenIB doesn't support alternate retry counts - copy them */ + mqpcb->retry_count_al = mqpcb->retry_count; + mqpcb->rnr_retry_count_al = mqpcb->rnr_retry_count; + + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_ALT_PHYS_PORT, 1) + | EHCA_BMASK_SET(MQPCB_MASK_ALT_P_KEY_IDX, 1) + | EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT_AL, 1); + + /* + * Always supply the GRH flag, even if it's zero, to give the + * hypervisor a clear "yes" or "no" instead of a "perhaps" + */ + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1); + + /* + * only if GRH is TRUE we might consider SOURCE_GID_IDX + * and DEST_GID otherwise phype will return H_ATTR_PARM!!! + */ + if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) { + mqpcb->send_grh_flag_al = 1; + + for (cnt = 0; cnt < 16; cnt++) + mqpcb->dest_gid_al.byte[cnt] = + attr->alt_ah_attr.grh.dgid.raw[cnt]; + mqpcb->source_gid_idx_al = + attr->alt_ah_attr.grh.sgid_index; + mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label; + mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit; + mqpcb->traffic_class_al = + attr->alt_ah_attr.grh.traffic_class; + + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1) | + EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1); + } + } + + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1); + } + + if (attr_mask & IB_QP_SQ_PSN) { + mqpcb->send_psn = attr->sq_psn; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1); + } + + if (attr_mask & IB_QP_DEST_QPN) { + mqpcb->dest_qp_nr = attr->dest_qp_num; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1); + } + + if (attr_mask & IB_QP_PATH_MIG_STATE) { + if (attr->path_mig_state != IB_MIG_REARM + && attr->path_mig_state != IB_MIG_MIGRATED) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid mig_state=%x", + attr->path_mig_state); + goto modify_qp_exit2; + } + mqpcb->path_migration_state = attr->path_mig_state + 1; + if (attr->path_mig_state == IB_MIG_REARM) + my_qp->mig_armed = 1; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1); + } + + if (attr_mask & IB_QP_CAP) { + mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1); + mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1); + /* no support for max_send/recv_sge yet */ + } + + if (ehca_debug_level >= 2) + ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%lli " + "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + + if ((my_qp->qp_type == IB_QPT_UD || + my_qp->qp_type == IB_QPT_GSI || + my_qp->qp_type == IB_QPT_SMI) && + statetrans == IB_QPST_SQE2RTS) { + /* doorbell to reprocessing wqes */ + iosync(); /* serialize GAL register access */ + hipz_update_sqa(my_qp, bad_wqe_cnt-1); + ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt); + } + + if (statetrans == IB_QPST_RESET2INIT || + statetrans == IB_QPST_INIT2INIT) { + mqpcb->qp_enable = 1; + mqpcb->qp_state = EHCA_QPS_INIT; + update_mask = 0; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, + my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(ibqp->device, "ENABLE in context of " + "RESET_2_INIT failed! Maybe you didn't get " + "a LID h_ret=%lli ehca_qp=%p qp_num=%x", + h_ret, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + } + if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR) + && !is_user) { + ret = check_for_left_cqes(my_qp, shca); + if (ret) + goto modify_qp_exit2; + } + + if (statetrans == IB_QPST_ANY2RESET) { + ipz_qeit_reset(&my_qp->ipz_rqueue); + ipz_qeit_reset(&my_qp->ipz_squeue); + + if (qp_cur_state == IB_QPS_ERR && !is_user) { + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + + if (HAS_RQ(my_qp)) + del_from_err_list(my_qp->recv_cq, + &my_qp->rq_err_node); + } + if (!is_user) + reset_queue_map(&my_qp->sq_map); + + if (HAS_RQ(my_qp) && !is_user) + reset_queue_map(&my_qp->rq_map); + } + + if (attr_mask & IB_QP_QKEY) + my_qp->qkey = attr->qkey; + +modify_qp_exit2: + if (squeue_locked) { /* this means: sqe -> rts */ + spin_unlock_irqrestore(&my_qp->spinlock_s, flags); + my_qp->sqerr_purgeflag = 1; + } + +modify_qp_exit1: + ehca_free_fw_ctrlblock(mqpcb); + + return ret; +} + +int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata) +{ + int ret = 0; + + struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, + ib_device); + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + + /* The if-block below caches qp_attr to be modified for GSI and SMI + * qps during the initialization by ib_mad. When the respective port + * is activated, ie we got an event PORT_ACTIVE, we'll replay the + * cached modify calls sequence, see ehca_recover_sqs() below. + * Why that is required: + * 1) If one port is connected, older code requires that port one + * to be connected and module option nr_ports=1 to be given by + * user, which is very inconvenient for end user. + * 2) Firmware accepts modify_qp() only if respective port has become + * active. Older code had a wait loop of 30sec create_qp()/ + * define_aqp1(), which is not appropriate in practice. This + * code now removes that wait loop, see define_aqp1(), and always + * reports all ports to ib_mad resp. users. Only activated ports + * will then usable for the users. + */ + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) { + int port = my_qp->init_attr.port_num; + struct ehca_sport *sport = &shca->sport[port - 1]; + unsigned long flags; + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + /* cache qp_attr only during init */ + if (my_qp->mod_qp_parm) { + struct ehca_mod_qp_parm *p; + if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) { + ehca_err(&shca->ib_device, + "mod_qp_parm overflow state=%x port=%x" + " type=%x", attr->qp_state, + my_qp->init_attr.port_num, + ibqp->qp_type); + spin_unlock_irqrestore(&sport->mod_sqp_lock, + flags); + return -EINVAL; + } + p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx]; + p->mask = attr_mask; + p->attr = *attr; + my_qp->mod_qp_parm_idx++; + ehca_dbg(&shca->ib_device, + "Saved qp_attr for state=%x port=%x type=%x", + attr->qp_state, my_qp->init_attr.port_num, + ibqp->qp_type); + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + goto out; + } + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + } + + ret = internal_modify_qp(ibqp, attr, attr_mask, 0); + +out: + if ((ret == 0) && (attr_mask & IB_QP_STATE)) + my_qp->state = attr->qp_state; + + return ret; +} + +void ehca_recover_sqp(struct ib_qp *sqp) +{ + struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp); + int port = my_sqp->init_attr.port_num; + struct ib_qp_attr attr; + struct ehca_mod_qp_parm *qp_parm; + int i, qp_parm_idx, ret; + unsigned long flags, wr_cnt; + + if (!my_sqp->mod_qp_parm) + return; + ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num); + + qp_parm = my_sqp->mod_qp_parm; + qp_parm_idx = my_sqp->mod_qp_parm_idx; + for (i = 0; i < qp_parm_idx; i++) { + attr = qp_parm[i].attr; + ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0); + if (ret) { + ehca_err(sqp->device, "Could not modify SQP port=%x " + "qp_num=%x ret=%x", port, sqp->qp_num, ret); + goto free_qp_parm; + } + ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x", + port, sqp->qp_num, attr.qp_state); + } + + /* re-trigger posted recv wrs */ + wr_cnt = my_sqp->ipz_rqueue.current_q_offset / + my_sqp->ipz_rqueue.qe_size; + if (wr_cnt) { + spin_lock_irqsave(&my_sqp->spinlock_r, flags); + hipz_update_rqa(my_sqp, wr_cnt); + spin_unlock_irqrestore(&my_sqp->spinlock_r, flags); + ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx", + port, sqp->qp_num, wr_cnt); + } + +free_qp_parm: + kfree(qp_parm); + /* this prevents subsequent calls to modify_qp() to cache qp_attr */ + my_sqp->mod_qp_parm = NULL; +} + +int ehca_query_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) +{ + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = container_of(qp->device, struct ehca_shca, + ib_device); + struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; + struct hcp_modify_qp_control_block *qpcb; + int cnt, ret = 0; + u64 h_ret; + + if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) { + ehca_err(qp->device, "Invalid attribute mask " + "ehca_qp=%p qp_num=%x qp_attr_mask=%x ", + my_qp, qp->qp_num, qp_attr_mask); + return -EINVAL; + } + + qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!qpcb) { + ehca_err(qp->device, "Out of memory for qpcb " + "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); + return -ENOMEM; + } + + h_ret = hipz_h_query_qp(adapter_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + qpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(qp->device, "hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, qp->qp_num, h_ret); + goto query_qp_exit1; + } + + qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state); + qp_attr->qp_state = qp_attr->cur_qp_state; + + if (qp_attr->cur_qp_state == -EINVAL) { + ret = -EINVAL; + ehca_err(qp->device, "Got invalid ehca_qp_state=%x " + "ehca_qp=%p qp_num=%x", + qpcb->qp_state, my_qp, qp->qp_num); + goto query_qp_exit1; + } + + if (qp_attr->qp_state == IB_QPS_SQD) + qp_attr->sq_draining = 1; + + qp_attr->qkey = qpcb->qkey; + qp_attr->path_mtu = qpcb->path_mtu; + qp_attr->path_mig_state = qpcb->path_migration_state - 1; + qp_attr->rq_psn = qpcb->receive_psn; + qp_attr->sq_psn = qpcb->send_psn; + qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field; + qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1; + qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1; + /* UD_AV CIRCUMVENTION */ + if (my_qp->qp_type == IB_QPT_UD) { + qp_attr->cap.max_send_sge = + qpcb->actual_nr_sges_in_sq_wqe - 2; + qp_attr->cap.max_recv_sge = + qpcb->actual_nr_sges_in_rq_wqe - 2; + } else { + qp_attr->cap.max_send_sge = + qpcb->actual_nr_sges_in_sq_wqe; + qp_attr->cap.max_recv_sge = + qpcb->actual_nr_sges_in_rq_wqe; + } + + qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size; + qp_attr->dest_qp_num = qpcb->dest_qp_nr; + + qp_attr->pkey_index = qpcb->prim_p_key_idx; + qp_attr->port_num = qpcb->prim_phys_port; + qp_attr->timeout = qpcb->timeout; + qp_attr->retry_cnt = qpcb->retry_count; + qp_attr->rnr_retry = qpcb->rnr_retry_count; + + qp_attr->alt_pkey_index = qpcb->alt_p_key_idx; + qp_attr->alt_port_num = qpcb->alt_phys_port; + qp_attr->alt_timeout = qpcb->timeout_al; + + qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res; + qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp; + + /* primary av */ + qp_attr->ah_attr.sl = qpcb->service_level; + + if (qpcb->send_grh_flag) { + qp_attr->ah_attr.ah_flags = IB_AH_GRH; + } + + qp_attr->ah_attr.static_rate = qpcb->max_static_rate; + qp_attr->ah_attr.dlid = qpcb->dlid; + qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits; + qp_attr->ah_attr.port_num = qp_attr->port_num; + + /* primary GRH */ + qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class; + qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit; + qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx; + qp_attr->ah_attr.grh.flow_label = qpcb->flow_label; + + for (cnt = 0; cnt < 16; cnt++) + qp_attr->ah_attr.grh.dgid.raw[cnt] = + qpcb->dest_gid.byte[cnt]; + + /* alternate AV */ + qp_attr->alt_ah_attr.sl = qpcb->service_level_al; + if (qpcb->send_grh_flag_al) { + qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH; + } + + qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al; + qp_attr->alt_ah_attr.dlid = qpcb->dlid_al; + qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al; + + /* alternate GRH */ + qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al; + qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al; + qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al; + qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al; + + for (cnt = 0; cnt < 16; cnt++) + qp_attr->alt_ah_attr.grh.dgid.raw[cnt] = + qpcb->dest_gid_al.byte[cnt]; + + /* return init attributes given in ehca_create_qp */ + if (qp_init_attr) + *qp_init_attr = my_qp->init_attr; + + if (ehca_debug_level >= 2) + ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num); + +query_qp_exit1: + ehca_free_fw_ctrlblock(qpcb); + + return ret; +} + +int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ + struct ehca_qp *my_qp = + container_of(ibsrq, struct ehca_qp, ib_srq); + struct ehca_shca *shca = + container_of(ibsrq->pd->device, struct ehca_shca, ib_device); + struct hcp_modify_qp_control_block *mqpcb; + u64 update_mask; + u64 h_ret; + int ret = 0; + + mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!mqpcb) { + ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); + return -ENOMEM; + } + + update_mask = 0; + if (attr_mask & IB_SRQ_LIMIT) { + attr_mask &= ~IB_SRQ_LIMIT; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1) + | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1); + mqpcb->curr_srq_limit = attr->srq_limit; + mqpcb->qp_aff_asyn_ev_log_reg = + EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1); + } + + /* by now, all bits in attr_mask should have been cleared */ + if (attr_mask) { + ehca_err(ibsrq->device, "invalid attribute mask bits set " + "attr_mask=%x", attr_mask); + ret = -EINVAL; + goto modify_srq_exit0; + } + + if (ehca_debug_level >= 2) + ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle, + NULL, update_mask, mqpcb, + my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%lli " + "ehca_qp=%p qp_num=%x", + h_ret, my_qp, my_qp->real_qp_num); + } + +modify_srq_exit0: + ehca_free_fw_ctrlblock(mqpcb); + + return ret; +} + +int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) +{ + struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq); + struct ehca_shca *shca = container_of(srq->device, struct ehca_shca, + ib_device); + struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; + struct hcp_modify_qp_control_block *qpcb; + int ret = 0; + u64 h_ret; + + qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!qpcb) { + ehca_err(srq->device, "Out of memory for qpcb " + "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num); + return -ENOMEM; + } + + h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle, + NULL, qpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(srq->device, "hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, my_qp->real_qp_num, h_ret); + goto query_srq_exit1; + } + + srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1; + srq_attr->max_sge = 3; + srq_attr->srq_limit = qpcb->curr_srq_limit; + + if (ehca_debug_level >= 2) + ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); + +query_srq_exit1: + ehca_free_fw_ctrlblock(qpcb); + + return ret; +} + +static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, + struct ib_uobject *uobject) +{ + struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device); + struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, + ib_pd); + struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1]; + u32 qp_num = my_qp->real_qp_num; + int ret; + u64 h_ret; + u8 port_num; + int is_user = 0; + enum ib_qp_type qp_type; + unsigned long flags; + + if (uobject) { + is_user = 1; + if (my_qp->mm_count_galpa || + my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { + ehca_err(dev, "Resources still referenced in " + "user space qp_num=%x", qp_num); + return -EINVAL; + } + } + + if (my_qp->send_cq) { + ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num); + if (ret) { + ehca_err(dev, "Couldn't unassign qp from " + "send_cq ret=%i qp_num=%x cq_num=%x", ret, + qp_num, my_qp->send_cq->cq_number); + return ret; + } + } + + write_lock_irqsave(&ehca_qp_idr_lock, flags); + idr_remove(&ehca_qp_idr, my_qp->token); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + + /* + * SRQs will never get into an error list and do not have a recv_cq, + * so we need to skip them here. + */ + if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user) + del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); + + if (HAS_SQ(my_qp) && !is_user) + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + + /* now wait until all pending events have completed */ + wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events)); + + h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); + if (h_ret != H_SUCCESS) { + ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%lli " + "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num); + return ehca2ib_return_code(h_ret); + } + + port_num = my_qp->init_attr.port_num; + qp_type = my_qp->init_attr.qp_type; + + if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + kfree(my_qp->mod_qp_parm); + my_qp->mod_qp_parm = NULL; + shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL; + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + } + + /* no support for IB_QPT_SMI yet */ + if (qp_type == IB_QPT_GSI) { + struct ib_event event; + ehca_info(dev, "device %s: port %x is inactive.", + shca->ib_device.name, port_num); + event.device = &shca->ib_device; + event.event = IB_EVENT_PORT_ERR; + event.element.port_num = port_num; + shca->sport[port_num - 1].port_state = IB_PORT_DOWN; + ib_dispatch_event(&event); + } + + if (HAS_RQ(my_qp)) { + ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); + if (!is_user) + vfree(my_qp->rq_map.map); + } + if (HAS_SQ(my_qp)) { + ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); + if (!is_user) + vfree(my_qp->sq_map.map); + } + kmem_cache_free(qp_cache, my_qp); + atomic_dec(&shca->num_qps); + return 0; +} + +int ehca_destroy_qp(struct ib_qp *qp) +{ + return internal_destroy_qp(qp->device, + container_of(qp, struct ehca_qp, ib_qp), + qp->uobject); +} + +int ehca_destroy_srq(struct ib_srq *srq) +{ + return internal_destroy_qp(srq->device, + container_of(srq, struct ehca_qp, ib_srq), + srq->uobject); +} + +int ehca_init_qp_cache(void) +{ + qp_cache = kmem_cache_create("ehca_cache_qp", + sizeof(struct ehca_qp), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!qp_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_qp_cache(void) +{ + if (qp_cache) + kmem_cache_destroy(qp_cache); +} diff --git a/drivers/staging/rdma/ehca/ehca_reqs.c b/drivers/staging/rdma/ehca/ehca_reqs.c new file mode 100644 index 0000000..47f9498 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_reqs.c @@ -0,0 +1,953 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * post_send/recv, poll_cq, req_notify + * + * Authors: Hoang-Nam Nguyen + * Waleri Fomin + * Joachim Fenkes + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" +#include "hipz_fns.h" + +/* in RC traffic, insert an empty RDMA READ every this many packets */ +#define ACK_CIRC_THRESHOLD 2000000 + +static u64 replace_wr_id(u64 wr_id, u16 idx) +{ + u64 ret; + + ret = wr_id & ~QMAP_IDX_MASK; + ret |= idx & QMAP_IDX_MASK; + + return ret; +} + +static u16 get_app_wr_id(u64 wr_id) +{ + return wr_id & QMAP_IDX_MASK; +} + +static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, + struct ehca_wqe *wqe_p, + struct ib_recv_wr *recv_wr, + u32 rq_map_idx) +{ + u8 cnt_ds; + if (unlikely((recv_wr->num_sge < 0) || + (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) { + ehca_gen_err("Invalid number of WQE SGE. " + "num_sqe=%x max_nr_of_sg=%x", + recv_wr->num_sge, ipz_rqueue->act_nr_of_sg); + return -EINVAL; /* invalid SG list length */ + } + + /* clear wqe header until sglist */ + memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); + + wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx); + wqe_p->nr_of_data_seg = recv_wr->num_sge; + + for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { + wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr = + recv_wr->sg_list[cnt_ds].addr; + wqe_p->u.all_rcv.sg_list[cnt_ds].lkey = + recv_wr->sg_list[cnt_ds].lkey; + wqe_p->u.all_rcv.sg_list[cnt_ds].length = + recv_wr->sg_list[cnt_ds].length; + } + + if (ehca_debug_level >= 3) { + ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", + ipz_rqueue); + ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); + } + + return 0; +} + +#if defined(DEBUG_GSI_SEND_WR) + +/* need ib_mad struct */ +#include + +static void trace_send_wr_ud(const struct ib_send_wr *send_wr) +{ + int idx; + int j; + while (send_wr) { + struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; + struct ib_sge *sge = send_wr->sg_list; + ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " + "send_flags=%x opcode=%x", idx, send_wr->wr_id, + send_wr->num_sge, send_wr->send_flags, + send_wr->opcode); + if (mad_hdr) { + ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x " + "mgmt_class=%x class_version=%x method=%x " + "status=%x class_specific=%x tid=%lx " + "attr_id=%x resv=%x attr_mod=%x", + idx, mad_hdr->base_version, + mad_hdr->mgmt_class, + mad_hdr->class_version, mad_hdr->method, + mad_hdr->status, mad_hdr->class_specific, + mad_hdr->tid, mad_hdr->attr_id, + mad_hdr->resv, + mad_hdr->attr_mod); + } + for (j = 0; j < send_wr->num_sge; j++) { + u8 *data = __va(sge->addr); + ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " + "lkey=%x", + idx, j, data, sge->length, sge->lkey); + /* assume length is n*16 */ + ehca_dmp(data, sge->length, "send_wr#%x sge#%x", + idx, j); + sge++; + } /* eof for j */ + idx++; + send_wr = send_wr->next; + } /* eof while send_wr */ +} + +#endif /* DEBUG_GSI_SEND_WR */ + +static inline int ehca_write_swqe(struct ehca_qp *qp, + struct ehca_wqe *wqe_p, + const struct ib_send_wr *send_wr, + u32 sq_map_idx, + int hidden) +{ + u32 idx; + u64 dma_length; + struct ehca_av *my_av; + u32 remote_qkey = send_wr->wr.ud.remote_qkey; + struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; + + if (unlikely((send_wr->num_sge < 0) || + (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { + ehca_gen_err("Invalid number of WQE SGE. " + "num_sqe=%x max_nr_of_sg=%x", + send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg); + return -EINVAL; /* invalid SG list length */ + } + + /* clear wqe header until sglist */ + memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); + + wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx); + + qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); + qmap_entry->reported = 0; + qmap_entry->cqe_req = 0; + + switch (send_wr->opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + wqe_p->optype = WQE_OPTYPE_SEND; + break; + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + wqe_p->optype = WQE_OPTYPE_RDMAWRITE; + break; + case IB_WR_RDMA_READ: + wqe_p->optype = WQE_OPTYPE_RDMAREAD; + break; + default: + ehca_gen_err("Invalid opcode=%x", send_wr->opcode); + return -EINVAL; /* invalid opcode */ + } + + wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE; + + wqe_p->wr_flag = 0; + + if ((send_wr->send_flags & IB_SEND_SIGNALED || + qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) + && !hidden) { + wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; + qmap_entry->cqe_req = 1; + } + + if (send_wr->opcode == IB_WR_SEND_WITH_IMM || + send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { + /* this might not work as long as HW does not support it */ + wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data); + wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT; + } + + wqe_p->nr_of_data_seg = send_wr->num_sge; + + switch (qp->qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + /* no break is intential here */ + case IB_QPT_UD: + /* IB 1.2 spec C10-15 compliance */ + if (send_wr->wr.ud.remote_qkey & 0x80000000) + remote_qkey = qp->qkey; + + wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; + wqe_p->local_ee_context_qkey = remote_qkey; + if (unlikely(!send_wr->wr.ud.ah)) { + ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); + return -EINVAL; + } + if (unlikely(send_wr->wr.ud.remote_qpn == 0)) { + ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num); + return -EINVAL; + } + my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah); + wqe_p->u.ud_av.ud_av = my_av->av; + + /* + * omitted check of IB_SEND_INLINE + * since HW does not support it + */ + for (idx = 0; idx < send_wr->num_sge; idx++) { + wqe_p->u.ud_av.sg_list[idx].vaddr = + send_wr->sg_list[idx].addr; + wqe_p->u.ud_av.sg_list[idx].lkey = + send_wr->sg_list[idx].lkey; + wqe_p->u.ud_av.sg_list[idx].length = + send_wr->sg_list[idx].length; + } /* eof for idx */ + if (qp->qp_type == IB_QPT_SMI || + qp->qp_type == IB_QPT_GSI) + wqe_p->u.ud_av.ud_av.pmtu = 1; + if (qp->qp_type == IB_QPT_GSI) { + wqe_p->pkeyi = send_wr->wr.ud.pkey_index; +#ifdef DEBUG_GSI_SEND_WR + trace_send_wr_ud(send_wr); +#endif /* DEBUG_GSI_SEND_WR */ + } + break; + + case IB_QPT_UC: + if (send_wr->send_flags & IB_SEND_FENCE) + wqe_p->wr_flag |= WQE_WRFLAG_FENCE; + /* no break is intentional here */ + case IB_QPT_RC: + /* TODO: atomic not implemented */ + wqe_p->u.nud.remote_virtual_address = + send_wr->wr.rdma.remote_addr; + wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey; + + /* + * omitted checking of IB_SEND_INLINE + * since HW does not support it + */ + dma_length = 0; + for (idx = 0; idx < send_wr->num_sge; idx++) { + wqe_p->u.nud.sg_list[idx].vaddr = + send_wr->sg_list[idx].addr; + wqe_p->u.nud.sg_list[idx].lkey = + send_wr->sg_list[idx].lkey; + wqe_p->u.nud.sg_list[idx].length = + send_wr->sg_list[idx].length; + dma_length += send_wr->sg_list[idx].length; + } /* eof idx */ + wqe_p->u.nud.atomic_1st_op_dma_len = dma_length; + + /* unsolicited ack circumvention */ + if (send_wr->opcode == IB_WR_RDMA_READ) { + /* on RDMA read, switch on and reset counters */ + qp->message_count = qp->packet_count = 0; + qp->unsol_ack_circ = 1; + } else + /* else estimate #packets */ + qp->packet_count += (dma_length >> qp->mtu_shift) + 1; + + break; + + default: + ehca_gen_err("Invalid qptype=%x", qp->qp_type); + return -EINVAL; + } + + if (ehca_debug_level >= 3) { + ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp); + ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe"); + } + return 0; +} + +/* map_ib_wc_status converts raw cqe_status to ib_wc_status */ +static inline void map_ib_wc_status(u32 cqe_status, + enum ib_wc_status *wc_status) +{ + if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) { + switch (cqe_status & 0x3F) { + case 0x01: + case 0x21: + *wc_status = IB_WC_LOC_LEN_ERR; + break; + case 0x02: + case 0x22: + *wc_status = IB_WC_LOC_QP_OP_ERR; + break; + case 0x03: + case 0x23: + *wc_status = IB_WC_LOC_EEC_OP_ERR; + break; + case 0x04: + case 0x24: + *wc_status = IB_WC_LOC_PROT_ERR; + break; + case 0x05: + case 0x25: + *wc_status = IB_WC_WR_FLUSH_ERR; + break; + case 0x06: + *wc_status = IB_WC_MW_BIND_ERR; + break; + case 0x07: /* remote error - look into bits 20:24 */ + switch ((cqe_status + & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) { + case 0x0: + /* + * PSN Sequence Error! + * couldn't find a matching status! + */ + *wc_status = IB_WC_GENERAL_ERR; + break; + case 0x1: + *wc_status = IB_WC_REM_INV_REQ_ERR; + break; + case 0x2: + *wc_status = IB_WC_REM_ACCESS_ERR; + break; + case 0x3: + *wc_status = IB_WC_REM_OP_ERR; + break; + case 0x4: + *wc_status = IB_WC_REM_INV_RD_REQ_ERR; + break; + } + break; + case 0x08: + *wc_status = IB_WC_RETRY_EXC_ERR; + break; + case 0x09: + *wc_status = IB_WC_RNR_RETRY_EXC_ERR; + break; + case 0x0A: + case 0x2D: + *wc_status = IB_WC_REM_ABORT_ERR; + break; + case 0x0B: + case 0x2E: + *wc_status = IB_WC_INV_EECN_ERR; + break; + case 0x0C: + case 0x2F: + *wc_status = IB_WC_INV_EEC_STATE_ERR; + break; + case 0x0D: + *wc_status = IB_WC_BAD_RESP_ERR; + break; + case 0x10: + /* WQE purged */ + *wc_status = IB_WC_WR_FLUSH_ERR; + break; + default: + *wc_status = IB_WC_FATAL_ERR; + + } + } else + *wc_status = IB_WC_SUCCESS; +} + +static inline int post_one_send(struct ehca_qp *my_qp, + struct ib_send_wr *cur_send_wr, + int hidden) +{ + struct ehca_wqe *wqe_p; + int ret; + u32 sq_map_idx; + u64 start_offset = my_qp->ipz_squeue.current_q_offset; + + /* get pointer next to free WQE */ + wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); + if (unlikely(!wqe_p)) { + /* too many posted work requests: queue overflow */ + ehca_err(my_qp->ib_qp.device, "Too many posted WQEs " + "qp_num=%x", my_qp->ib_qp.qp_num); + return -ENOMEM; + } + + /* + * Get the index of the WQE in the send queue. The same index is used + * for writing into the sq_map. + */ + sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size; + + /* write a SEND WQE into the QUEUE */ + ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden); + /* + * if something failed, + * reset the free entry pointer to the start value + */ + if (unlikely(ret)) { + my_qp->ipz_squeue.current_q_offset = start_offset; + ehca_err(my_qp->ib_qp.device, "Could not write WQE " + "qp_num=%x", my_qp->ib_qp.qp_num); + return -EINVAL; + } + + return 0; +} + +int ehca_post_send(struct ib_qp *qp, + struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr) +{ + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + int wqe_cnt = 0; + int ret = 0; + unsigned long flags; + + /* Reject WR if QP is in RESET, INIT or RTR state */ + if (unlikely(my_qp->state < IB_QPS_RTS)) { + ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", + my_qp->state, qp->qp_num); + ret = -EINVAL; + goto out; + } + + /* LOCK the QUEUE */ + spin_lock_irqsave(&my_qp->spinlock_s, flags); + + /* Send an empty extra RDMA read if: + * 1) there has been an RDMA read on this connection before + * 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets + * 3) we can be sure that any previous extra RDMA read has been + * processed so we don't overflow the SQ + */ + if (unlikely(my_qp->unsol_ack_circ && + my_qp->packet_count > ACK_CIRC_THRESHOLD && + my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) { + /* insert an empty RDMA READ to fix up the remote QP state */ + struct ib_send_wr circ_wr; + memset(&circ_wr, 0, sizeof(circ_wr)); + circ_wr.opcode = IB_WR_RDMA_READ; + post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */ + wqe_cnt++; + ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num); + my_qp->message_count = my_qp->packet_count = 0; + } + + /* loop processes list of send reqs */ + while (send_wr) { + ret = post_one_send(my_qp, send_wr, 0); + if (unlikely(ret)) { + goto post_send_exit0; + } + wqe_cnt++; + send_wr = send_wr->next; + } + +post_send_exit0: + iosync(); /* serialize GAL register access */ + hipz_update_sqa(my_qp, wqe_cnt); + if (unlikely(ret || ehca_debug_level >= 2)) + ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", + my_qp, qp->qp_num, wqe_cnt, ret); + my_qp->message_count += wqe_cnt; + spin_unlock_irqrestore(&my_qp->spinlock_s, flags); + +out: + if (ret) + *bad_send_wr = send_wr; + return ret; +} + +static int internal_post_recv(struct ehca_qp *my_qp, + struct ib_device *dev, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + struct ehca_wqe *wqe_p; + int wqe_cnt = 0; + int ret = 0; + u32 rq_map_idx; + unsigned long flags; + struct ehca_qmap_entry *qmap_entry; + + if (unlikely(!HAS_RQ(my_qp))) { + ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", + my_qp, my_qp->real_qp_num, my_qp->ext_type); + ret = -ENODEV; + goto out; + } + + /* LOCK the QUEUE */ + spin_lock_irqsave(&my_qp->spinlock_r, flags); + + /* loop processes list of recv reqs */ + while (recv_wr) { + u64 start_offset = my_qp->ipz_rqueue.current_q_offset; + /* get pointer next to free WQE */ + wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue); + if (unlikely(!wqe_p)) { + /* too many posted work requests: queue overflow */ + ret = -ENOMEM; + ehca_err(dev, "Too many posted WQEs " + "qp_num=%x", my_qp->real_qp_num); + goto post_recv_exit0; + } + /* + * Get the index of the WQE in the recv queue. The same index + * is used for writing into the rq_map. + */ + rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; + + /* write a RECV WQE into the QUEUE */ + ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr, + rq_map_idx); + /* + * if something failed, + * reset the free entry pointer to the start value + */ + if (unlikely(ret)) { + my_qp->ipz_rqueue.current_q_offset = start_offset; + ret = -EINVAL; + ehca_err(dev, "Could not write WQE " + "qp_num=%x", my_qp->real_qp_num); + goto post_recv_exit0; + } + + qmap_entry = &my_qp->rq_map.map[rq_map_idx]; + qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id); + qmap_entry->reported = 0; + qmap_entry->cqe_req = 1; + + wqe_cnt++; + recv_wr = recv_wr->next; + } /* eof for recv_wr */ + +post_recv_exit0: + iosync(); /* serialize GAL register access */ + hipz_update_rqa(my_qp, wqe_cnt); + if (unlikely(ret || ehca_debug_level >= 2)) + ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", + my_qp, my_qp->real_qp_num, wqe_cnt, ret); + spin_unlock_irqrestore(&my_qp->spinlock_r, flags); + +out: + if (ret) + *bad_recv_wr = recv_wr; + + return ret; +} + +int ehca_post_recv(struct ib_qp *qp, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + + /* Reject WR if QP is in RESET state */ + if (unlikely(my_qp->state == IB_QPS_RESET)) { + ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", + my_qp->state, qp->qp_num); + *bad_recv_wr = recv_wr; + return -EINVAL; + } + + return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr); +} + +int ehca_post_srq_recv(struct ib_srq *srq, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq), + srq->device, recv_wr, bad_recv_wr); +} + +/* + * ib_wc_opcode table converts ehca wc opcode to ib + * Since we use zero to indicate invalid opcode, the actual ib opcode must + * be decremented!!! + */ +static const u8 ib_wc_opcode[255] = { + [0x01] = IB_WC_RECV+1, + [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1, + [0x04] = IB_WC_BIND_MW+1, + [0x08] = IB_WC_FETCH_ADD+1, + [0x10] = IB_WC_COMP_SWAP+1, + [0x20] = IB_WC_RDMA_WRITE+1, + [0x40] = IB_WC_RDMA_READ+1, + [0x80] = IB_WC_SEND+1 +}; + +/* internal function to poll one entry of cq */ +static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) +{ + int ret = 0, qmap_tail_idx; + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + struct ehca_cqe *cqe; + struct ehca_qp *my_qp; + struct ehca_qmap_entry *qmap_entry; + struct ehca_queue_map *qmap; + int cqe_count = 0, is_error; + +repoll: + cqe = (struct ehca_cqe *) + ipz_qeit_get_inc_valid(&my_cq->ipz_queue); + if (!cqe) { + ret = -EAGAIN; + if (ehca_debug_level >= 3) + ehca_dbg(cq->device, "Completion queue is empty " + "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number); + goto poll_cq_one_exit0; + } + + /* prevents loads being reordered across this point */ + rmb(); + + cqe_count++; + if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { + struct ehca_qp *qp; + int purgeflag; + unsigned long flags; + + qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number); + if (!qp) { + ehca_err(cq->device, "cq_num=%x qp_num=%x " + "could not find qp -> ignore cqe", + my_cq->cq_number, cqe->local_qp_number); + ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x", + my_cq->cq_number, cqe->local_qp_number); + /* ignore this purged cqe */ + goto repoll; + } + spin_lock_irqsave(&qp->spinlock_s, flags); + purgeflag = qp->sqerr_purgeflag; + spin_unlock_irqrestore(&qp->spinlock_s, flags); + + if (purgeflag) { + ehca_dbg(cq->device, + "Got CQE with purged bit qp_num=%x src_qp=%x", + cqe->local_qp_number, cqe->remote_qp_number); + if (ehca_debug_level >= 2) + ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", + cqe->local_qp_number, + cqe->remote_qp_number); + /* + * ignore this to avoid double cqes of bad wqe + * that caused sqe and turn off purge flag + */ + qp->sqerr_purgeflag = 0; + goto repoll; + } + } + + is_error = cqe->status & WC_STATUS_ERROR_BIT; + + /* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */ + if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) { + ehca_dbg(cq->device, + "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----", + is_error ? "ERROR " : "", my_cq, my_cq->cq_number); + ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", + my_cq, my_cq->cq_number); + ehca_dbg(cq->device, + "ehca_cq=%p cq_num=%x -------------------------", + my_cq, my_cq->cq_number); + } + + read_lock(&ehca_qp_idr_lock); + my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); + read_unlock(&ehca_qp_idr_lock); + if (!my_qp) + goto repoll; + wc->qp = &my_qp->ib_qp; + + qmap_tail_idx = get_app_wr_id(cqe->work_request_id); + if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) + /* We got a send completion. */ + qmap = &my_qp->sq_map; + else + /* We got a receive completion. */ + qmap = &my_qp->rq_map; + + /* advance the tail pointer */ + qmap->tail = qmap_tail_idx; + + if (is_error) { + /* + * set left_to_poll to 0 because in error state, we will not + * get any additional CQEs + */ + my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, + my_qp->sq_map.entries); + my_qp->sq_map.left_to_poll = 0; + ehca_add_to_err_list(my_qp, 1); + + my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, + my_qp->rq_map.entries); + my_qp->rq_map.left_to_poll = 0; + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + } + + qmap_entry = &qmap->map[qmap_tail_idx]; + if (qmap_entry->reported) { + ehca_warn(cq->device, "Double cqe on qp_num=%#x", + my_qp->real_qp_num); + /* found a double cqe, discard it and read next one */ + goto repoll; + } + + wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); + qmap_entry->reported = 1; + + /* if left_to_poll is decremented to 0, add the QP to the error list */ + if (qmap->left_to_poll > 0) { + qmap->left_to_poll--; + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + ehca_add_to_err_list(my_qp, 1); + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + } + } + + /* eval ib_wc_opcode */ + wc->opcode = ib_wc_opcode[cqe->optype]-1; + if (unlikely(wc->opcode == -1)) { + ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x " + "ehca_cq=%p cq_num=%x", + cqe->optype, cqe->status, my_cq, my_cq->cq_number); + /* dump cqe for other infos */ + ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", + my_cq, my_cq->cq_number); + /* update also queue adder to throw away this entry!!! */ + goto repoll; + } + + /* eval ib_wc_status */ + if (unlikely(is_error)) { + /* complete with errors */ + map_ib_wc_status(cqe->status, &wc->status); + wc->vendor_err = wc->status; + } else + wc->status = IB_WC_SUCCESS; + + wc->byte_len = cqe->nr_bytes_transferred; + wc->pkey_index = cqe->pkey_index; + wc->slid = cqe->rlid; + wc->dlid_path_bits = cqe->dlid; + wc->src_qp = cqe->remote_qp_number; + /* + * HW has "Immed data present" and "GRH present" in bits 6 and 5. + * SW defines those in bits 1 and 0, so we can just shift and mask. + */ + wc->wc_flags = (cqe->w_completion_flags >> 5) & 3; + wc->ex.imm_data = cpu_to_be32(cqe->immediate_data); + wc->sl = cqe->service_level; + +poll_cq_one_exit0: + if (cqe_count > 0) + hipz_update_feca(my_cq, cqe_count); + + return ret; +} + +static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, + struct ib_wc *wc, int num_entries, + struct ipz_queue *ipz_queue, int on_sq) +{ + int nr = 0; + struct ehca_wqe *wqe; + u64 offset; + struct ehca_queue_map *qmap; + struct ehca_qmap_entry *qmap_entry; + + if (on_sq) + qmap = &my_qp->sq_map; + else + qmap = &my_qp->rq_map; + + qmap_entry = &qmap->map[qmap->next_wqe_idx]; + + while ((nr < num_entries) && (qmap_entry->reported == 0)) { + /* generate flush CQE */ + + memset(wc, 0, sizeof(*wc)); + + offset = qmap->next_wqe_idx * ipz_queue->qe_size; + wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); + if (!wqe) { + ehca_err(cq->device, "Invalid wqe offset=%#llx on " + "qp_num=%#x", offset, my_qp->real_qp_num); + return nr; + } + + wc->wr_id = replace_wr_id(wqe->work_request_id, + qmap_entry->app_wr_id); + + if (on_sq) { + switch (wqe->optype) { + case WQE_OPTYPE_SEND: + wc->opcode = IB_WC_SEND; + break; + case WQE_OPTYPE_RDMAWRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case WQE_OPTYPE_RDMAREAD: + wc->opcode = IB_WC_RDMA_READ; + break; + default: + ehca_err(cq->device, "Invalid optype=%x", + wqe->optype); + return nr; + } + } else + wc->opcode = IB_WC_RECV; + + if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) { + wc->ex.imm_data = wqe->immediate_data; + wc->wc_flags |= IB_WC_WITH_IMM; + } + + wc->status = IB_WC_WR_FLUSH_ERR; + + wc->qp = &my_qp->ib_qp; + + /* mark as reported and advance next_wqe pointer */ + qmap_entry->reported = 1; + qmap->next_wqe_idx = next_index(qmap->next_wqe_idx, + qmap->entries); + qmap_entry = &qmap->map[qmap->next_wqe_idx]; + + wc++; nr++; + } + + return nr; + +} + +int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) +{ + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + int nr; + struct ehca_qp *err_qp; + struct ib_wc *current_wc = wc; + int ret = 0; + unsigned long flags; + int entries_left = num_entries; + + if (num_entries < 1) { + ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " + "cq_num=%x", num_entries, my_cq, my_cq->cq_number); + ret = -EINVAL; + goto poll_cq_exit0; + } + + spin_lock_irqsave(&my_cq->spinlock, flags); + + /* generate flush cqes for send queues */ + list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_squeue, 1); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + /* generate flush cqes for receive queues */ + list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_rqueue, 0); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + for (nr = 0; nr < entries_left; nr++) { + ret = ehca_poll_cq_one(cq, current_wc); + if (ret) + break; + current_wc++; + } /* eof for nr */ + entries_left -= nr; + + spin_unlock_irqrestore(&my_cq->spinlock, flags); + if (ret == -EAGAIN || !ret) + ret = num_entries - entries_left; + +poll_cq_exit0: + return ret; +} + +int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) +{ + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + int ret = 0; + + switch (notify_flags & IB_CQ_SOLICITED_MASK) { + case IB_CQ_SOLICITED: + hipz_set_cqx_n0(my_cq, 1); + break; + case IB_CQ_NEXT_COMP: + hipz_set_cqx_n1(my_cq, 1); + break; + default: + return -EINVAL; + } + + if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { + unsigned long spl_flags; + spin_lock_irqsave(&my_cq->spinlock, spl_flags); + ret = ipz_qeit_is_valid(&my_cq->ipz_queue); + spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); + } + + return ret; +} diff --git a/drivers/staging/rdma/ehca/ehca_sqp.c b/drivers/staging/rdma/ehca/ehca_sqp.c new file mode 100644 index 0000000..376b031 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_sqp.c @@ -0,0 +1,245 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * SQP functions + * + * Authors: Khadija Souissi + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +#define IB_MAD_STATUS_REDIRECT cpu_to_be16(0x0002) +#define IB_MAD_STATUS_UNSUP_VERSION cpu_to_be16(0x0004) +#define IB_MAD_STATUS_UNSUP_METHOD cpu_to_be16(0x0008) + +#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001) + +/** + * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue + * pair is created successfully, the corresponding port gets active. + * + * Define Special Queue pair 0 (SMI QP) is still not supported. + * + * @qp_init_attr: Queue pair init attributes with port and queue pair type + */ + +u64 ehca_define_sqp(struct ehca_shca *shca, + struct ehca_qp *ehca_qp, + struct ib_qp_init_attr *qp_init_attr) +{ + u32 pma_qp_nr, bma_qp_nr; + u64 ret; + u8 port = qp_init_attr->port_num; + int counter; + + shca->sport[port - 1].port_state = IB_PORT_DOWN; + + switch (qp_init_attr->qp_type) { + case IB_QPT_SMI: + /* function not supported yet */ + break; + case IB_QPT_GSI: + ret = hipz_h_define_aqp1(shca->ipz_hca_handle, + ehca_qp->ipz_qp_handle, + ehca_qp->galpas.kernel, + (u32) qp_init_attr->port_num, + &pma_qp_nr, &bma_qp_nr); + + if (ret != H_SUCCESS) { + ehca_err(&shca->ib_device, + "Can't define AQP1 for port %x. h_ret=%lli", + port, ret); + return ret; + } + shca->sport[port - 1].pma_qp_nr = pma_qp_nr; + ehca_dbg(&shca->ib_device, "port=%x pma_qp_nr=%x", + port, pma_qp_nr); + break; + default: + ehca_err(&shca->ib_device, "invalid qp_type=%x", + qp_init_attr->qp_type); + return H_PARAMETER; + } + + if (ehca_nr_ports < 0) /* autodetect mode */ + return H_SUCCESS; + + for (counter = 0; + shca->sport[port - 1].port_state != IB_PORT_ACTIVE && + counter < ehca_port_act_time; + counter++) { + ehca_dbg(&shca->ib_device, "... wait until port %x is active", + port); + msleep_interruptible(1000); + } + + if (counter == ehca_port_act_time) { + ehca_err(&shca->ib_device, "Port %x is not active.", port); + return H_HARDWARE; + } + + return H_SUCCESS; +} + +struct ib_perf { + struct ib_mad_hdr mad_hdr; + u8 reserved[40]; + u8 data[192]; +} __attribute__ ((packed)); + +/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */ +struct tcslfl { + u32 tc:8; + u32 sl:4; + u32 fl:20; +} __attribute__ ((packed)); + +/* IP Version/TC/FL packed into 32 bits, as in GRH */ +struct vertcfl { + u32 ver:4; + u32 tc:8; + u32 fl:20; +} __attribute__ ((packed)); + +static int ehca_process_perf(struct ib_device *ibdev, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + const struct ib_perf *in_perf = (const struct ib_perf *)in_mad; + struct ib_perf *out_perf = (struct ib_perf *)out_mad; + struct ib_class_port_info *poi = + (struct ib_class_port_info *)out_perf->data; + struct tcslfl *tcslfl = + (struct tcslfl *)&poi->redirect_tcslfl; + struct ehca_shca *shca = + container_of(ibdev, struct ehca_shca, ib_device); + struct ehca_sport *sport = &shca->sport[port_num - 1]; + + ehca_dbg(ibdev, "method=%x", in_perf->mad_hdr.method); + + *out_mad = *in_mad; + + if (in_perf->mad_hdr.class_version != 1) { + ehca_warn(ibdev, "Unsupported class_version=%x", + in_perf->mad_hdr.class_version); + out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_VERSION; + goto perf_reply; + } + + switch (in_perf->mad_hdr.method) { + case IB_MGMT_METHOD_GET: + case IB_MGMT_METHOD_SET: + /* set class port info for redirection */ + out_perf->mad_hdr.attr_id = IB_PMA_CLASS_PORT_INFO; + out_perf->mad_hdr.status = IB_MAD_STATUS_REDIRECT; + memset(poi, 0, sizeof(*poi)); + poi->base_version = 1; + poi->class_version = 1; + poi->resp_time_value = 18; + + /* copy local routing information from WC where applicable */ + tcslfl->sl = in_wc->sl; + poi->redirect_lid = + sport->saved_attr.lid | in_wc->dlid_path_bits; + poi->redirect_qp = sport->pma_qp_nr; + poi->redirect_qkey = IB_QP1_QKEY; + + ehca_query_pkey(ibdev, port_num, in_wc->pkey_index, + &poi->redirect_pkey); + + /* if request was globally routed, copy route info */ + if (in_grh) { + const struct vertcfl *vertcfl = + (const struct vertcfl *)&in_grh->version_tclass_flow; + memcpy(poi->redirect_gid, in_grh->dgid.raw, + sizeof(poi->redirect_gid)); + tcslfl->tc = vertcfl->tc; + tcslfl->fl = vertcfl->fl; + } else + /* else only fill in default GID */ + ehca_query_gid(ibdev, port_num, 0, + (union ib_gid *)&poi->redirect_gid); + + ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x", + sport->saved_attr.lid, sport->pma_qp_nr); + break; + + case IB_MGMT_METHOD_GET_RESP: + return IB_MAD_RESULT_FAILURE; + + default: + out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_METHOD; + break; + } + +perf_reply: + out_perf->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} + +int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) +{ + int ret; + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; + + if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc) + return IB_MAD_RESULT_FAILURE; + + /* accept only pma request */ + if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) + return IB_MAD_RESULT_SUCCESS; + + ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp); + ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh, + in_mad, out_mad); + + return ret; +} diff --git a/drivers/staging/rdma/ehca/ehca_tools.h b/drivers/staging/rdma/ehca/ehca_tools.h new file mode 100644 index 0000000..d280b12 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_tools.h @@ -0,0 +1,155 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * auxiliary functions + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Khadija Souissi + * Waleri Fomin + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef EHCA_TOOLS_H +#define EHCA_TOOLS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +extern int ehca_debug_level; + +#define ehca_dbg(ib_dev, format, arg...) \ + do { \ + if (unlikely(ehca_debug_level)) \ + dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \ + "PU%04x EHCA_DBG:%s " format "\n", \ + raw_smp_processor_id(), __func__, \ + ## arg); \ + } while (0) + +#define ehca_info(ib_dev, format, arg...) \ + dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +#define ehca_warn(ib_dev, format, arg...) \ + dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +#define ehca_err(ib_dev, format, arg...) \ + dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +/* use this one only if no ib_dev available */ +#define ehca_gen_dbg(format, arg...) \ + do { \ + if (unlikely(ehca_debug_level)) \ + printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg); \ + } while (0) + +#define ehca_gen_warn(format, arg...) \ + printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +#define ehca_gen_err(format, arg...) \ + printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +/** + * ehca_dmp - printk a memory block, whose length is n*8 bytes. + * Each line has the following layout: + * adr=X ofs=Y <8 bytes hex> <8 bytes hex> + */ +#define ehca_dmp(adr, len, format, args...) \ + do { \ + unsigned int x; \ + unsigned int l = (unsigned int)(len); \ + unsigned char *deb = (unsigned char *)(adr); \ + for (x = 0; x < l; x += 16) { \ + printk(KERN_INFO "EHCA_DMP:%s " format \ + " adr=%p ofs=%04x %016llx %016llx\n", \ + __func__, ##args, deb, x, \ + *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ + deb += 16; \ + } \ + } while (0) + +/* define a bitmask, little endian version */ +#define EHCA_BMASK(pos, length) (((pos) << 16) + (length)) + +/* define a bitmask, the ibm way... */ +#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1)) + +/* internal function, don't use */ +#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff) + +/* internal function, don't use */ +#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff)) + +/** + * EHCA_BMASK_SET - return value shifted and masked by mask + * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable + * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask + * in variable + */ +#define EHCA_BMASK_SET(mask, value) \ + ((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask)) + +/** + * EHCA_BMASK_GET - extract a parameter from value by mask + */ +#define EHCA_BMASK_GET(mask, value) \ + (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask))) + +/* Converts ehca to ib return code */ +int ehca2ib_return_code(u64 ehca_rc); + +#endif /* EHCA_TOOLS_H */ diff --git a/drivers/staging/rdma/ehca/ehca_uverbs.c b/drivers/staging/rdma/ehca/ehca_uverbs.c new file mode 100644 index 0000000..1a1d5d9 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_uverbs.c @@ -0,0 +1,309 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * userspace support verbs + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_classes.h" +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "ehca_tools.h" +#include "hcp_if.h" + +struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, + struct ib_udata *udata) +{ + struct ehca_ucontext *my_context; + + my_context = kzalloc(sizeof *my_context, GFP_KERNEL); + if (!my_context) { + ehca_err(device, "Out of memory device=%p", device); + return ERR_PTR(-ENOMEM); + } + + return &my_context->ib_ucontext; +} + +int ehca_dealloc_ucontext(struct ib_ucontext *context) +{ + kfree(container_of(context, struct ehca_ucontext, ib_ucontext)); + return 0; +} + +static void ehca_mm_open(struct vm_area_struct *vma) +{ + u32 *count = (u32 *)vma->vm_private_data; + if (!count) { + ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + return; + } + (*count)++; + if (!(*count)) + ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", + vma->vm_start, vma->vm_end, *count); +} + +static void ehca_mm_close(struct vm_area_struct *vma) +{ + u32 *count = (u32 *)vma->vm_private_data; + if (!count) { + ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + return; + } + (*count)--; + ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", + vma->vm_start, vma->vm_end, *count); +} + +static const struct vm_operations_struct vm_ops = { + .open = ehca_mm_open, + .close = ehca_mm_close, +}; + +static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas, + u32 *mm_count) +{ + int ret; + u64 vsize, physical; + + vsize = vma->vm_end - vma->vm_start; + if (vsize < EHCA_PAGESIZE) { + ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start); + return -EINVAL; + } + + physical = galpas->user.fw_handle; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical); + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ + ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT, + vma->vm_page_prot); + if (unlikely(ret)) { + ehca_gen_err("remap_pfn_range() failed ret=%i", ret); + return -ENOMEM; + } + + vma->vm_private_data = mm_count; + (*mm_count)++; + vma->vm_ops = &vm_ops; + + return 0; +} + +static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue, + u32 *mm_count) +{ + int ret; + u64 start, ofs; + struct page *page; + + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; + start = vma->vm_start; + for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { + u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); + page = virt_to_page(virt_addr); + ret = vm_insert_page(vma, start, page); + if (unlikely(ret)) { + ehca_gen_err("vm_insert_page() failed rc=%i", ret); + return ret; + } + start += PAGE_SIZE; + } + vma->vm_private_data = mm_count; + (*mm_count)++; + vma->vm_ops = &vm_ops; + + return 0; +} + +static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq, + u32 rsrc_type) +{ + int ret; + + switch (rsrc_type) { + case 0: /* galpa fw handle */ + ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number); + ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_fw() failed rc=%i cq_num=%x", + ret, cq->cq_number); + return ret; + } + break; + + case 1: /* cq queue_addr */ + ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number); + ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_queue() failed rc=%i cq_num=%x", + ret, cq->cq_number); + return ret; + } + break; + + default: + ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x", + rsrc_type, cq->cq_number); + return -EINVAL; + } + + return 0; +} + +static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, + u32 rsrc_type) +{ + int ret; + + switch (rsrc_type) { + case 0: /* galpa fw handle */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num); + ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "remap_pfn_range() failed ret=%i qp_num=%x", + ret, qp->ib_qp.qp_num); + return -ENOMEM; + } + break; + + case 1: /* qp rqueue_addr */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x rq", qp->ib_qp.qp_num); + ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, + &qp->mm_count_rqueue); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_queue(rq) failed rc=%i qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; + } + break; + + case 2: /* qp squeue_addr */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x sq", qp->ib_qp.qp_num); + ret = ehca_mmap_queue(vma, &qp->ipz_squeue, + &qp->mm_count_squeue); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_queue(sq) failed rc=%i qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; + } + break; + + default: + ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x", + rsrc_type, qp->ib_qp.qp_num); + return -EINVAL; + } + + return 0; +} + +int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + u64 fileoffset = vma->vm_pgoff; + u32 idr_handle = fileoffset & 0x1FFFFFF; + u32 q_type = (fileoffset >> 27) & 0x1; /* CQ, QP,... */ + u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */ + u32 ret; + struct ehca_cq *cq; + struct ehca_qp *qp; + struct ib_uobject *uobject; + + switch (q_type) { + case 0: /* CQ */ + read_lock(&ehca_cq_idr_lock); + cq = idr_find(&ehca_cq_idr, idr_handle); + read_unlock(&ehca_cq_idr_lock); + + /* make sure this mmap really belongs to the authorized user */ + if (!cq) + return -EINVAL; + + if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context) + return -EINVAL; + + ret = ehca_mmap_cq(vma, cq, rsrc_type); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_cq() failed rc=%i cq_num=%x", + ret, cq->cq_number); + return ret; + } + break; + + case 1: /* QP */ + read_lock(&ehca_qp_idr_lock); + qp = idr_find(&ehca_qp_idr, idr_handle); + read_unlock(&ehca_qp_idr_lock); + + /* make sure this mmap really belongs to the authorized user */ + if (!qp) + return -EINVAL; + + uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject; + if (!uobject || uobject->context != context) + return -EINVAL; + + ret = ehca_mmap_qp(vma, qp, rsrc_type); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_qp() failed rc=%i qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; + } + break; + + default: + ehca_gen_err("bad queue type %x", q_type); + return -EINVAL; + } + + return 0; +} diff --git a/drivers/staging/rdma/ehca/hcp_if.c b/drivers/staging/rdma/ehca/hcp_if.c new file mode 100644 index 0000000..89517ff --- /dev/null +++ b/drivers/staging/rdma/ehca/hcp_if.c @@ -0,0 +1,949 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware Infiniband Interface code for POWER + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Joachim Fenkes + * Gerd Bayer + * Waleri Fomin + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "ehca_tools.h" +#include "hcp_if.h" +#include "hcp_phyp.h" +#include "hipz_fns.h" +#include "ipz_pt_fn.h" + +#define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9, 11) +#define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12, 12) +#define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13, 15) +#define H_ALL_RES_QP_STORAGE EHCA_BMASK_IBM(16, 17) +#define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18, 18) +#define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19, 21) +#define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22, 23) +#define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31, 31) +#define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35) +#define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39) +#define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56, 63) + +#define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0, 15) +#define H_ALL_RES_QP_MAX_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31) +#define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32, 39) +#define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40, 47) + +#define H_ALL_RES_QP_UD_AV_LKEY EHCA_BMASK_IBM(32, 63) +#define H_ALL_RES_QP_SRQ_QP_TOKEN EHCA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_SRQ_QP_HANDLE EHCA_BMASK_IBM(0, 64) +#define H_ALL_RES_QP_SRQ_LIMIT EHCA_BMASK_IBM(48, 63) +#define H_ALL_RES_QP_SRQ_QPN EHCA_BMASK_IBM(40, 63) + +#define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) +#define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48, 63) +#define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8, 15) +#define H_ALL_RES_QP_ACT_RECV_SGE EHCA_BMASK_IBM(24, 31) + +#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63) + +#define H_MP_INIT_TYPE EHCA_BMASK_IBM(44, 47) +#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48) +#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49) + +#define HCALL4_REGS_FORMAT "r4=%lx r5=%lx r6=%lx r7=%lx" +#define HCALL7_REGS_FORMAT HCALL4_REGS_FORMAT " r8=%lx r9=%lx r10=%lx" +#define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx" + +static DEFINE_SPINLOCK(hcall_lock); + +static long ehca_plpar_hcall_norets(unsigned long opcode, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7) +{ + long ret; + int i, sleep_msecs; + unsigned long flags = 0; + + if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT, + opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); + + for (i = 0; i < 5; i++) { + /* serialize hCalls to work around firmware issue */ + if (ehca_lock_hcalls) + spin_lock_irqsave(&hcall_lock, flags); + + ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4, + arg5, arg6, arg7); + + if (ehca_lock_hcalls) + spin_unlock_irqrestore(&hcall_lock, flags); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); + msleep_interruptible(sleep_msecs); + continue; + } + + if (ret < H_SUCCESS) + ehca_gen_err("opcode=%lx ret=%li " HCALL7_REGS_FORMAT, + opcode, ret, arg1, arg2, arg3, + arg4, arg5, arg6, arg7); + else + if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret); + + return ret; + } + + return H_BUSY; +} + +static long ehca_plpar_hcall9(unsigned long opcode, + unsigned long *outs, /* array of 9 outputs */ + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7, + unsigned long arg8, + unsigned long arg9) +{ + long ret; + int i, sleep_msecs; + unsigned long flags = 0; + + if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); + + for (i = 0; i < 5; i++) { + /* serialize hCalls to work around firmware issue */ + if (ehca_lock_hcalls) + spin_lock_irqsave(&hcall_lock, flags); + + ret = plpar_hcall9(opcode, outs, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); + + if (ehca_lock_hcalls) + spin_unlock_irqrestore(&hcall_lock, flags); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); + msleep_interruptible(sleep_msecs); + continue; + } + + if (ret < H_SUCCESS) { + ehca_gen_err("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, + opcode, arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); + ehca_gen_err("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT, + ret, outs[0], outs[1], outs[2], outs[3], + outs[4], outs[5], outs[6], outs[7], + outs[8]); + } else if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT, + ret, outs[0], outs[1], outs[2], outs[3], + outs[4], outs[5], outs[6], outs[7], + outs[8]); + return ret; + } + + return H_BUSY; +} + +u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_pfeq *pfeq, + const u32 neq_control, + const u32 number_of_entries, + struct ipz_eq_handle *eq_handle, + u32 *act_nr_of_entries, + u32 *act_pages, + u32 *eq_ist) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + u64 allocate_controls; + + /* resource type */ + allocate_controls = 3ULL; + + /* ISN is associated */ + if (neq_control != 1) + allocate_controls = (1ULL << (63 - 7)) | allocate_controls; + else /* notification event queue */ + allocate_controls = (1ULL << 63) | allocate_controls; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + allocate_controls, /* r5 */ + number_of_entries, /* r6 */ + 0, 0, 0, 0, 0, 0); + eq_handle->handle = outs[0]; + *act_nr_of_entries = (u32)outs[3]; + *act_pages = (u32)outs[4]; + *eq_ist = (u32)outs[5]; + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resource - ret=%lli ", ret); + + return ret; +} + +u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, + struct ipz_eq_handle eq_handle, + const u64 event_mask) +{ + return ehca_plpar_hcall_norets(H_RESET_EVENTS, + adapter_handle.handle, /* r4 */ + eq_handle.handle, /* r5 */ + event_mask, /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + struct ehca_alloc_cq_parms *param) +{ + int rc; + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + 2, /* r5 */ + param->eq_handle.handle, /* r6 */ + cq->token, /* r7 */ + param->nr_cqe, /* r8 */ + 0, 0, 0, 0); + cq->ipz_cq_handle.handle = outs[0]; + param->act_nr_of_entries = (u32)outs[3]; + param->act_pages = (u32)outs[4]; + + if (ret == H_SUCCESS) { + rc = hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]); + if (rc) { + ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", + rc, outs[5]); + + ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + cq->ipz_cq_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + ret = H_NO_MEM; + } + } + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resources. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_alloc_qp_parms *parms, int is_user) +{ + int rc; + u64 ret; + u64 allocate_controls, max_r10_reg, r11, r12; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + allocate_controls = + EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type) + | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0) + | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype) + | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype) + | EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage) + | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE, + parms->squeue.page_size) + | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE, + parms->rqueue.page_size) + | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING, + !!(parms->ll_comp_flags & LLQP_RECV_COMP)) + | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING, + !!(parms->ll_comp_flags & LLQP_SEND_COMP)) + | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL, + parms->ud_av_l_key_ctl) + | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1); + + max_r10_reg = + EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR, + parms->squeue.max_wr + 1) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR, + parms->rqueue.max_wr + 1) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE, + parms->squeue.max_sge) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE, + parms->rqueue.max_sge); + + r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token); + + if (parms->ext_type == EQPT_SRQ) + r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit); + else + r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn); + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + allocate_controls, /* r5 */ + parms->send_cq_handle.handle, + parms->recv_cq_handle.handle, + parms->eq_handle.handle, + ((u64)parms->token << 32) | parms->pd.value, + max_r10_reg, r11, r12); + + parms->qp_handle.handle = outs[0]; + parms->real_qp_num = (u32)outs[1]; + parms->squeue.act_nr_wqes = + (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]); + parms->rqueue.act_nr_wqes = + (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]); + parms->squeue.act_nr_sges = + (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]); + parms->rqueue.act_nr_sges = + (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]); + parms->squeue.queue_size = + (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]); + parms->rqueue.queue_size = + (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); + + if (ret == H_SUCCESS) { + rc = hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]); + if (rc) { + ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", + rc, outs[6]); + + ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + parms->qp_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + ret = H_NO_MEM; + } + } + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resources. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, + struct hipz_query_port *query_port_response_block) +{ + u64 ret; + u64 r_cb = __pa(query_port_response_block); + + if (r_cb & (EHCA_PAGESIZE-1)) { + ehca_gen_err("response block not page aligned"); + return H_PARAMETER; + } + + ret = ehca_plpar_hcall_norets(H_QUERY_PORT, + adapter_handle.handle, /* r4 */ + port_id, /* r5 */ + r_cb, /* r6 */ + 0, 0, 0, 0); + + if (ehca_debug_level >= 2) + ehca_dmp(query_port_response_block, 64, "response_block"); + + return ret; +} + +u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, const u32 port_cap, + const u8 init_type, const int modify_mask) +{ + u64 port_attributes = port_cap; + + if (modify_mask & IB_PORT_SHUTDOWN) + port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1); + if (modify_mask & IB_PORT_INIT_TYPE) + port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type); + if (modify_mask & IB_PORT_RESET_QKEY_CNTR) + port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1); + + return ehca_plpar_hcall_norets(H_MODIFY_PORT, + adapter_handle.handle, /* r4 */ + port_id, /* r5 */ + port_attributes, /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, + struct hipz_query_hca *query_hca_rblock) +{ + u64 r_cb = __pa(query_hca_rblock); + + if (r_cb & (EHCA_PAGESIZE-1)) { + ehca_gen_err("response_block=%p not page aligned", + query_hca_rblock); + return H_PARAMETER; + } + + return ehca_plpar_hcall_norets(H_QUERY_HCA, + adapter_handle.handle, /* r4 */ + r_cb, /* r5 */ + 0, 0, 0, 0, 0); +} + +u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, + const u8 pagesize, + const u8 queue_type, + const u64 resource_handle, + const u64 logical_address_of_page, + u64 count) +{ + return ehca_plpar_hcall_norets(H_REGISTER_RPAGES, + adapter_handle.handle, /* r4 */ + (u64)queue_type | ((u64)pagesize) << 8, + /* r5 */ + resource_handle, /* r6 */ + logical_address_of_page, /* r7 */ + count, /* r8 */ + 0, 0); +} + +u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_eq_handle eq_handle, + struct ehca_pfeq *pfeq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count) +{ + if (count != 1) { + ehca_gen_err("Ppage counter=%llx", count); + return H_PARAMETER; + } + return hipz_h_register_rpage(adapter_handle, + pagesize, + queue_type, + eq_handle.handle, + logical_address_of_page, count); +} + +u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle, + u32 ist) +{ + u64 ret; + ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE, + adapter_handle.handle, /* r4 */ + ist, /* r5 */ + 0, 0, 0, 0, 0); + + if (ret != H_SUCCESS && ret != H_BUSY) + ehca_gen_err("Could not query interrupt state."); + + return ret; +} + +u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_cq_handle cq_handle, + struct ehca_pfcq *pfcq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa gal) +{ + if (count != 1) { + ehca_gen_err("Page counter=%llx", count); + return H_PARAMETER; + } + + return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, + cq_handle.handle, logical_address_of_page, + count); +} + +u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa galpa) +{ + if (count > 1) { + ehca_gen_err("Page counter=%llx", count); + return H_PARAMETER; + } + + return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, + qp_handle.handle, logical_address_of_page, + count); +} + +u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + void **log_addr_next_sq_wqe2processed, + void **log_addr_next_rq_wqe2processed, + int dis_and_get_function_code) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, + adapter_handle.handle, /* r4 */ + dis_and_get_function_code, /* r5 */ + qp_handle.handle, /* r6 */ + 0, 0, 0, 0, 0, 0); + if (log_addr_next_sq_wqe2processed) + *log_addr_next_sq_wqe2processed = (void *)outs[0]; + if (log_addr_next_rq_wqe2processed) + *log_addr_next_rq_wqe2processed = (void *)outs[1]; + + return ret; +} + +u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u64 update_mask, + struct hcp_modify_qp_control_block *mqpcb, + struct h_galpa gal) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + ret = ehca_plpar_hcall9(H_MODIFY_QP, outs, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + update_mask, /* r6 */ + __pa(mqpcb), /* r7 */ + 0, 0, 0, 0, 0); + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Insufficient resources ret=%lli", ret); + + return ret; +} + +u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + struct hcp_modify_qp_control_block *qqpcb, + struct h_galpa gal) +{ + return ehca_plpar_hcall_norets(H_QUERY_QP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + __pa(qqpcb), /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = hcp_galpas_dtor(&qp->galpas); + if (ret) { + ehca_gen_err("Could not destruct qp->galpas"); + return H_RESOURCE; + } + ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, + adapter_handle.handle, /* r4 */ + /* function code */ + 1, /* r5 */ + qp->ipz_qp_handle.handle, /* r6 */ + 0, 0, 0, 0, 0, 0); + if (ret == H_HARDWARE) + ehca_gen_err("HCA not operational. ret=%lli", ret); + + ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + qp->ipz_qp_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + + if (ret == H_RESOURCE) + ehca_gen_err("Resource still in use. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port) +{ + return ehca_plpar_hcall_norets(H_DEFINE_AQP0, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + port, /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port, u32 * pma_qp_nr, + u32 * bma_qp_nr) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + port, /* r6 */ + 0, 0, 0, 0, 0, 0); + *pma_qp_nr = (u32)outs[0]; + *bma_qp_nr = (u32)outs[1]; + + if (ret == H_ALIAS_EXIST) + ehca_gen_err("AQP1 already exists. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id) +{ + u64 ret; + + ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + mcg_dlid, /* r6 */ + interface_id, /* r7 */ + subnet_prefix, /* r8 */ + 0, 0); + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resources. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id) +{ + return ehca_plpar_hcall_norets(H_DETACH_MCQP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + mcg_dlid, /* r6 */ + interface_id, /* r7 */ + subnet_prefix, /* r8 */ + 0, 0); +} + +u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + u8 force_flag) +{ + u64 ret; + + ret = hcp_galpas_dtor(&cq->galpas); + if (ret) { + ehca_gen_err("Could not destruct cp->galpas"); + return H_RESOURCE; + } + + ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + cq->ipz_cq_handle.handle, /* r5 */ + force_flag != 0 ? 1L : 0L, /* r6 */ + 0, 0, 0, 0); + + if (ret == H_RESOURCE) + ehca_gen_err("H_FREE_RESOURCE failed ret=%lli ", ret); + + return ret; +} + +u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_eq *eq) +{ + u64 ret; + + ret = hcp_galpas_dtor(&eq->galpas); + if (ret) { + ehca_gen_err("Could not destruct eq->galpas"); + return H_RESOURCE; + } + + ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + eq->ipz_eq_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + + if (ret == H_RESOURCE) + ehca_gen_err("Resource in use. ret=%lli ", ret); + + return ret; +} + +u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + 5, /* r5 */ + vaddr, /* r6 */ + length, /* r7 */ + (((u64)access_ctrl) << 32ULL), /* r8 */ + pd.value, /* r9 */ + 0, 0, 0); + outparms->handle.handle = outs[0]; + outparms->lkey = (u32)outs[2]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count) +{ + u64 ret; + + if (unlikely(ehca_debug_level >= 3)) { + if (count > 1) { + u64 *kpage; + int i; + kpage = __va(logical_address_of_page); + for (i = 0; i < count; i++) + ehca_gen_dbg("kpage[%d]=%p", + i, (void *)kpage[i]); + } else + ehca_gen_dbg("kpage=%p", + (void *)logical_address_of_page); + } + + if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) { + ehca_gen_err("logical_address_of_page not on a 4k boundary " + "adapter_handle=%llx mr=%p mr_handle=%llx " + "pagesize=%x queue_type=%x " + "logical_address_of_page=%llx count=%llx", + adapter_handle.handle, mr, + mr->ipz_mr_handle.handle, pagesize, queue_type, + logical_address_of_page, count); + ret = H_PARAMETER; + } else + ret = hipz_h_register_rpage(adapter_handle, pagesize, + queue_type, + mr->ipz_mr_handle.handle, + logical_address_of_page, count); + return ret; +} + +u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_QUERY_MR, outs, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + 0, 0, 0, 0, 0, 0, 0); + outparms->len = outs[0]; + outparms->vaddr = outs[1]; + outparms->acl = outs[4] >> 32; + outparms->lkey = (u32)(outs[5] >> 32); + outparms->rkey = (u32)(outs[5] & (0xffffffff)); + + return ret; +} + +u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr) +{ + return ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); +} + +u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr_in, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + const u64 mr_addr_cb, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + vaddr_in, /* r6 */ + length, /* r7 */ + /* r8 */ + ((((u64)access_ctrl) << 32ULL) | pd.value), + mr_addr_cb, /* r9 */ + 0, 0, 0); + outparms->vaddr = outs[1]; + outparms->lkey = (u32)outs[2]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const struct ehca_mr *orig_mr, + const u64 vaddr_in, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs, + adapter_handle.handle, /* r4 */ + orig_mr->ipz_mr_handle.handle, /* r5 */ + vaddr_in, /* r6 */ + (((u64)access_ctrl) << 32ULL), /* r7 */ + pd.value, /* r8 */ + 0, 0, 0, 0); + outparms->handle.handle = outs[0]; + outparms->lkey = (u32)outs[2]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + const struct ipz_pd pd, + struct ehca_mw_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + 6, /* r5 */ + pd.value, /* r6 */ + 0, 0, 0, 0, 0, 0); + outparms->handle.handle = outs[0]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + struct ehca_mw_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_QUERY_MW, outs, + adapter_handle.handle, /* r4 */ + mw->ipz_mw_handle.handle, /* r5 */ + 0, 0, 0, 0, 0, 0, 0); + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw) +{ + return ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + mw->ipz_mw_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); +} + +u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, + const u64 ressource_handle, + void *rblock, + unsigned long *byte_count) +{ + u64 r_cb = __pa(rblock); + + if (r_cb & (EHCA_PAGESIZE-1)) { + ehca_gen_err("rblock not page aligned."); + return H_PARAMETER; + } + + return ehca_plpar_hcall_norets(H_ERROR_DATA, + adapter_handle.handle, + ressource_handle, + r_cb, + 0, 0, 0, 0); +} + +u64 hipz_h_eoi(int irq) +{ + unsigned long xirr; + + iosync(); + xirr = (0xffULL << 24) | irq; + + return plpar_hcall_norets(H_EOI, xirr); +} diff --git a/drivers/staging/rdma/ehca/hcp_if.h b/drivers/staging/rdma/ehca/hcp_if.h new file mode 100644 index 0000000..a46e514 --- /dev/null +++ b/drivers/staging/rdma/ehca/hcp_if.h @@ -0,0 +1,265 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware Infiniband Interface code for POWER + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Gerd Bayer + * Waleri Fomin + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HCP_IF_H__ +#define __HCP_IF_H__ + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "hipz_hw.h" + +/* + * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initialize + * resources, create the empty EQPT (ring). + */ +u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_pfeq *pfeq, + const u32 neq_control, + const u32 number_of_entries, + struct ipz_eq_handle *eq_handle, + u32 * act_nr_of_entries, + u32 * act_pages, + u32 * eq_ist); + +u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, + struct ipz_eq_handle eq_handle, + const u64 event_mask); +/* + * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize + * resources, create the empty CQPT (ring). + */ +u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + struct ehca_alloc_cq_parms *param); + + +/* + * hipz_h_alloc_resource_qp allocates QP resources in HW and FW, + * initialize resources, create empty QPPTs (2 rings). + */ +u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_alloc_qp_parms *parms, int is_user); + +u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, + struct hipz_query_port *query_port_response_block); + +u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, const u32 port_cap, + const u8 init_type, const int modify_mask); + +u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, + struct hipz_query_hca *query_hca_rblock); + +/* + * hipz_h_register_rpage internal function in hcp_if.h for all + * hcp_H_REGISTER_RPAGE calls. + */ +u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, + const u8 pagesize, + const u8 queue_type, + const u64 resource_handle, + const u64 logical_address_of_page, + u64 count); + +u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_eq_handle eq_handle, + struct ehca_pfeq *pfeq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count); + +u64 hipz_h_query_int_state(const struct ipz_adapter_handle + hcp_adapter_handle, + u32 ist); + +u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_cq_handle cq_handle, + struct ehca_pfcq *pfcq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa gal); + +u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa galpa); + +u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + void **log_addr_next_sq_wqe_tb_processed, + void **log_addr_next_rq_wqe_tb_processed, + int dis_and_get_function_code); +enum hcall_sigt { + HCALL_SIGT_NO_CQE = 0, + HCALL_SIGT_BY_WQE = 1, + HCALL_SIGT_EVERY = 2 +}; + +u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u64 update_mask, + struct hcp_modify_qp_control_block *mqpcb, + struct h_galpa gal); + +u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + struct hcp_modify_qp_control_block *qqpcb, + struct h_galpa gal); + +u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp); + +u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port); + +u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port, u32 * pma_qp_nr, + u32 * bma_qp_nr); + +u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id); + +u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id); + +u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + u8 force_flag); + +u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_eq *eq); + +/* + * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize + * resources. + */ +u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */ +u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count); + +/* hipz_h_query_mr queries MR in HW and FW */ +u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_free_resource_mr frees MR resources in HW and FW */ +u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr); + +/* hipz_h_reregister_pmr reregisters MR in HW and FW */ +u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr_in, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + const u64 mr_addr_cb, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_register_smr register shared MR in HW and FW */ +u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const struct ehca_mr *orig_mr, + const u64 vaddr_in, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms); + +/* + * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize + * resources. + */ +u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + const struct ipz_pd pd, + struct ehca_mw_hipzout_parms *outparms); + +/* hipz_h_query_mw queries MW in HW and FW */ +u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + struct ehca_mw_hipzout_parms *outparms); + +/* hipz_h_free_resource_mw frees MW resources in HW and FW */ +u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw); + +u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, + const u64 ressource_handle, + void *rblock, + unsigned long *byte_count); +u64 hipz_h_eoi(int irq); + +#endif /* __HCP_IF_H__ */ diff --git a/drivers/staging/rdma/ehca/hcp_phyp.c b/drivers/staging/rdma/ehca/hcp_phyp.c new file mode 100644 index 0000000..077376f --- /dev/null +++ b/drivers/staging/rdma/ehca/hcp_phyp.c @@ -0,0 +1,82 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * load store abstraction for ehca register access with tracing + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_classes.h" +#include "hipz_hw.h" + +u64 hcall_map_page(u64 physaddr) +{ + return (u64)ioremap(physaddr, EHCA_PAGESIZE); +} + +int hcall_unmap_page(u64 mapaddr) +{ + iounmap((volatile void __iomem *) mapaddr); + return 0; +} + +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, + u64 paddr_kernel, u64 paddr_user) +{ + if (!is_user) { + galpas->kernel.fw_handle = hcall_map_page(paddr_kernel); + if (!galpas->kernel.fw_handle) + return -ENOMEM; + } else + galpas->kernel.fw_handle = 0; + + galpas->user.fw_handle = paddr_user; + + return 0; +} + +int hcp_galpas_dtor(struct h_galpas *galpas) +{ + if (galpas->kernel.fw_handle) { + int ret = hcall_unmap_page(galpas->kernel.fw_handle); + if (ret) + return ret; + } + + galpas->user.fw_handle = galpas->kernel.fw_handle = 0; + + return 0; +} diff --git a/drivers/staging/rdma/ehca/hcp_phyp.h b/drivers/staging/rdma/ehca/hcp_phyp.h new file mode 100644 index 0000000..d1b0299 --- /dev/null +++ b/drivers/staging/rdma/ehca/hcp_phyp.h @@ -0,0 +1,90 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware calls + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Waleri Fomin + * Gerd Bayer + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HCP_PHYP_H__ +#define __HCP_PHYP_H__ + + +/* + * eHCA page (mapped into memory) + * resource to access eHCA register pages in CPU address space +*/ +struct h_galpa { + u64 fw_handle; + /* for pSeries this is a 64bit memory address where + I/O memory is mapped into CPU address space (kv) */ +}; + +/* + * resource to access eHCA address space registers, all types + */ +struct h_galpas { + u32 pid; /*PID of userspace galpa checking */ + struct h_galpa user; /* user space accessible resource, + set to 0 if unused */ + struct h_galpa kernel; /* kernel space accessible resource, + set to 0 if unused */ +}; + +static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset) +{ + u64 addr = galpa.fw_handle + offset; + return *(volatile u64 __force *)addr; +} + +static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value) +{ + u64 addr = galpa.fw_handle + offset; + *(volatile u64 __force *)addr = value; +} + +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, + u64 paddr_kernel, u64 paddr_user); + +int hcp_galpas_dtor(struct h_galpas *galpas); + +u64 hcall_map_page(u64 physaddr); + +int hcall_unmap_page(u64 mapaddr); + +#endif diff --git a/drivers/staging/rdma/ehca/hipz_fns.h b/drivers/staging/rdma/ehca/hipz_fns.h new file mode 100644 index 0000000..9dac93d --- /dev/null +++ b/drivers/staging/rdma/ehca/hipz_fns.h @@ -0,0 +1,68 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HW abstraction register functions + * + * Authors: Christoph Raisch + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_FNS_H__ +#define __HIPZ_FNS_H__ + +#include "ehca_classes.h" +#include "hipz_hw.h" + +#include "hipz_fns_core.h" + +#define hipz_galpa_store_eq(gal, offset, value) \ + hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value) + +#define hipz_galpa_load_eq(gal, offset) \ + hipz_galpa_load(gal, EQTEMM_OFFSET(offset)) + +#define hipz_galpa_store_qped(gal, offset, value) \ + hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value) + +#define hipz_galpa_load_qped(gal, offset) \ + hipz_galpa_load(gal, QPEDMM_OFFSET(offset)) + +#define hipz_galpa_store_mrmw(gal, offset, value) \ + hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value) + +#define hipz_galpa_load_mrmw(gal, offset) \ + hipz_galpa_load(gal, MRMWMM_OFFSET(offset)) + +#endif diff --git a/drivers/staging/rdma/ehca/hipz_fns_core.h b/drivers/staging/rdma/ehca/hipz_fns_core.h new file mode 100644 index 0000000..868735f --- /dev/null +++ b/drivers/staging/rdma/ehca/hipz_fns_core.h @@ -0,0 +1,100 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HW abstraction register functions + * + * Authors: Christoph Raisch + * Heiko J Schick + * Hoang-Nam Nguyen + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_FNS_CORE_H__ +#define __HIPZ_FNS_CORE_H__ + +#include "hcp_phyp.h" +#include "hipz_hw.h" + +#define hipz_galpa_store_cq(gal, offset, value) \ + hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value) + +#define hipz_galpa_load_cq(gal, offset) \ + hipz_galpa_load(gal, CQTEMM_OFFSET(offset)) + +#define hipz_galpa_store_qp(gal, offset, value) \ + hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value) +#define hipz_galpa_load_qp(gal, offset) \ + hipz_galpa_load(gal, QPTEMM_OFFSET(offset)) + +static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes) +{ + /* ringing doorbell :-) */ + hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa, + EHCA_BMASK_SET(QPX_SQADDER, nr_wqes)); +} + +static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes) +{ + /* ringing doorbell :-) */ + hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa, + EHCA_BMASK_SET(QPX_RQADDER, nr_wqes)); +} + +static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes) +{ + hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca, + EHCA_BMASK_SET(CQX_FECADDER, nr_cqes)); +} + +static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value) +{ + u64 cqx_n0_reg; + + hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0, + EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT, + value)); + cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0); +} + +static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value) +{ + u64 cqx_n1_reg; + + hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1, + EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value)); + cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1); +} + +#endif /* __HIPZ_FNC_CORE_H__ */ diff --git a/drivers/staging/rdma/ehca/hipz_hw.h b/drivers/staging/rdma/ehca/hipz_hw.h new file mode 100644 index 0000000..bf996c7 --- /dev/null +++ b/drivers/staging/rdma/ehca/hipz_hw.h @@ -0,0 +1,414 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * eHCA register definitions + * + * Authors: Waleri Fomin + * Christoph Raisch + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_HW_H__ +#define __HIPZ_HW_H__ + +#include "ehca_tools.h" + +#define EHCA_MAX_MTU 4 + +/* QP Table Entry Memory Map */ +struct hipz_qptemm { + u64 qpx_hcr; + u64 qpx_c; + u64 qpx_herr; + u64 qpx_aer; +/* 0x20*/ + u64 qpx_sqa; + u64 qpx_sqc; + u64 qpx_rqa; + u64 qpx_rqc; +/* 0x40*/ + u64 qpx_st; + u64 qpx_pmstate; + u64 qpx_pmfa; + u64 qpx_pkey; +/* 0x60*/ + u64 qpx_pkeya; + u64 qpx_pkeyb; + u64 qpx_pkeyc; + u64 qpx_pkeyd; +/* 0x80*/ + u64 qpx_qkey; + u64 qpx_dqp; + u64 qpx_dlidp; + u64 qpx_portp; +/* 0xa0*/ + u64 qpx_slidp; + u64 qpx_slidpp; + u64 qpx_dlida; + u64 qpx_porta; +/* 0xc0*/ + u64 qpx_slida; + u64 qpx_slidpa; + u64 qpx_slvl; + u64 qpx_ipd; +/* 0xe0*/ + u64 qpx_mtu; + u64 qpx_lato; + u64 qpx_rlimit; + u64 qpx_rnrlimit; +/* 0x100*/ + u64 qpx_t; + u64 qpx_sqhp; + u64 qpx_sqptp; + u64 qpx_nspsn; +/* 0x120*/ + u64 qpx_nspsnhwm; + u64 reserved1; + u64 qpx_sdsi; + u64 qpx_sdsbc; +/* 0x140*/ + u64 qpx_sqwsize; + u64 qpx_sqwts; + u64 qpx_lsn; + u64 qpx_nssn; +/* 0x160 */ + u64 qpx_mor; + u64 qpx_cor; + u64 qpx_sqsize; + u64 qpx_erc; +/* 0x180*/ + u64 qpx_rnrrc; + u64 qpx_ernrwt; + u64 qpx_rnrresp; + u64 qpx_lmsna; +/* 0x1a0 */ + u64 qpx_sqhpc; + u64 qpx_sqcptp; + u64 qpx_sigt; + u64 qpx_wqecnt; +/* 0x1c0*/ + u64 qpx_rqhp; + u64 qpx_rqptp; + u64 qpx_rqsize; + u64 qpx_nrr; +/* 0x1e0*/ + u64 qpx_rdmac; + u64 qpx_nrpsn; + u64 qpx_lapsn; + u64 qpx_lcr; +/* 0x200*/ + u64 qpx_rwc; + u64 qpx_rwva; + u64 qpx_rdsi; + u64 qpx_rdsbc; +/* 0x220*/ + u64 qpx_rqwsize; + u64 qpx_crmsn; + u64 qpx_rdd; + u64 qpx_larpsn; +/* 0x240*/ + u64 qpx_pd; + u64 qpx_scqn; + u64 qpx_rcqn; + u64 qpx_aeqn; +/* 0x260*/ + u64 qpx_aaelog; + u64 qpx_ram; + u64 qpx_rdmaqe0; + u64 qpx_rdmaqe1; +/* 0x280*/ + u64 qpx_rdmaqe2; + u64 qpx_rdmaqe3; + u64 qpx_nrpsnhwm; +/* 0x298*/ + u64 reserved[(0x400 - 0x298) / 8]; +/* 0x400 extended data */ + u64 reserved_ext[(0x500 - 0x400) / 8]; +/* 0x500 */ + u64 reserved2[(0x1000 - 0x500) / 8]; +/* 0x1000 */ +}; + +#define QPX_SQADDER EHCA_BMASK_IBM(48, 63) +#define QPX_RQADDER EHCA_BMASK_IBM(48, 63) +#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3) + +#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x) + +/* MRMWPT Entry Memory Map */ +struct hipz_mrmwmm { + /* 0x00 */ + u64 mrx_hcr; + + u64 mrx_c; + u64 mrx_herr; + u64 mrx_aer; + /* 0x20 */ + u64 mrx_pp; + u64 reserved1; + u64 reserved2; + u64 reserved3; + /* 0x40 */ + u64 reserved4[(0x200 - 0x40) / 8]; + /* 0x200 */ + u64 mrx_ctl[64]; + +}; + +#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x) + +struct hipz_qpedmm { + /* 0x00 */ + u64 reserved0[(0x400) / 8]; + /* 0x400 */ + u64 qpedx_phh; + u64 qpedx_ppsgp; + /* 0x410 */ + u64 qpedx_ppsgu; + u64 qpedx_ppdgp; + /* 0x420 */ + u64 qpedx_ppdgu; + u64 qpedx_aph; + /* 0x430 */ + u64 qpedx_apsgp; + u64 qpedx_apsgu; + /* 0x440 */ + u64 qpedx_apdgp; + u64 qpedx_apdgu; + /* 0x450 */ + u64 qpedx_apav; + u64 qpedx_apsav; + /* 0x460 */ + u64 qpedx_hcr; + u64 reserved1[4]; + /* 0x488 */ + u64 qpedx_rrl0; + /* 0x490 */ + u64 qpedx_rrrkey0; + u64 qpedx_rrva0; + /* 0x4a0 */ + u64 reserved2; + u64 qpedx_rrl1; + /* 0x4b0 */ + u64 qpedx_rrrkey1; + u64 qpedx_rrva1; + /* 0x4c0 */ + u64 reserved3; + u64 qpedx_rrl2; + /* 0x4d0 */ + u64 qpedx_rrrkey2; + u64 qpedx_rrva2; + /* 0x4e0 */ + u64 reserved4; + u64 qpedx_rrl3; + /* 0x4f0 */ + u64 qpedx_rrrkey3; + u64 qpedx_rrva3; +}; + +#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x) + +/* CQ Table Entry Memory Map */ +struct hipz_cqtemm { + u64 cqx_hcr; + u64 cqx_c; + u64 cqx_herr; + u64 cqx_aer; +/* 0x20 */ + u64 cqx_ptp; + u64 cqx_tp; + u64 cqx_fec; + u64 cqx_feca; +/* 0x40 */ + u64 cqx_ep; + u64 cqx_eq; +/* 0x50 */ + u64 reserved1; + u64 cqx_n0; +/* 0x60 */ + u64 cqx_n1; + u64 reserved2[(0x1000 - 0x60) / 8]; +/* 0x1000 */ +}; + +#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32, 63) +#define CQX_FECADDER EHCA_BMASK_IBM(32, 63) +#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0) +#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0) + +#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x) + +/* EQ Table Entry Memory Map */ +struct hipz_eqtemm { + u64 eqx_hcr; + u64 eqx_c; + + u64 eqx_herr; + u64 eqx_aer; +/* 0x20 */ + u64 eqx_ptp; + u64 eqx_tp; + u64 eqx_ssba; + u64 eqx_psba; + +/* 0x40 */ + u64 eqx_cec; + u64 eqx_meql; + u64 eqx_xisbi; + u64 eqx_xisc; +/* 0x60 */ + u64 eqx_it; + +}; + +#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x) + +/* access control defines for MR/MW */ +#define HIPZ_ACCESSCTRL_L_WRITE 0x00800000 +#define HIPZ_ACCESSCTRL_R_WRITE 0x00400000 +#define HIPZ_ACCESSCTRL_R_READ 0x00200000 +#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000 +#define HIPZ_ACCESSCTRL_MW_BIND 0x00080000 + +/* query hca response block */ +struct hipz_query_hca { + u32 cur_reliable_dg; + u32 cur_qp; + u32 cur_cq; + u32 cur_eq; + u32 cur_mr; + u32 cur_mw; + u32 cur_ee_context; + u32 cur_mcast_grp; + u32 cur_qp_attached_mcast_grp; + u32 reserved1; + u32 cur_ipv6_qp; + u32 cur_eth_qp; + u32 cur_hp_mr; + u32 reserved2[3]; + u32 max_rd_domain; + u32 max_qp; + u32 max_cq; + u32 max_eq; + u32 max_mr; + u32 max_hp_mr; + u32 max_mw; + u32 max_mrwpte; + u32 max_special_mrwpte; + u32 max_rd_ee_context; + u32 max_mcast_grp; + u32 max_total_mcast_qp_attach; + u32 max_mcast_qp_attach; + u32 max_raw_ipv6_qp; + u32 max_raw_ethy_qp; + u32 internal_clock_frequency; + u32 max_pd; + u32 max_ah; + u32 max_cqe; + u32 max_wqes_wq; + u32 max_partitions; + u32 max_rr_ee_context; + u32 max_rr_qp; + u32 max_rr_hca; + u32 max_act_wqs_ee_context; + u32 max_act_wqs_qp; + u32 max_sge; + u32 max_sge_rd; + u32 memory_page_size_supported; + u64 max_mr_size; + u32 local_ca_ack_delay; + u32 num_ports; + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + u64 node_guid; + u64 hca_cap_indicators; + u32 data_counter_register_size; + u32 max_shared_rq; + u32 max_isns_eq; + u32 max_neq; +} __attribute__ ((packed)); + +#define HCA_CAP_AH_PORT_NR_CHECK EHCA_BMASK_IBM( 0, 0) +#define HCA_CAP_ATOMIC EHCA_BMASK_IBM( 1, 1) +#define HCA_CAP_AUTO_PATH_MIG EHCA_BMASK_IBM( 2, 2) +#define HCA_CAP_BAD_P_KEY_CTR EHCA_BMASK_IBM( 3, 3) +#define HCA_CAP_SQD_RTS_PORT_CHANGE EHCA_BMASK_IBM( 4, 4) +#define HCA_CAP_CUR_QP_STATE_MOD EHCA_BMASK_IBM( 5, 5) +#define HCA_CAP_INIT_TYPE EHCA_BMASK_IBM( 6, 6) +#define HCA_CAP_PORT_ACTIVE_EVENT EHCA_BMASK_IBM( 7, 7) +#define HCA_CAP_Q_KEY_VIOL_CTR EHCA_BMASK_IBM( 8, 8) +#define HCA_CAP_WQE_RESIZE EHCA_BMASK_IBM( 9, 9) +#define HCA_CAP_RAW_PACKET_MCAST EHCA_BMASK_IBM(10, 10) +#define HCA_CAP_SHUTDOWN_PORT EHCA_BMASK_IBM(11, 11) +#define HCA_CAP_RC_LL_QP EHCA_BMASK_IBM(12, 12) +#define HCA_CAP_SRQ EHCA_BMASK_IBM(13, 13) +#define HCA_CAP_UD_LL_QP EHCA_BMASK_IBM(16, 16) +#define HCA_CAP_RESIZE_MR EHCA_BMASK_IBM(17, 17) +#define HCA_CAP_MINI_QP EHCA_BMASK_IBM(18, 18) +#define HCA_CAP_H_ALLOC_RES_SYNC EHCA_BMASK_IBM(19, 19) + +/* query port response block */ +struct hipz_query_port { + u32 state; + u32 bad_pkey_cntr; + u32 lmc; + u32 lid; + u32 subnet_timeout; + u32 qkey_viol_cntr; + u32 sm_sl; + u32 sm_lid; + u32 capability_mask; + u32 init_type_reply; + u32 pkey_tbl_len; + u32 gid_tbl_len; + u64 gid_prefix; + u32 port_nr; + u16 pkey_entries[16]; + u8 reserved1[32]; + u32 trent_size; + u32 trbuf_size; + u64 max_msg_sz; + u32 max_mtu; + u32 vl_cap; + u32 phys_pstate; + u32 phys_state; + u32 phys_speed; + u32 phys_width; + u8 reserved2[1884]; + u64 guid_entries[255]; +} __attribute__ ((packed)); + +#endif diff --git a/drivers/staging/rdma/ehca/ipz_pt_fn.c b/drivers/staging/rdma/ehca/ipz_pt_fn.c new file mode 100644 index 0000000..7ffc748 --- /dev/null +++ b/drivers/staging/rdma/ehca/ipz_pt_fn.c @@ -0,0 +1,289 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * internal queue handling + * + * Authors: Waleri Fomin + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_tools.h" +#include "ipz_pt_fn.h" +#include "ehca_classes.h" + +#define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT) + +struct kmem_cache *small_qp_cache; + +void *ipz_qpageit_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + queue->current_q_offset += queue->pagesize; + if (queue->current_q_offset > queue->queue_length) { + queue->current_q_offset -= queue->pagesize; + ret = NULL; + } + if (((u64)ret) % queue->pagesize) { + ehca_gen_err("ERROR!! not at PAGE-Boundary"); + return NULL; + } + return ret; +} + +void *ipz_qeit_eq_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u64 last_entry_in_q = queue->queue_length - queue->qe_size; + + queue->current_q_offset += queue->qe_size; + if (queue->current_q_offset > last_entry_in_q) { + queue->current_q_offset = 0; + queue->toggle_state = (~queue->toggle_state) & 1; + } + + return ret; +} + +int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset) +{ + int i; + for (i = 0; i < queue->queue_length / queue->pagesize; i++) { + u64 page = __pa(queue->queue_pages[i]); + if (addr >= page && addr < page + queue->pagesize) { + *q_offset = addr - page + i * queue->pagesize; + return 0; + } + } + return -EINVAL; +} + +#if PAGE_SHIFT < EHCA_PAGESHIFT +#error Kernel pages must be at least as large than eHCA pages (4K) ! +#endif + +/* + * allocate pages for queue: + * outer loop allocates whole kernel pages (page aligned) and + * inner loop divides a kernel page into smaller hca queue pages + */ +static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages) +{ + int k, f = 0; + u8 *kpage; + + while (f < nr_of_pages) { + kpage = (u8 *)get_zeroed_page(GFP_KERNEL); + if (!kpage) + goto out; + + for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) { + queue->queue_pages[f] = (struct ipz_page *)kpage; + kpage += EHCA_PAGESIZE; + f++; + } + } + return 1; + +out: + for (f = 0; f < nr_of_pages && queue->queue_pages[f]; + f += PAGES_PER_KPAGE) + free_page((unsigned long)(queue->queue_pages)[f]); + return 0; +} + +static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) +{ + int order = ilog2(queue->pagesize) - 9; + struct ipz_small_queue_page *page; + unsigned long bit; + + mutex_lock(&pd->lock); + + if (!list_empty(&pd->free[order])) + page = list_entry(pd->free[order].next, + struct ipz_small_queue_page, list); + else { + page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL); + if (!page) + goto out; + + page->page = get_zeroed_page(GFP_KERNEL); + if (!page->page) { + kmem_cache_free(small_qp_cache, page); + goto out; + } + + list_add(&page->list, &pd->free[order]); + } + + bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order); + __set_bit(bit, page->bitmap); + page->fill++; + + if (page->fill == IPZ_SPAGE_PER_KPAGE >> order) + list_move(&page->list, &pd->full[order]); + + mutex_unlock(&pd->lock); + + queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9))); + queue->small_page = page; + queue->offset = bit << (order + 9); + return 1; + +out: + ehca_err(pd->ib_pd.device, "failed to allocate small queue page"); + mutex_unlock(&pd->lock); + return 0; +} + +static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) +{ + int order = ilog2(queue->pagesize) - 9; + struct ipz_small_queue_page *page = queue->small_page; + unsigned long bit; + int free_page = 0; + + bit = ((unsigned long)queue->queue_pages[0] & ~PAGE_MASK) + >> (order + 9); + + mutex_lock(&pd->lock); + + __clear_bit(bit, page->bitmap); + page->fill--; + + if (page->fill == 0) { + list_del(&page->list); + free_page = 1; + } + + if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1) + /* the page was full until we freed the chunk */ + list_move_tail(&page->list, &pd->free[order]); + + mutex_unlock(&pd->lock); + + if (free_page) { + free_page(page->page); + kmem_cache_free(small_qp_cache, page); + } +} + +int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, + const u32 nr_of_pages, const u32 pagesize, + const u32 qe_size, const u32 nr_of_sg, + int is_small) +{ + if (pagesize > PAGE_SIZE) { + ehca_gen_err("FATAL ERROR: pagesize=%x " + "is greater than kernel page size", pagesize); + return 0; + } + + /* init queue fields */ + queue->queue_length = nr_of_pages * pagesize; + queue->pagesize = pagesize; + queue->qe_size = qe_size; + queue->act_nr_of_sg = nr_of_sg; + queue->current_q_offset = 0; + queue->toggle_state = 1; + queue->small_page = NULL; + + /* allocate queue page pointers */ + queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *), + GFP_KERNEL | __GFP_NOWARN); + if (!queue->queue_pages) { + queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *)); + if (!queue->queue_pages) { + ehca_gen_err("Couldn't allocate queue page list"); + return 0; + } + } + + /* allocate actual queue pages */ + if (is_small) { + if (!alloc_small_queue_page(queue, pd)) + goto ipz_queue_ctor_exit0; + } else + if (!alloc_queue_pages(queue, nr_of_pages)) + goto ipz_queue_ctor_exit0; + + return 1; + +ipz_queue_ctor_exit0: + ehca_gen_err("Couldn't alloc pages queue=%p " + "nr_of_pages=%x", queue, nr_of_pages); + kvfree(queue->queue_pages); + + return 0; +} + +int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue) +{ + int i, nr_pages; + + if (!queue || !queue->queue_pages) { + ehca_gen_dbg("queue or queue_pages is NULL"); + return 0; + } + + if (queue->small_page) + free_small_queue_page(queue, pd); + else { + nr_pages = queue->queue_length / queue->pagesize; + for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE) + free_page((unsigned long)queue->queue_pages[i]); + } + + kvfree(queue->queue_pages); + + return 1; +} + +int ehca_init_small_qp_cache(void) +{ + small_qp_cache = kmem_cache_create("ehca_cache_small_qp", + sizeof(struct ipz_small_queue_page), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!small_qp_cache) + return -ENOMEM; + + return 0; +} + +void ehca_cleanup_small_qp_cache(void) +{ + kmem_cache_destroy(small_qp_cache); +} diff --git a/drivers/staging/rdma/ehca/ipz_pt_fn.h b/drivers/staging/rdma/ehca/ipz_pt_fn.h new file mode 100644 index 0000000..a801274 --- /dev/null +++ b/drivers/staging/rdma/ehca/ipz_pt_fn.h @@ -0,0 +1,289 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * internal queue handling + * + * Authors: Waleri Fomin + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __IPZ_PT_FN_H__ +#define __IPZ_PT_FN_H__ + +#define EHCA_PAGESHIFT 12 +#define EHCA_PAGESIZE 4096UL +#define EHCA_PAGEMASK (~(EHCA_PAGESIZE-1)) +#define EHCA_PT_ENTRIES 512UL + +#include "ehca_tools.h" +#include "ehca_qes.h" + +struct ehca_pd; +struct ipz_small_queue_page; + +extern struct kmem_cache *small_qp_cache; + +/* struct generic ehca page */ +struct ipz_page { + u8 entries[EHCA_PAGESIZE]; +}; + +#define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512) + +struct ipz_small_queue_page { + unsigned long page; + unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG]; + int fill; + void *mapped_addr; + u32 mmap_count; + struct list_head list; +}; + +/* struct generic queue in linux kernel virtual memory (kv) */ +struct ipz_queue { + u64 current_q_offset; /* current queue entry */ + + struct ipz_page **queue_pages; /* array of pages belonging to queue */ + u32 qe_size; /* queue entry size */ + u32 act_nr_of_sg; + u32 queue_length; /* queue length allocated in bytes */ + u32 pagesize; + u32 toggle_state; /* toggle flag - per page */ + u32 offset; /* save offset within page for small_qp */ + struct ipz_small_queue_page *small_page; +}; + +/* + * return current Queue Entry for a certain q_offset + * returns address (kv) of Queue Entry + */ +static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset) +{ + struct ipz_page *current_page; + if (q_offset >= queue->queue_length) + return NULL; + current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT]; + return ¤t_page->entries[q_offset & (EHCA_PAGESIZE - 1)]; +} + +/* + * return current Queue Entry + * returns address (kv) of Queue Entry + */ +static inline void *ipz_qeit_get(struct ipz_queue *queue) +{ + return ipz_qeit_calc(queue, queue->current_q_offset); +} + +/* + * return current Queue Page , increment Queue Page iterator from + * page to page in struct ipz_queue, last increment will return 0! and + * NOT wrap + * returns address (kv) of Queue Page + * warning don't use in parallel with ipz_QE_get_inc() + */ +void *ipz_qpageit_get_inc(struct ipz_queue *queue); + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * warning don't use in parallel with ipz_qpageit_get_inc() + */ +static inline void *ipz_qeit_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + queue->current_q_offset += queue->qe_size; + if (queue->current_q_offset >= queue->queue_length) { + queue->current_q_offset = 0; + /* toggle the valid flag */ + queue->toggle_state = (~queue->toggle_state) & 1; + } + + return ret; +} + +/* + * return a bool indicating whether current Queue Entry is valid + */ +static inline int ipz_qeit_is_valid(struct ipz_queue *queue) +{ + struct ehca_cqe *cqe = ipz_qeit_get(queue); + return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1)); +} + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * returns 0 and does not increment, if wrong valid state + * warning don't use in parallel with ipz_qpageit_get_inc() + */ +static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue) +{ + return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL; +} + +/* + * returns and resets Queue Entry iterator + * returns address (kv) of first Queue Entry + */ +static inline void *ipz_qeit_reset(struct ipz_queue *queue) +{ + queue->current_q_offset = 0; + return ipz_qeit_get(queue); +} + +/* + * return the q_offset corresponding to an absolute address + */ +int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset); + +/* + * return the next queue offset. don't modify the queue. + */ +static inline u64 ipz_queue_advance_offset(struct ipz_queue *queue, u64 offset) +{ + offset += queue->qe_size; + if (offset >= queue->queue_length) offset = 0; + return offset; +} + +/* struct generic page table */ +struct ipz_pt { + u64 entries[EHCA_PT_ENTRIES]; +}; + +/* struct page table for a queue, only to be used in pf */ +struct ipz_qpt { + /* queue page tables (kv), use u64 because we know the element length */ + u64 *qpts; + u32 n_qpts; + u32 n_ptes; /* number of page table entries */ + u64 *current_pte_addr; +}; + +/* + * constructor for a ipz_queue_t, placement new for ipz_queue_t, + * new for all dependent datastructors + * all QP Tables are the same + * flow: + * allocate+pin queue + * see ipz_qpt_ctor() + * returns true if ok, false if out of memory + */ +int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, + const u32 nr_of_pages, const u32 pagesize, + const u32 qe_size, const u32 nr_of_sg, + int is_small); + +/* + * destructor for a ipz_queue_t + * -# free queue + * see ipz_queue_ctor() + * returns true if ok, false if queue was NULL-ptr of free failed + */ +int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue); + +/* + * constructor for a ipz_qpt_t, + * placement new for struct ipz_queue, new for all dependent datastructors + * all QP Tables are the same, + * flow: + * -# allocate+pin queue + * -# initialise ptcb + * -# allocate+pin PTs + * -# link PTs to a ring, according to HCA Arch, set bit62 id needed + * -# the ring must have room for exactly nr_of_PTEs + * see ipz_qpt_ctor() + */ +void ipz_qpt_ctor(struct ipz_qpt *qpt, + const u32 nr_of_qes, + const u32 pagesize, + const u32 qe_size, + const u8 lowbyte, const u8 toggle, + u32 * act_nr_of_QEs, u32 * act_nr_of_pages); + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * warning don't use in parallel with ipz_qpageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + * fix EQ page problems + */ +void *ipz_qeit_eq_get_inc(struct ipz_queue *queue); + +/* + * return current Event Queue Entry, increment Queue Entry iterator + * by one step in struct ipz_queue if valid, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * returns 0 and does not increment, if wrong valid state + * warning don't use in parallel with ipz_queue_QPageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + */ +static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u32 qe = *(u8 *)ret; + if ((qe >> 7) != (queue->toggle_state & 1)) + return NULL; + ipz_qeit_eq_get_inc(queue); /* this is a good one */ + return ret; +} + +static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u32 qe = *(u8 *)ret; + if ((qe >> 7) != (queue->toggle_state & 1)) + return NULL; + return ret; +} + +/* returns address (GX) of first queue entry */ +static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt) +{ + return be64_to_cpu(qpt->qpts[0]); +} + +/* returns address (kv) of first page of queue page table */ +static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt) +{ + return qpt->qpts; +} + +#endif /* __IPZ_PT_FN_H__ */ -- cgit v0.10.2 From eda2116f4ab6d79cfcffc202b5d2bbb0797ba013 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 11 Sep 2015 19:24:19 -0500 Subject: [CIFS] mount option sec=none not displayed properly in /proc/mounts When the user specifies "sec=none" in a cifs mount, we set sec_type as unspecified (and set a flag and the username will be null) rather than setting sectype as "none" so cifs_show_security was not properly displaying it in cifs /proc/mounts entries. Signed-off-by: Steve French Reviewed-by: Jeff Layton diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 6a1119e..e739950 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -325,8 +325,11 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) static void cifs_show_security(struct seq_file *s, struct cifs_ses *ses) { - if (ses->sectype == Unspecified) + if (ses->sectype == Unspecified) { + if (ses->user_name == NULL) + seq_puts(s, ",sec=none"); return; + } seq_puts(s, ",sec="); -- cgit v0.10.2 From 728d29400488d54974d3317fe8a232b45fdb42ee Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 31 Aug 2015 16:13:47 -0700 Subject: hwmon: (nct6775) Swap STEP_UP_TIME and STEP_DOWN_TIME registers for most chips The STEP_UP_TIME and STEP_DOWN_TIME registers are swapped for all chips but NCT6775. Reported-by: Grazvydas Ignotas Reviewed-by: Jean Delvare Cc: stable@vger.kernel.org # v3.10+ Signed-off-by: Guenter Roeck diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index bd1c99d..2aaedbe 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -354,6 +354,10 @@ static const u16 NCT6775_REG_TEMP_CRIT[ARRAY_SIZE(nct6775_temp_label) - 1] /* NCT6776 specific data */ +/* STEP_UP_TIME and STEP_DOWN_TIME regs are swapped for all chips but NCT6775 */ +#define NCT6776_REG_FAN_STEP_UP_TIME NCT6775_REG_FAN_STEP_DOWN_TIME +#define NCT6776_REG_FAN_STEP_DOWN_TIME NCT6775_REG_FAN_STEP_UP_TIME + static const s8 NCT6776_ALARM_BITS[] = { 0, 1, 2, 3, 8, 21, 20, 16, /* in0.. in7 */ 17, -1, -1, -1, -1, -1, -1, /* in8..in14 */ @@ -3528,8 +3532,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_PULSES = NCT6776_REG_FAN_PULSES; data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT; data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME; - data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME; - data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME; + data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME; + data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME; data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; @@ -3600,8 +3604,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_PULSES = NCT6779_REG_FAN_PULSES; data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT; data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME; - data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME; - data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME; + data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME; + data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME; data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; @@ -3677,8 +3681,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_PULSES = NCT6779_REG_FAN_PULSES; data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT; data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME; - data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME; - data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME; + data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME; + data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME; data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; -- cgit v0.10.2 From cd1faefa66425c3fa338777773c5c017edea3439 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 30 Aug 2015 19:45:19 -0700 Subject: hwmon: (nct6775) Add support for NCT6793D NCT6793D is register compatible with NCT6792D. Also move nct6775_sio_names[] closer to enum kinds to simplify adding new chips. Tested-by: Grazvydas Ignotas Reviewed-by: Jean Delvare Signed-off-by: Guenter Roeck diff --git a/Documentation/hwmon/nct6775 b/Documentation/hwmon/nct6775 index f0dd3d2..76add4c 100644 --- a/Documentation/hwmon/nct6775 +++ b/Documentation/hwmon/nct6775 @@ -32,6 +32,10 @@ Supported chips: Prefix: 'nct6792' Addresses scanned: ISA address retrieved from Super I/O registers Datasheet: Available from Nuvoton upon request + * Nuvoton NCT6793D + Prefix: 'nct6793' + Addresses scanned: ISA address retrieved from Super I/O registers + Datasheet: Available from Nuvoton upon request Authors: Guenter Roeck diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 500b262..e13c902 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -1140,8 +1140,8 @@ config SENSORS_NCT6775 help If you say yes here you get support for the hardware monitoring functionality of the Nuvoton NCT6106D, NCT6775F, NCT6776F, NCT6779D, - NCT6791D, NCT6792D and compatible Super-I/O chips. This driver - replaces the w83627ehf driver for NCT6775F and NCT6776F. + NCT6791D, NCT6792D, NCT6793D, and compatible Super-I/O chips. This + driver replaces the w83627ehf driver for NCT6775F and NCT6776F. This driver can also be built as a module. If so, the module will be called nct6775. diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 2aaedbe..8b4fa55 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -39,6 +39,7 @@ * nct6779d 15 5 5 2+6 0xc560 0xc1 0x5ca3 * nct6791d 15 6 6 2+6 0xc800 0xc1 0x5ca3 * nct6792d 15 6 6 2+6 0xc910 0xc1 0x5ca3 + * nct6793d 15 6 6 2+6 0xd120 0xc1 0x5ca3 * * #temp lists the number of monitored temperature sources (first value) plus * the number of directly connectable temperature sensors (second value). @@ -63,7 +64,7 @@ #define USE_ALTERNATE -enum kinds { nct6106, nct6775, nct6776, nct6779, nct6791, nct6792 }; +enum kinds { nct6106, nct6775, nct6776, nct6779, nct6791, nct6792, nct6793 }; /* used to set data->name = nct6775_device_names[data->sio_kind] */ static const char * const nct6775_device_names[] = { @@ -73,6 +74,17 @@ static const char * const nct6775_device_names[] = { "nct6779", "nct6791", "nct6792", + "nct6793", +}; + +static const char * const nct6775_sio_names[] __initconst = { + "NCT6106D", + "NCT6775F", + "NCT6776D/F", + "NCT6779D", + "NCT6791D", + "NCT6792D", + "NCT6793D", }; static unsigned short force_id; @@ -104,6 +116,7 @@ MODULE_PARM_DESC(fan_debounce, "Enable debouncing for fan RPM signal"); #define SIO_NCT6779_ID 0xc560 #define SIO_NCT6791_ID 0xc800 #define SIO_NCT6792_ID 0xc910 +#define SIO_NCT6793_ID 0xd120 #define SIO_ID_MASK 0xFFF0 enum pwm_enable { off, manual, thermal_cruise, speed_cruise, sf3, sf4 }; @@ -537,7 +550,7 @@ static const s8 NCT6791_ALARM_BITS[] = { 4, 5, 13, -1, -1, -1, /* temp1..temp6 */ 12, 9 }; /* intrusion0, intrusion1 */ -/* NCT6792 specific data */ +/* NCT6792/NCT6793 specific data */ static const u16 NCT6792_REG_TEMP_MON[] = { 0x73, 0x75, 0x77, 0x79, 0x7b, 0x7d }; @@ -1060,6 +1073,7 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg) case nct6779: case nct6791: case nct6792: + case nct6793: return reg == 0x150 || reg == 0x153 || reg == 0x155 || ((reg & 0xfff0) == 0x4b0 && (reg & 0x000f) < 0x0b) || reg == 0x402 || @@ -1411,6 +1425,7 @@ static void nct6775_update_pwm_limits(struct device *dev) case nct6779: case nct6791: case nct6792: + case nct6793: reg = nct6775_read_value(data, data->REG_CRITICAL_PWM_ENABLE[i]); if (reg & data->CRITICAL_PWM_ENABLE_MASK) @@ -2826,6 +2841,7 @@ store_auto_pwm(struct device *dev, struct device_attribute *attr, case nct6779: case nct6791: case nct6792: + case nct6793: nct6775_write_value(data, data->REG_CRITICAL_PWM[nr], val); reg = nct6775_read_value(data, @@ -3260,7 +3276,7 @@ nct6775_check_fan_inputs(struct nct6775_data *data) pwm4pin = false; pwm5pin = false; pwm6pin = false; - } else { /* NCT6779D, NCT6791D, or NCT6792D */ + } else { /* NCT6779D, NCT6791D, NCT6792D, or NCT6793D */ regval = superio_inb(sioreg, 0x1c); fan3pin = !(regval & (1 << 5)); @@ -3273,7 +3289,8 @@ nct6775_check_fan_inputs(struct nct6775_data *data) fan4min = fan4pin; - if (data->kind == nct6791 || data->kind == nct6792) { + if (data->kind == nct6791 || data->kind == nct6792 || + data->kind == nct6793) { regval = superio_inb(sioreg, 0x2d); fan6pin = (regval & (1 << 1)); pwm6pin = (regval & (1 << 0)); @@ -3647,6 +3664,7 @@ static int nct6775_probe(struct platform_device *pdev) break; case nct6791: case nct6792: + case nct6793: data->in_num = 15; data->pwm_num = 6; data->auto_pwm_num = 4; @@ -3922,6 +3940,7 @@ static int nct6775_probe(struct platform_device *pdev) case nct6779: case nct6791: case nct6792: + case nct6793: break; } @@ -3954,6 +3973,7 @@ static int nct6775_probe(struct platform_device *pdev) break; case nct6791: case nct6792: + case nct6793: tmp |= 0x7e; break; } @@ -4051,7 +4071,8 @@ static int __maybe_unused nct6775_resume(struct device *dev) if (reg != data->sio_reg_enable) superio_outb(sioreg, SIO_REG_ENABLE, data->sio_reg_enable); - if (data->kind == nct6791 || data->kind == nct6792) + if (data->kind == nct6791 || data->kind == nct6792 || + data->kind == nct6793) nct6791_enable_io_mapping(sioreg); superio_exit(sioreg); @@ -4110,15 +4131,6 @@ static struct platform_driver nct6775_driver = { .probe = nct6775_probe, }; -static const char * const nct6775_sio_names[] __initconst = { - "NCT6106D", - "NCT6775F", - "NCT6776D/F", - "NCT6779D", - "NCT6791D", - "NCT6792D", -}; - /* nct6775_find() looks for a '627 in the Super-I/O config space */ static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data) { @@ -4154,6 +4166,9 @@ static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data) case SIO_NCT6792_ID: sio_data->kind = nct6792; break; + case SIO_NCT6793_ID: + sio_data->kind = nct6793; + break; default: if (val != 0xffff) pr_debug("unsupported chip ID: 0x%04x\n", val); @@ -4179,7 +4194,8 @@ static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data) superio_outb(sioaddr, SIO_REG_ENABLE, val | 0x01); } - if (sio_data->kind == nct6791 || sio_data->kind == nct6792) + if (sio_data->kind == nct6791 || sio_data->kind == nct6792 || + sio_data->kind == nct6793) nct6791_enable_io_mapping(sioaddr); superio_exit(sioaddr); @@ -4289,7 +4305,7 @@ static void __exit sensors_nct6775_exit(void) } MODULE_AUTHOR("Guenter Roeck "); -MODULE_DESCRIPTION("NCT6106D/NCT6775F/NCT6776F/NCT6779D/NCT6791D/NCT6792D driver"); +MODULE_DESCRIPTION("Driver for NCT6775F and compatible chips"); MODULE_LICENSE("GPL"); module_init(sensors_nct6775_init); -- cgit v0.10.2 From 7c5b190e115a2f7a51a85f261e7d7dca4b4bbe64 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 10 Sep 2015 21:55:27 +0200 Subject: x86/cpu: Print family/model/stepping in hex 924e101a7ab6 ("x86/debug: Dump family, model, stepping of the boot CPU") had its good intentions to dump the exact F/M/S as an aid during debugging sessions but its output can be ambiguous. Fix that: -smpboot: CPU0: Intel Core Processor (Broadwell) (fam: 06, model: 47, stepping: 02) +smpboot: CPU0: Intel Core Processor (Broadwell) (family: 0x6, model: 0x47, stepping: 0x2) Also, spell out "family". Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1441914927-32037-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 07ce52c..de22ea7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1110,10 +1110,10 @@ void print_cpu_info(struct cpuinfo_x86 *c) else printk(KERN_CONT "%d86", c->x86); - printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model); + printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model); if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask); + printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask); else printk(KERN_CONT ")\n"); -- cgit v0.10.2 From ebfb4988f0378e2ac3b4a0aa1ea20d724293f392 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Sep 2015 11:58:27 +0200 Subject: perf/x86/intel: Fix constraint access Sasha reported that we can get here with .idx==-1, and cpuc->event_constraints unallocated. Suggested-by: Stephane Eranian Reported-by: Sasha Levin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Fixes: b371b5943178 ("perf/x86: Fix event/group validation") Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 3f124d5..f41e4dc 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2316,9 +2316,12 @@ static struct event_constraint * intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) { - struct event_constraint *c1 = cpuc->event_constraint[idx]; + struct event_constraint *c1 = NULL; struct event_constraint *c2; + if (idx >= 0) /* fake does < 0 */ + c1 = cpuc->event_constraint[idx]; + /* * first time only * - static constraint: no change across incremental scheduling calls -- cgit v0.10.2 From 2619d7e9c92d524cb155ec89fd72875321512e5b Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 9 Sep 2015 16:07:30 -0700 Subject: time: Fix timekeeping_freqadjust()'s incorrect use of abs() instead of abs64() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The internal clocksteering done for fine-grained error correction uses a logarithmic approximation, so any time adjtimex() adjusts the clock steering, timekeeping_freqadjust() quickly approximates the correct clock frequency over a series of ticks. Unfortunately, the logic in timekeeping_freqadjust(), introduced in commit: dc491596f639 ("timekeeping: Rework frequency adjustments to work better w/ nohz") used the abs() function with a s64 error value to calculate the size of the approximated adjustment to be made. Per include/linux/kernel.h: "abs() should not be used for 64-bit types (s64, u64, long long) - use abs64()". Thus on 32-bit platforms, this resulted in the clocksteering to take a quite dampended random walk trying to converge on the proper frequency, which caused the adjustments to be made much slower then intended (most easily observed when large adjustments are made). This patch fixes the issue by using abs64() instead. Reported-by: Nuno Gonçalves Tested-by: Nuno Goncalves Signed-off-by: John Stultz Cc: # v3.17+ Cc: Linus Torvalds Cc: Miroslav Lichvar Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1441840051-20244-1-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f6ee2e6..3739ac6 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1614,7 +1614,7 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk, negative = (tick_error < 0); /* Sort out the magnitude of the correction */ - tick_error = abs(tick_error); + tick_error = abs64(tick_error); for (adj = 0; tick_error > interval; adj++) tick_error >>= 1; -- cgit v0.10.2 From caa470475d9b59eeff093ae650800d34612c4379 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 11 Sep 2015 12:36:12 -0300 Subject: perf header: Fixup reading of HEADER_NRCPUS feature The original patch introducing this header wrote the number of CPUs available and online in one order and then swapped those values when reading, fix it. Before: # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 4 # nrcpus avail : 4 # echo 0 > /sys/devices/system/cpu/cpu2/online # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 4 # nrcpus avail : 3 # echo 0 > /sys/devices/system/cpu/cpu1/online # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 4 # nrcpus avail : 2 After the fix, bringing back the CPUs online: # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 2 # nrcpus avail : 4 # echo 1 > /sys/devices/system/cpu/cpu2/online # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 3 # nrcpus avail : 4 # echo 1 > /sys/devices/system/cpu/cpu1/online # perf record usleep 1 # perf report --header-only | grep 'nrcpus \(online\|avail\)' # nrcpus online : 4 # nrcpus avail : 4 Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Kan Liang Cc: Stephane Eranian Cc: Wang Nan Fixes: fbe96f29ce4b ("perf tools: Make perf.data more self-descriptive (v8)") Link: http://lkml.kernel.org/r/20150911153323.GP23511@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 4181454..fce6634 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1438,7 +1438,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, if (ph->needs_swap) nr = bswap_32(nr); - ph->env.nr_cpus_online = nr; + ph->env.nr_cpus_avail = nr; ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) @@ -1447,7 +1447,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, if (ph->needs_swap) nr = bswap_32(nr); - ph->env.nr_cpus_avail = nr; + ph->env.nr_cpus_online = nr; return 0; } -- cgit v0.10.2 From 9e26b0b114adb321a9bf41520bac304ef49e77d1 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sun, 23 Aug 2015 21:11:52 +0800 Subject: gpio: mxc: need to check return value of irq_alloc_generic_chip Need to check return value of irq_alloc_generic_chip, because it may return NULL. 1. Change mxc_gpio_init_gc return type from void to int. 2. Add a new lable out_irqdomain_remove to remove the irq domain when mxc_gpio_init_gc fail. Signed-off-by: Peng Fan Cc: Alexandre Courbot [Manually rebased] Signed-off-by: Linus Walleij diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c index b752b56..8813aba 100644 --- a/drivers/gpio/gpio-mxc.c +++ b/drivers/gpio/gpio-mxc.c @@ -339,13 +339,15 @@ static int gpio_set_wake_irq(struct irq_data *d, u32 enable) return 0; } -static void mxc_gpio_init_gc(struct mxc_gpio_port *port, int irq_base) +static int mxc_gpio_init_gc(struct mxc_gpio_port *port, int irq_base) { struct irq_chip_generic *gc; struct irq_chip_type *ct; gc = irq_alloc_generic_chip("gpio-mxc", 1, irq_base, port->base, handle_level_irq); + if (!gc) + return -ENOMEM; gc->private = port; ct = gc->chip_types; @@ -360,6 +362,8 @@ static void mxc_gpio_init_gc(struct mxc_gpio_port *port, int irq_base) irq_setup_generic_chip(gc, IRQ_MSK(32), IRQ_GC_INIT_NESTED_LOCK, IRQ_NOREQUEST, 0); + + return 0; } static void mxc_gpio_get_hw(struct platform_device *pdev) @@ -477,12 +481,16 @@ static int mxc_gpio_probe(struct platform_device *pdev) } /* gpio-mxc can be a generic irq chip */ - mxc_gpio_init_gc(port, irq_base); + err = mxc_gpio_init_gc(port, irq_base); + if (err < 0) + goto out_irqdomain_remove; list_add_tail(&port->node, &mxc_gpio_ports); return 0; +out_irqdomain_remove: + irq_domain_remove(port->domain); out_irqdesc_free: irq_free_descs(irq_base, 32); out_gpiochip_remove: -- cgit v0.10.2 From 1bbc557d976b4e5ae9a41d619bd79f09ccac9afc Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Sun, 23 Aug 2015 21:11:53 +0800 Subject: gpio: mxs: need to check return value of irq_alloc_generic_chip Need to check return value of irq_alloc_generic_chip, because it may return NULL. 1. Change mxs_gpio_init_gc return type from void to int. 2. Add a new lable out_irqdomain_remove to remove the irq domain when mxc_gpio_init_gc fail. Signed-off-by: Peng Fan Cc: Alexandre Courbot Signed-off-by: Linus Walleij diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c index b7f383e..1387385 100644 --- a/drivers/gpio/gpio-mxs.c +++ b/drivers/gpio/gpio-mxs.c @@ -196,13 +196,16 @@ static int mxs_gpio_set_wake_irq(struct irq_data *d, unsigned int enable) return 0; } -static void __init mxs_gpio_init_gc(struct mxs_gpio_port *port, int irq_base) +static int __init mxs_gpio_init_gc(struct mxs_gpio_port *port, int irq_base) { struct irq_chip_generic *gc; struct irq_chip_type *ct; gc = irq_alloc_generic_chip("gpio-mxs", 1, irq_base, port->base, handle_level_irq); + if (!gc) + return -ENOMEM; + gc->private = port; ct = gc->chip_types; @@ -216,6 +219,8 @@ static void __init mxs_gpio_init_gc(struct mxs_gpio_port *port, int irq_base) irq_setup_generic_chip(gc, IRQ_MSK(32), IRQ_GC_INIT_NESTED_LOCK, IRQ_NOREQUEST, 0); + + return 0; } static int mxs_gpio_to_irq(struct gpio_chip *gc, unsigned offset) @@ -317,7 +322,9 @@ static int mxs_gpio_probe(struct platform_device *pdev) } /* gpio-mxs can be a generic irq chip */ - mxs_gpio_init_gc(port, irq_base); + err = mxs_gpio_init_gc(port, irq_base); + if (err < 0) + goto out_irqdomain_remove; /* setup one handler for each entry */ irq_set_chained_handler_and_data(port->irq, mxs_gpio_irq_handler, @@ -343,6 +350,8 @@ static int mxs_gpio_probe(struct platform_device *pdev) out_bgpio_remove: bgpio_remove(&port->bgc); +out_irqdomain_remove: + irq_domain_remove(port->domain); out_irqdesc_free: irq_free_descs(irq_base, 32); return err; -- cgit v0.10.2 From aad38b75fb632200eb64282469458d21ce2cc39a Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 25 Aug 2015 09:12:43 +0000 Subject: gpio: rcar: GPIO_RCAR doesn't relate to ARM 8cd1470("gpio: rcar: Add r8a7795 (R-Car H3) support") added GPIO support for r8a7795. r8a7795 based on CONFIG_ARM64. OTOH, GPIO_RCAR driver can be compiled fine on non-ARM. This patch removed ARM dependency for it. Signed-off-by: Kuninori Morimoto Acked-by: Geert Uytterhoeven Acked-by: Simon Horman Signed-off-by: Linus Walleij diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index b4fc9e4..8949b3f 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -356,7 +356,7 @@ config GPIO_PXA config GPIO_RCAR tristate "Renesas R-Car GPIO" - depends on ARM && (ARCH_SHMOBILE || COMPILE_TEST) + depends on ARCH_SHMOBILE || COMPILE_TEST select GPIOLIB_IRQCHIP help Say yes here to support GPIO on Renesas R-Car SoCs. -- cgit v0.10.2 From e20538b82f1ffcc06e68feb117f24f211cff7a4d Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 28 Aug 2015 09:44:18 -0700 Subject: gpio: Propagate errors from chip->get() It's possible to have gpio chips hanging off unreliable remote buses where the get() operation will fail to acquire a readout of the current gpio state. Propagate these errors to the consumer so that they can act on, retry or ignore these failing reads, instead of treating them as the line being held high. Signed-off-by: Bjorn Andersson Reviewed-by: Alexandre Courbot Signed-off-by: Linus Walleij diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 980c1f8..5db3445 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1174,15 +1174,16 @@ EXPORT_SYMBOL_GPL(gpiod_is_active_low); * that the GPIO was actually requested. */ -static bool _gpiod_get_raw_value(const struct gpio_desc *desc) +static int _gpiod_get_raw_value(const struct gpio_desc *desc) { struct gpio_chip *chip; - bool value; int offset; + int value; chip = desc->chip; offset = gpio_chip_hwgpio(desc); - value = chip->get ? chip->get(chip, offset) : false; + value = chip->get ? chip->get(chip, offset) : -EIO; + value = value < 0 ? value : !!value; trace_gpio_value(desc_to_gpio(desc), 1, value); return value; } @@ -1192,7 +1193,7 @@ static bool _gpiod_get_raw_value(const struct gpio_desc *desc) * @desc: gpio whose value will be returned * * Return the GPIO's raw value, i.e. the value of the physical line disregarding - * its ACTIVE_LOW status. + * its ACTIVE_LOW status, or negative errno on failure. * * This function should be called from contexts where we cannot sleep, and will * complain if the GPIO chip functions potentially sleep. @@ -1212,7 +1213,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_raw_value); * @desc: gpio whose value will be returned * * Return the GPIO's logical value, i.e. taking the ACTIVE_LOW status into - * account. + * account, or negative errno on failure. * * This function should be called from contexts where we cannot sleep, and will * complain if the GPIO chip functions potentially sleep. @@ -1226,6 +1227,9 @@ int gpiod_get_value(const struct gpio_desc *desc) WARN_ON(desc->chip->can_sleep); value = _gpiod_get_raw_value(desc); + if (value < 0) + return value; + if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) value = !value; @@ -1548,7 +1552,7 @@ EXPORT_SYMBOL_GPL(gpiochip_unlock_as_irq); * @desc: gpio whose value will be returned * * Return the GPIO's raw value, i.e. the value of the physical line disregarding - * its ACTIVE_LOW status. + * its ACTIVE_LOW status, or negative errno on failure. * * This function is to be called from contexts that can sleep. */ @@ -1566,7 +1570,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_raw_value_cansleep); * @desc: gpio whose value will be returned * * Return the GPIO's logical value, i.e. taking the ACTIVE_LOW status into - * account. + * account, or negative errno on failure. * * This function is to be called from contexts that can sleep. */ @@ -1579,6 +1583,9 @@ int gpiod_get_value_cansleep(const struct gpio_desc *desc) return 0; value = _gpiod_get_raw_value(desc); + if (value < 0) + return value; + if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) value = !value; -- cgit v0.10.2 From 69de52ba321dda8dd7f632d1e480983494325ba0 Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Wed, 2 Sep 2015 20:07:09 +0200 Subject: Documentation: gpio: board: add flags parameter to gpiod_get*() functions With commit 39b2bbe3d715 ("gpio: add flags argument to gpiod_get*() functions") the gpiod_get*() functions got a 'flags' parameter. Reflect this in the documentation, too. Signed-off-by: Dirk Behme Signed-off-by: Linus Walleij diff --git a/Documentation/gpio/board.txt b/Documentation/gpio/board.txt index b80606d..9edd5af 100644 --- a/Documentation/gpio/board.txt +++ b/Documentation/gpio/board.txt @@ -39,11 +39,11 @@ This property will make GPIOs 15, 16 and 17 available to the driver under the struct gpio_desc *red, *green, *blue, *power; - red = gpiod_get_index(dev, "led", 0); - green = gpiod_get_index(dev, "led", 1); - blue = gpiod_get_index(dev, "led", 2); + red = gpiod_get_index(dev, "led", 0, GPIOD_OUT_HIGH); + green = gpiod_get_index(dev, "led", 1, GPIOD_OUT_HIGH); + blue = gpiod_get_index(dev, "led", 2, GPIOD_OUT_HIGH); - power = gpiod_get(dev, "power"); + power = gpiod_get(dev, "power", GPIOD_OUT_HIGH); The led GPIOs will be active-high, while the power GPIO will be active-low (i.e. gpiod_is_active_low(power) will be true). @@ -142,13 +142,14 @@ The driver controlling "foo.0" will then be able to obtain its GPIOs as follows: struct gpio_desc *red, *green, *blue, *power; - red = gpiod_get_index(dev, "led", 0); - green = gpiod_get_index(dev, "led", 1); - blue = gpiod_get_index(dev, "led", 2); + red = gpiod_get_index(dev, "led", 0, GPIOD_OUT_HIGH); + green = gpiod_get_index(dev, "led", 1, GPIOD_OUT_HIGH); + blue = gpiod_get_index(dev, "led", 2, GPIOD_OUT_HIGH); - power = gpiod_get(dev, "power"); - gpiod_direction_output(power, 1); + power = gpiod_get(dev, "power", GPIOD_OUT_HIGH); -Since the "power" GPIO is mapped as active-low, its actual signal will be 0 -after this code. Contrary to the legacy integer GPIO interface, the active-low -property is handled during mapping and is thus transparent to GPIO consumers. +Since the "led" GPIOs are mapped as active-high, this example will switch their +signals to 1, i.e. enabling the LEDs. And for the "power" GPIO, which is mapped +as active-low, its actual signal will be 0 after this code. Contrary to the legacy +integer GPIO interface, the active-low property is handled during mapping and is +thus transparent to GPIO consumers. -- cgit v0.10.2 From 87e77e46c61a9333227ab41cdefb1875758a4f13 Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Wed, 2 Sep 2015 20:07:10 +0200 Subject: Documentation: gpio: board: describe the con_id parameter The con_id parameter has to match the GPIO description and is automatically extended by the GPIO suffix if not NULL. I had to look into the code to understand this and properly find the GPIO I've been looking for, so document this. Signed-off-by: Dirk Behme Acked-by: Alexandre Courbot Signed-off-by: Linus Walleij diff --git a/Documentation/gpio/board.txt b/Documentation/gpio/board.txt index 9edd5af..5fa069a 100644 --- a/Documentation/gpio/board.txt +++ b/Documentation/gpio/board.txt @@ -48,6 +48,15 @@ This property will make GPIOs 15, 16 and 17 available to the driver under the The led GPIOs will be active-high, while the power GPIO will be active-low (i.e. gpiod_is_active_low(power) will be true). +The second parameter of the gpiod_get() functions, the con_id string, has to be +the -prefix of the GPIO suffixes ("gpios" or "gpio", automatically +looked up by the gpiod functions internally) used in the device tree. With above +"led-gpios" example, use the prefix without the "-" as con_id parameter: "led". + +Internally, the GPIO subsystem prefixes the GPIO suffix ("gpios" or "gpio") +with the string passed in con_id to get the resulting string +(snprintf(... "%s-%s", con_id, gpio_suffixes[]). + ACPI ---- ACPI also supports function names for GPIOs in a similar fashion to DT. diff --git a/Documentation/gpio/consumer.txt b/Documentation/gpio/consumer.txt index a206639..e000502 100644 --- a/Documentation/gpio/consumer.txt +++ b/Documentation/gpio/consumer.txt @@ -39,6 +39,9 @@ device that displays digits), an additional index argument can be specified: const char *con_id, unsigned int idx, enum gpiod_flags flags) +For a more detailed description of the con_id parameter in the DeviceTree case +see Documentation/gpio/board.txt + The flags parameter is used to optionally specify a direction and initial value for the GPIO. Values can be: -- cgit v0.10.2 From e799f35c32ea940222b568abf38d6abcab7fa8c1 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Sun, 30 Aug 2015 23:58:44 +0200 Subject: gpio: sx150x: Remove unnecessary MODULE_ALIAS() The driver has a I2C device id table that is used to create the module aliases and also "sx150x" isn't a supported I2C id, so it's never used. Signed-off-by: Javier Martinez Canillas Signed-off-by: Linus Walleij diff --git a/drivers/gpio/gpio-sx150x.c b/drivers/gpio/gpio-sx150x.c index 458d9d7..9c6b967 100644 --- a/drivers/gpio/gpio-sx150x.c +++ b/drivers/gpio/gpio-sx150x.c @@ -706,4 +706,3 @@ module_exit(sx150x_exit); MODULE_AUTHOR("Gregory Bean "); MODULE_DESCRIPTION("Driver for Semtech SX150X I2C GPIO Expanders"); MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("i2c:sx150x"); -- cgit v0.10.2 From 5e606abef57a89b3ca25f5d97a953c6cdad7cbac Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 28 Aug 2015 11:44:49 -0700 Subject: gpio: omap: Fix gpiochip_add() handling for deferred probe Currently we gpio-omap breaks if gpiochip_add() returns -EPROBE_DEFER: [ 0.570000] gpiochip_add: GPIOs 0..31 (gpio) failed to register [ 0.570000] omap_gpio 48310000.gpio: Could not register gpio chip -517 ... [ 3.670000] omap_gpio 48310000.gpio: Unbalanced pm_runtime_enable! Let's fix the issue by adding the missing pm_runtime_put() on error. Cc: Grygorii Strashko Cc: Javier Martinez Canillas Cc: Kevin Hilman Cc: Santosh Shilimkar Acked-by: Santosh Shilimkar Signed-off-by: Tony Lindgren Signed-off-by: Linus Walleij diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 2ae0d47..892a9d1 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -1253,8 +1253,11 @@ static int omap_gpio_probe(struct platform_device *pdev) omap_gpio_mod_init(bank); ret = omap_gpio_chip_init(bank, irqc); - if (ret) + if (ret) { + pm_runtime_put_sync(bank->dev); + pm_runtime_disable(bank->dev); return ret; + } omap_gpio_show_rev(bank); -- cgit v0.10.2 From ae80d64ee8c88b77c58254bcdc5c0981faab672d Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 1 Sep 2015 10:46:15 +0200 Subject: Documentation: gpio: Explain that -gpio is also supported The GPIO documentation mentions that GPIOs are mapped by defining a -gpios property in the consumer device's node but a -gpio sufix is also supported after commit: dd34c37aa3e8 ("gpio: of: Allow -gpio suffix for property names") Update the documentation to match the implementation. Signed-off-by: Javier Martinez Canillas Signed-off-by: Linus Walleij diff --git a/Documentation/gpio/board.txt b/Documentation/gpio/board.txt index 5fa069a..f59c43b 100644 --- a/Documentation/gpio/board.txt +++ b/Documentation/gpio/board.txt @@ -21,8 +21,8 @@ exact way to do it depends on the GPIO controller providing the GPIOs, see the device tree bindings for your controller. GPIOs mappings are defined in the consumer device's node, in a property named --gpios, where is the function the driver will request -through gpiod_get(). For example: +either -gpios or -gpio, where is the function +the driver will request through gpiod_get(). For example: foo_device { compatible = "acme,foo"; @@ -31,7 +31,7 @@ through gpiod_get(). For example: <&gpio 16 GPIO_ACTIVE_HIGH>, /* green */ <&gpio 17 GPIO_ACTIVE_HIGH>; /* blue */ - power-gpios = <&gpio 1 GPIO_ACTIVE_LOW>; + power-gpio = <&gpio 1 GPIO_ACTIVE_LOW>; }; This property will make GPIOs 15, 16 and 17 available to the driver under the -- cgit v0.10.2 From 46d4f7c25e1bb59b1663878b843a7ec06eaf5806 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 3 Sep 2015 10:31:27 -0700 Subject: gpio: omap: Fix GPIO numbering for deferred probe If gpio-omap probe fails with -EPROBE_DEFER, the GPIO numbering keeps increasing. Only increase the gpio count if gpiochip_add() was successful as otherwise the numbers will increase for each probe attempt. Cc: Javier Martinez Canillas Cc: Kevin Hilman Cc: Santosh Shilimkar Reviewed-by: Grygorii Strashko Signed-off-by: Tony Lindgren Signed-off-by: Linus Walleij diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 892a9d1..072af52 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -1098,7 +1098,6 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc) } else { bank->chip.label = "gpio"; bank->chip.base = gpio; - gpio += bank->width; } bank->chip.ngpio = bank->width; @@ -1108,6 +1107,9 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc) return ret; } + if (!bank->is_mpuio) + gpio += bank->width; + #ifdef CONFIG_ARCH_OMAP1 /* * REVISIT: Once we have OMAP1 supporting SPARSE_IRQ, we can drop -- cgit v0.10.2 From d259ec26a6c541a5437e9ed0a1e1891342af3cff Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 24 Aug 2015 23:12:26 +0200 Subject: pinctrl: qcom: ssbi: convert null test to IS_ERR test Since commit 323de9efdf3e ("pinctrl: make pinctrl_register() return proper error code"), pinctrl_register returns an error code rather than NULL on failure. Update some drivers that were introduced more recently. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression e,e1,e2; @@ e = pinctrl_register(...) ... when != e = e1 if ( - e == NULL + IS_ERR(e) ) { ... return - e2 + PTR_ERR(e) ; } // Signed-off-by: Julia Lawall Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c index c978b31..e1a3721 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c @@ -723,9 +723,9 @@ static int pm8xxx_gpio_probe(struct platform_device *pdev) #endif pctrl->pctrl = pinctrl_register(&pctrl->desc, &pdev->dev, pctrl); - if (!pctrl->pctrl) { + if (IS_ERR(pctrl->pctrl)) { dev_err(&pdev->dev, "couldn't register pm8xxx gpio driver\n"); - return -ENODEV; + return PTR_ERR(pctrl->pctrl); } pctrl->chip = pm8xxx_gpio_template; diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c index 2d1b69f..6652b8d 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c @@ -814,9 +814,9 @@ static int pm8xxx_mpp_probe(struct platform_device *pdev) #endif pctrl->pctrl = pinctrl_register(&pctrl->desc, &pdev->dev, pctrl); - if (!pctrl->pctrl) { + if (IS_ERR(pctrl->pctrl)) { dev_err(&pdev->dev, "couldn't register pm8xxx mpp driver\n"); - return -ENODEV; + return PTR_ERR(pctrl->pctrl); } pctrl->chip = pm8xxx_mpp_template; -- cgit v0.10.2 From 5a99233e9bcacc7ea23e173a75bbb7301abd3e6f Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 24 Aug 2015 23:12:27 +0200 Subject: pinctrl: digicolor: convert null test to IS_ERR test Since commit 323de9efdf3e ("pinctrl: make pinctrl_register() return proper error code"), pinctrl_register returns an error code rather than NULL on failure. Update a driver that was introduced more recently. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression e,e1,e2; @@ e = pinctrl_register(...) ... when != e = e1 if ( - e == NULL + IS_ERR(e) ) { ... return - e2 + PTR_ERR(e) ; } // Signed-off-by: Julia Lawall Acked-by: Baruch Siach Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/pinctrl-digicolor.c b/drivers/pinctrl/pinctrl-digicolor.c index 461fffc..11f8b83 100644 --- a/drivers/pinctrl/pinctrl-digicolor.c +++ b/drivers/pinctrl/pinctrl-digicolor.c @@ -337,9 +337,9 @@ static int dc_pinctrl_probe(struct platform_device *pdev) pmap->dev = &pdev->dev; pmap->pctl = pinctrl_register(pctl_desc, &pdev->dev, pmap); - if (!pmap->pctl) { + if (IS_ERR(pmap->pctl)) { dev_err(&pdev->dev, "pinctrl driver registration failed\n"); - return -EINVAL; + return PTR_ERR(pmap->pctl); } ret = dc_gpiochip_add(pmap, pdev->dev.of_node); -- cgit v0.10.2 From 163dc9f39a26b41fc49319fce4145b35f9705789 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 1 Aug 2015 13:22:38 +0900 Subject: pinctrl: join lines that can be a single line within 80 columns There is no reason to break a line shorter than 80 columns. Signed-off-by: Masahiro Yamada Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c index 67e08cb..29984b3 100644 --- a/drivers/pinctrl/pinmux.c +++ b/drivers/pinctrl/pinmux.c @@ -313,8 +313,7 @@ static int pinmux_func_name_to_selector(struct pinctrl_dev *pctldev, /* See if this pctldev has this function */ while (selector < nfuncs) { - const char *fname = ops->get_function_name(pctldev, - selector); + const char *fname = ops->get_function_name(pctldev, selector); if (!strcmp(function, fname)) return selector; -- cgit v0.10.2 From 942cde724075f840ded89390b10dce1a47a4d712 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 3 Sep 2015 10:34:30 -0700 Subject: pinctrl: core: Warn about NULL gpio_chip in pinctrl_ready_for_gpio_range() If the gpio driver is confused about the numbers for gpio-ranges, pinctrl_ready_for_gpio_range() may get called with invalid GPIO causing a NULL pointer exception. Let's instead provide a warning that allows fixing the problem and return with error. Signed-off-by: Tony Lindgren Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 69723e0..9638a00 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -349,6 +349,9 @@ static bool pinctrl_ready_for_gpio_range(unsigned gpio) struct pinctrl_gpio_range *range = NULL; struct gpio_chip *chip = gpio_to_chip(gpio); + if (WARN(!chip, "no gpio_chip for gpio%i?", gpio)) + return false; + mutex_lock(&pinctrldev_list_mutex); /* Loop over the pin controllers */ -- cgit v0.10.2 From fa84b52cb681b27e6b5e003457562e25a239b9c4 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 8 Sep 2015 16:46:40 +0200 Subject: pinctrl: samsung: s3c24xx: fix syntax error ?SYNTAX ERROR irq_desc_get_irq_chip() does not exist. It should be irq_desc_get_chip(). Tested by compiling s3c2410_defconfig. Cc: Thomas Gleixner Reported-by: Paul Gortmaker Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c index 019844d..d168b39 100644 --- a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c +++ b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c @@ -361,7 +361,7 @@ static inline void s3c24xx_demux_eint(struct irq_desc *desc, u32 offset, u32 range) { struct s3c24xx_eint_data *data = irq_desc_get_handler_data(desc); - struct irq_chip *chip = irq_desc_get_irq_chip(desc); + struct irq_chip *chip = irq_desc_get_chip(desc); struct samsung_pinctrl_drv_data *d = data->drvdata; unsigned int pend, mask; -- cgit v0.10.2 From 1e6428124fe22906be0de1622c8fed8e50e5de05 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 5 Sep 2015 08:58:10 +0200 Subject: x86/vm86: Fix the misleading CONFIG_VM86 Kconfig help text The CONFIG_VM86 Kconfig help text is actively misleading, so fix it: - Don't mark it 'obsolete' in the text as we'll support the ABI as long as CPUs support it. - Qualify the part about software emulation and mention that for some apps you want a real vm86 mode. - Don't scare users away from the option, instead explain what it does. Reported-by: Stas Sergeev Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Austin S Hemmelgarn Cc: Borislav Petkov Cc: Brian Gerst Cc: Josh Boyer Cc: Kees Cook Cc: Linus Torvalds Cc: Matthew Garrett Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 48f7433..d288153 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1004,7 +1004,7 @@ config X86_THERMAL_VECTOR depends on X86_MCE_INTEL config X86_LEGACY_VM86 - bool "Legacy VM86 support (obsolete)" + bool "Legacy VM86 support" default n depends on X86_32 ---help--- @@ -1016,19 +1016,20 @@ config X86_LEGACY_VM86 available to accelerate real mode DOS programs. However, any recent version of DOSEMU, X, or vbetool should be fully functional even without kernel VM86 support, as they will all - fall back to (pretty well performing) software emulation. + fall back to software emulation. Nevertheless, if you are using + a 16-bit DOS program where 16-bit performance matters, vm86 + mode might be faster than emulation and you might want to + enable this option. - Anything that works on a 64-bit kernel is unlikely to need - this option, as 64-bit kernels don't, and can't, support V8086 - mode. This option is also unrelated to 16-bit protected mode - and is not needed to run most 16-bit programs under Wine. + Note that any app that works on a 64-bit kernel is unlikely to + need this option, as 64-bit kernels don't, and can't, support + V8086 mode. This option is also unrelated to 16-bit protected + mode and is not needed to run most 16-bit programs under Wine. - Enabling this option adds considerable attack surface to the - kernel and slows down system calls and exception handling. + Enabling this option increases the complexity of the kernel + and slows down exception handling a tiny bit. - Unless you use very old userspace or need the last drop of - performance in your real mode DOS games and can't use KVM, - say N here. + If unsure, say N here. config VM86 bool -- cgit v0.10.2 From eef7635a22f6b144206b5ca2f1398f637acffc4d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 11 Sep 2015 09:34:26 +0530 Subject: clockevents: Remove unused set_mode() callback All users are migrated to the per-state callbacks, get rid of the unused interface and the core support code. Signed-off-by: Viresh Kumar Signed-off-by: Thomas Gleixner Cc: linaro-kernel@lists.linaro.org Cc: John Stultz Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/fd60de14cf6d125489c031207567bb255ad946f6.1441943991.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 31ce435..bdcf358 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -18,15 +18,6 @@ struct clock_event_device; struct module; -/* Clock event mode commands for legacy ->set_mode(): OBSOLETE */ -enum clock_event_mode { - CLOCK_EVT_MODE_UNUSED, - CLOCK_EVT_MODE_SHUTDOWN, - CLOCK_EVT_MODE_PERIODIC, - CLOCK_EVT_MODE_ONESHOT, - CLOCK_EVT_MODE_RESUME, -}; - /* * Possible states of a clock event device. * @@ -86,16 +77,14 @@ enum clock_event_state { * @min_delta_ns: minimum delta value in ns * @mult: nanosecond to cycles multiplier * @shift: nanoseconds to cycles divisor (power of two) - * @mode: operating mode, relevant only to ->set_mode(), OBSOLETE * @state_use_accessors:current state of the device, assigned by the core code * @features: features * @retries: number of forced programming retries - * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME. - * @set_state_periodic: switch state to periodic, if !set_mode - * @set_state_oneshot: switch state to oneshot, if !set_mode - * @set_state_oneshot_stopped: switch state to oneshot_stopped, if !set_mode - * @set_state_shutdown: switch state to shutdown, if !set_mode - * @tick_resume: resume clkevt device, if !set_mode + * @set_state_periodic: switch state to periodic + * @set_state_oneshot: switch state to oneshot + * @set_state_oneshot_stopped: switch state to oneshot_stopped + * @set_state_shutdown: switch state to shutdown + * @tick_resume: resume clkevt device * @broadcast: function to broadcast events * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration @@ -116,18 +105,10 @@ struct clock_event_device { u64 min_delta_ns; u32 mult; u32 shift; - enum clock_event_mode mode; enum clock_event_state state_use_accessors; unsigned int features; unsigned long retries; - /* - * State transition callback(s): Only one of the two groups should be - * defined: - * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME. - * - set_state_{shutdown|periodic|oneshot|oneshot_stopped}(), tick_resume(). - */ - void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); int (*set_state_periodic)(struct clock_event_device *); int (*set_state_oneshot)(struct clock_event_device *); int (*set_state_oneshot_stopped)(struct clock_event_device *); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 50eb107..a9b76a4 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -97,20 +97,6 @@ EXPORT_SYMBOL_GPL(clockevent_delta2ns); static int __clockevents_switch_state(struct clock_event_device *dev, enum clock_event_state state) { - /* Transition with legacy set_mode() callback */ - if (dev->set_mode) { - /* Legacy callback doesn't support new modes */ - if (state > CLOCK_EVT_STATE_ONESHOT) - return -ENOSYS; - /* - * 'clock_event_state' and 'clock_event_mode' have 1-to-1 - * mapping until *_ONESHOT, and so a simple cast will work. - */ - dev->set_mode((enum clock_event_mode)state, dev); - dev->mode = (enum clock_event_mode)state; - return 0; - } - if (dev->features & CLOCK_EVT_FEAT_DUMMY) return 0; @@ -204,12 +190,8 @@ int clockevents_tick_resume(struct clock_event_device *dev) { int ret = 0; - if (dev->set_mode) { - dev->set_mode(CLOCK_EVT_MODE_RESUME, dev); - dev->mode = CLOCK_EVT_MODE_RESUME; - } else if (dev->tick_resume) { + if (dev->tick_resume) ret = dev->tick_resume(dev); - } return ret; } @@ -460,26 +442,6 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu) } EXPORT_SYMBOL_GPL(clockevents_unbind_device); -/* Sanity check of state transition callbacks */ -static int clockevents_sanity_check(struct clock_event_device *dev) -{ - /* Legacy set_mode() callback */ - if (dev->set_mode) { - /* We shouldn't be supporting new modes now */ - WARN_ON(dev->set_state_periodic || dev->set_state_oneshot || - dev->set_state_shutdown || dev->tick_resume || - dev->set_state_oneshot_stopped); - - BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); - return 0; - } - - if (dev->features & CLOCK_EVT_FEAT_DUMMY) - return 0; - - return 0; -} - /** * clockevents_register_device - register a clock event device * @dev: device to register @@ -488,8 +450,6 @@ void clockevents_register_device(struct clock_event_device *dev) { unsigned long flags; - BUG_ON(clockevents_sanity_check(dev)); - /* Initialize state to DETACHED */ clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index d11c55b..4fcd99e 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -398,7 +398,6 @@ void tick_shutdown(unsigned int cpu) * the set mode function! */ clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); - dev->mode = CLOCK_EVT_MODE_UNUSED; clockevents_exchange_device(dev, NULL); dev->event_handler = clockevents_handle_noop; td->evtdev = NULL; diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 129c960..f75e35b 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -225,7 +225,7 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) (unsigned long long) dev->min_delta_ns); SEQ_printf(m, " mult: %u\n", dev->mult); SEQ_printf(m, " shift: %u\n", dev->shift); - SEQ_printf(m, " mode: %d\n", dev->mode); + SEQ_printf(m, " mode: %d\n", clockevent_get_state(dev)); SEQ_printf(m, " next_event: %Ld nsecs\n", (unsigned long long) ktime_to_ns(dev->next_event)); @@ -233,40 +233,34 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) print_name_offset(m, dev->set_next_event); SEQ_printf(m, "\n"); - if (dev->set_mode) { - SEQ_printf(m, " set_mode: "); - print_name_offset(m, dev->set_mode); + if (dev->set_state_shutdown) { + SEQ_printf(m, " shutdown: "); + print_name_offset(m, dev->set_state_shutdown); SEQ_printf(m, "\n"); - } else { - if (dev->set_state_shutdown) { - SEQ_printf(m, " shutdown: "); - print_name_offset(m, dev->set_state_shutdown); - SEQ_printf(m, "\n"); - } + } - if (dev->set_state_periodic) { - SEQ_printf(m, " periodic: "); - print_name_offset(m, dev->set_state_periodic); - SEQ_printf(m, "\n"); - } + if (dev->set_state_periodic) { + SEQ_printf(m, " periodic: "); + print_name_offset(m, dev->set_state_periodic); + SEQ_printf(m, "\n"); + } - if (dev->set_state_oneshot) { - SEQ_printf(m, " oneshot: "); - print_name_offset(m, dev->set_state_oneshot); - SEQ_printf(m, "\n"); - } + if (dev->set_state_oneshot) { + SEQ_printf(m, " oneshot: "); + print_name_offset(m, dev->set_state_oneshot); + SEQ_printf(m, "\n"); + } - if (dev->set_state_oneshot_stopped) { - SEQ_printf(m, " oneshot stopped: "); - print_name_offset(m, dev->set_state_oneshot_stopped); - SEQ_printf(m, "\n"); - } + if (dev->set_state_oneshot_stopped) { + SEQ_printf(m, " oneshot stopped: "); + print_name_offset(m, dev->set_state_oneshot_stopped); + SEQ_printf(m, "\n"); + } - if (dev->tick_resume) { - SEQ_printf(m, " resume: "); - print_name_offset(m, dev->tick_resume); - SEQ_printf(m, "\n"); - } + if (dev->tick_resume) { + SEQ_printf(m, " resume: "); + print_name_offset(m, dev->tick_resume); + SEQ_printf(m, "\n"); } SEQ_printf(m, " event_handler: "); -- cgit v0.10.2 From f454b478861325f067fd58ba7ee9f1b5c4a9d6a0 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 2 Sep 2015 09:45:58 -0600 Subject: x86/ldt: Fix small LDT allocation for Xen While the following commit: 37868fe113 ("x86/ldt: Make modify_ldt synchronous") added a nice comment explaining that Xen needs page-aligned whole page chunks for guest descriptor tables, it then nevertheless used kzalloc() on the small size path. As I'm unaware of guarantees for kmalloc(PAGE_SIZE, ) to return page-aligned memory blocks, I believe this needs to be switched back to __get_free_page() (or better get_zeroed_page()). Signed-off-by: Jan Beulich Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: David Vrabel Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/55E735D6020000780009F1E6@prv-mh.provo.novell.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 2bcc052..6acc9dd 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -58,7 +58,7 @@ static struct ldt_struct *alloc_ldt_struct(int size) if (alloc_size > PAGE_SIZE) new_ldt->entries = vzalloc(alloc_size); else - new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL); + new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL); if (!new_ldt->entries) { kfree(new_ldt); @@ -95,7 +95,7 @@ static void free_ldt_struct(struct ldt_struct *ldt) if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) vfree(ldt->entries); else - kfree(ldt->entries); + free_page((unsigned long)ldt->entries); kfree(ldt); } -- cgit v0.10.2 From ba9cc453c400049f632d4eb2f2835e2f96654ddc Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 11 Sep 2015 08:49:47 +0100 Subject: arm64: dma-mapping: check whether cma area is initialized or not If CMA is turned on and CMA size is set to zero, kernel should behave as if CMA was not enabled at compile time. Every dma allocation should check existence of cma area before requesting memory. Arm has done this by commit e464ef16c4f0 ("arm: dma-mapping: add checking cma area initialized"), also do this for arm64. Acked-by: Catalin Marinas Signed-off-by: Jisheng Zhang Signed-off-by: Will Deacon diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 0bcc4bc..99224dc 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -100,7 +100,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, if (IS_ENABLED(CONFIG_ZONE_DMA) && dev->coherent_dma_mask <= DMA_BIT_MASK(32)) flags |= GFP_DMA; - if (IS_ENABLED(CONFIG_DMA_CMA) && (flags & __GFP_WAIT)) { + if (dev_get_cma_area(dev) && (flags & __GFP_WAIT)) { struct page *page; void *addr; -- cgit v0.10.2 From b847415ce96efef819534b230d84695b1bc6d36b Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 11 Sep 2015 18:22:00 +0100 Subject: arm64: Fix the pte_hw_dirty() check when AF/DBM is enabled Commit 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits") introduced support for handling hardware updates of the access flag and dirty status. The PTE is automatically dirtied in hardware (if supported) by clearing the PTE_RDONLY bit when the PTE_DBM/PTE_WRITE bit is set. The pte_hw_dirty() macro was added to detect a hardware dirtied pte. The pte_dirty() macro checks for both software PTE_DIRTY and pte_hw_dirty(). Functions like pte_modify() clear the PTE_RDONLY bit since it is meant to be set in set_pte_at() when written to memory. In such cases, pte_hw_dirty() would return true even though such pte is clean. This patch changes pte_hw_dirty() to test the PTE_DBM/PTE_WRITE bit together with PTE_RDONLY. Fixes: 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits") Reported-by: Julien Grall Tested-by: Julien Grall Tested-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 6900b2d9..69207f0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -146,7 +146,7 @@ extern struct page *empty_zero_page; #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #ifdef CONFIG_ARM64_HW_AFDBM -#define pte_hw_dirty(pte) (!(pte_val(pte) & PTE_RDONLY)) +#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) #else #define pte_hw_dirty(pte) (0) #endif @@ -238,7 +238,7 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); * When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via * the page fault mechanism. Checking the dirty status of a pte becomes: * - * PTE_DIRTY || !PTE_RDONLY + * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) */ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) -- cgit v0.10.2 From 62d96c71d248834af2891293dc23cc344ae2ec36 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 11 Sep 2015 18:22:01 +0100 Subject: arm64: Fix pte_modify() to preserve the hardware dirty information The pte_modify() function with hardware AF/DBM enabled must transfer the hardware dirty information to the software PTE_DIRTY bit. However, it was setting this bit in newprot and the mask does not cover such bit. This patch sets PTE_DIRTY on the original pte which will be preserved in the returned value. Fixes: 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits") Cc: Julien Grall Tested-by: Julien Grall Tested-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 69207f0..31df98a 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -503,7 +503,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) - newprot |= PTE_DIRTY; + pte = pte_mkdirty(pte); pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } -- cgit v0.10.2 From bf950040a53da35522e38066d9eb6ab7a1c9d136 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 11 Sep 2015 18:22:02 +0100 Subject: arm64: pgtable: use a single bit for PTE_WRITE regardless of DBM Depending on CONFIG_ARM64_HW_AFDBM, we use either bit 57 or 51 of the pte to represent PTE_WRITE. Given that bit 51 is reserved prior to ARMv8.1, we can just use that bit regardless of the config option. That also matches what happens if a kernel configured with ARM64_HW_AFDBM=y is run on a CPU without the DBM functionality. Cc: Julien Grall Tested-by: Julien Grall Signed-off-by: Will Deacon diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 31df98a..b0329be 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -26,13 +26,9 @@ * Software defined PTE bits definition. */ #define PTE_VALID (_AT(pteval_t, 1) << 0) +#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) -#ifdef CONFIG_ARM64_HW_AFDBM -#define PTE_WRITE (PTE_DBM) /* same as DBM */ -#else -#define PTE_WRITE (_AT(pteval_t, 1) << 57) -#endif #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* -- cgit v0.10.2 From cda34fc774d114afe98515a21c2063a803f922bc Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 14 Sep 2015 12:42:23 +0200 Subject: x86/paravirt: Remove the unused pv_time_ops::get_tsc_khz method It's not used anywhere. Signed-off-by: Juergen Gross Acked-by: Rusty Russell Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akataria@vmware.com Cc: chrisw@sous-sol.org Cc: jeremy@goop.org Cc: virtualization@lists.linux-foundation.org Link: http://lkml.kernel.org/r/1442227343-403-1-git-send-email-jgross@suse.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index ce029e4..31247b5 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -97,7 +97,6 @@ struct pv_lazy_ops { struct pv_time_ops { unsigned long long (*sched_clock)(void); unsigned long long (*steal_clock)(int cpu); - unsigned long (*get_tsc_khz)(void); }; struct pv_cpu_ops { -- cgit v0.10.2 From 4857c91f0d195f05908fff296ba1ec5fca87066c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 14 Sep 2015 12:00:55 +0200 Subject: x86/ioapic: Force affinity setting in setup_ioapic_dest() The recent ioapic cleanups changed the affinity setting in setup_ioapic_dest() from a direct write to the hardware to the delayed affinity setup via irq_set_affinity(). That results in a warning from chained_irq_exit(): WARNING: CPU: 0 PID: 5 at kernel/irq/migration.c:32 irq_move_masked_irq [] irq_move_masked_irq+0xb8/0xc0 [] ioapic_ack_level+0x111/0x130 [] intel_gpio_irq_handler+0x148/0x1c0 The reason is that irq_set_affinity() does not write directly to the hardware. It marks the affinity setting as pending and executes it from the next interrupt. The chained handler infrastructure does not take the irq descriptor lock for performance reasons because such a chained interrupt is not visible to any interfaces. So the delayed affinity setting triggers the warning in irq_move_masked_irq(). Restore the old behaviour by calling the set_affinity function of the ioapic chip in setup_ioapic_dest(). This is safe as none of the interrupts can be on the fly at this point. Fixes: aa5cb97f14a2 'x86/irq: Remove x86_io_apic_ops.set_affinity and related interfaces' Reported-and-tested-by: Mika Westerberg Signed-off-by: Thomas Gleixner Cc: Jiang Liu Cc: jarkko.nikula@linux.intel.com diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 38a76f8..5c60bb1 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2522,6 +2522,7 @@ void __init setup_ioapic_dest(void) int pin, ioapic, irq, irq_entry; const struct cpumask *mask; struct irq_data *idata; + struct irq_chip *chip; if (skip_ioapic_setup == 1) return; @@ -2545,9 +2546,9 @@ void __init setup_ioapic_dest(void) else mask = apic->target_cpus(); - irq_set_affinity(irq, mask); + chip = irq_data_get_irq_chip(idata); + chip->irq_set_affinity(idata, mask, false); } - } #endif -- cgit v0.10.2 From 5d7c631d926b59aa16f3c56eaeb83f1036c81dc7 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 30 Jul 2015 16:24:43 -0700 Subject: x86/apic: Serialize LVTT and TSC_DEADLINE writes The APIC LVTT register is MMIO mapped but the TSC_DEADLINE register is an MSR. The write to the TSC_DEADLINE MSR is not serializing, so it's not guaranteed that the write to LVTT has reached the APIC before the TSC_DEADLINE MSR is written. In such a case the write to the MSR is ignored and as a consequence the local timer interrupt never fires. The SDM decribes this issue for xAPIC and x2APIC modes. The serialization methods recommended by the SDM differ. xAPIC: "1. Memory-mapped write to LVT Timer Register, setting bits 18:17 to 10b. 2. WRMSR to the IA32_TSC_DEADLINE MSR a value much larger than current time-stamp counter. 3. If RDMSR of the IA32_TSC_DEADLINE MSR returns zero, go to step 2. 4. WRMSR to the IA32_TSC_DEADLINE MSR the desired deadline." x2APIC: "To allow for efficient access to the APIC registers in x2APIC mode, the serializing semantics of WRMSR are relaxed when writing to the APIC registers. Thus, system software should not use 'WRMSR to APIC registers in x2APIC mode' as a serializing instruction. Read and write accesses to the APIC registers will occur in program order. A WRMSR to an APIC register may complete before all preceding stores are globally visible; software can prevent this by inserting a serializing instruction, an SFENCE, or an MFENCE before the WRMSR." The xAPIC method is to just wait for the memory mapped write to hit the LVTT by checking whether the MSR write has reached the hardware. There is no reason why a proper MFENCE after the memory mapped write would not do the same. Andi Kleen confirmed that MFENCE is sufficient for the xAPIC case as well. Issue MFENCE before writing to the TSC_DEADLINE MSR. This can be done unconditionally as all CPUs which have TSC_DEADLINE also have MFENCE support. [ tglx: Massaged the changelog ] Signed-off-by: Shaohua Li Reviewed-by: Ingo Molnar Cc: Cc: Cc: Cc: Andi Kleen Cc: H. Peter Anvin Cc: stable@vger.kernel.org #v3.7+ Link: http://lkml.kernel.org/r/20150909041352.GA2059853@devbig257.prn2.facebook.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3ca3e46..24e94ce 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -336,6 +336,13 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) apic_write(APIC_LVTT, lvtt_value); if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { + /* + * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode, + * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized. + * According to Intel, MFENCE can do the serialization here. + */ + asm volatile("mfence" : : : "memory"); + printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); return; } -- cgit v0.10.2 From bd315aab8a3ab1bc7074774b89a5d8ec7c1ff7ab Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 14 Sep 2015 10:23:55 +0000 Subject: perf top: Fix segfault pressing -> with no hist entries 'perf top' segfaults with following operation: # perf top -e page-faults -p 11400 # 11400 never generates page-fault Then on the resulting empty interface, press right key: # ./perf top -e page-faults -p 11400 perf: Segmentation fault -------- backtrace -------- ./perf[0x535428] /lib64/libc.so.6(+0x3545f)[0x7f0dd360745f] ./perf[0x531d46] ./perf(perf_evlist__tui_browse_hists+0x96)[0x5340d6] ./perf[0x44ba2f] /lib64/libpthread.so.0(+0x81d0)[0x7f0dd49dc1d0] /lib64/libc.so.6(clone+0x6c)[0x7f0dd36b90dc] The bug resides in perf_evsel__hists_browse() that, in the above circumstance browser->selection can be NULL, but code after skip_annotation doesn't consider it. This patch fix it by checking browser->selection before fetching browser->selection->map. Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1442226235-117265-1-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index cf86f2d..c04c60d 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1968,7 +1968,8 @@ skip_annotation: &options[nr_options], dso); nr_options += add_map_opt(browser, &actions[nr_options], &options[nr_options], - browser->selection->map); + browser->selection ? + browser->selection->map : NULL); /* perf script support */ if (browser->he_selection) { @@ -1976,6 +1977,15 @@ skip_annotation: &actions[nr_options], &options[nr_options], thread, NULL); + /* + * Note that browser->selection != NULL + * when browser->he_selection is not NULL, + * so we don't need to check browser->selection + * before fetching browser->selection->sym like what + * we do before fetching browser->selection->map. + * + * See hist_browser__show_entry. + */ nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], -- cgit v0.10.2 From 1975dbc276c6ab62230cf4f9df5ddc9ff0e0e473 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 14 Sep 2015 17:11:05 -0600 Subject: locking/static_keys: Fix up the static keys documentation Fix a few small mistakes in the static key documentation and delete an unneeded sentence. Suggested-by: Jason Baron Signed-off-by: Jonathan Corbet Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150914171105.511e1e21@lwn.net Signed-off-by: Ingo Molnar diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt index ec91158..477927b 100644 --- a/Documentation/static-keys.txt +++ b/Documentation/static-keys.txt @@ -15,8 +15,8 @@ The updated API replacements are: DEFINE_STATIC_KEY_TRUE(key); DEFINE_STATIC_KEY_FALSE(key); -static_key_likely() -static_key_unlikely() +static_branch_likely() +static_branch_unlikely() 0) Abstract diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 0684bd3..f109423 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -21,8 +21,8 @@ * * DEFINE_STATIC_KEY_TRUE(key); * DEFINE_STATIC_KEY_FALSE(key); - * static_key_likely() - * static_key_unlikely() + * static_branch_likely() + * static_branch_unlikely() * * Jump labels provide an interface to generate dynamic branches using * self-modifying code. Assuming toolchain and architecture support, if we @@ -45,12 +45,10 @@ * statement, setting the key to true requires us to patch in a jump * to the out-of-line of true branch. * - * In addtion to static_branch_{enable,disable}, we can also reference count + * In addition to static_branch_{enable,disable}, we can also reference count * the key or branch direction via static_branch_{inc,dec}. Thus, * static_branch_inc() can be thought of as a 'make more true' and - * static_branch_dec() as a 'make more false'. The inc()/dec() - * interface is meant to be used exclusively from the inc()/dec() for a given - * key. + * static_branch_dec() as a 'make more false'. * * Since this relies on modifying code, the branch modifying functions * must be considered absolute slow paths (machine wide synchronization etc.). -- cgit v0.10.2 From a69b09e2342a9c144b0291b9aeb849ab7d5843bf Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:49 +0300 Subject: perf evlist: Simplify propagate_maps() logic If evsel->cpus is to be reassigned then the current value must be "put", which works even if it is NULL. Simplify the current logic by moving the "put" next to the assignment. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d51a520..95e07ea 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1113,11 +1113,10 @@ static int perf_evlist__propagate_maps(struct perf_evlist *evlist, * We already have cpus for evsel (via PMU sysfs) so * keep it, if there's no target cpu list defined. */ - if (evsel->cpus && has_user_cpus) + if (!evsel->cpus || has_user_cpus) { cpu_map__put(evsel->cpus); - - if (!evsel->cpus || has_user_cpus) evsel->cpus = cpu_map__get(evlist->cpus); + } evsel->threads = thread_map__get(evlist->threads); -- cgit v0.10.2 From 725e06b2e2754fbff61521fa76fee51cee5bcb5f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:50 +0300 Subject: perf evlist: Simplify set_maps() logic Don't need to check for NULL when "putting" evlist->maps and evlist->threads because the "put" functions already do that. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 95e07ea..9cb9296 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1156,14 +1156,10 @@ int perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, struct thread_map *threads) { - if (evlist->cpus) - cpu_map__put(evlist->cpus); - + cpu_map__put(evlist->cpus); evlist->cpus = cpus; - if (evlist->threads) - thread_map__put(evlist->threads); - + thread_map__put(evlist->threads); evlist->threads = threads; return perf_evlist__propagate_maps(evlist, false); -- cgit v0.10.2 From d5bc056e73841d4bc941474a342ef9b6a207ac84 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:51 +0300 Subject: perf evlist: Remove redundant validation from propagate_maps() The validation checks that the values that were just assigned, got assigned i.e. the error can't ever happen. Subsequent patches will call this code in places where errors are not being returned. Changing those code paths to return this non-existent error is counter-productive, so just remove it. That in turn results in perf_evlist__set_maps not needing to return an error, but callers aren't checking it either, so remove that too. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 9cb9296..785bfd3 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1103,8 +1103,8 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); } -static int perf_evlist__propagate_maps(struct perf_evlist *evlist, - bool has_user_cpus) +static void perf_evlist__propagate_maps(struct perf_evlist *evlist, + bool has_user_cpus) { struct perf_evsel *evsel; @@ -1119,13 +1119,7 @@ static int perf_evlist__propagate_maps(struct perf_evlist *evlist, } evsel->threads = thread_map__get(evlist->threads); - - if ((evlist->cpus && !evsel->cpus) || - (evlist->threads && !evsel->threads)) - return -ENOMEM; } - - return 0; } int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) @@ -1144,7 +1138,9 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) if (evlist->cpus == NULL) goto out_delete_threads; - return perf_evlist__propagate_maps(evlist, !!target->cpu_list); + perf_evlist__propagate_maps(evlist, !!target->cpu_list); + + return 0; out_delete_threads: thread_map__put(evlist->threads); @@ -1152,9 +1148,8 @@ out_delete_threads: return -1; } -int perf_evlist__set_maps(struct perf_evlist *evlist, - struct cpu_map *cpus, - struct thread_map *threads) +void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, + struct thread_map *threads) { cpu_map__put(evlist->cpus); evlist->cpus = cpus; @@ -1162,7 +1157,7 @@ int perf_evlist__set_maps(struct perf_evlist *evlist, thread_map__put(evlist->threads); evlist->threads = threads; - return perf_evlist__propagate_maps(evlist, false); + perf_evlist__propagate_maps(evlist, false); } int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index b39a619..da2fa91 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -155,9 +155,8 @@ int perf_evlist__enable_event_idx(struct perf_evlist *evlist, void perf_evlist__set_selected(struct perf_evlist *evlist, struct perf_evsel *evsel); -int perf_evlist__set_maps(struct perf_evlist *evlist, - struct cpu_map *cpus, - struct thread_map *threads); +void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, + struct thread_map *threads); int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target); int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel); -- cgit v0.10.2 From ec9a77a7e3346a05b1287597982d0dd09dd1c3bd Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:52 +0300 Subject: perf evlist: Add has_user_cpus member Subsequent patches will need to call perf_evlist__propagate_maps without reference to a "target". Add evlist->has_user_cpus to record whether the user has specified which cpus to target (and therefore whether that list of cpus should override the default settings for a selected event i.e. the cpu maps should be propagated) Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-5-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 785bfd3..3a4445f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1103,8 +1103,7 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); } -static void perf_evlist__propagate_maps(struct perf_evlist *evlist, - bool has_user_cpus) +static void perf_evlist__propagate_maps(struct perf_evlist *evlist) { struct perf_evsel *evsel; @@ -1113,7 +1112,7 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist, * We already have cpus for evsel (via PMU sysfs) so * keep it, if there's no target cpu list defined. */ - if (!evsel->cpus || has_user_cpus) { + if (!evsel->cpus || evlist->has_user_cpus) { cpu_map__put(evsel->cpus); evsel->cpus = cpu_map__get(evlist->cpus); } @@ -1138,7 +1137,9 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) if (evlist->cpus == NULL) goto out_delete_threads; - perf_evlist__propagate_maps(evlist, !!target->cpu_list); + evlist->has_user_cpus = !!target->cpu_list; + + perf_evlist__propagate_maps(evlist); return 0; @@ -1157,7 +1158,7 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, thread_map__put(evlist->threads); evlist->threads = threads; - perf_evlist__propagate_maps(evlist, false); + perf_evlist__propagate_maps(evlist); } int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index da2fa91..cfc4df6 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -42,6 +42,7 @@ struct perf_evlist { int nr_mmaps; bool overwrite; bool enabled; + bool has_user_cpus; size_t mmap_len; int id_pos; int is_pos; -- cgit v0.10.2 From f114d6eff76d20b521d8716e969e71b1f56f82b5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:53 +0300 Subject: perf evlist: Fix splice_list_tail() not setting evlist Commit d49e46950772 ("perf evsel: Add a backpointer to the evlist a evsel is in") updated perf_evlist__add() but not perf_evlist__splice_list_tail(). This illustrates that it is better if perf_evlist__splice_list_tail() calls perf_evlist__add() instead of duplicating the logic, so do that. This will also simplify a subsequent fix for propagating maps. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3a4445f..961560b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -136,15 +136,14 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) } void perf_evlist__splice_list_tail(struct perf_evlist *evlist, - struct list_head *list, - int nr_entries) + struct list_head *list) { - bool set_id_pos = !evlist->nr_entries; + struct perf_evsel *evsel, *temp; - list_splice_tail(list, &evlist->entries); - evlist->nr_entries += nr_entries; - if (set_id_pos) - perf_evlist__set_id_pos(evlist); + __evlist__for_each_safe(list, temp, evsel) { + list_del_init(&evsel->node); + perf_evlist__add(evlist, evsel); + } } void __perf_evlist__set_leader(struct list_head *list) @@ -210,7 +209,7 @@ static int perf_evlist__add_attrs(struct perf_evlist *evlist, list_add_tail(&evsel->node, &head); } - perf_evlist__splice_list_tail(evlist, &head, nr_attrs); + perf_evlist__splice_list_tail(evlist, &head); return 0; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index cfc4df6..115d8b5 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -179,8 +179,7 @@ bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist); bool perf_evlist__valid_read_format(struct perf_evlist *evlist); void perf_evlist__splice_list_tail(struct perf_evlist *evlist, - struct list_head *list, - int nr_entries); + struct list_head *list); static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist) { diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d826e6f..7e8ae21 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1140,10 +1140,9 @@ int parse_events(struct perf_evlist *evlist, const char *str, ret = parse_events__scanner(str, &data, PE_START_EVENTS); perf_pmu__parse_cleanup(); if (!ret) { - int entries = data.idx - evlist->nr_entries; struct perf_evsel *last; - perf_evlist__splice_list_tail(evlist, &data.list, entries); + perf_evlist__splice_list_tail(evlist, &data.list); evlist->nr_groups += data.nr_groups; last = perf_evlist__last(evlist); last->cmdline_group_boundary = true; -- cgit v0.10.2 From b278c364b35ae940b05f6a9edf8061fc886cd09e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:54 +0300 Subject: perf evlist: Fix missing thread_map__put in propagate_maps() perf_evlist__propagate_maps() incorrectly assumes evsel->threads is NULL before reassigning it, but it won't be NULL when perf_evlist__set_maps() is used to set different (or NULL) maps. Thus thread_map__put must be used, which works even if evsel->threads is NULL. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-7-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 961560b..79056c6 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1116,6 +1116,7 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist) evsel->cpus = cpu_map__get(evlist->cpus); } + thread_map__put(evsel->threads); evsel->threads = thread_map__get(evlist->threads); } } -- cgit v0.10.2 From fce4d296b405b03fba033a55017348bf55b10db6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:55 +0300 Subject: perf evsel: Add own_cpus member perf_evlist__propagate_maps() cannot easily tell if an evsel has its own cpu map. To make that simpler, keep a copy of the PMU cpu map and adjust the propagation logic accordingly. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-8-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 79056c6..5bd3b49 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1111,9 +1111,12 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist) * We already have cpus for evsel (via PMU sysfs) so * keep it, if there's no target cpu list defined. */ - if (!evsel->cpus || evlist->has_user_cpus) { + if (!evsel->own_cpus || evlist->has_user_cpus) { cpu_map__put(evsel->cpus); evsel->cpus = cpu_map__get(evlist->cpus); + } else if (evsel->cpus != evsel->own_cpus) { + cpu_map__put(evsel->cpus); + evsel->cpus = cpu_map__get(evsel->own_cpus); } thread_map__put(evsel->threads); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c53f791..5410483 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1033,6 +1033,7 @@ void perf_evsel__exit(struct perf_evsel *evsel) perf_evsel__free_config_terms(evsel); close_cgroup(evsel->cgrp); cpu_map__put(evsel->cpus); + cpu_map__put(evsel->own_cpus); thread_map__put(evsel->threads); zfree(&evsel->group_name); zfree(&evsel->name); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 298e6bb..ef8925f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -98,6 +98,7 @@ struct perf_evsel { struct cgroup_sel *cgrp; void *handler; struct cpu_map *cpus; + struct cpu_map *own_cpus; struct thread_map *threads; unsigned int sample_size; int id_pos; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 7e8ae21..21ed6ee 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -287,8 +287,8 @@ __add_event(struct list_head *list, int *idx, if (!evsel) return NULL; - if (cpus) - evsel->cpus = cpu_map__get(cpus); + evsel->cpus = cpu_map__get(cpus); + evsel->own_cpus = cpu_map__get(cpus); if (name) evsel->name = strdup(name); -- cgit v0.10.2 From 934e0f2053ce299893ca48a411bf7fdc8ac6254f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:56 +0300 Subject: perf evlist: Make set_maps() more resilient Make perf_evlist__set_maps() more resilient by allowing for the possibility that one or another of the maps isn't being changed and therefore should not be "put". Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-9-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 5bd3b49..78ff52e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1155,11 +1155,22 @@ out_delete_threads: void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, struct thread_map *threads) { - cpu_map__put(evlist->cpus); - evlist->cpus = cpus; + /* + * Allow for the possibility that one or another of the maps isn't being + * changed i.e. don't put it. Note we are assuming the maps that are + * being applied are brand new and evlist is taking ownership of the + * original reference count of 1. If that is not the case it is up to + * the caller to increase the reference count. + */ + if (cpus != evlist->cpus) { + cpu_map__put(evlist->cpus); + evlist->cpus = cpus; + } - thread_map__put(evlist->threads); - evlist->threads = threads; + if (threads != evlist->threads) { + thread_map__put(evlist->threads); + evlist->threads = threads; + } perf_evlist__propagate_maps(evlist); } -- cgit v0.10.2 From 74bfd2b25de354feb4484c553dce4fe8d9c3b60b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:57 +0300 Subject: perf evlist: Make create_maps() use set_maps() Since there is a function to set maps, perf_evlist__create_maps() should use it. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-10-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 78ff52e..c17f355 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1126,29 +1126,30 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist) int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) { - evlist->threads = thread_map__new_str(target->pid, target->tid, - target->uid); + struct cpu_map *cpus; + struct thread_map *threads; - if (evlist->threads == NULL) + threads = thread_map__new_str(target->pid, target->tid, target->uid); + + if (!threads) return -1; if (target__uses_dummy_map(target)) - evlist->cpus = cpu_map__dummy_new(); + cpus = cpu_map__dummy_new(); else - evlist->cpus = cpu_map__new(target->cpu_list); + cpus = cpu_map__new(target->cpu_list); - if (evlist->cpus == NULL) + if (!cpus) goto out_delete_threads; evlist->has_user_cpus = !!target->cpu_list; - perf_evlist__propagate_maps(evlist); + perf_evlist__set_maps(evlist, cpus, threads); return 0; out_delete_threads: - thread_map__put(evlist->threads); - evlist->threads = NULL; + thread_map__put(threads); return -1; } -- cgit v0.10.2 From adc0c3e87b0e1baeccabe09b6dba738f17d0e91d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:58 +0300 Subject: perf evlist: Factor out a function to propagate maps for a single evsel Subsequent fixes will need a function that just propagates maps for a single evsel so factor it out. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-11-git-send-email-adrian.hunter@intel.com [ Moved them to before perf_evlist__add() to avoid having to move it in the next patch ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c17f355..4e840bf 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -124,6 +124,33 @@ void perf_evlist__delete(struct perf_evlist *evlist) free(evlist); } +static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, + struct perf_evsel *evsel) +{ + /* + * We already have cpus for evsel (via PMU sysfs) so + * keep it, if there's no target cpu list defined. + */ + if (!evsel->own_cpus || evlist->has_user_cpus) { + cpu_map__put(evsel->cpus); + evsel->cpus = cpu_map__get(evlist->cpus); + } else if (evsel->cpus != evsel->own_cpus) { + cpu_map__put(evsel->cpus); + evsel->cpus = cpu_map__get(evsel->own_cpus); + } + + thread_map__put(evsel->threads); + evsel->threads = thread_map__get(evlist->threads); +} + +static void perf_evlist__propagate_maps(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) + __perf_evlist__propagate_maps(evlist, evsel); +} + void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) { entry->evlist = evlist; @@ -1102,28 +1129,6 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); } -static void perf_evlist__propagate_maps(struct perf_evlist *evlist) -{ - struct perf_evsel *evsel; - - evlist__for_each(evlist, evsel) { - /* - * We already have cpus for evsel (via PMU sysfs) so - * keep it, if there's no target cpu list defined. - */ - if (!evsel->own_cpus || evlist->has_user_cpus) { - cpu_map__put(evsel->cpus); - evsel->cpus = cpu_map__get(evlist->cpus); - } else if (evsel->cpus != evsel->own_cpus) { - cpu_map__put(evsel->cpus); - evsel->cpus = cpu_map__get(evsel->own_cpus); - } - - thread_map__put(evsel->threads); - evsel->threads = thread_map__get(evlist->threads); - } -} - int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) { struct cpu_map *cpus; -- cgit v0.10.2 From 44c42d71c659527c81bf169808959c9339116d85 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:58:59 +0300 Subject: perf evlist: Fix add() not propagating maps If evsels are added after maps are created, then they won't have any maps propagated to them. Fix that. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-12-git-send-email-adrian.hunter@intel.com [ Moved the moving of propagate_maps() to the patch before, so that this one does _just_ the one lile fix calling in add()] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 4e840bf..99267ab 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -160,6 +160,8 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) if (!evlist->nr_entries++) perf_evlist__set_id_pos(evlist); + + __perf_evlist__propagate_maps(evlist, entry); } void perf_evlist__splice_list_tail(struct perf_evlist *evlist, -- cgit v0.10.2 From 8c0498b6891d7ca5c379c6283de7fc7fe8eebe5c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:59:00 +0300 Subject: perf evlist: Fix create_syswide_maps() not propagating maps Fix it by making it call perf_evlist__set_maps() instead of setting the maps itself. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-13-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 99267ab..c8fc8a2 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1400,6 +1400,8 @@ void perf_evlist__close(struct perf_evlist *evlist) static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) { + struct cpu_map *cpus; + struct thread_map *threads; int err = -ENOMEM; /* @@ -1411,20 +1413,19 @@ static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) * error, and we may not want to do that fallback to a * default cpu identity map :-\ */ - evlist->cpus = cpu_map__new(NULL); - if (evlist->cpus == NULL) + cpus = cpu_map__new(NULL); + if (!cpus) goto out; - evlist->threads = thread_map__new_dummy(); - if (evlist->threads == NULL) - goto out_free_cpus; + threads = thread_map__new_dummy(); + if (!threads) + goto out_put; - err = 0; + perf_evlist__set_maps(evlist, cpus, threads); out: return err; -out_free_cpus: - cpu_map__put(evlist->cpus); - evlist->cpus = NULL; +out_put: + cpu_map__put(cpus); goto out; } -- cgit v0.10.2 From 2998272275fc31fc3d478ef9c95e7eaef67dafa3 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:59:01 +0300 Subject: perf tests: Fix task exit test setting maps The test titled "Test number of exit event of a simple workload" was setting cpu/thread maps directly. Make it use the proper function perf_evlist__set_maps() especially now that it also propagates the maps. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-14-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 3a8fedef..add1638 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -43,6 +43,8 @@ int test__task_exit(void) }; const char *argv[] = { "true", NULL }; char sbuf[STRERR_BUFSIZE]; + struct cpu_map *cpus; + struct thread_map *threads; signal(SIGCHLD, sig_handler); @@ -58,14 +60,19 @@ int test__task_exit(void) * perf_evlist__prepare_workload we'll fill in the only thread * we're monitoring, the one forked there. */ - evlist->cpus = cpu_map__dummy_new(); - evlist->threads = thread_map__new_by_tid(-1); - if (!evlist->cpus || !evlist->threads) { + cpus = cpu_map__dummy_new(); + threads = thread_map__new_by_tid(-1); + if (!cpus || !threads) { err = -ENOMEM; pr_debug("Not enough memory to create thread/cpu maps\n"); - goto out_delete_evlist; + goto out_free_maps; } + perf_evlist__set_maps(evlist, cpus, threads); + + cpus = NULL; + threads = NULL; + err = perf_evlist__prepare_workload(evlist, &target, argv, false, workload_exec_failed_signal); if (err < 0) { @@ -114,6 +121,9 @@ retry: err = -1; } +out_free_maps: + cpu_map__put(cpus); + thread_map__put(threads); out_delete_evlist: perf_evlist__delete(evlist); return err; -- cgit v0.10.2 From c5e6bd2ed3e81df443e4ae11e95ed71ff77bf9e5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 8 Sep 2015 10:59:02 +0300 Subject: perf tests: Fix software clock events test setting maps The test titled "Test software clock events have valid period values" was setting cpu/thread maps directly. Make it use the proper function perf_evlist__set_maps() especially now that it also propagates the maps. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/1441699142-18905-15-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 1aa21c9..5b83f56 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -34,6 +34,8 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) .disabled = 1, .freq = 1, }; + struct cpu_map *cpus; + struct thread_map *threads; attr.sample_freq = 500; @@ -50,14 +52,19 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) } perf_evlist__add(evlist, evsel); - evlist->cpus = cpu_map__dummy_new(); - evlist->threads = thread_map__new_by_tid(getpid()); - if (!evlist->cpus || !evlist->threads) { + cpus = cpu_map__dummy_new(); + threads = thread_map__new_by_tid(getpid()); + if (!cpus || !threads) { err = -ENOMEM; pr_debug("Not enough memory to create thread/cpu maps\n"); - goto out_delete_evlist; + goto out_free_maps; } + perf_evlist__set_maps(evlist, cpus, threads); + + cpus = NULL; + threads = NULL; + if (perf_evlist__open(evlist)) { const char *knob = "/proc/sys/kernel/perf_event_max_sample_rate"; @@ -107,6 +114,9 @@ next_event: err = -1; } +out_free_maps: + cpu_map__put(cpus); + thread_map__put(threads); out_delete_evlist: perf_evlist__delete(evlist); return err; -- cgit v0.10.2 From 2314ee4d444e28d4670ff80c84df68c25887decb Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 21 Aug 2015 04:40:22 +0100 Subject: arm64: enable generic idle loop Enable generic idle loop for ARM64, so can support for hlt/nohlt command line options to override default idle loop behavior. Acked-by: Catalin Marinas Signed-off-by: Leo Yan Signed-off-by: Will Deacon diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7d95663..8b6e378 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -32,6 +32,7 @@ config ARM64 select GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_CPU_AUTOPROBE select GENERIC_EARLY_IOREMAP + select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL -- cgit v0.10.2 From d10bcd473301888f957ec4b6b12aa3621be78d59 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 2 Sep 2015 18:49:28 +0100 Subject: arm64: head.S: initialise mdcr_el2 in el2_setup When entering the kernel at EL2, we fail to initialise the MDCR_EL2 register which controls debug access and PMU capabilities at EL1. This patch ensures that the register is initialised so that all traps are disabled and all the PMU counters are available to the host. When a guest is scheduled, KVM takes care to configure trapping appropriately. Cc: Acked-by: Marc Zyngier Signed-off-by: Will Deacon diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index a055be6..90d09ed 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -523,6 +523,11 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr hstr_el2, xzr // Disable CP15 traps to EL2 #endif + /* EL2 debug */ + mrs x0, pmcr_el0 // Disable debug access traps + ubfx x0, x0, #11, #5 // to EL2 and allow access to + msr mdcr_el2, x0 // all PMU counters from EL1 + /* Stage-2 translation */ msr vttbr_el2, xzr -- cgit v0.10.2 From 09a77a885233e2a20dac2635a79c83ccf50a26a1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 15 Sep 2015 16:03:36 +0100 Subject: modsign: Fix GPL/OpenSSL licence incompatibility The GPL does not permit us to link against the OpenSSL library. Use LGPL for sign-file and extract-file instead. [ The whole "openssl isn't compatible with gpl" is really just fear-mongering, but there's no reason not to make modsign LGPL, so nobody cares. - Linus ] Reported-by: Julian Andres Klode Signed-off-by: David Woodhouse Signed-off-by: David Howells Reviewed-by: Julian Andres Klode Signed-off-by: Linus Torvalds diff --git a/scripts/extract-cert.c b/scripts/extract-cert.c index 10d23ca..6ce5945 100644 --- a/scripts/extract-cert.c +++ b/scripts/extract-cert.c @@ -1,15 +1,15 @@ /* Extract X.509 certificate in DER form from PKCS#11 or PEM. * - * Copyright © 2014 Red Hat, Inc. All Rights Reserved. - * Copyright © 2015 Intel Corporation. + * Copyright © 2014-2015 Red Hat, Inc. All Rights Reserved. + * Copyright © 2015 Intel Corporation. * * Authors: David Howells * David Woodhouse * * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the licence, or (at your option) any later version. */ #define _GNU_SOURCE #include diff --git a/scripts/sign-file.c b/scripts/sign-file.c index 058bba3..c3899ca 100755 --- a/scripts/sign-file.c +++ b/scripts/sign-file.c @@ -1,12 +1,15 @@ /* Sign a module file using the given key. * - * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) + * Copyright © 2014-2015 Red Hat, Inc. All Rights Reserved. + * Copyright © 2015 Intel Corporation. + * + * Authors: David Howells + * David Woodhouse * * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the licence, or (at your option) any later version. */ #define _GNU_SOURCE #include -- cgit v0.10.2 From 865ca084fdc68cd9b658da4b098008278da8fed3 Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Tue, 15 Sep 2015 13:50:18 -0700 Subject: ia64: Enable userfaultfd and membarrier system calls Signed-off-by: Tony Luck Signed-off-by: Linus Torvalds diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 95c39b9..99c96a5 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -11,7 +11,7 @@ -#define NR_syscalls 319 /* length of syscall table */ +#define NR_syscalls 321 /* length of syscall table */ /* * The following defines stop scripts/checksyscalls.sh from complaining about diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h index 4610795..98e94e1 100644 --- a/arch/ia64/include/uapi/asm/unistd.h +++ b/arch/ia64/include/uapi/asm/unistd.h @@ -332,5 +332,7 @@ #define __NR_memfd_create 1340 #define __NR_bpf 1341 #define __NR_execveat 1342 +#define __NR_userfaultfd 1343 +#define __NR_membarrier 1344 #endif /* _UAPI_ASM_IA64_UNISTD_H */ diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index ae0de7b..37cc7a6 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1768,5 +1768,7 @@ sys_call_table: data8 sys_memfd_create // 1340 data8 sys_bpf data8 sys_execveat + data8 sys_userfaultfd + data8 sys_membarrier .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls -- cgit v0.10.2 From 03da3ff1cfcd7774c8780d2547ba0d995f7dc03d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 16 Sep 2015 14:10:03 +0100 Subject: x86/platform: Fix Geode LX timekeeping in the generic x86 build In 2007, commit 07190a08eef36 ("Mark TSC on GeodeLX reliable") bypassed verification of the TSC on Geode LX. However, this code (now in the check_system_tsc_reliable() function in arch/x86/kernel/tsc.c) was only present if CONFIG_MGEODE_LX was set. OpenWRT has recently started building its generic Geode target for Geode GX, not LX, to include support for additional platforms. This broke the timekeeping on LX-based devices, because the TSC wasn't marked as reliable: https://dev.openwrt.org/ticket/20531 By adding a runtime check on is_geode_lx(), we can also include the fix if CONFIG_MGEODEGX1 or CONFIG_X86_GENERIC are set, thus fixing the problem. Signed-off-by: David Woodhouse Cc: Andres Salomon Cc: Linus Torvalds Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1442409003.131189.87.camel@infradead.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 79055cf..51e62d6 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -21,6 +21,7 @@ #include #include #include +#include unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -1015,15 +1016,17 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable); static void __init check_system_tsc_reliable(void) { -#ifdef CONFIG_MGEODE_LX - /* RTSC counts during suspend */ +#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) + if (is_geode_lx()) { + /* RTSC counts during suspend */ #define RTSC_SUSP 0x100 - unsigned long res_low, res_high; + unsigned long res_low, res_high; - rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); - /* Geode_LX - the OLPC CPU has a very reliable TSC */ - if (res_low & RTSC_SUSP) - tsc_clocksource_reliable = 1; + rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); + /* Geode_LX - the OLPC CPU has a very reliable TSC */ + if (res_low & RTSC_SUSP) + tsc_clocksource_reliable = 1; + } #endif if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) tsc_clocksource_reliable = 1; -- cgit v0.10.2 From e56d82a116176f7af9d642b560abbbd3a2b68013 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 11 Sep 2015 15:31:24 +0100 Subject: arm64: cpu hotplug: ensure we mask out CPU_TASKS_FROZEN in notifiers We have a couple of CPU hotplug notifiers for resetting the CPU debug state to a sane value when a CPU comes online. This patch ensures that we mask out CPU_TASKS_FROZEN so that we don't miss any online events occuring due to suspend/resume. Acked-by: Lorenzo Pieralisi Signed-off-by: Will Deacon diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 9b3b62a..cebf786 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -134,7 +134,7 @@ static int os_lock_notify(struct notifier_block *self, unsigned long action, void *data) { int cpu = (unsigned long)data; - if (action == CPU_ONLINE) + if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) smp_call_function_single(cpu, clear_os_lock, NULL, 1); return NOTIFY_OK; } diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index c97040e..bba85c8 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -872,7 +872,7 @@ static int hw_breakpoint_reset_notify(struct notifier_block *self, void *hcpu) { int cpu = (long)hcpu; - if (action == CPU_ONLINE) + if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1); return NOTIFY_OK; } -- cgit v0.10.2 From bdec97a855ef1e239f130f7a11584721c9a1bf04 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 15 Sep 2015 12:07:06 +0100 Subject: arm64: compat: fix vfp save/restore across signal handlers in big-endian When saving/restoring the VFP registers from a compat (AArch32) signal frame, we rely on the compat registers forming a prefix of the native register file and therefore make use of copy_{to,from}_user to transfer between the native fpsimd_state and the compat_vfp_sigframe. Unfortunately, this doesn't work so well in a big-endian environment. Our fpsimd save/restore code operates directly on 128-bit quantities (Q registers) whereas the compat_vfp_sigframe represents the registers as an array of 64-bit (D) registers. The architecture packs the compat D registers into the Q registers, with the least significant bytes holding the lower register. Consequently, we need to swap the 64-bit halves when converting between these two representations on a big-endian machine. This patch replaces the __copy_{to,from}_user invocations in our compat VFP signal handling code with explicit __put_user loops that operate on 64-bit values and swap them accordingly. Cc: Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 948f0ad..71ef6dc 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -212,14 +212,32 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) /* * VFP save/restore code. + * + * We have to be careful with endianness, since the fpsimd context-switch + * code operates on 128-bit (Q) register values whereas the compat ABI + * uses an array of 64-bit (D) registers. Consequently, we need to swap + * the two halves of each Q register when running on a big-endian CPU. */ +union __fpsimd_vreg { + __uint128_t raw; + struct { +#ifdef __AARCH64EB__ + u64 hi; + u64 lo; +#else + u64 lo; + u64 hi; +#endif + }; +}; + static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) { struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr, fpexc; - int err = 0; + int i, err = 0; /* * Save the hardware registers to the fpsimd_state structure. @@ -235,10 +253,15 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) /* * Now copy the FP registers. Since the registers are packed, * we can copy the prefix we want (V0-V15) as it is. - * FIXME: Won't work if big endian. */ - err |= __copy_to_user(&frame->ufp.fpregs, fpsimd->vregs, - sizeof(frame->ufp.fpregs)); + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg = { + .raw = fpsimd->vregs[i >> 1], + }; + + __put_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __put_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + } /* Create an AArch32 fpscr from the fpsr and the fpcr. */ fpscr = (fpsimd->fpsr & VFP_FPSCR_STAT_MASK) | @@ -263,7 +286,7 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr; - int err = 0; + int i, err = 0; __get_user_error(magic, &frame->magic, err); __get_user_error(size, &frame->size, err); @@ -273,12 +296,14 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) return -EINVAL; - /* - * Copy the FP registers into the start of the fpsimd_state. - * FIXME: Won't work if big endian. - */ - err |= __copy_from_user(fpsimd.vregs, frame->ufp.fpregs, - sizeof(frame->ufp.fpregs)); + /* Copy the FP registers into the start of the fpsimd_state. */ + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg; + + __get_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __get_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + fpsimd.vregs[i >> 1] = vreg.raw; + } /* Extract the fpsr and the fpcr from the fpscr */ __get_user_error(fpscr, &frame->ufp.fpscr, err); -- cgit v0.10.2 From df057cc7b4fa59e9b55f07ffdb6c62bf02e99a00 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 17 Mar 2015 12:15:02 +0000 Subject: arm64: errata: add module build workaround for erratum #843419 Cortex-A53 processors <= r0p4 are affected by erratum #843419 which can lead to a memory access using an incorrect address in certain sequences headed by an ADRP instruction. There is a linker fix to generate veneers for ADRP instructions, but this doesn't work for kernel modules which are built as unlinked ELF objects. This patch adds a new config option for the erratum which, when enabled, builds kernel modules with the mcmodel=large flag. This uses absolute addressing for all kernel symbols, thereby removing the use of ADRP as a PC-relative form of addressing. The ADRP relocs are removed from the module loader so that we fail to load any potentially affected modules. Cc: Acked-by: Catalin Marinas Signed-off-by: Will Deacon diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8b6e378..07d1811 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -332,6 +332,22 @@ config ARM64_ERRATUM_845719 If unsure, say Y. +config ARM64_ERRATUM_843419 + bool "Cortex-A53: 843419: A load or store might access an incorrect address" + depends on MODULES + default y + help + This option builds kernel modules using the large memory model in + order to avoid the use of the ADRP instruction, which can cause + a subsequent memory access to use an incorrect address on Cortex-A53 + parts up to r0p4. + + Note that the kernel itself must be linked with a version of ld + which fixes potentially affected ADRP instructions through the + use of veneers. + + If unsure, say Y. + endmenu diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 15ff5b4..f9914d7 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -41,6 +41,10 @@ endif CHECKFLAGS += -D__aarch64__ +ifeq ($(CONFIG_ARM64_ERRATUM_843419), y) +CFLAGS_MODULE += -mcmodel=large +endif + # Default value head-y := arch/arm64/kernel/head.o diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 67bf410..876eb8d 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -332,12 +332,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, AARCH64_INSN_IMM_ADR); break; +#ifndef CONFIG_ARM64_ERRATUM_843419 case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; case R_AARCH64_ADR_PREL_PG_HI21: ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, AARCH64_INSN_IMM_ADR); break; +#endif case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC: overflow_check = false; -- cgit v0.10.2 From 7f61f545657281a3a1b0faf68993165ebdecc51b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 14 Sep 2015 16:01:05 +0300 Subject: libceph: don't access invalid memory in keepalive2 path This struct ceph_timespec ceph_ts; ... con_out_kvec_add(con, sizeof(ceph_ts), &ceph_ts); wraps ceph_ts into a kvec and adds it to con->out_kvec array, yet ceph_ts becomes invalid on return from prepare_write_keepalive(). As a result, we send out bogus keepalive2 stamps. Fix this by encoding into a ceph_timespec member, similar to how acks are read and written. Signed-off-by: Ilya Dryomov Reviewed-by: Yan, Zheng diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 7e1252e..b2371d9 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -238,6 +238,8 @@ struct ceph_connection { bool out_kvec_is_msg; /* kvec refers to out_msg */ int out_more; /* there is more data after the kvecs */ __le64 out_temp_ack; /* for writing an ack */ + struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2 + stamp */ /* message in temps */ struct ceph_msg_header in_hdr; @@ -248,7 +250,7 @@ struct ceph_connection { int in_base_pos; /* bytes read */ __le64 in_temp_ack; /* for reading an ack */ - struct timespec last_keepalive_ack; + struct timespec last_keepalive_ack; /* keepalive2 ack stamp */ struct delayed_work work; /* send|recv work */ unsigned long delay; /* current delay interval */ diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 525f454..b9b0e3b 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1353,11 +1353,12 @@ static void prepare_write_keepalive(struct ceph_connection *con) dout("prepare_write_keepalive %p\n", con); con_out_kvec_reset(con); if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { - struct timespec ts = CURRENT_TIME; - struct ceph_timespec ceph_ts; - ceph_encode_timespec(&ceph_ts, &ts); + struct timespec now = CURRENT_TIME; + con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); - con_out_kvec_add(con, sizeof(ceph_ts), &ceph_ts); + ceph_encode_timespec(&con->out_temp_keepalive2, &now); + con_out_kvec_add(con, sizeof(con->out_temp_keepalive2), + &con->out_temp_keepalive2); } else { con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive); } -- cgit v0.10.2 From 335c25858218e76ef47f92ecb9d22e919d36140d Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 14 Sep 2015 12:44:22 +0300 Subject: libceph: advertise support for keepalive2 We are the client, but advertise keepalive2 anyway - for consistency, if nothing else. In the future the server might want to know whether its clients support keepalive2. Signed-off-by: Ilya Dryomov Reviewed-by: Yan, Zheng diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 4763ad6..f89b31d 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -107,6 +107,7 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_OSDMAP_ENC | \ CEPH_FEATURE_CRUSH_TUNABLES3 | \ CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \ + CEPH_FEATURE_MSGR_KEEPALIVE2 | \ CEPH_FEATURE_CRUSH_V4) #define CEPH_FEATURES_REQUIRED_DEFAULT \ -- cgit v0.10.2 From 8a1513b49321e503fd6c8b6793e3b1f9a8a3285b Mon Sep 17 00:00:00 2001 From: Kyle Evans Date: Fri, 11 Sep 2015 10:40:17 -0500 Subject: hp-wmi: limit hotkey enable Do not write initialize magic on systems that do not have feature query 0xb. Fixes Bug #82451. Redefine FEATURE_QUERY to align with 0xb and FEATURE2 with 0xd for code clearity. Add a new test function, hp_wmi_bios_2008_later() & simplify hp_wmi_bios_2009_later(), which fixes a bug in cases where an improper value is returned. Probably also fixes Bug #69131. Add missing __init tag. Signed-off-by: Kyle Evans Cc: stable@vger.kernel.org Signed-off-by: Darren Hart diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 0669731..fb4dd7b 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -54,8 +54,9 @@ MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4"); #define HPWMI_HARDWARE_QUERY 0x4 #define HPWMI_WIRELESS_QUERY 0x5 #define HPWMI_BIOS_QUERY 0x9 +#define HPWMI_FEATURE_QUERY 0xb #define HPWMI_HOTKEY_QUERY 0xc -#define HPWMI_FEATURE_QUERY 0xd +#define HPWMI_FEATURE2_QUERY 0xd #define HPWMI_WIRELESS2_QUERY 0x1b #define HPWMI_POSTCODEERROR_QUERY 0x2a @@ -295,25 +296,33 @@ static int hp_wmi_tablet_state(void) return (state & 0x4) ? 1 : 0; } -static int __init hp_wmi_bios_2009_later(void) +static int __init hp_wmi_bios_2008_later(void) { int state = 0; int ret = hp_wmi_perform_query(HPWMI_FEATURE_QUERY, 0, &state, sizeof(state), sizeof(state)); - if (ret) - return ret; + if (!ret) + return 1; - return (state & 0x10) ? 1 : 0; + return (ret == HPWMI_RET_UNKNOWN_CMDTYPE) ? 0 : -ENXIO; } -static int hp_wmi_enable_hotkeys(void) +static int __init hp_wmi_bios_2009_later(void) { - int ret; - int query = 0x6e; + int state = 0; + int ret = hp_wmi_perform_query(HPWMI_FEATURE2_QUERY, 0, &state, + sizeof(state), sizeof(state)); + if (!ret) + return 1; - ret = hp_wmi_perform_query(HPWMI_BIOS_QUERY, 1, &query, sizeof(query), - 0); + return (ret == HPWMI_RET_UNKNOWN_CMDTYPE) ? 0 : -ENXIO; +} +static int __init hp_wmi_enable_hotkeys(void) +{ + int value = 0x6e; + int ret = hp_wmi_perform_query(HPWMI_BIOS_QUERY, 1, &value, + sizeof(value), 0); if (ret) return -EINVAL; return 0; @@ -663,7 +672,7 @@ static int __init hp_wmi_input_setup(void) hp_wmi_tablet_state()); input_sync(hp_wmi_input_dev); - if (hp_wmi_bios_2009_later() == 4) + if (!hp_wmi_bios_2009_later() && hp_wmi_bios_2008_later()) hp_wmi_enable_hotkeys(); status = wmi_install_notify_handler(HPWMI_EVENT_GUID, hp_wmi_notify, NULL); -- cgit v0.10.2 From 3aaf14da807a4e9931a37f21e4251abb8a67021b Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Thu, 17 Sep 2015 16:01:40 -0700 Subject: zram: fix possible use after free in zcomp_create() zcomp_create() verifies the success of zcomp_strm_{multi,single}_create() through comp->stream, which can potentially be pointing to memory that was freed if these functions returned an error. While at it, replace a 'ERR_PTR(-ENOMEM)' by a more generic 'ERR_PTR(error)' as in the future zcomp_strm_{multi,siggle}_create() could return other error codes. Function documentation updated accordingly. Fixes: beca3ec71fe5 ("zram: add multi stream functionality") Signed-off-by: Luis Henriques Acked-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 965d1af..5cb13ca 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -330,12 +330,14 @@ void zcomp_destroy(struct zcomp *comp) * allocate new zcomp and initialize it. return compressing * backend pointer or ERR_PTR if things went bad. ERR_PTR(-EINVAL) * if requested algorithm is not supported, ERR_PTR(-ENOMEM) in - * case of allocation error. + * case of allocation error, or any other error potentially + * returned by functions zcomp_strm_{multi,single}_create. */ struct zcomp *zcomp_create(const char *compress, int max_strm) { struct zcomp *comp; struct zcomp_backend *backend; + int error; backend = find_backend(compress); if (!backend) @@ -347,12 +349,12 @@ struct zcomp *zcomp_create(const char *compress, int max_strm) comp->backend = backend; if (max_strm > 1) - zcomp_strm_multi_create(comp, max_strm); + error = zcomp_strm_multi_create(comp, max_strm); else - zcomp_strm_single_create(comp); - if (!comp->stream) { + error = zcomp_strm_single_create(comp); + if (error) { kfree(comp); - return ERR_PTR(-ENOMEM); + return ERR_PTR(error); } return comp; } -- cgit v0.10.2 From 8d77a6d18ae9ccfd5eee1cc551ee4ac27fd41464 Mon Sep 17 00:00:00 2001 From: Xishi Qiu Date: Thu, 17 Sep 2015 16:01:43 -0700 Subject: kasan: fix last shadow judgement in memory_is_poisoned_16() The shadow which correspond 16 bytes memory may span 2 or 3 bytes. If the memory is aligned on 8, then the shadow takes only 2 bytes. So we check "shadow_first_bytes" is enough, and need not to call "memory_is_poisoned_1(addr + 15);". But the code "if (likely(!last_byte))" is wrong judgement. e.g. addr=0, so last_byte = 15 & KASAN_SHADOW_MASK = 7, then the code will continue to call "memory_is_poisoned_1(addr + 15);" Signed-off-by: Xishi Qiu Acked-by: Andrey Ryabinin Cc: Andrey Konovalov Cc: Rusty Russell Cc: Michal Marek Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 7b28e9c..8da2114 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -135,12 +135,11 @@ static __always_inline bool memory_is_poisoned_16(unsigned long addr) if (unlikely(*shadow_addr)) { u16 shadow_first_bytes = *(u16 *)shadow_addr; - s8 last_byte = (addr + 15) & KASAN_SHADOW_MASK; if (unlikely(shadow_first_bytes)) return true; - if (likely(!last_byte)) + if (likely(IS_ALIGNED(addr, 8))) return false; return memory_is_poisoned_1(addr + 15); -- cgit v0.10.2 From 969560d2afca76823cf97ec4f5c0fb7833e18553 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Thu, 17 Sep 2015 16:01:46 -0700 Subject: alpha: io: define ioremap_uc ioremap_uc was not defined and as a result while building with allmodconfig were getting build error of: implicit declaration of function 'ioremap_uc'. Signed-off-by: Sudip Mukherjee Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index f05bdb4..ff40491 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -297,7 +297,9 @@ static inline void __iomem * ioremap_nocache(unsigned long offset, unsigned long size) { return ioremap(offset, size); -} +} + +#define ioremap_uc ioremap_nocache static inline void iounmap(volatile void __iomem *addr) { -- cgit v0.10.2 From 14b97deddf8ddecce9f35165b667c55c73e14638 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Thu, 17 Sep 2015 16:01:49 -0700 Subject: alpha: lib: export __delay __delay was not exported as a result while building with allmodconfig we were getting build error of undefined symbol. __delay is being used by: drivers/net/phy/mdio-octeon.c Signed-off-by: Sudip Mukherjee Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/alpha/lib/udelay.c b/arch/alpha/lib/udelay.c index 69d52aa..f2d81ff 100644 --- a/arch/alpha/lib/udelay.c +++ b/arch/alpha/lib/udelay.c @@ -30,6 +30,7 @@ __delay(int loops) " bgt %0,1b" : "=&r" (tmp), "=r" (loops) : "1"(loops)); } +EXPORT_SYMBOL(__delay); #ifdef CONFIG_SMP #define LPJ cpu_data[smp_processor_id()].loops_per_jiffy -- cgit v0.10.2 From 62bef58a55dfa8ada2a22b2496c6340468ecd98a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 17 Sep 2015 16:01:51 -0700 Subject: lib/string_helpers.c: fix infinite loop in string_get_size() Some string_get_size() calls (e.g.: string_get_size(1, 512, STRING_UNITS_10, ..., ...) string_get_size(15, 64, STRING_UNITS_10, ..., ...) ) result in an infinite loop. The problem is that if size is equal to divisor[units]/blk_size and is smaller than divisor[units] we'll end up with size == 0 when we start doing sf_cap calculations: For string_get_size(1, 512, STRING_UNITS_10, ..., ...) case: ... remainder = do_div(size, divisor[units]); -> size is 0, remainder is 1 remainder *= blk_size; -> remainder is 512 ... size *= blk_size; -> size is still 0 size += remainder / divisor[units]; -> size is still 0 The caller causing the issue is sd_read_capacity(), the problem was noticed on Hyper-V, such weird size was reported by host when scanning collides with device removal. This is probably a separate issue worth fixing, this patch is intended to prevent the library routine from infinite looping. Signed-off-by: Vitaly Kuznetsov Acked-by: James Bottomley Cc: Andy Shevchenko Cc: Rasmus Villemoes Cc: "K. Y. Srinivasan" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 54036ce..5939f63 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -59,7 +59,11 @@ void string_get_size(u64 size, u64 blk_size, const enum string_size_units units, } exp = divisor[units] / (u32)blk_size; - if (size >= exp) { + /* + * size must be strictly greater than exp here to ensure that remainder + * is greater than divisor[units] coming out of the if below. + */ + if (size > exp) { remainder = do_div(size, divisor[units]); remainder *= blk_size; i++; -- cgit v0.10.2 From c03e946fdd653c4a23e242aca83da7e9838f5b00 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 17 Sep 2015 16:01:54 -0700 Subject: userfaultfd: add missing mmput() in error path This fixes a memleak if anon_inode_getfile() fails in userfaultfd(). Signed-off-by: Eric Biggers Signed-off-by: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 634e676..f9aeb40 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1287,8 +1287,10 @@ static struct file *userfaultfd_file_create(int flags) file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx, O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS)); - if (IS_ERR(file)) + if (IS_ERR(file)) { + mmput(ctx->mm); kmem_cache_free(userfaultfd_ctx_cachep, ctx); + } out: return file; } -- cgit v0.10.2 From 0526109a24eb07984f9e79852767300c8b8144de Mon Sep 17 00:00:00 2001 From: Cyril Hrubis Date: Thu, 17 Sep 2015 16:01:57 -0700 Subject: MAINTAINERS: update LTP mailing list [akpm@linux-foundation.org: Wanlong Gao has moved] Signed-off-by: Cyril Hrubis Cc: Jan Stancek Cc: Stanislav Kholmanskikh Cc: Alexey Kodanev Cc: Wanlong Gao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index 7ba7ab7..274f854 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6452,11 +6452,11 @@ F: drivers/hwmon/ltc4261.c LTP (Linux Test Project) M: Mike Frysinger M: Cyril Hrubis -M: Wanlong Gao +M: Wanlong Gao M: Jan Stancek M: Stanislav Kholmanskikh M: Alexey Kodanev -L: ltp-list@lists.sourceforge.net (subscribers-only) +L: ltp@lists.linux.it (subscribers-only) W: http://linux-test-project.github.io/ T: git git://github.com/linux-test-project/ltp.git S: Maintained -- cgit v0.10.2 From 28c553d0aa0acf02e18f9e008661491a4b996595 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 17 Sep 2015 16:02:00 -0700 Subject: revert "mm: make sure all file VMAs have ->vm_ops set" Revert commit 6dc296e7df4c "mm: make sure all file VMAs have ->vm_ops set". Will Deacon reports that it "causes some mmap regressions in LTP, which appears to use a MAP_PRIVATE mmap of /dev/zero as a way to get anonymous pages in some of its tests (specifically mmap10 [1])". William Shuman reports Oracle crashes. So revert the patch while we work out what to do. Reported-by: William Shuman Reported-by: Will Deacon Cc: Kirill A. Shutemov Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/mmap.c b/mm/mmap.c index 971dd2c..c739d6d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -612,8 +612,6 @@ static unsigned long count_vma_pages_range(struct mm_struct *mm, void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, struct rb_node **rb_link, struct rb_node *rb_parent) { - WARN_ONCE(vma->vm_file && !vma->vm_ops, "missing vma->vm_ops"); - /* Update tracking information for the gap following the new vma. */ if (vma->vm_next) vma_gap_update(vma->vm_next); @@ -1638,12 +1636,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr, */ WARN_ON_ONCE(addr != vma->vm_start); - /* All file mapping must have ->vm_ops set */ - if (!vma->vm_ops) { - static const struct vm_operations_struct dummy_ops = {}; - vma->vm_ops = &dummy_ops; - } - addr = vma->vm_start; vm_flags = vma->vm_flags; } else if (vm_flags & VM_SHARED) { -- cgit v0.10.2