From d5dcafee5f183e9aedddb147a89cb46ab038f26b Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 10 Jun 2016 10:56:44 +0200
Subject: s390/mm: no local TLB flush for clearing-by-ASCE IDTE

The local-clearing control of the IDTE instruction does not have any effect
for the clearing-by-ASCE operation. Only the invalidation-and-clearing
operation respects the local-clearing bit.

Remove __tlb_flush_idte_local and simplify the batched TLB flushing code.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 1a691ef..490014c 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -26,17 +26,6 @@ static inline void __tlb_flush_idte(unsigned long asce)
 		: : "a" (2048), "a" (asce) : "cc");
 }
 
-/*
- * Flush TLB entries for a specific ASCE on the local CPU
- */
-static inline void __tlb_flush_idte_local(unsigned long asce)
-{
-	/* Local TLB flush for the mm */
-	asm volatile(
-		"	.insn	rrf,0xb98e0000,0,%0,%1,1"
-		: : "a" (2048), "a" (asce) : "cc");
-}
-
 #ifdef CONFIG_SMP
 void smp_ptlb_all(void);
 
@@ -65,9 +54,7 @@ static inline void __tlb_flush_full(struct mm_struct *mm)
 		/* Global TLB flush */
 		__tlb_flush_global();
 		/* Reset TLB flush mask */
-		if (MACHINE_HAS_TLB_LC)
-			cpumask_copy(mm_cpumask(mm),
-				     &mm->context.cpu_attach_mask);
+		cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
 	}
 	atomic_dec(&mm->context.flush_count);
 	preempt_enable();
@@ -81,19 +68,12 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
 {
 	preempt_disable();
 	atomic_inc(&mm->context.flush_count);
-	if (MACHINE_HAS_TLB_LC &&
-	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
-		__tlb_flush_idte_local(asce);
-	} else {
-		if (MACHINE_HAS_IDTE)
-			__tlb_flush_idte(asce);
-		else
-			__tlb_flush_global();
-		/* Reset TLB flush mask */
-		if (MACHINE_HAS_TLB_LC)
-			cpumask_copy(mm_cpumask(mm),
-				     &mm->context.cpu_attach_mask);
-	}
+	if (MACHINE_HAS_IDTE)
+		__tlb_flush_idte(asce);
+	else
+		__tlb_flush_global();
+	/* Reset TLB flush mask */
+	cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
 	atomic_dec(&mm->context.flush_count);
 	preempt_enable();
 }
@@ -114,18 +94,12 @@ static inline void __tlb_flush_kernel(void)
  */
 static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
 {
-	if (MACHINE_HAS_TLB_LC)
-		__tlb_flush_idte_local(asce);
-	else
-		__tlb_flush_local();
+	__tlb_flush_local();
 }
 
 static inline void __tlb_flush_kernel(void)
 {
-	if (MACHINE_HAS_TLB_LC)
-		__tlb_flush_idte_local(init_mm.context.asce);
-	else
-		__tlb_flush_local();
+	__tlb_flush_local();
 }
 #endif
 
-- 
cgit v0.10.2


From 44b6cc8130e80e673ba8b3baf8e41891fe484786 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 13 Jun 2016 10:36:00 +0200
Subject: s390/mm,kvm: flush gmap address space with IDTE

The __tlb_flush_mm() helper uses a global flush if the mm struct
has a gmap structure attached to it. Replace the global flush with
two individual flushes by means of the IDTE instruction if only a
single gmap is attached the the mm.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 6d39329..bea785d 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -12,6 +12,7 @@ typedef struct {
 	struct list_head pgtable_list;
 	spinlock_t gmap_lock;
 	struct list_head gmap_list;
+	unsigned long gmap_asce;
 	unsigned long asce;
 	unsigned long asce_limit;
 	unsigned long vdso_base;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index c6a088c..515fea5 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -21,6 +21,7 @@ static inline int init_new_context(struct task_struct *tsk,
 	INIT_LIST_HEAD(&mm->context.gmap_list);
 	cpumask_clear(&mm->context.cpu_attach_mask);
 	atomic_set(&mm->context.flush_count, 0);
+	mm->context.gmap_asce = 0;
 	mm->context.flush_mm = 0;
 #ifdef CONFIG_PGSTE
 	mm->context.alloc_pgste = page_table_allocate_pgste;
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 490014c..3984610 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -60,18 +60,25 @@ static inline void __tlb_flush_full(struct mm_struct *mm)
 	preempt_enable();
 }
 
-/*
- * Flush TLB entries for a specific ASCE on all CPUs. Should never be used
- * when more than one asce (e.g. gmap) ran on this mm.
- */
-static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
+static inline void __tlb_flush_mm(struct mm_struct *mm)
 {
+	unsigned long gmap_asce;
+
+	/*
+	 * If the machine has IDTE we prefer to do a per mm flush
+	 * on all cpus instead of doing a local flush if the mm
+	 * only ran on the local cpu.
+	 */
 	preempt_disable();
 	atomic_inc(&mm->context.flush_count);
-	if (MACHINE_HAS_IDTE)
-		__tlb_flush_idte(asce);
-	else
-		__tlb_flush_global();
+	gmap_asce = READ_ONCE(mm->context.gmap_asce);
+	if (MACHINE_HAS_IDTE && gmap_asce != -1UL) {
+		if (gmap_asce)
+			__tlb_flush_idte(gmap_asce);
+		__tlb_flush_idte(mm->context.asce);
+	} else {
+		__tlb_flush_full(mm);
+	}
 	/* Reset TLB flush mask */
 	cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
 	atomic_dec(&mm->context.flush_count);
@@ -92,7 +99,7 @@ static inline void __tlb_flush_kernel(void)
 /*
  * Flush TLB entries for a specific ASCE on all CPUs.
  */
-static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
+static inline void __tlb_flush_mm(struct mm_struct *mm)
 {
 	__tlb_flush_local();
 }
@@ -103,19 +110,6 @@ static inline void __tlb_flush_kernel(void)
 }
 #endif
 
-static inline void __tlb_flush_mm(struct mm_struct * mm)
-{
-	/*
-	 * If the machine has IDTE we prefer to do a per mm flush
-	 * on all cpus instead of doing a local flush if the mm
-	 * only ran on the local cpu.
-	 */
-	if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list))
-		__tlb_flush_asce(mm, mm->context.asce);
-	else
-		__tlb_flush_full(mm);
-}
-
 static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
 {
 	if (mm->context.flush_mm) {
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 2ce6bb3..3ba6227 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -94,6 +94,7 @@ out:
 struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit)
 {
 	struct gmap *gmap;
+	unsigned long gmap_asce;
 
 	gmap = gmap_alloc(limit);
 	if (!gmap)
@@ -101,6 +102,11 @@ struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit)
 	gmap->mm = mm;
 	spin_lock(&mm->context.gmap_lock);
 	list_add_rcu(&gmap->list, &mm->context.gmap_list);
+	if (list_is_singular(&mm->context.gmap_list))
+		gmap_asce = gmap->asce;
+	else
+		gmap_asce = -1UL;
+	WRITE_ONCE(mm->context.gmap_asce, gmap_asce);
 	spin_unlock(&mm->context.gmap_lock);
 	return gmap;
 }
@@ -230,6 +236,7 @@ EXPORT_SYMBOL_GPL(gmap_put);
 void gmap_remove(struct gmap *gmap)
 {
 	struct gmap *sg, *next;
+	unsigned long gmap_asce;
 
 	/* Remove all shadow gmaps linked to this gmap */
 	if (!list_empty(&gmap->children)) {
@@ -243,6 +250,14 @@ void gmap_remove(struct gmap *gmap)
 	/* Remove gmap from the pre-mm list */
 	spin_lock(&gmap->mm->context.gmap_lock);
 	list_del_rcu(&gmap->list);
+	if (list_empty(&gmap->mm->context.gmap_list))
+		gmap_asce = 0;
+	else if (list_is_singular(&gmap->mm->context.gmap_list))
+		gmap_asce = list_first_entry(&gmap->mm->context.gmap_list,
+					     struct gmap, list)->asce;
+	else
+		gmap_asce = -1UL;
+	WRITE_ONCE(gmap->mm->context.gmap_asce, gmap_asce);
 	spin_unlock(&gmap->mm->context.gmap_lock);
 	synchronize_rcu();
 	/* Put reference */
-- 
cgit v0.10.2


From 34eeaf376dbe53849acc3d4edc4efc2ad97ab23e Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 14 Jun 2016 12:38:40 +0200
Subject: s390/mm: merge local / non-local IPTE helper

Merge the __ptep_ipte and __ptep_ipte_local functions into a single
__ptep_ipte function with an additional parameter. The __pte_ipte_range
function is still extra as the while loops makes it hard to merge.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 72c7f60..7ef2306 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -874,35 +874,31 @@ static inline pte_t pte_mkhuge(pte_t pte)
 }
 #endif
 
-static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
-{
-	unsigned long pto = (unsigned long) ptep;
-
-	/* Invalidation + global TLB flush for the pte */
-	asm volatile(
-		"	ipte	%2,%3"
-		: "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
-}
+#define IPTE_GLOBAL	0
+#define	IPTE_LOCAL	1
 
-static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep)
+static inline void __ptep_ipte(unsigned long address, pte_t *ptep, int local)
 {
 	unsigned long pto = (unsigned long) ptep;
 
-	/* Invalidation + local TLB flush for the pte */
+	/* Invalidation + TLB flush for the pte */
 	asm volatile(
-		"	.insn rrf,0xb2210000,%2,%3,0,1"
-		: "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
+		"       .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]"
+		: "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address),
+		  [m4] "i" (local));
 }
 
-static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep)
+static inline void __ptep_ipte_range(unsigned long address, int nr,
+				     pte_t *ptep, int local)
 {
 	unsigned long pto = (unsigned long) ptep;
 
-	/* Invalidate a range of ptes + global TLB flush of the ptes */
+	/* Invalidate a range of ptes + TLB flush of the ptes */
 	do {
 		asm volatile(
-			"	.insn rrf,0xb2210000,%2,%0,%1,0"
-			: "+a" (address), "+a" (nr) : "a" (pto) : "memory");
+			"       .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]"
+			: [r2] "+a" (address), [r3] "+a" (nr)
+			: [r1] "a" (pto), [m4] "i" (local) : "memory");
 	} while (nr != 255);
 }
 
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index af7cf28c..44f1503 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -309,11 +309,11 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr)
 	int i;
 
 	if (test_facility(13)) {
-		__ptep_ipte_range(address, nr - 1, pte);
+		__ptep_ipte_range(address, nr - 1, pte, IPTE_GLOBAL);
 		return;
 	}
 	for (i = 0; i < nr; i++) {
-		__ptep_ipte(address, pte);
+		__ptep_ipte(address, pte, IPTE_GLOBAL);
 		address += PAGE_SIZE;
 		pte++;
 	}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 5f09201..1dc6cad 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -35,9 +35,9 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm,
 	atomic_inc(&mm->context.flush_count);
 	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
-		__ptep_ipte_local(addr, ptep);
+		__ptep_ipte(addr, ptep, IPTE_LOCAL);
 	else
-		__ptep_ipte(addr, ptep);
+		__ptep_ipte(addr, ptep, IPTE_GLOBAL);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -56,7 +56,7 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
 		pte_val(*ptep) |= _PAGE_INVALID;
 		mm->context.flush_mm = 1;
 	} else
-		__ptep_ipte(addr, ptep);
+		__ptep_ipte(addr, ptep, IPTE_GLOBAL);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -620,7 +620,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
 	pte = *ptep;
 	if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
 		pgste = pgste_pte_notify(mm, addr, ptep, pgste);
-		__ptep_ipte(addr, ptep);
+		__ptep_ipte(addr, ptep, IPTE_GLOBAL);
 		if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
 			pte_val(pte) |= _PAGE_PROTECT;
 		else
-- 
cgit v0.10.2


From 47e4d851c57840b3b5e440cf2c64c37e99b36a09 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 14 Jun 2016 12:41:35 +0200
Subject: s390/mm: merge local / non-local IDTE helper

Merge the __p[m|u]xdp_idte and __p[m|u]dp_idte_local functions into a
single __p[m|u]dp_idte function with an additional parameter.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 7ef2306..0362cd5 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1235,53 +1235,33 @@ static inline void __pmdp_csp(pmd_t *pmdp)
 	    pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
 }
 
-static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp)
-{
-	unsigned long sto;
-
-	sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
-	asm volatile(
-		"	.insn	rrf,0xb98e0000,%2,%3,0,0"
-		: "=m" (*pmdp)
-		: "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK))
-		: "cc" );
-}
-
-static inline void __pudp_idte(unsigned long address, pud_t *pudp)
-{
-	unsigned long r3o;
-
-	r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t);
-	r3o |= _ASCE_TYPE_REGION3;
-	asm volatile(
-		"	.insn	rrf,0xb98e0000,%2,%3,0,0"
-		: "=m" (*pudp)
-		: "m" (*pudp), "a" (r3o), "a" ((address & PUD_MASK))
-		: "cc");
-}
+#define IDTE_GLOBAL	0
+#define IDTE_LOCAL	1
 
-static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
+static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp, int local)
 {
 	unsigned long sto;
 
 	sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
 	asm volatile(
-		"	.insn	rrf,0xb98e0000,%2,%3,0,1"
-		: "=m" (*pmdp)
-		: "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK))
+		"	.insn	rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
+		: "+m" (*pmdp)
+		: [r1] "a" (sto), [r2] "a" ((address & HPAGE_MASK)),
+		  [m4] "i" (local)
 		: "cc" );
 }
 
-static inline void __pudp_idte_local(unsigned long address, pud_t *pudp)
+static inline void __pudp_idte(unsigned long address, pud_t *pudp, int local)
 {
 	unsigned long r3o;
 
 	r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t);
 	r3o |= _ASCE_TYPE_REGION3;
 	asm volatile(
-		"	.insn	rrf,0xb98e0000,%2,%3,0,1"
-		: "=m" (*pudp)
-		: "m" (*pudp), "a" (r3o), "a" ((address & PUD_MASK))
+		"	.insn	rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
+		: "+m" (*pudp)
+		: [r1] "a" (r3o), [r2] "a" ((address & PUD_MASK)),
+		  [m4] "i" (local)
 		: "cc");
 }
 
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 1dc6cad..7a1897c 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -301,9 +301,9 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
 	atomic_inc(&mm->context.flush_count);
 	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
-		__pmdp_idte_local(addr, pmdp);
+		__pmdp_idte(addr, pmdp, IDTE_LOCAL);
 	else
-		__pmdp_idte(addr, pmdp);
+		__pmdp_idte(addr, pmdp, IDTE_GLOBAL);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -322,7 +322,7 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
 		pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
 		mm->context.flush_mm = 1;
 	} else if (MACHINE_HAS_IDTE)
-		__pmdp_idte(addr, pmdp);
+		__pmdp_idte(addr, pmdp, IDTE_GLOBAL);
 	else
 		__pmdp_csp(pmdp);
 	atomic_dec(&mm->context.flush_count);
@@ -374,9 +374,9 @@ static inline pud_t pudp_flush_direct(struct mm_struct *mm,
 	atomic_inc(&mm->context.flush_count);
 	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
-		__pudp_idte_local(addr, pudp);
+		__pudp_idte(addr, pudp, IDTE_LOCAL);
 	else
-		__pudp_idte(addr, pudp);
+		__pudp_idte(addr, pudp, IDTE_GLOBAL);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
-- 
cgit v0.10.2


From 6c29588578edc9ae2c9fae27ff96f443cf39c0f3 Mon Sep 17 00:00:00 2001
From: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Date: Mon, 8 Aug 2016 04:27:15 +0200
Subject: s390: cio: remove redundant cio_cancel declaration

cio_cancel was declared twice. Remove one of them.

Signed-off-by: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index 93de0b4..f0e57ae 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -127,7 +127,6 @@ extern int cio_resume (struct subchannel *);
 extern int cio_halt (struct subchannel *);
 extern int cio_start (struct subchannel *, struct ccw1 *, __u8);
 extern int cio_start_key (struct subchannel *, struct ccw1 *, __u8, __u8);
-extern int cio_cancel (struct subchannel *);
 extern int cio_set_options (struct subchannel *, int);
 extern int cio_update_schib(struct subchannel *sch);
 extern int cio_commit_config(struct subchannel *sch);
-- 
cgit v0.10.2


From bd3a172557d6685f5fbd4b09713f74b84872e2b9 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 18 Jul 2016 14:05:21 +0200
Subject: s390/pci: add zpci_report_error interface

The 'report_error' interface for PCI devices found on s390 can be
used by a user space program to inject an adapter error notification.
Add a new kernel interface zpci_report_error to allow a PCI device
driver to inject these error notifications without a detour over
user space.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 0da91c4..8769cbf 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -11,6 +11,7 @@
 #include <asm-generic/pci.h>
 #include <asm/pci_clp.h>
 #include <asm/pci_debug.h>
+#include <asm/sclp.h>
 
 #define PCIBIOS_MIN_IO		0x1000
 #define PCIBIOS_MIN_MEM		0x10000000
@@ -216,6 +217,9 @@ void zpci_debug_init_device(struct zpci_dev *, const char *);
 void zpci_debug_exit_device(struct zpci_dev *);
 void zpci_debug_info(struct zpci_dev *, struct seq_file *);
 
+/* Error reporting */
+int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *);
+
 #ifdef CONFIG_NUMA
 
 /* Returns the node based on PCI bus */
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 871af75..15ffc19 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -854,6 +854,15 @@ void zpci_stop_device(struct zpci_dev *zdev)
 }
 EXPORT_SYMBOL_GPL(zpci_stop_device);
 
+int zpci_report_error(struct pci_dev *pdev,
+		      struct zpci_report_error_header *report)
+{
+	struct zpci_dev *zdev = to_zpci(pdev);
+
+	return sclp_pci_report(report, zdev->fh, zdev->fid);
+}
+EXPORT_SYMBOL(zpci_report_error);
+
 static inline int barsize(u8 size)
 {
 	return (size) ? (1 << size) >> 10 : 0;
-- 
cgit v0.10.2


From 2ccb5bf0e2f190c825c69087aa40c89db89ca1ad Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Sat, 20 Aug 2016 19:25:34 +0200
Subject: s390/tape: Use memdup_user() rather than duplicating its
 implementation

Reuse existing functionality from memdup_user() instead of keeping
duplicate source code.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c
index d3d1936..e352047 100644
--- a/drivers/s390/char/tape_3590.c
+++ b/drivers/s390/char/tape_3590.c
@@ -312,15 +312,10 @@ static int tape_3592_ioctl_kekl_set(struct tape_device *device,
 		return -ENOSYS;
 	if (!crypt_enabled(device))
 		return -EUNATCH;
-	ext_kekls = kmalloc(sizeof(*ext_kekls), GFP_KERNEL);
-	if (!ext_kekls)
-		return -ENOMEM;
-	if (copy_from_user(ext_kekls, (char __user *)arg, sizeof(*ext_kekls))) {
-		rc = -EFAULT;
-		goto out;
-	}
+	ext_kekls = memdup_user((char __user *)arg, sizeof(*ext_kekls));
+	if (IS_ERR(ext_kekls))
+		return PTR_ERR(ext_kekls);
 	rc = tape_3592_kekl_set(device, ext_kekls);
-out:
 	kfree(ext_kekls);
 	return rc;
 }
-- 
cgit v0.10.2


From f6c1d359be6bb0aa0715b4b75d9ecf63bdb07c4a Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 16 Aug 2016 10:31:10 +0200
Subject: KVM: s390: generate facility mask from readable list

Automatically generate the KVM facility mask out of a readable list.
Manually changing the masks is very error prone, especially if the
special IBM bit numbering has to be considered.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/include/asm/facilities_src.h b/arch/s390/include/asm/facilities_src.h
index 4917728..3b758f6 100644
--- a/arch/s390/include/asm/facilities_src.h
+++ b/arch/s390/include/asm/facilities_src.h
@@ -55,4 +55,28 @@ static struct facility_def facility_defs[] = {
 			-1 /* END */
 		}
 	},
+	{
+		.name = "FACILITIES_KVM",
+		.bits = (int[]){
+			0,  /* N3 instructions */
+			1,  /* z/Arch mode installed */
+			2,  /* z/Arch mode active */
+			3,  /* DAT-enhancement */
+			4,  /* idte segment table */
+			5,  /* idte region table */
+			6,  /* ASN-and-LX reuse */
+			7,  /* stfle */
+			8,  /* enhanced-DAT 1 */
+			9,  /* sense-running-status */
+			10, /* conditional sske */
+			13, /* ipte-range */
+			14, /* nonquiescing key-setting */
+			73, /* transactional execution */
+			75, /* access-exception-fetch/store indication */
+			76, /* msa extension 3 */
+			77, /* msa extension 4 */
+			78, /* enhanced-DAT 2 */
+			-1  /* END */
+		}
+	},
 };
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 3f3ae48..4f484e0 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -132,10 +132,7 @@ module_param(nested, int, S_IRUGO);
 MODULE_PARM_DESC(nested, "Nested virtualization support");
 
 /* upper facilities limit for kvm */
-unsigned long kvm_s390_fac_list_mask[16] = {
-	0xffe6000000000000UL,
-	0x005e000000000000UL,
-};
+unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
 
 unsigned long kvm_s390_fac_list_mask_size(void)
 {
-- 
cgit v0.10.2


From 41ad022039522485456802f09d8eccebc24aac3e Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Thu, 14 Jul 2016 13:09:57 +0200
Subject: s390/time: simplify stp time syncs

The way we call do_adjtimex() today is broken. It has 0 effect, as
ADJ_OFFSET_SINGLESHOT (0x0001) in the kernel maps to !ADJ_ADJTIME
(in contrast to user space where it maps to  ADJ_OFFSET_SINGLESHOT |
ADJ_ADJTIME - 0x8001). !ADJ_ADJTIME will silently ignore all adjustments
without STA_PLL being active. We could switch to ADJ_ADJTIME or turn
STA_PLL on, but still we would run into some problems:

- Even when switching to nanoseconds, we lose accuracy.
- Successive calls to do_adjtimex() will simply overwrite any leftovers
  from the previous call (if not fully handled)
- Anything that NTP does using the sysctl heavily interferes with our
  use.
- !ADJ_ADJTIME will silently round stuff > or < than 0.5 seconds

Reusing do_adjtimex() here just feels wrong. The whole STP synchronization
works right now *somehow* only, as do_adjtimex() does nothing and our
TOD clock jumps in time, although it shouldn't. This is especially bad
as the clock could jump backwards in time. We will have to find another
way to fix this up.

As leap seconds are also not properly handled yet, let's just get rid of
all this complex logic altogether and use the correct clock_delta for
fixing up the clock comparator and keeping the sched_clock monotonic.

This change should have 0 effect on the current STP mechanism. Once we
know how to best handle sync events and leap second updates, we'll start
with a fresh implementation.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 4e99498..c95e98d 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -50,10 +50,6 @@
 #include <asm/cio.h>
 #include "entry.h"
 
-/* change this if you have some constant time drift */
-#define USECS_PER_JIFFY     ((unsigned long) 1000000/HZ)
-#define CLK_TICKS_PER_JIFFY ((unsigned long) USECS_PER_JIFFY << 12)
-
 u64 sched_clock_base_cc = -1;	/* Force to data section. */
 EXPORT_SYMBOL_GPL(sched_clock_base_cc);
 
@@ -318,43 +314,6 @@ void __init time_init(void)
 	vtime_init();
 }
 
-/*
- * The time is "clock". old is what we think the time is.
- * Adjust the value by a multiple of jiffies and add the delta to ntp.
- * "delay" is an approximation how long the synchronization took. If
- * the time correction is positive, then "delay" is subtracted from
- * the time difference and only the remaining part is passed to ntp.
- */
-static unsigned long long adjust_time(unsigned long long old,
-				      unsigned long long clock,
-				      unsigned long long delay)
-{
-	unsigned long long delta, ticks;
-	struct timex adjust;
-
-	if (clock > old) {
-		/* It is later than we thought. */
-		delta = ticks = clock - old;
-		delta = ticks = (delta < delay) ? 0 : delta - delay;
-		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
-		adjust.offset = ticks * (1000000 / HZ);
-	} else {
-		/* It is earlier than we thought. */
-		delta = ticks = old - clock;
-		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
-		delta = -delta;
-		adjust.offset = -ticks * (1000000 / HZ);
-	}
-	sched_clock_base_cc += delta;
-	if (adjust.offset != 0) {
-		pr_notice("The ETR interface has adjusted the clock "
-			  "by %li microseconds\n", adjust.offset);
-		adjust.modes = ADJ_OFFSET_SINGLESHOT;
-		do_adjtimex(&adjust);
-	}
-	return delta;
-}
-
 static DEFINE_PER_CPU(atomic_t, clock_sync_word);
 static DEFINE_MUTEX(clock_sync_mutex);
 static unsigned long clock_sync_flags;
@@ -582,7 +541,7 @@ void stp_queue_work(void)
 static int stp_sync_clock(void *data)
 {
 	static int first;
-	unsigned long long old_clock, delta, new_clock, clock_delta;
+	unsigned long long clock_delta;
 	struct clock_sync_data *stp_sync;
 	struct ptff_qto qto;
 	int rc;
@@ -605,18 +564,17 @@ static int stp_sync_clock(void *data)
 	if (stp_info.todoff[0] || stp_info.todoff[1] ||
 	    stp_info.todoff[2] || stp_info.todoff[3] ||
 	    stp_info.tmd != 2) {
-		old_clock = get_tod_clock();
 		rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0, &clock_delta);
 		if (rc == 0) {
-			new_clock = old_clock + clock_delta;
-			delta = adjust_time(old_clock, new_clock, 0);
+			/* fixup the monotonic sched clock */
+			sched_clock_base_cc += clock_delta;
 			if (ptff_query(PTFF_QTO) &&
 			    ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
 				/* Update LPAR offset */
 				lpar_offset = qto.tod_epoch_difference;
 			atomic_notifier_call_chain(&s390_epoch_delta_notifier,
 						   0, &clock_delta);
-			fixup_clock_comparator(delta);
+			fixup_clock_comparator(clock_delta);
 			rc = chsc_sstpi(stp_page, &stp_info,
 					sizeof(struct stp_sstpi));
 			if (rc == 0 && stp_info.tmd != 2)
-- 
cgit v0.10.2


From ca64f63901a054af755994648bf6835047c71cbb Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Thu, 14 Jul 2016 13:38:06 +0200
Subject: s390/time: cleanup etr leftovers

There are still some etr leftovers and wrong comments, let's clean that up.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index c95e98d..5e7237d 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -318,10 +318,8 @@ static DEFINE_PER_CPU(atomic_t, clock_sync_word);
 static DEFINE_MUTEX(clock_sync_mutex);
 static unsigned long clock_sync_flags;
 
-#define CLOCK_SYNC_HAS_ETR	0
-#define CLOCK_SYNC_HAS_STP	1
-#define CLOCK_SYNC_ETR		2
-#define CLOCK_SYNC_STP		3
+#define CLOCK_SYNC_HAS_STP	0
+#define CLOCK_SYNC_STP		1
 
 /*
  * The get_clock function for the physical clock. It will get the current
@@ -343,34 +341,32 @@ int get_phys_clock(unsigned long long *clock)
 	if (sw0 == sw1 && (sw0 & 0x80000000U))
 		/* Success: time is in sync. */
 		return 0;
-	if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags) &&
-	    !test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+	if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
 		return -EOPNOTSUPP;
-	if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags) &&
-	    !test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
+	if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
 		return -EACCES;
 	return -EAGAIN;
 }
 EXPORT_SYMBOL(get_phys_clock);
 
 /*
- * Make get_sync_clock return -EAGAIN.
+ * Make get_phys_clock() return -EAGAIN.
  */
 static void disable_sync_clock(void *dummy)
 {
 	atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
 	/*
-	 * Clear the in-sync bit 2^31. All get_sync_clock calls will
+	 * Clear the in-sync bit 2^31. All get_phys_clock calls will
 	 * fail until the sync bit is turned back on. In addition
 	 * increase the "sequence" counter to avoid the race of an
-	 * etr event and the complete recovery against get_sync_clock.
+	 * stp event and the complete recovery against get_phys_clock.
 	 */
 	atomic_andnot(0x80000000, sw_ptr);
 	atomic_inc(sw_ptr);
 }
 
 /*
- * Make get_sync_clock return 0 again.
+ * Make get_phys_clock() return 0 again.
  * Needs to be called from a context disabled for preemption.
  */
 static void enable_sync_clock(void)
@@ -393,7 +389,7 @@ static inline int check_sync_clock(void)
 	return rc;
 }
 
-/* Single threaded workqueue used for etr and stp sync events */
+/* Single threaded workqueue used for stp sync events */
 static struct workqueue_struct *time_sync_wq;
 
 static void __init time_init_wq(void)
@@ -407,20 +403,12 @@ struct clock_sync_data {
 	atomic_t cpus;
 	int in_sync;
 	unsigned long long fixup_cc;
-	int etr_port;
-	struct etr_aib *etr_aib;
 };
 
 static void clock_sync_cpu(struct clock_sync_data *sync)
 {
 	atomic_dec(&sync->cpus);
 	enable_sync_clock();
-	/*
-	 * This looks like a busy wait loop but it isn't. etr_sync_cpus
-	 * is called on all other cpus while the TOD clocks is stopped.
-	 * __udelay will stop the cpu on an enabled wait psw until the
-	 * TOD is running again.
-	 */
 	while (sync->in_sync == 0) {
 		__udelay(1);
 		/*
-- 
cgit v0.10.2


From 0c00b1e00bba9c9046bee4e487ed19360da9ded0 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Thu, 14 Jul 2016 14:46:56 +0200
Subject: s390/time: fixup the clock comparator on all cpus

By leaving fixup_cc unset, only the clock comparator of the cpu actually
doing the sync is fixed up until now.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 5e7237d..5b1a199 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -562,6 +562,7 @@ static int stp_sync_clock(void *data)
 				lpar_offset = qto.tod_epoch_difference;
 			atomic_notifier_call_chain(&s390_epoch_delta_notifier,
 						   0, &clock_delta);
+			stp_sync->fixup_cc = clock_delta;
 			fixup_clock_comparator(clock_delta);
 			rc = chsc_sstpi(stp_page, &stp_info,
 					sizeof(struct stp_sstpi));
-- 
cgit v0.10.2


From 67f03de5f0ad6b4b0481bb43e4a819d1a441900b Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 18 Jul 2016 17:10:17 +0200
Subject: s390/time: avoid races when updating tb_update_count

The increment might not be atomic and we're not holding the
timekeeper_lock. Therefore we might lose an update to count, resulting in
VDSO being trapped in a loop. As other archs also simply update the
values and count doesn't seem to have an impact on reloading of these
values in VDSO code, let's just remove the update of tb_update_count.

Suggested-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 5b1a199..0bfcc49 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -278,13 +278,8 @@ extern struct timezone sys_tz;
 
 void update_vsyscall_tz(void)
 {
-	/* Make userspace gettimeofday spin until we're done. */
-	++vdso_data->tb_update_count;
-	smp_wmb();
 	vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
 	vdso_data->tz_dsttime = sys_tz.tz_dsttime;
-	smp_wmb();
-	++vdso_data->tb_update_count;
 }
 
 /*
-- 
cgit v0.10.2


From 0eab11c7e0d30de14a15ccd8269eef238321a8e1 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 25 Aug 2016 10:40:19 +0200
Subject: s390/vx: allow to include vx-insn.h with .include

To make the vx-insn.h more versatile avoid cpp preprocessor macros
and allow to use plain numbers for vector and general purpose register
operands. With that you can emit an .include from a C file into the
assembler text and then use the vx-insn macros in inline assemblies.

For example:

asm (".include \"asm/vx-insn.h\"");

static inline void xor_vec(int x, int y, int z)
{
	asm volatile("VX %0,%1,%2"
		     : : "i" (x), "i" (y), "i" (z));
}

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h
index 4a31356..b61846d 100644
--- a/arch/s390/include/asm/vx-insn.h
+++ b/arch/s390/include/asm/vx-insn.h
@@ -16,15 +16,13 @@
 
 /* Macros to generate vector instruction byte code */
 
-#define REG_NUM_INVALID	       255
-
 /* GR_NUM - Retrieve general-purpose register number
  *
  * @opd:	Operand to store register number
  * @r64:	String designation register in the format "%rN"
  */
 .macro	GR_NUM	opd gr
-	\opd = REG_NUM_INVALID
+	\opd = 255
 	.ifc \gr,%r0
 		\opd = 0
 	.endif
@@ -73,14 +71,11 @@
 	.ifc \gr,%r15
 		\opd = 15
 	.endif
-	.if \opd == REG_NUM_INVALID
-		.error "Invalid general-purpose register designation: \gr"
+	.if \opd == 255
+		\opd = \gr
 	.endif
 .endm
 
-/* VX_R() - Macro to encode the VX_NUM into the instruction */
-#define VX_R(v)		(v & 0x0F)
-
 /* VX_NUM - Retrieve vector register number
  *
  * @opd:	Operand to store register number
@@ -88,11 +83,10 @@
  *
  * The vector register number is used for as input number to the
  * instruction and, as well as, to compute the RXB field of the
- * instruction.  To encode the particular vector register number,
- * use the VX_R(v) macro to extract the instruction opcode.
+ * instruction.
  */
 .macro	VX_NUM	opd vxr
-	\opd = REG_NUM_INVALID
+	\opd = 255
 	.ifc \vxr,%v0
 		\opd = 0
 	.endif
@@ -189,8 +183,8 @@
 	.ifc \vxr,%v31
 		\opd = 31
 	.endif
-	.if \opd == REG_NUM_INVALID
-		.error "Invalid vector register designation: \vxr"
+	.if \opd == 255
+		\opd = \vxr
 	.endif
 .endm
 
@@ -251,7 +245,7 @@
 /* VECTOR GENERATE BYTE MASK */
 .macro	VGBM	vr imm2
 	VX_NUM	v1, \vr
-	.word	(0xE700 | (VX_R(v1) << 4))
+	.word	(0xE700 | ((v1&15) << 4))
 	.word	\imm2
 	MRXBOPC	0, 0x44, v1
 .endm
@@ -267,7 +261,7 @@
 	VX_NUM	v1, \v
 	GR_NUM	b2, "%r0"
 	GR_NUM	r3, \gr
-	.word	0xE700 | (VX_R(v1) << 4) | r3
+	.word	0xE700 | ((v1&15) << 4) | r3
 	.word	(b2 << 12) | (\disp)
 	MRXBOPC	\m, 0x22, v1
 .endm
@@ -289,7 +283,7 @@
 	VX_NUM	v1, \v
 	GR_NUM	x2, \index
 	GR_NUM	b2, \base
-	.word	0xE700 | (VX_R(v1) << 4) | x2
+	.word	0xE700 | ((v1&15) << 4) | x2
 	.word	(b2 << 12) | (\disp)
 	MRXBOPC 0, 0x06, v1
 .endm
@@ -299,7 +293,7 @@
 	VX_NUM	v1, \vr1
 	GR_NUM	x2, \index
 	GR_NUM	b2, \base
-	.word	0xE700 | (VX_R(v1) << 4) | x2
+	.word	0xE700 | ((v1&15) << 4) | x2
 	.word	(b2 << 12) | (\disp)
 	MRXBOPC	\m3, \opc, v1
 .endm
@@ -319,7 +313,7 @@
 /* VECTOR LOAD ELEMENT IMMEDIATE */
 .macro	VLEIx	vr1, imm2, m3, opc
 	VX_NUM	v1, \vr1
-	.word	0xE700 | (VX_R(v1) << 4)
+	.word	0xE700 | ((v1&15) << 4)
 	.word	\imm2
 	MRXBOPC	\m3, \opc, v1
 .endm
@@ -341,7 +335,7 @@
 	GR_NUM	r1, \gr
 	GR_NUM	b2, \base
 	VX_NUM	v3, \vr
-	.word	0xE700 | (r1 << 4) | VX_R(v3)
+	.word	0xE700 | (r1 << 4) | (v3&15)
 	.word	(b2 << 12) | (\disp)
 	MRXBOPC	\m, 0x21, v3
 .endm
@@ -363,7 +357,7 @@
 	VX_NUM	v1, \vfrom
 	VX_NUM	v3, \vto
 	GR_NUM	b2, \base	    /* Base register */
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v3)
+	.word	0xE700 | ((v1&15) << 4) | (v3&15)
 	.word	(b2 << 12) | (\disp)
 	MRXBOPC	0, 0x36, v1, v3
 .endm
@@ -373,7 +367,7 @@
 	VX_NUM	v1, \vfrom
 	VX_NUM	v3, \vto
 	GR_NUM	b2, \base	    /* Base register */
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v3)
+	.word	0xE700 | ((v1&15) << 4) | (v3&15)
 	.word	(b2 << 12) | (\disp)
 	MRXBOPC	0, 0x3E, v1, v3
 .endm
@@ -384,16 +378,16 @@
 	VX_NUM	v2, \vr2
 	VX_NUM	v3, \vr3
 	VX_NUM	v4, \vr4
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
-	.word	(VX_R(v3) << 12)
-	MRXBOPC	VX_R(v4), 0x8C, v1, v2, v3, v4
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	(v4&15), 0x8C, v1, v2, v3, v4
 .endm
 
 /* VECTOR UNPACK LOGICAL LOW */
 .macro	VUPLL	vr1, vr2, m3
 	VX_NUM	v1, \vr1
 	VX_NUM	v2, \vr2
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
 	.word	0x0000
 	MRXBOPC	\m3, 0xD4, v1, v2
 .endm
@@ -415,8 +409,8 @@
 	VX_NUM	v1, \vr1
 	VX_NUM	v2, \vr2
 	VX_NUM	v3, \vr3
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
-	.word	(VX_R(v3) << 12)
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
 	MRXBOPC	0, 0x6D, v1, v2, v3
 .endm
 
@@ -425,8 +419,8 @@
 	VX_NUM	v1, \vr1
 	VX_NUM	v2, \vr2
 	VX_NUM	v3, \vr3
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
-	.word	(VX_R(v3) << 12)
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
 	MRXBOPC	\m4, 0xB4, v1, v2, v3
 .endm
 .macro	VGFMB	vr1, vr2, vr3
@@ -448,9 +442,9 @@
 	VX_NUM	v2, \vr2
 	VX_NUM	v3, \vr3
 	VX_NUM	v4, \vr4
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
-	.word	(VX_R(v3) << 12) | (\m5 << 8)
-	MRXBOPC	VX_R(v4), 0xBC, v1, v2, v3, v4
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12) | (\m5 << 8)
+	MRXBOPC	(v4&15), 0xBC, v1, v2, v3, v4
 .endm
 .macro	VGFMAB	vr1, vr2, vr3, vr4
 	VGFMA	\vr1, \vr2, \vr3, \vr4, 0
@@ -470,8 +464,8 @@
 	VX_NUM	v1, \vr1
 	VX_NUM	v2, \vr2
 	VX_NUM	v3, \vr3
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
-	.word	(VX_R(v3) << 12)
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
 	MRXBOPC	0, 0x7D, v1, v2, v3
 .endm
 
-- 
cgit v0.10.2


From 7f79695cc1b6aa6d80a861780d9f8ce75d3dddcb Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 22 Aug 2016 12:06:21 +0200
Subject: s390/fpu: improve kernel_fpu_[begin|end]

In case of nested user of the FPU or vector registers in the kernel
the current code uses the mask of the FPU/vector registers of the
previous contexts to decide which registers to save and restore.
E.g. if the previous context used KERNEL_VXR_V0V7 and the next
context wants to use KERNEL_VXR_V24V31 the first 8 vector registers
are stored to the FPU state structure. But this is not necessary
as the next context does not use these registers.

Rework the FPU/vector register save and restore code. The new code
does a few things differently:
1) A lowcore field is used instead of a per-cpu variable.
2) The kernel_fpu_end function now has two parameters just like
   kernel_fpu_begin. The register flags are required by both
   functions to save / restore the minimal register set.
3) The inline functions kernel_fpu_begin/kernel_fpu_end now do the
   update of the register masks. If the user space FPU registers
   have already been stored neither save_fpu_regs nor the
   __kernel_fpu_begin/__kernel_fpu_end functions have to be called
   for the first context. In this case kernel_fpu_begin adds 7
   instructions and kernel_fpu_end adds 4 instructions.
3) The inline assemblies in __kernel_fpu_begin / __kernel_fpu_end
   to save / restore the vector registers are simplified a bit.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
index 2bad9d8..992e630 100644
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -67,7 +67,7 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
 									    \
 		kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW);		    \
 		crc = ___crc32_vx(crc, data, aligned);			    \
-		kernel_fpu_end(&vxstate);				    \
+		kernel_fpu_end(&vxstate, KERNEL_VXR_LOW);		    \
 									    \
 		if (remaining)						    \
 			crc = ___crc32_sw(crc, data + aligned, remaining);  \
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
index 6aba6fc..02124d6 100644
--- a/arch/s390/include/asm/fpu/api.h
+++ b/arch/s390/include/asm/fpu/api.h
@@ -64,18 +64,18 @@ static inline int test_fp_ctl(u32 fpc)
 	return rc;
 }
 
-#define KERNEL_VXR_V0V7		1
-#define KERNEL_VXR_V8V15	2
-#define KERNEL_VXR_V16V23	4
-#define KERNEL_VXR_V24V31	8
-#define KERNEL_FPR		16
-#define KERNEL_FPC		256
+#define KERNEL_FPC		1
+#define KERNEL_VXR_V0V7		2
+#define KERNEL_VXR_V8V15	4
+#define KERNEL_VXR_V16V23	8
+#define KERNEL_VXR_V24V31	16
 
 #define KERNEL_VXR_LOW		(KERNEL_VXR_V0V7|KERNEL_VXR_V8V15)
 #define KERNEL_VXR_MID		(KERNEL_VXR_V8V15|KERNEL_VXR_V16V23)
 #define KERNEL_VXR_HIGH		(KERNEL_VXR_V16V23|KERNEL_VXR_V24V31)
 
-#define KERNEL_FPU_MASK		(KERNEL_VXR_LOW|KERNEL_VXR_HIGH|KERNEL_FPR)
+#define KERNEL_VXR		(KERNEL_VXR_LOW|KERNEL_VXR_HIGH)
+#define KERNEL_FPR		(KERNEL_FPC|KERNEL_VXR_V0V7)
 
 struct kernel_fpu;
 
@@ -87,18 +87,28 @@ struct kernel_fpu;
  * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions.
  */
 void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags);
-void __kernel_fpu_end(struct kernel_fpu *state);
+void __kernel_fpu_end(struct kernel_fpu *state, u32 flags);
 
 
 static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
 {
 	preempt_disable();
-	__kernel_fpu_begin(state, flags);
+	state->mask = S390_lowcore.fpu_flags;
+	if (!test_cpu_flag(CIF_FPU))
+		/* Save user space FPU state and register contents */
+		save_fpu_regs();
+	else if (state->mask & flags)
+		/* Save FPU/vector register in-use by the kernel */
+		__kernel_fpu_begin(state, flags);
+	S390_lowcore.fpu_flags |= flags;
 }
 
-static inline void kernel_fpu_end(struct kernel_fpu *state)
+static inline void kernel_fpu_end(struct kernel_fpu *state, u32 flags)
 {
-	__kernel_fpu_end(state);
+	S390_lowcore.fpu_flags = state->mask;
+	if (state->mask & flags)
+		/* Restore FPU/vector register in-use by the kernel */
+		__kernel_fpu_end(state, flags);
 	preempt_enable();
 }
 
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index d79ba7c..7b93b78 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -129,7 +129,8 @@ struct lowcore {
 	__u8	pad_0x0390[0x0398-0x0390];	/* 0x0390 */
 	__u64	gmap;				/* 0x0398 */
 	__u32	spinlock_lockval;		/* 0x03a0 */
-	__u8	pad_0x03a0[0x0400-0x03a4];	/* 0x03a4 */
+	__u32	fpu_flags;			/* 0x03a4 */
+	__u8	pad_0x03a8[0x0400-0x03a8];	/* 0x03a8 */
 
 	/* Per cpu primary space access list */
 	__u32	paste[16];			/* 0x0400 */
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
index 81d1d18..1235b94 100644
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -10,240 +10,167 @@
 #include <asm/fpu/types.h>
 #include <asm/fpu/api.h>
 
-/*
- * Per-CPU variable to maintain FPU register ranges that are in use
- * by the kernel.
- */
-static DEFINE_PER_CPU(u32, kernel_fpu_state);
-
-#define KERNEL_FPU_STATE_MASK	(KERNEL_FPU_MASK|KERNEL_FPC)
-
+asm(".include \"asm/vx-insn.h\"\n");
 
 void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
 {
-	if (!__this_cpu_read(kernel_fpu_state)) {
-		/*
-		 * Save user space FPU state and register contents.  Multiple
-		 * calls because of interruptions do not matter and return
-		 * immediately.  This also sets CIF_FPU to lazy restore FP/VX
-		 * register contents when returning to user space.
-		 */
-		save_fpu_regs();
-	}
-
-	/* Update flags to use the vector facility for KERNEL_FPR */
-	if (MACHINE_HAS_VX && (state->mask & KERNEL_FPR)) {
-		flags |= KERNEL_VXR_LOW | KERNEL_FPC;
-		flags &= ~KERNEL_FPR;
-	}
-
-	/* Save and update current kernel VX state */
-	state->mask = __this_cpu_read(kernel_fpu_state);
-	__this_cpu_or(kernel_fpu_state, flags & KERNEL_FPU_STATE_MASK);
-
 	/*
-	 * If this is the first call to __kernel_fpu_begin(), no additional
-	 * work is required.
+	 * Limit the save to the FPU/vector registers already
+	 * in use by the previous context
 	 */
-	if (!(state->mask & KERNEL_FPU_STATE_MASK))
-		return;
+	flags &= state->mask;
 
-	/*
-	 * If KERNEL_FPR is still set, the vector facility is not available
-	 * and, thus, save floating-point control and registers only.
-	 */
-	if (state->mask & KERNEL_FPR) {
-		asm volatile("stfpc %0" : "=Q" (state->fpc));
-		asm volatile("std 0,%0" : "=Q" (state->fprs[0]));
-		asm volatile("std 1,%0" : "=Q" (state->fprs[1]));
-		asm volatile("std 2,%0" : "=Q" (state->fprs[2]));
-		asm volatile("std 3,%0" : "=Q" (state->fprs[3]));
-		asm volatile("std 4,%0" : "=Q" (state->fprs[4]));
-		asm volatile("std 5,%0" : "=Q" (state->fprs[5]));
-		asm volatile("std 6,%0" : "=Q" (state->fprs[6]));
-		asm volatile("std 7,%0" : "=Q" (state->fprs[7]));
-		asm volatile("std 8,%0" : "=Q" (state->fprs[8]));
-		asm volatile("std 9,%0" : "=Q" (state->fprs[9]));
-		asm volatile("std 10,%0" : "=Q" (state->fprs[10]));
-		asm volatile("std 11,%0" : "=Q" (state->fprs[11]));
-		asm volatile("std 12,%0" : "=Q" (state->fprs[12]));
-		asm volatile("std 13,%0" : "=Q" (state->fprs[13]));
-		asm volatile("std 14,%0" : "=Q" (state->fprs[14]));
-		asm volatile("std 15,%0" : "=Q" (state->fprs[15]));
+	if (flags & KERNEL_FPC)
+		/* Save floating point control */
+		asm volatile("stfpc %0" : "=m" (state->fpc));
+
+	if (!MACHINE_HAS_VX) {
+		if (flags & KERNEL_VXR_V0V7) {
+			/* Save floating-point registers */
+			asm volatile("std 0,%0" : "=Q" (state->fprs[0]));
+			asm volatile("std 1,%0" : "=Q" (state->fprs[1]));
+			asm volatile("std 2,%0" : "=Q" (state->fprs[2]));
+			asm volatile("std 3,%0" : "=Q" (state->fprs[3]));
+			asm volatile("std 4,%0" : "=Q" (state->fprs[4]));
+			asm volatile("std 5,%0" : "=Q" (state->fprs[5]));
+			asm volatile("std 6,%0" : "=Q" (state->fprs[6]));
+			asm volatile("std 7,%0" : "=Q" (state->fprs[7]));
+			asm volatile("std 8,%0" : "=Q" (state->fprs[8]));
+			asm volatile("std 9,%0" : "=Q" (state->fprs[9]));
+			asm volatile("std 10,%0" : "=Q" (state->fprs[10]));
+			asm volatile("std 11,%0" : "=Q" (state->fprs[11]));
+			asm volatile("std 12,%0" : "=Q" (state->fprs[12]));
+			asm volatile("std 13,%0" : "=Q" (state->fprs[13]));
+			asm volatile("std 14,%0" : "=Q" (state->fprs[14]));
+			asm volatile("std 15,%0" : "=Q" (state->fprs[15]));
+		}
 		return;
 	}
 
-	/*
-	 * If this is a nested call to __kernel_fpu_begin(), check the saved
-	 * state mask to save and later restore the vector registers that
-	 * are already in use.	Let's start with checking floating-point
-	 * controls.
-	 */
-	if (state->mask & KERNEL_FPC)
-		asm volatile("stfpc %0" : "=m" (state->fpc));
-
 	/* Test and save vector registers */
 	asm volatile (
 		/*
 		 * Test if any vector register must be saved and, if so,
 		 * test if all register can be saved.
 		 */
-		"	tmll	%[m],15\n"	/* KERNEL_VXR_MASK */
-		"	jz	20f\n"		/* no work -> done */
 		"	la	1,%[vxrs]\n"	/* load save area */
-		"	jo	18f\n"		/* -> save V0..V31 */
-
+		"	tmll	%[m],30\n"	/* KERNEL_VXR */
+		"	jz	7f\n"		/* no work -> done */
+		"	jo	5f\n"		/* -> save V0..V31 */
 		/*
-		 * Test if V8..V23 can be saved at once... this speeds up
-		 * for KERNEL_fpu_MID only. Otherwise continue to split the
-		 * range of vector registers into two halves and test them
-		 * separately.
+		 * Test for special case KERNEL_FPU_MID only. In this
+		 * case a vstm V8..V23 is the best instruction
 		 */
-		"	tmll	%[m],6\n"	/* KERNEL_VXR_MID */
-		"	jo	17f\n"		/* -> save V8..V23 */
-
+		"	chi	%[m],12\n"	/* KERNEL_VXR_MID */
+		"	jne	0f\n"		/* -> save V8..V23 */
+		"	VSTM	8,23,128,1\n"	/* vstm %v8,%v23,128(%r1) */
+		"	j	7f\n"
 		/* Test and save the first half of 16 vector registers */
-		"1:	tmll	%[m],3\n"	/* KERNEL_VXR_LOW */
-		"	jz	10f\n"		/* -> KERNEL_VXR_HIGH */
+		"0:	tmll	%[m],6\n"	/* KERNEL_VXR_LOW */
+		"	jz	3f\n"		/* -> KERNEL_VXR_HIGH */
 		"	jo	2f\n"		/* 11 -> save V0..V15 */
-		"	brc	4,3f\n"		/* 01 -> save V0..V7  */
-		"	brc	2,4f\n"		/* 10 -> save V8..V15 */
-
+		"	brc	2,1f\n"		/* 10 -> save V8..V15 */
+		"	VSTM	0,7,0,1\n"	/* vstm %v0,%v7,0(%r1) */
+		"	j	3f\n"
+		"1:	VSTM	8,15,128,1\n"	/* vstm %v8,%v15,128(%r1) */
+		"	j	3f\n"
+		"2:	VSTM	0,15,0,1\n"	/* vstm %v0,%v15,0(%r1) */
 		/* Test and save the second half of 16 vector registers */
-		"10:	tmll	%[m],12\n"	/* KERNEL_VXR_HIGH */
-		"	jo	19f\n"		/* 11 -> save V16..V31 */
-		"	brc	4,11f\n"	/* 01 -> save V16..V23	*/
-		"	brc	2,12f\n"	/* 10 -> save V24..V31 */
-		"	j	20f\n"		/* 00 -> done */
-
-		/*
-		 * Below are the vstm combinations to save multiple vector
-		 * registers at once.
-		 */
-		"2:	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
-		"	j	10b\n"			/* -> VXR_HIGH */
-		"3:	.word	0xe707,0x1000,0x003e\n" /* vstm 0,7,0(1) */
-		"	j	10b\n"			/* -> VXR_HIGH */
-		"4:	.word	0xe78f,0x1080,0x003e\n" /* vstm 8,15,128(1) */
-		"	j	10b\n"			/* -> VXR_HIGH */
-		"\n"
-		"11:	.word	0xe707,0x1100,0x0c3e\n"	/* vstm 16,23,256(1) */
-		"	j	20f\n"			/* -> done */
-		"12:	.word	0xe78f,0x1180,0x0c3e\n" /* vstm 24,31,384(1) */
-		"	j	20f\n"			/* -> done */
-		"\n"
-		"17:	.word	0xe787,0x1080,0x043e\n"	/* vstm 8,23,128(1) */
-		"	nill	%[m],249\n"		/* m &= ~VXR_MID    */
-		"	j	1b\n"			/* -> VXR_LOW */
-		"\n"
-		"18:	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
-		"19:	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
-		"20:"
+		"3:	tmll	%[m],24\n"	/* KERNEL_VXR_HIGH */
+		"	jz	7f\n"
+		"	jo	6f\n"		/* 11 -> save V16..V31 */
+		"	brc	2,4f\n"		/* 10 -> save V24..V31 */
+		"	VSTM	16,23,256,1\n"	/* vstm %v16,%v23,256(%r1) */
+		"	j	7f\n"
+		"4:	VSTM	24,31,384,1\n"	/* vstm %v24,%v31,384(%r1) */
+		"	j	7f\n"
+		"5:	VSTM	0,15,0,1\n"	/* vstm %v0,%v15,0(%r1) */
+		"6:	VSTM	16,31,256,1\n"	/* vstm %v16,%v31,256(%r1) */
+		"7:"
 		: [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
-		: [m] "d" (state->mask)
+		: [m] "d" (flags)
 		: "1", "cc");
 }
 EXPORT_SYMBOL(__kernel_fpu_begin);
 
-void __kernel_fpu_end(struct kernel_fpu *state)
+void __kernel_fpu_end(struct kernel_fpu *state, u32 flags)
 {
-	/* Just update the per-CPU state if there is nothing to restore */
-	if (!(state->mask & KERNEL_FPU_STATE_MASK))
-		goto update_fpu_state;
-
 	/*
-	 * If KERNEL_FPR is specified, the vector facility is not available
-	 * and, thus, restore floating-point control and registers only.
+	 * Limit the restore to the FPU/vector registers of the
+	 * previous context that have been overwritte by the
+	 * current context
 	 */
-	if (state->mask & KERNEL_FPR) {
-		asm volatile("lfpc %0" : : "Q" (state->fpc));
-		asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
-		asm volatile("ld 1,%0" : : "Q" (state->fprs[1]));
-		asm volatile("ld 2,%0" : : "Q" (state->fprs[2]));
-		asm volatile("ld 3,%0" : : "Q" (state->fprs[3]));
-		asm volatile("ld 4,%0" : : "Q" (state->fprs[4]));
-		asm volatile("ld 5,%0" : : "Q" (state->fprs[5]));
-		asm volatile("ld 6,%0" : : "Q" (state->fprs[6]));
-		asm volatile("ld 7,%0" : : "Q" (state->fprs[7]));
-		asm volatile("ld 8,%0" : : "Q" (state->fprs[8]));
-		asm volatile("ld 9,%0" : : "Q" (state->fprs[9]));
-		asm volatile("ld 10,%0" : : "Q" (state->fprs[10]));
-		asm volatile("ld 11,%0" : : "Q" (state->fprs[11]));
-		asm volatile("ld 12,%0" : : "Q" (state->fprs[12]));
-		asm volatile("ld 13,%0" : : "Q" (state->fprs[13]));
-		asm volatile("ld 14,%0" : : "Q" (state->fprs[14]));
-		asm volatile("ld 15,%0" : : "Q" (state->fprs[15]));
-		goto update_fpu_state;
-	}
+	flags &= state->mask;
 
-	/* Test and restore floating-point controls */
-	if (state->mask & KERNEL_FPC)
+	if (flags & KERNEL_FPC)
+		/* Restore floating-point controls */
 		asm volatile("lfpc %0" : : "Q" (state->fpc));
 
+	if (!MACHINE_HAS_VX) {
+		if (flags & KERNEL_VXR_V0V7) {
+			/* Restore floating-point registers */
+			asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
+			asm volatile("ld 1,%0" : : "Q" (state->fprs[1]));
+			asm volatile("ld 2,%0" : : "Q" (state->fprs[2]));
+			asm volatile("ld 3,%0" : : "Q" (state->fprs[3]));
+			asm volatile("ld 4,%0" : : "Q" (state->fprs[4]));
+			asm volatile("ld 5,%0" : : "Q" (state->fprs[5]));
+			asm volatile("ld 6,%0" : : "Q" (state->fprs[6]));
+			asm volatile("ld 7,%0" : : "Q" (state->fprs[7]));
+			asm volatile("ld 8,%0" : : "Q" (state->fprs[8]));
+			asm volatile("ld 9,%0" : : "Q" (state->fprs[9]));
+			asm volatile("ld 10,%0" : : "Q" (state->fprs[10]));
+			asm volatile("ld 11,%0" : : "Q" (state->fprs[11]));
+			asm volatile("ld 12,%0" : : "Q" (state->fprs[12]));
+			asm volatile("ld 13,%0" : : "Q" (state->fprs[13]));
+			asm volatile("ld 14,%0" : : "Q" (state->fprs[14]));
+			asm volatile("ld 15,%0" : : "Q" (state->fprs[15]));
+		}
+		return;
+	}
+
 	/* Test and restore (load) vector registers */
 	asm volatile (
 		/*
-		 * Test if any vector registers must be loaded and, if so,
+		 * Test if any vector register must be loaded and, if so,
 		 * test if all registers can be loaded at once.
 		 */
-		"	tmll	%[m],15\n"	/* KERNEL_VXR_MASK */
-		"	jz	20f\n"		/* no work -> done */
-		"	la	1,%[vxrs]\n"	/* load load area */
-		"	jo	18f\n"		/* -> load V0..V31 */
-
-		/*
-		 * Test if V8..V23 can be restored at once... this speeds up
-		 * for KERNEL_VXR_MID only. Otherwise continue to split the
-		 * range of vector registers into two halves and test them
-		 * separately.
-		 */
-		"	tmll	%[m],6\n"	/* KERNEL_VXR_MID */
-		"	jo	17f\n"		/* -> load V8..V23 */
-
-		/* Test and load the first half of 16 vector registers */
-		"1:	tmll	%[m],3\n"	/* KERNEL_VXR_LOW */
-		"	jz	10f\n"		/* -> KERNEL_VXR_HIGH */
-		"	jo	2f\n"		/* 11 -> load V0..V15 */
-		"	brc	4,3f\n"		/* 01 -> load V0..V7  */
-		"	brc	2,4f\n"		/* 10 -> load V8..V15 */
-
-		/* Test and load the second half of 16 vector registers */
-		"10:	tmll	%[m],12\n"	/* KERNEL_VXR_HIGH */
-		"	jo	19f\n"		/* 11 -> load V16..V31 */
-		"	brc	4,11f\n"	/* 01 -> load V16..V23	*/
-		"	brc	2,12f\n"	/* 10 -> load V24..V31 */
-		"	j	20f\n"		/* 00 -> done */
-
+		"	la	1,%[vxrs]\n"	/* load restore area */
+		"	tmll	%[m],30\n"	/* KERNEL_VXR */
+		"	jz	7f\n"		/* no work -> done */
+		"	jo	5f\n"		/* -> restore V0..V31 */
 		/*
-		 * Below are the vstm combinations to load multiple vector
-		 * registers at once.
+		 * Test for special case KERNEL_FPU_MID only. In this
+		 * case a vlm V8..V23 is the best instruction
 		 */
-		"2:	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
-		"	j	10b\n"			/* -> VXR_HIGH */
-		"3:	.word	0xe707,0x1000,0x0036\n" /* vlm 0,7,0(1) */
-		"	j	10b\n"			/* -> VXR_HIGH */
-		"4:	.word	0xe78f,0x1080,0x0036\n" /* vlm 8,15,128(1) */
-		"	j	10b\n"			/* -> VXR_HIGH */
-		"\n"
-		"11:	.word	0xe707,0x1100,0x0c36\n"	/* vlm 16,23,256(1) */
-		"	j	20f\n"			/* -> done */
-		"12:	.word	0xe78f,0x1180,0x0c36\n" /* vlm 24,31,384(1) */
-		"	j	20f\n"			/* -> done */
-		"\n"
-		"17:	.word	0xe787,0x1080,0x0436\n"	/* vlm 8,23,128(1) */
-		"	nill	%[m],249\n"		/* m &= ~VXR_MID    */
-		"	j	1b\n"			/* -> VXR_LOW */
-		"\n"
-		"18:	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
-		"19:	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
-		"20:"
-		:
-		: [vxrs] "Q" (*(struct vx_array *) &state->vxrs),
-		  [m] "d" (state->mask)
+		"	chi	%[m],12\n"	/* KERNEL_VXR_MID */
+		"	jne	0f\n"		/* -> restore V8..V23 */
+		"	VLM	8,23,128,1\n"	/* vlm %v8,%v23,128(%r1) */
+		"	j	7f\n"
+		/* Test and restore the first half of 16 vector registers */
+		"0:	tmll	%[m],6\n"	/* KERNEL_VXR_LOW */
+		"	jz	3f\n"		/* -> KERNEL_VXR_HIGH */
+		"	jo	2f\n"		/* 11 -> restore V0..V15 */
+		"	brc	2,1f\n"		/* 10 -> restore V8..V15 */
+		"	VLM	0,7,0,1\n"	/* vlm %v0,%v7,0(%r1) */
+		"	j	3f\n"
+		"1:	VLM	8,15,128,1\n"	/* vlm %v8,%v15,128(%r1) */
+		"	j	3f\n"
+		"2:	VLM	0,15,0,1\n"	/* vlm %v0,%v15,0(%r1) */
+		/* Test and restore the second half of 16 vector registers */
+		"3:	tmll	%[m],24\n"	/* KERNEL_VXR_HIGH */
+		"	jz	7f\n"
+		"	jo	6f\n"		/* 11 -> restore V16..V31 */
+		"	brc	2,4f\n"		/* 10 -> restore V24..V31 */
+		"	VLM	16,23,256,1\n"	/* vlm %v16,%v23,256(%r1) */
+		"	j	7f\n"
+		"4:	VLM	24,31,384,1\n"	/* vlm %v24,%v31,384(%r1) */
+		"	j	7f\n"
+		"5:	VLM	0,15,0,1\n"	/* vlm %v0,%v15,0(%r1) */
+		"6:	VLM	16,31,256,1\n"	/* vlm %v16,%v31,256(%r1) */
+		"7:"
+		: [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
+		: [m] "d" (flags)
 		: "1", "cc");
-
-update_fpu_state:
-	/* Update current kernel VX state */
-	__this_cpu_write(kernel_fpu_state, state->mask);
 }
 EXPORT_SYMBOL(__kernel_fpu_end);
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 050b8d0..bfda6aa 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -454,7 +454,7 @@ void s390_adjust_jiffies(void)
 			: "Q" (info->capability), "d" (10000000), "d" (0)
 			: "cc"
 			);
-		kernel_fpu_end(&fpu);
+		kernel_fpu_end(&fpu, KERNEL_FPR);
 	} else
 		/*
 		 * Really old machine without stsi block for basic
-- 
cgit v0.10.2


From 8f149ea6e91534c3e0b4cfcd843323bd94273087 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 22 Aug 2016 14:40:06 +0200
Subject: s390/nmi: improve revalidation of fpu / vector registers

The machine check handler will do one of two things if the floating-point
control, a floating point register or a vector register can not be
revalidated:
1) if the PSW indicates user mode the process is terminated
2) if the PSW indicates kernel mode the system is stopped

To unconditionally stop the system for 2) is incorrect.

There are three possible outcomes if the floating-point control, a
floating point register or a vector registers can not be revalidated:
1) The kernel is inside a kernel_fpu_begin/kernel_fpu_end block and
   needs the register. The system is stopped.
2) No active kernel_fpu_begin/kernel_fpu_end block and the CIF_CPU bit
   is not set. The user space process needs the register and is killed.
3) No active kernel_fpu_begin/kernel_fpu_end block and the CIF_FPU bit
   is set. Neither the kernel nor the user space process needs the
   lost register. Just revalidate it and continue.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 29376f0..9a32f74 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -98,7 +98,7 @@ EXPORT_SYMBOL_GPL(s390_handle_mcck);
  * returns 0 if all registers could be validated
  * returns 1 otherwise
  */
-static int notrace s390_validate_registers(union mci mci)
+static int notrace s390_validate_registers(union mci mci, int umode)
 {
 	int kill_task;
 	u64 zero;
@@ -110,26 +110,41 @@ static int notrace s390_validate_registers(union mci mci)
 	if (!mci.gr) {
 		/*
 		 * General purpose registers couldn't be restored and have
-		 * unknown contents. Process needs to be terminated.
+		 * unknown contents. Stop system or terminate process.
 		 */
+		if (!umode)
+			s390_handle_damage();
 		kill_task = 1;
 	}
 	if (!mci.fp) {
 		/*
-		 * Floating point registers can't be restored and
-		 * therefore the process needs to be terminated.
+		 * Floating point registers can't be restored. If the
+		 * kernel currently uses floating point registers the
+		 * system is stopped. If the process has its floating
+		 * pointer registers loaded it is terminated.
+		 * Otherwise just revalidate the registers.
 		 */
-		kill_task = 1;
+		if (S390_lowcore.fpu_flags & KERNEL_VXR_V0V7)
+			s390_handle_damage();
+		if (!test_cpu_flag(CIF_FPU))
+			kill_task = 1;
 	}
 	fpt_save_area = &S390_lowcore.floating_pt_save_area;
 	fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
 	if (!mci.fc) {
 		/*
 		 * Floating point control register can't be restored.
-		 * Task will be terminated.
+		 * If the kernel currently uses the floating pointer
+		 * registers and needs the FPC register the system is
+		 * stopped. If the process has its floating pointer
+		 * registers loaded it is terminated. Otherwiese the
+		 * FPC is just revalidated.
 		 */
+		if (S390_lowcore.fpu_flags & KERNEL_FPC)
+			s390_handle_damage();
 		asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero));
-		kill_task = 1;
+		if (!test_cpu_flag(CIF_FPU))
+			kill_task = 1;
 	} else
 		asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
 
@@ -159,10 +174,16 @@ static int notrace s390_validate_registers(union mci mci)
 
 		if (!mci.vr) {
 			/*
-			 * Vector registers can't be restored and therefore
-			 * the process needs to be terminated.
+			 * Vector registers can't be restored. If the kernel
+			 * currently uses vector registers the system is
+			 * stopped. If the process has its vector registers
+			 * loaded it is terminated. Otherwise just revalidate
+			 * the registers.
 			 */
-			kill_task = 1;
+			if (S390_lowcore.fpu_flags & KERNEL_VXR)
+				s390_handle_damage();
+			if (!test_cpu_flag(CIF_FPU))
+				kill_task = 1;
 		}
 		cr0.val = S390_lowcore.cregs_save_area[0];
 		cr0.afp = cr0.vx = 1;
@@ -250,13 +271,11 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
 	struct mcck_struct *mcck;
 	unsigned long long tmp;
 	union mci mci;
-	int umode;
 
 	nmi_enter();
 	inc_irq_stat(NMI_NMI);
 	mci.val = S390_lowcore.mcck_interruption_code;
 	mcck = this_cpu_ptr(&cpu_mcck);
-	umode = user_mode(regs);
 
 	if (mci.sd) {
 		/* System damage -> stopping machine */
@@ -297,22 +316,14 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
 			s390_handle_damage();
 		}
 	}
-	if (s390_validate_registers(mci)) {
-		if (umode) {
-			/*
-			 * Couldn't restore all register contents while in
-			 * user mode -> mark task for termination.
-			 */
-			mcck->kill_task = 1;
-			mcck->mcck_code = mci.val;
-			set_cpu_flag(CIF_MCCK_PENDING);
-		} else {
-			/*
-			 * Couldn't restore all register contents while in
-			 * kernel mode -> stopping machine.
-			 */
-			s390_handle_damage();
-		}
+	if (s390_validate_registers(mci, user_mode(regs))) {
+		/*
+		 * Couldn't restore all register contents for the
+		 * user space process -> mark task for termination.
+		 */
+		mcck->kill_task = 1;
+		mcck->mcck_code = mci.val;
+		set_cpu_flag(CIF_MCCK_PENDING);
 	}
 	if (mci.cd) {
 		/* Timing facility damage */
-- 
cgit v0.10.2


From 474fd6e80fe529e9adeeb7ea9d4e5d6c4da0b7fe Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 23 Aug 2016 13:30:24 +0200
Subject: RAID/s390: add SIMD implementation for raid6 gen/xor

Using vector registers is slightly faster:

raid6: vx128x8  gen() 19705 MB/s
raid6: vx128x8  xor() 11886 MB/s
raid6: using algorithm vx128x8 gen() 19705 MB/s
raid6: .... xor() 11886 MB/s, rmw enabled

vs the software algorithms:

raid6: int64x1  gen()  3018 MB/s
raid6: int64x1  xor()  1429 MB/s
raid6: int64x2  gen()  4661 MB/s
raid6: int64x2  xor()  3143 MB/s
raid6: int64x4  gen()  5392 MB/s
raid6: int64x4  xor()  3509 MB/s
raid6: int64x8  gen()  4441 MB/s
raid6: int64x8  xor()  3207 MB/s
raid6: using algorithm int64x4 gen() 5392 MB/s
raid6: .... xor() 3509 MB/s, rmw enabled

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h
index b61846d..49c24a2 100644
--- a/arch/s390/include/asm/vx-insn.h
+++ b/arch/s390/include/asm/vx-insn.h
@@ -278,6 +278,15 @@
 	VLVG	\v, \gr, \index, 3
 .endm
 
+/* VECTOR LOAD REGISTER */
+.macro	VLR	v1, v2
+	VX_NUM	v1, \v1
+	VX_NUM	v2, \v2
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	0
+	MRXBOPC	0, 0x56, v1, v2
+.endm
+
 /* VECTOR LOAD */
 .macro	VL	v, disp, index="%r0", base
 	VX_NUM	v1, \v
@@ -404,6 +413,16 @@
 
 /* Vector integer instructions */
 
+/* VECTOR AND */
+.macro	VN	vr1, vr2, vr3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	0, 0x68, v1, v2, v3
+.endm
+
 /* VECTOR EXCLUSIVE OR */
 .macro	VX	vr1, vr2, vr3
 	VX_NUM	v1, \vr1
@@ -469,6 +488,73 @@
 	MRXBOPC	0, 0x7D, v1, v2, v3
 .endm
 
+/* VECTOR REPLICATE IMMEDIATE */
+.macro	VREPI	vr1, imm2, m3
+	VX_NUM	v1, \vr1
+	.word	0xE700 | ((v1&15) << 4)
+	.word	\imm2
+	MRXBOPC	\m3, 0x45, v1
+.endm
+.macro	VREPIB	vr1, imm2
+	VREPI	\vr1, \imm2, 0
+.endm
+.macro	VREPIH	vr1, imm2
+	VREPI	\vr1, \imm2, 1
+.endm
+.macro	VREPIF	vr1, imm2
+	VREPI	\vr1, \imm2, 2
+.endm
+.macro	VREPIG	vr1, imm2
+	VREP	\vr1, \imm2, 3
+.endm
+
+/* VECTOR ADD */
+.macro	VA	vr1, vr2, vr3, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	\m4, 0xF3, v1, v2, v3
+.endm
+.macro	VAB	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 0
+.endm
+.macro	VAH	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 1
+.endm
+.macro	VAF	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 2
+.endm
+.macro	VAG	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 3
+.endm
+.macro	VAQ	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 4
+.endm
+
+/* VECTOR ELEMENT SHIFT RIGHT ARITHMETIC */
+.macro	VESRAV	vr1, vr2, vr3, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC \m4, 0x7A, v1, v2, v3
+.endm
+
+.macro	VESRAVB	vr1, vr2, vr3
+	VESRAV	\vr1, \vr2, \vr3, 0
+.endm
+.macro	VESRAVH	vr1, vr2, vr3
+	VESRAV	\vr1, \vr2, \vr3, 1
+.endm
+.macro	VESRAVF	vr1, vr2, vr3
+	VESRAV	\vr1, \vr2, \vr3, 2
+.endm
+.macro	VESRAVG	vr1, vr2, vr3
+	VESRAV	\vr1, \vr2, \vr3, 3
+.endm
 
 #endif	/* __ASSEMBLY__ */
 #endif	/* __ASM_S390_VX_INSN_H */
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index a0118d5..c032a6a 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -103,6 +103,7 @@ extern const struct raid6_calls raid6_avx2x1;
 extern const struct raid6_calls raid6_avx2x2;
 extern const struct raid6_calls raid6_avx2x4;
 extern const struct raid6_calls raid6_tilegx8;
+extern const struct raid6_calls raid6_s390vx8;
 
 struct raid6_recov_calls {
 	void (*data2)(int, size_t, int, int, void **);
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
index 0a7e494..f01b1cb 100644
--- a/lib/raid6/.gitignore
+++ b/lib/raid6/.gitignore
@@ -3,3 +3,4 @@ altivec*.c
 int*.c
 tables.c
 neon?.c
+s390vx?.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3b10a48..667b960 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -7,6 +7,7 @@ raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o
 raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
 raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
+raid6_pq-$(CONFIG_S390) += s390vx8.o
 
 hostprogs-y	+= mktables
 
@@ -116,6 +117,11 @@ $(obj)/tilegx8.c:   UNROLL := 8
 $(obj)/tilegx8.c:   $(src)/tilegx.uc $(src)/unroll.awk FORCE
 	$(call if_changed,unroll)
 
+targets += s390vx8.c
+$(obj)/s390vx8.c:   UNROLL := 8
+$(obj)/s390vx8.c:   $(src)/s390vx.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
 quiet_cmd_mktable = TABLE   $@
       cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
 
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 975c6e0..e1923b6 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -69,6 +69,9 @@ const struct raid6_calls * const raid6_algos[] = {
 #if defined(CONFIG_TILEGX)
 	&raid6_tilegx8,
 #endif
+#if defined(CONFIG_S390)
+	&raid6_s390vx8,
+#endif
 	&raid6_intx1,
 	&raid6_intx2,
 	&raid6_intx4,
diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc
new file mode 100644
index 0000000..7b45191
--- /dev/null
+++ b/lib/raid6/s390vx.uc
@@ -0,0 +1,168 @@
+/*
+ * raid6_vx$#.c
+ *
+ * $#-way unrolled RAID6 gen/xor functions for s390
+ * based on the vector facility
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * This file is postprocessed using unroll.awk.
+ */
+
+#include <linux/raid/pq.h>
+#include <asm/fpu/api.h>
+
+asm(".include \"asm/vx-insn.h\"\n");
+
+#define NSIZE 16
+
+static inline void LOAD_CONST(void)
+{
+	asm volatile("VREPIB %v24,7");
+	asm volatile("VREPIB %v25,0x1d");
+}
+
+/*
+ * The SHLBYTE() operation shifts each of the 16 bytes in
+ * vector register y left by 1 bit and stores the result in
+ * vector register x.
+ */
+static inline void SHLBYTE(int x, int y)
+{
+	asm volatile ("VAB %0,%1,%1" : : "i" (x), "i" (y));
+}
+
+/*
+ * For each of the 16 bytes in the vector register y the MASK()
+ * operation returns 0xFF if the high bit of the byte is 1,
+ * or 0x00 if the high bit is 0. The result is stored in vector
+ * register x.
+ */
+static inline void MASK(int x, int y)
+{
+	asm volatile ("VESRAVB	%0,%1,24" : : "i" (x), "i" (y));
+}
+
+static inline void AND(int x, int y, int z)
+{
+	asm volatile ("VN %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
+}
+
+static inline void XOR(int x, int y, int z)
+{
+	asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
+}
+
+static inline void LOAD_DATA(int x, int n, u8 *ptr)
+{
+	typedef struct { u8 _[16*n]; } addrtype;
+	register addrtype *__ptr asm("1") = (addrtype *) ptr;
+
+	asm volatile ("VLM %2,%3,0,%r1"
+		      : : "m" (*__ptr), "a" (__ptr), "i" (x), "i" (x + n - 1));
+}
+
+static inline void STORE_DATA(int x, int n, u8 *ptr)
+{
+	typedef struct { u8 _[16*n]; } addrtype;
+	register addrtype *__ptr asm("1") = (addrtype *) ptr;
+
+	asm volatile ("VSTM %2,%3,0,1"
+		      : "=m" (*__ptr) : "a" (__ptr), "i" (x), "i" (x + n - 1));
+}
+
+static inline void COPY_VEC(int x, int y)
+{
+	asm volatile ("VLR %0,%1" : : "i" (x), "i" (y));
+}
+
+static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	struct kernel_fpu vxstate;
+	u8 **dptr, *p, *q;
+	int d, z, z0;
+
+	kernel_fpu_begin(&vxstate, KERNEL_VXR);
+	LOAD_CONST();
+
+	dptr = (u8 **) ptrs;
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0 + 1];	/* XOR parity */
+	q = dptr[z0 + 2];	/* RS syndrome */
+
+	for (d = 0; d < bytes; d += $#*NSIZE) {
+		LOAD_DATA(0,$#,&dptr[z0][d]);
+		COPY_VEC(8+$$,0+$$);
+		for (z = z0 - 1; z >= 0; z--) {
+			MASK(16+$$,8+$$);
+			AND(16+$$,16+$$,25);
+			SHLBYTE(8+$$,8+$$);
+			XOR(8+$$,8+$$,16+$$);
+			LOAD_DATA(16,$#,&dptr[z][d]);
+			XOR(0+$$,0+$$,16+$$);
+			XOR(8+$$,8+$$,16+$$);
+		}
+		STORE_DATA(0,$#,&p[d]);
+		STORE_DATA(8,$#,&q[d]);
+	}
+	kernel_fpu_end(&vxstate, KERNEL_VXR);
+}
+
+static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop,
+					size_t bytes, void **ptrs)
+{
+	struct kernel_fpu vxstate;
+	u8 **dptr, *p, *q;
+	int d, z, z0;
+
+	dptr = (u8 **) ptrs;
+	z0 = stop;		/* P/Q right side optimization */
+	p = dptr[disks - 2];	/* XOR parity */
+	q = dptr[disks - 1];	/* RS syndrome */
+
+	kernel_fpu_begin(&vxstate, KERNEL_VXR);
+	LOAD_CONST();
+
+	for (d = 0; d < bytes; d += $#*NSIZE) {
+		/* P/Q data pages */
+		LOAD_DATA(0,$#,&dptr[z0][d]);
+		COPY_VEC(8+$$,0+$$);
+		for (z = z0 - 1; z >= start; z--) {
+			MASK(16+$$,8+$$);
+			AND(16+$$,16+$$,25);
+			SHLBYTE(8+$$,8+$$);
+			XOR(8+$$,8+$$,16+$$);
+			LOAD_DATA(16,$#,&dptr[z][d]);
+			XOR(0+$$,0+$$,16+$$);
+			XOR(8+$$,8+$$,16+$$);
+		}
+		/* P/Q left side optimization */
+		for (z = start - 1; z >= 0; z--) {
+			MASK(16+$$,8+$$);
+			AND(16+$$,16+$$,25);
+			SHLBYTE(8+$$,8+$$);
+			XOR(8+$$,8+$$,16+$$);
+		}
+		LOAD_DATA(16,$#,&p[d]);
+		XOR(16+$$,16+$$,0+$$);
+		STORE_DATA(16,$#,&p[d]);
+		LOAD_DATA(16,$#,&q[d]);
+		XOR(16+$$,16+$$,8+$$);
+		STORE_DATA(16,$#,&q[d]);
+	}
+	kernel_fpu_end(&vxstate, KERNEL_VXR);
+}
+
+static int raid6_s390vx$#_valid(void)
+{
+	return MACHINE_HAS_VX;
+}
+
+const struct raid6_calls raid6_s390vx$# = {
+	raid6_s390vx$#_gen_syndrome,
+	raid6_s390vx$#_xor_syndrome,
+	raid6_s390vx$#_valid,
+	"vx128x$#",
+	1
+};
-- 
cgit v0.10.2


From edc63a3785b48455e05793e848f0174e21f38d09 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 15 Aug 2016 09:19:16 +0200
Subject: s390/crypto: cleanup cpacf function codes

Use a separate define for the decryption modifier bit instead of
duplicating the function codes for encryption / decrypton.
In addition use an unsigned type for the function code.

Reviewed-by: Harald Freudenberger <freude@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 2ea18b0..9da5469 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -41,9 +41,8 @@ static char keylen_flag;
 
 struct s390_aes_ctx {
 	u8 key[AES_MAX_KEY_SIZE];
-	long enc;
-	long dec;
 	int key_len;
+	unsigned long fc;
 	union {
 		struct crypto_skcipher *blk;
 		struct crypto_cipher *cip;
@@ -61,9 +60,8 @@ struct pcc_param {
 struct s390_xts_ctx {
 	u8 key[32];
 	u8 pcc_key[32];
-	long enc;
-	long dec;
 	int key_len;
+	unsigned long fc;
 	struct crypto_skcipher *fallback;
 };
 
@@ -146,16 +144,16 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 
 	switch (sctx->key_len) {
 	case 16:
-		cpacf_km(CPACF_KM_AES_128_ENC, &sctx->key, out, in,
-			 AES_BLOCK_SIZE);
+		cpacf_km(CPACF_KM_AES_128,
+			 &sctx->key, out, in, AES_BLOCK_SIZE);
 		break;
 	case 24:
-		cpacf_km(CPACF_KM_AES_192_ENC, &sctx->key, out, in,
-			 AES_BLOCK_SIZE);
+		cpacf_km(CPACF_KM_AES_192,
+			 &sctx->key, out, in, AES_BLOCK_SIZE);
 		break;
 	case 32:
-		cpacf_km(CPACF_KM_AES_256_ENC, &sctx->key, out, in,
-			 AES_BLOCK_SIZE);
+		cpacf_km(CPACF_KM_AES_256,
+			 &sctx->key, out, in, AES_BLOCK_SIZE);
 		break;
 	}
 }
@@ -171,16 +169,16 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 
 	switch (sctx->key_len) {
 	case 16:
-		cpacf_km(CPACF_KM_AES_128_DEC, &sctx->key, out, in,
-			 AES_BLOCK_SIZE);
+		cpacf_km(CPACF_KM_AES_128 | CPACF_DECRYPT,
+			 &sctx->key, out, in, AES_BLOCK_SIZE);
 		break;
 	case 24:
-		cpacf_km(CPACF_KM_AES_192_DEC, &sctx->key, out, in,
-			 AES_BLOCK_SIZE);
+		cpacf_km(CPACF_KM_AES_192 | CPACF_DECRYPT,
+			 &sctx->key, out, in, AES_BLOCK_SIZE);
 		break;
 	case 32:
-		cpacf_km(CPACF_KM_AES_256_DEC, &sctx->key, out, in,
-			 AES_BLOCK_SIZE);
+		cpacf_km(CPACF_KM_AES_256 | CPACF_DECRYPT,
+			 &sctx->key, out, in, AES_BLOCK_SIZE);
 		break;
 	}
 }
@@ -301,16 +299,13 @@ static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 
 	switch (key_len) {
 	case 16:
-		sctx->enc = CPACF_KM_AES_128_ENC;
-		sctx->dec = CPACF_KM_AES_128_DEC;
+		sctx->fc = CPACF_KM_AES_128;
 		break;
 	case 24:
-		sctx->enc = CPACF_KM_AES_192_ENC;
-		sctx->dec = CPACF_KM_AES_192_DEC;
+		sctx->fc = CPACF_KM_AES_192;
 		break;
 	case 32:
-		sctx->enc = CPACF_KM_AES_256_ENC;
-		sctx->dec = CPACF_KM_AES_256_DEC;
+		sctx->fc = CPACF_KM_AES_256;
 		break;
 	}
 
@@ -351,7 +346,7 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc,
 		return fallback_blk_enc(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_aes_crypt(desc, sctx->enc, sctx->key, &walk);
+	return ecb_aes_crypt(desc, sctx->fc, sctx->key, &walk);
 }
 
 static int ecb_aes_decrypt(struct blkcipher_desc *desc,
@@ -365,7 +360,7 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc,
 		return fallback_blk_dec(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_aes_crypt(desc, sctx->dec, sctx->key, &walk);
+	return ecb_aes_crypt(desc, sctx->fc | CPACF_DECRYPT, sctx->key, &walk);
 }
 
 static int fallback_init_blk(struct crypto_tfm *tfm)
@@ -430,16 +425,13 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 
 	switch (key_len) {
 	case 16:
-		sctx->enc = CPACF_KMC_AES_128_ENC;
-		sctx->dec = CPACF_KMC_AES_128_DEC;
+		sctx->fc = CPACF_KMC_AES_128;
 		break;
 	case 24:
-		sctx->enc = CPACF_KMC_AES_192_ENC;
-		sctx->dec = CPACF_KMC_AES_192_DEC;
+		sctx->fc = CPACF_KMC_AES_192;
 		break;
 	case 32:
-		sctx->enc = CPACF_KMC_AES_256_ENC;
-		sctx->dec = CPACF_KMC_AES_256_DEC;
+		sctx->fc = CPACF_KMC_AES_256;
 		break;
 	}
 
@@ -492,7 +484,7 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc,
 		return fallback_blk_enc(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_aes_crypt(desc, sctx->enc, &walk);
+	return cbc_aes_crypt(desc, sctx->fc, &walk);
 }
 
 static int cbc_aes_decrypt(struct blkcipher_desc *desc,
@@ -506,7 +498,7 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc,
 		return fallback_blk_dec(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_aes_crypt(desc, sctx->dec, &walk);
+	return cbc_aes_crypt(desc, sctx->fc | CPACF_DECRYPT, &walk);
 }
 
 static struct crypto_alg cbc_aes_alg = {
@@ -603,19 +595,16 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 
 	switch (key_len) {
 	case 32:
-		xts_ctx->enc = CPACF_KM_XTS_128_ENC;
-		xts_ctx->dec = CPACF_KM_XTS_128_DEC;
+		xts_ctx->fc = CPACF_KM_XTS_128;
 		memcpy(xts_ctx->key + 16, in_key, 16);
 		memcpy(xts_ctx->pcc_key + 16, in_key + 16, 16);
 		break;
 	case 48:
-		xts_ctx->enc = 0;
-		xts_ctx->dec = 0;
+		xts_ctx->fc = 0;
 		xts_fallback_setkey(tfm, in_key, key_len);
 		break;
 	case 64:
-		xts_ctx->enc = CPACF_KM_XTS_256_ENC;
-		xts_ctx->dec = CPACF_KM_XTS_256_DEC;
+		xts_ctx->fc = CPACF_KM_XTS_256;
 		memcpy(xts_ctx->key, in_key, 32);
 		memcpy(xts_ctx->pcc_key, in_key + 32, 32);
 		break;
@@ -685,7 +674,7 @@ static int xts_aes_encrypt(struct blkcipher_desc *desc,
 		return xts_fallback_encrypt(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_aes_crypt(desc, xts_ctx->enc, xts_ctx, &walk);
+	return xts_aes_crypt(desc, xts_ctx->fc, xts_ctx, &walk);
 }
 
 static int xts_aes_decrypt(struct blkcipher_desc *desc,
@@ -699,7 +688,7 @@ static int xts_aes_decrypt(struct blkcipher_desc *desc,
 		return xts_fallback_decrypt(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_aes_crypt(desc, xts_ctx->dec, xts_ctx, &walk);
+	return xts_aes_crypt(desc, xts_ctx->fc | CPACF_DECRYPT, xts_ctx, &walk);
 }
 
 static int xts_fallback_init(struct crypto_tfm *tfm)
@@ -759,16 +748,13 @@ static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 
 	switch (key_len) {
 	case 16:
-		sctx->enc = CPACF_KMCTR_AES_128_ENC;
-		sctx->dec = CPACF_KMCTR_AES_128_DEC;
+		sctx->fc = CPACF_KMCTR_AES_128;
 		break;
 	case 24:
-		sctx->enc = CPACF_KMCTR_AES_192_ENC;
-		sctx->dec = CPACF_KMCTR_AES_192_DEC;
+		sctx->fc = CPACF_KMCTR_AES_192;
 		break;
 	case 32:
-		sctx->enc = CPACF_KMCTR_AES_256_ENC;
-		sctx->dec = CPACF_KMCTR_AES_256_DEC;
+		sctx->fc = CPACF_KMCTR_AES_256;
 		break;
 	}
 
@@ -865,7 +851,7 @@ static int ctr_aes_encrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_aes_crypt(desc, sctx->enc, sctx, &walk);
+	return ctr_aes_crypt(desc, sctx->fc, sctx, &walk);
 }
 
 static int ctr_aes_decrypt(struct blkcipher_desc *desc,
@@ -876,7 +862,7 @@ static int ctr_aes_decrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_aes_crypt(desc, sctx->dec, sctx, &walk);
+	return ctr_aes_crypt(desc, sctx->fc | CPACF_DECRYPT, sctx, &walk);
 }
 
 static struct crypto_alg ctr_aes_alg = {
@@ -906,11 +892,11 @@ static int __init aes_s390_init(void)
 {
 	int ret;
 
-	if (cpacf_query(CPACF_KM, CPACF_KM_AES_128_ENC))
+	if (cpacf_query(CPACF_KM, CPACF_KM_AES_128))
 		keylen_flag |= AES_KEYLEN_128;
-	if (cpacf_query(CPACF_KM, CPACF_KM_AES_192_ENC))
+	if (cpacf_query(CPACF_KM, CPACF_KM_AES_192))
 		keylen_flag |= AES_KEYLEN_192;
-	if (cpacf_query(CPACF_KM, CPACF_KM_AES_256_ENC))
+	if (cpacf_query(CPACF_KM, CPACF_KM_AES_256))
 		keylen_flag |= AES_KEYLEN_256;
 
 	if (!keylen_flag)
@@ -933,17 +919,17 @@ static int __init aes_s390_init(void)
 	if (ret)
 		goto cbc_aes_err;
 
-	if (cpacf_query(CPACF_KM, CPACF_KM_XTS_128_ENC) &&
-	    cpacf_query(CPACF_KM, CPACF_KM_XTS_256_ENC)) {
+	if (cpacf_query(CPACF_KM, CPACF_KM_XTS_128) &&
+	    cpacf_query(CPACF_KM, CPACF_KM_XTS_256)) {
 		ret = crypto_register_alg(&xts_aes_alg);
 		if (ret)
 			goto xts_aes_err;
 		xts_aes_alg_reg = 1;
 	}
 
-	if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_128_ENC) &&
-	    cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_192_ENC) &&
-	    cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_256_ENC)) {
+	if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_128) &&
+	    cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_192) &&
+	    cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_256)) {
 		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
 		if (!ctrblk) {
 			ret = -ENOMEM;
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 697e71a..fadd474 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -53,14 +53,15 @@ static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	cpacf_km(CPACF_KM_DEA_ENC, ctx->key, out, in, DES_BLOCK_SIZE);
+	cpacf_km(CPACF_KM_DEA, ctx->key, out, in, DES_BLOCK_SIZE);
 }
 
 static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	cpacf_km(CPACF_KM_DEA_DEC, ctx->key, out, in, DES_BLOCK_SIZE);
+	cpacf_km(CPACF_KM_DEA | CPACF_DECRYPT,
+		 ctx->key, out, in, DES_BLOCK_SIZE);
 }
 
 static struct crypto_alg des_alg = {
@@ -148,7 +149,7 @@ static int ecb_des_encrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_DEA_ENC, ctx->key, &walk);
+	return ecb_desall_crypt(desc, CPACF_KM_DEA, ctx->key, &walk);
 }
 
 static int ecb_des_decrypt(struct blkcipher_desc *desc,
@@ -159,7 +160,8 @@ static int ecb_des_decrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_DEA_DEC, ctx->key, &walk);
+	return ecb_desall_crypt(desc, CPACF_KM_DEA | CPACF_DECRYPT,
+				ctx->key, &walk);
 }
 
 static struct crypto_alg ecb_des_alg = {
@@ -189,7 +191,7 @@ static int cbc_des_encrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_DEA_ENC, &walk);
+	return cbc_desall_crypt(desc, CPACF_KMC_DEA, &walk);
 }
 
 static int cbc_des_decrypt(struct blkcipher_desc *desc,
@@ -199,7 +201,7 @@ static int cbc_des_decrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_DEA_DEC, &walk);
+	return cbc_desall_crypt(desc, CPACF_KMC_DEA | CPACF_DECRYPT, &walk);
 }
 
 static struct crypto_alg cbc_des_alg = {
@@ -257,14 +259,15 @@ static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	cpacf_km(CPACF_KM_TDEA_192_ENC, ctx->key, dst, src, DES_BLOCK_SIZE);
+	cpacf_km(CPACF_KM_TDEA_192, ctx->key, dst, src, DES_BLOCK_SIZE);
 }
 
 static void des3_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	cpacf_km(CPACF_KM_TDEA_192_DEC, ctx->key, dst, src, DES_BLOCK_SIZE);
+	cpacf_km(CPACF_KM_TDEA_192 | CPACF_DECRYPT,
+		 ctx->key, dst, src, DES_BLOCK_SIZE);
 }
 
 static struct crypto_alg des3_alg = {
@@ -294,7 +297,7 @@ static int ecb_des3_encrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192_ENC, ctx->key, &walk);
+	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192, ctx->key, &walk);
 }
 
 static int ecb_des3_decrypt(struct blkcipher_desc *desc,
@@ -305,7 +308,8 @@ static int ecb_des3_decrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192_DEC, ctx->key, &walk);
+	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192 | CPACF_DECRYPT,
+				ctx->key, &walk);
 }
 
 static struct crypto_alg ecb_des3_alg = {
@@ -335,7 +339,7 @@ static int cbc_des3_encrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192_ENC, &walk);
+	return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192, &walk);
 }
 
 static int cbc_des3_decrypt(struct blkcipher_desc *desc,
@@ -345,7 +349,8 @@ static int cbc_des3_decrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192_DEC, &walk);
+	return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192 | CPACF_DECRYPT,
+				&walk);
 }
 
 static struct crypto_alg cbc_des3_alg = {
@@ -456,7 +461,7 @@ static int ctr_des_encrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA_ENC, ctx, &walk);
+	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA, ctx, &walk);
 }
 
 static int ctr_des_decrypt(struct blkcipher_desc *desc,
@@ -467,7 +472,8 @@ static int ctr_des_decrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA_DEC, ctx, &walk);
+	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA | CPACF_DECRYPT,
+				ctx, &walk);
 }
 
 static struct crypto_alg ctr_des_alg = {
@@ -499,7 +505,7 @@ static int ctr_des3_encrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192_ENC, ctx, &walk);
+	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192, ctx, &walk);
 }
 
 static int ctr_des3_decrypt(struct blkcipher_desc *desc,
@@ -510,7 +516,8 @@ static int ctr_des3_decrypt(struct blkcipher_desc *desc,
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192_DEC, ctx, &walk);
+	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192 | CPACF_DECRYPT,
+				ctx, &walk);
 }
 
 static struct crypto_alg ctr_des3_alg = {
@@ -538,8 +545,8 @@ static int __init des_s390_init(void)
 {
 	int ret;
 
-	if (!cpacf_query(CPACF_KM, CPACF_KM_DEA_ENC) ||
-	    !cpacf_query(CPACF_KM, CPACF_KM_TDEA_192_ENC))
+	if (!cpacf_query(CPACF_KM, CPACF_KM_DEA) ||
+	    !cpacf_query(CPACF_KM, CPACF_KM_TDEA_192))
 		return -EOPNOTSUPP;
 
 	ret = crypto_register_alg(&des_alg);
@@ -561,8 +568,8 @@ static int __init des_s390_init(void)
 	if (ret)
 		goto cbc_des3_err;
 
-	if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_DEA_ENC) &&
-	    cpacf_query(CPACF_KMCTR, CPACF_KMCTR_TDEA_192_ENC)) {
+	if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_DEA) &&
+	    cpacf_query(CPACF_KMCTR, CPACF_KMCTR_TDEA_192)) {
 		ret = crypto_register_alg(&ctr_des_alg);
 		if (ret)
 			goto ctr_des_err;
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index d28621d..ca88419 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -28,67 +28,51 @@
 #define CPACF_PPNO		0xb93c		/* MSA5 */
 
 /*
- * Function codes for the KM (CIPHER MESSAGE)
- * instruction (0x80 is the decipher modifier bit)
+ * Decryption modifier bit
+ */
+#define CPACF_DECRYPT		0x80
+
+/*
+ * Function codes for the KM (CIPHER MESSAGE) instruction
  */
 #define CPACF_KM_QUERY		0x00
-#define CPACF_KM_DEA_ENC	0x01
-#define CPACF_KM_DEA_DEC	0x81
-#define CPACF_KM_TDEA_128_ENC	0x02
-#define CPACF_KM_TDEA_128_DEC	0x82
-#define CPACF_KM_TDEA_192_ENC	0x03
-#define CPACF_KM_TDEA_192_DEC	0x83
-#define CPACF_KM_AES_128_ENC	0x12
-#define CPACF_KM_AES_128_DEC	0x92
-#define CPACF_KM_AES_192_ENC	0x13
-#define CPACF_KM_AES_192_DEC	0x93
-#define CPACF_KM_AES_256_ENC	0x14
-#define CPACF_KM_AES_256_DEC	0x94
-#define CPACF_KM_XTS_128_ENC	0x32
-#define CPACF_KM_XTS_128_DEC	0xb2
-#define CPACF_KM_XTS_256_ENC	0x34
-#define CPACF_KM_XTS_256_DEC	0xb4
+#define CPACF_KM_DEA		0x01
+#define CPACF_KM_TDEA_128	0x02
+#define CPACF_KM_TDEA_192	0x03
+#define CPACF_KM_AES_128	0x12
+#define CPACF_KM_AES_192	0x13
+#define CPACF_KM_AES_256	0x14
+#define CPACF_KM_XTS_128	0x32
+#define CPACF_KM_XTS_256	0x34
 
 /*
  * Function codes for the KMC (CIPHER MESSAGE WITH CHAINING)
- * instruction (0x80 is the decipher modifier bit)
+ * instruction
  */
 #define CPACF_KMC_QUERY		0x00
-#define CPACF_KMC_DEA_ENC	0x01
-#define CPACF_KMC_DEA_DEC	0x81
-#define CPACF_KMC_TDEA_128_ENC	0x02
-#define CPACF_KMC_TDEA_128_DEC	0x82
-#define CPACF_KMC_TDEA_192_ENC	0x03
-#define CPACF_KMC_TDEA_192_DEC	0x83
-#define CPACF_KMC_AES_128_ENC	0x12
-#define CPACF_KMC_AES_128_DEC	0x92
-#define CPACF_KMC_AES_192_ENC	0x13
-#define CPACF_KMC_AES_192_DEC	0x93
-#define CPACF_KMC_AES_256_ENC	0x14
-#define CPACF_KMC_AES_256_DEC	0x94
+#define CPACF_KMC_DEA		0x01
+#define CPACF_KMC_TDEA_128	0x02
+#define CPACF_KMC_TDEA_192	0x03
+#define CPACF_KMC_AES_128	0x12
+#define CPACF_KMC_AES_192	0x13
+#define CPACF_KMC_AES_256	0x14
 #define CPACF_KMC_PRNG		0x43
 
 /*
  * Function codes for the KMCTR (CIPHER MESSAGE WITH COUNTER)
- * instruction (0x80 is the decipher modifier bit)
+ * instruction
  */
-#define CPACF_KMCTR_QUERY	 0x00
-#define CPACF_KMCTR_DEA_ENC	 0x01
-#define CPACF_KMCTR_DEA_DEC	 0x81
-#define CPACF_KMCTR_TDEA_128_ENC 0x02
-#define CPACF_KMCTR_TDEA_128_DEC 0x82
-#define CPACF_KMCTR_TDEA_192_ENC 0x03
-#define CPACF_KMCTR_TDEA_192_DEC 0x83
-#define CPACF_KMCTR_AES_128_ENC	 0x12
-#define CPACF_KMCTR_AES_128_DEC	 0x92
-#define CPACF_KMCTR_AES_192_ENC	 0x13
-#define CPACF_KMCTR_AES_192_DEC	 0x93
-#define CPACF_KMCTR_AES_256_ENC	 0x14
-#define CPACF_KMCTR_AES_256_DEC	 0x94
+#define CPACF_KMCTR_QUERY	0x00
+#define CPACF_KMCTR_DEA		0x01
+#define CPACF_KMCTR_TDEA_128	0x02
+#define CPACF_KMCTR_TDEA_192	0x03
+#define CPACF_KMCTR_AES_128	0x12
+#define CPACF_KMCTR_AES_192	0x13
+#define CPACF_KMCTR_AES_256	0x14
 
 /*
  * Function codes for the KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST)
- * instruction (0x80 is the decipher modifier bit)
+ * instruction
  */
 #define CPACF_KIMD_QUERY	0x00
 #define CPACF_KIMD_SHA_1	0x01
@@ -98,7 +82,7 @@
 
 /*
  * Function codes for the KLMD (COMPUTE LAST MESSAGE DIGEST)
- * instruction (0x80 is the decipher modifier bit)
+ * instruction
  */
 #define CPACF_KLMD_QUERY	0x00
 #define CPACF_KLMD_SHA_1	0x01
@@ -107,7 +91,7 @@
 
 /*
  * function codes for the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE)
- * instruction (0x80 is the decipher modifier bit)
+ * instruction
  */
 #define CPACF_KMAC_QUERY	0x00
 #define CPACF_KMAC_DEA		0x01
@@ -116,7 +100,7 @@
 
 /*
  * Function codes for the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION)
- * instruction (0x80 is the decipher modifier bit)
+ * instruction
  */
 #define CPACF_PPNO_QUERY		0x00
 #define CPACF_PPNO_SHA512_DRNG_GEN	0x03
@@ -194,7 +178,7 @@ static inline int cpacf_query(unsigned int opcode, unsigned int func)
  * Returns 0 for the query func, number of processed bytes for
  * encryption/decryption funcs
  */
-static inline int cpacf_km(long func, void *param,
+static inline int cpacf_km(unsigned long func, void *param,
 			   u8 *dest, const u8 *src, long src_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
@@ -224,7 +208,7 @@ static inline int cpacf_km(long func, void *param,
  * Returns 0 for the query func, number of processed bytes for
  * encryption/decryption funcs
  */
-static inline int cpacf_kmc(long func, void *param,
+static inline int cpacf_kmc(unsigned long func, void *param,
 			    u8 *dest, const u8 *src, long src_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
@@ -253,7 +237,7 @@ static inline int cpacf_kmc(long func, void *param,
  *
  * Returns 0 for the query func, number of processed bytes for digest funcs
  */
-static inline int cpacf_kimd(long func, void *param,
+static inline int cpacf_kimd(unsigned long func, void *param,
 			     const u8 *src, long src_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
@@ -280,7 +264,7 @@ static inline int cpacf_kimd(long func, void *param,
  *
  * Returns 0 for the query func, number of processed bytes for digest funcs
  */
-static inline int cpacf_klmd(long func, void *param,
+static inline int cpacf_klmd(unsigned long func, void *param,
 			     const u8 *src, long src_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
@@ -308,7 +292,7 @@ static inline int cpacf_klmd(long func, void *param,
  *
  * Returns 0 for the query func, number of processed bytes for digest funcs
  */
-static inline int cpacf_kmac(long func, void *param,
+static inline int cpacf_kmac(unsigned long func, void *param,
 			     const u8 *src, long src_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
@@ -338,7 +322,7 @@ static inline int cpacf_kmac(long func, void *param,
  * Returns 0 for the query func, number of processed bytes for
  * encryption/decryption funcs
  */
-static inline int cpacf_kmctr(long func, void *param, u8 *dest,
+static inline int cpacf_kmctr(unsigned long func, void *param, u8 *dest,
 			      const u8 *src, long src_len, u8 *counter)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
@@ -372,7 +356,7 @@ static inline int cpacf_kmctr(long func, void *param, u8 *dest,
  * Returns 0 for the query func, number of random bytes stored in
  * dest buffer for generate function
  */
-static inline int cpacf_ppno(long func, void *param,
+static inline int cpacf_ppno(unsigned long func, void *param,
 			     u8 *dest, long dest_len,
 			     const u8 *seed, long seed_len)
 {
@@ -402,7 +386,7 @@ static inline int cpacf_ppno(long func, void *param,
  *
  * Returns 0.
  */
-static inline int cpacf_pcc(long func, void *param)
+static inline int cpacf_pcc(unsigned long func, void *param)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
 	register unsigned long r1 asm("1") = (unsigned long) param;
-- 
cgit v0.10.2


From 0177db01adf26cf9c5dfe1feaf17087de4b9e40e Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 15 Aug 2016 10:41:52 +0200
Subject: s390/crypto: simplify return code handling

The CPACF instructions can complete with three different condition codes:
CC=0 for successful completion, CC=1 if the protected key verification
failed, and CC=3 for partial completion.

The inline functions will restart the CPACF instruction for partial
completion, this removes the CC=3 case. The CC=1 case is only relevant
for the protected key functions of the KM, KMC, KMAC and KMCTR
instructions. As the protected key functions are not used by the
current code, there is no need for any kind of return code handling.

Reviewed-by: Harald Freudenberger <freude@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 9da5469..4eb8de4 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -324,9 +324,7 @@ static int ecb_aes_crypt(struct blkcipher_desc *desc, long func, void *param,
 		u8 *out = walk->dst.virt.addr;
 		u8 *in = walk->src.virt.addr;
 
-		ret = cpacf_km(func, param, out, in, n);
-		if (ret < 0 || ret != n)
-			return -EIO;
+		cpacf_km(func, param, out, in, n);
 
 		nbytes &= AES_BLOCK_SIZE - 1;
 		ret = blkcipher_walk_done(desc, walk, nbytes);
@@ -460,9 +458,7 @@ static int cbc_aes_crypt(struct blkcipher_desc *desc, long func,
 		u8 *out = walk->dst.virt.addr;
 		u8 *in = walk->src.virt.addr;
 
-		ret = cpacf_kmc(func, &param, out, in, n);
-		if (ret < 0 || ret != n)
-			return -EIO;
+		cpacf_kmc(func, &param, out, in, n);
 
 		nbytes &= AES_BLOCK_SIZE - 1;
 		ret = blkcipher_walk_done(desc, walk, nbytes);
@@ -640,9 +636,7 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, long func,
 	memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
 	memcpy(pcc_param.key, xts_ctx->pcc_key, 32);
 	/* remove decipher modifier bit from 'func' and call PCC */
-	ret = cpacf_pcc(func & 0x7f, &pcc_param.key[offset]);
-	if (ret < 0)
-		return -EIO;
+	cpacf_pcc(func & 0x7f, &pcc_param.key[offset]);
 
 	memcpy(xts_param.key, xts_ctx->key, 32);
 	memcpy(xts_param.init, pcc_param.xts, 16);
@@ -652,9 +646,7 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, long func,
 		out = walk->dst.virt.addr;
 		in = walk->src.virt.addr;
 
-		ret = cpacf_km(func, &xts_param.key[offset], out, in, n);
-		if (ret < 0 || ret != n)
-			return -EIO;
+		cpacf_km(func, &xts_param.key[offset], out, in, n);
 
 		nbytes &= AES_BLOCK_SIZE - 1;
 		ret = blkcipher_walk_done(desc, walk, nbytes);
@@ -798,12 +790,7 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
 				n = __ctrblk_init(ctrptr, nbytes);
 			else
 				n = AES_BLOCK_SIZE;
-			ret = cpacf_kmctr(func, sctx->key, out, in, n, ctrptr);
-			if (ret < 0 || ret != n) {
-				if (ctrptr == ctrblk)
-					spin_unlock(&ctrblk_lock);
-				return -EIO;
-			}
+			cpacf_kmctr(func, sctx->key, out, in, n, ctrptr);
 			if (n > AES_BLOCK_SIZE)
 				memcpy(ctrptr, ctrptr + n - AES_BLOCK_SIZE,
 				       AES_BLOCK_SIZE);
@@ -830,10 +817,7 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
 	if (nbytes) {
 		out = walk->dst.virt.addr;
 		in = walk->src.virt.addr;
-		ret = cpacf_kmctr(func, sctx->key, buf, in,
-				  AES_BLOCK_SIZE, ctrbuf);
-		if (ret < 0 || ret != AES_BLOCK_SIZE)
-			return -EIO;
+		cpacf_kmctr(func, sctx->key, buf, in, AES_BLOCK_SIZE, ctrbuf);
 		memcpy(out, buf, nbytes);
 		crypto_inc(ctrbuf, AES_BLOCK_SIZE);
 		ret = blkcipher_walk_done(desc, walk, 0);
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index fadd474..9998785 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -95,9 +95,7 @@ static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
 		u8 *out = walk->dst.virt.addr;
 		u8 *in = walk->src.virt.addr;
 
-		ret = cpacf_km(func, key, out, in, n);
-		if (ret < 0 || ret != n)
-			return -EIO;
+		cpacf_km(func, key, out, in, n);
 
 		nbytes &= DES_BLOCK_SIZE - 1;
 		ret = blkcipher_walk_done(desc, walk, nbytes);
@@ -128,9 +126,7 @@ static int cbc_desall_crypt(struct blkcipher_desc *desc, long func,
 		u8 *out = walk->dst.virt.addr;
 		u8 *in = walk->src.virt.addr;
 
-		ret = cpacf_kmc(func, &param, out, in, n);
-		if (ret < 0 || ret != n)
-			return -EIO;
+		cpacf_kmc(func, &param, out, in, n);
 
 		nbytes &= DES_BLOCK_SIZE - 1;
 		ret = blkcipher_walk_done(desc, walk, nbytes);
@@ -411,12 +407,7 @@ static int ctr_desall_crypt(struct blkcipher_desc *desc, long func,
 				n = __ctrblk_init(ctrptr, nbytes);
 			else
 				n = DES_BLOCK_SIZE;
-			ret = cpacf_kmctr(func, ctx->key, out, in, n, ctrptr);
-			if (ret < 0 || ret != n) {
-				if (ctrptr == ctrblk)
-					spin_unlock(&ctrblk_lock);
-				return -EIO;
-			}
+			cpacf_kmctr(func, ctx->key, out, in, n, ctrptr);
 			if (n > DES_BLOCK_SIZE)
 				memcpy(ctrptr, ctrptr + n - DES_BLOCK_SIZE,
 				       DES_BLOCK_SIZE);
@@ -441,10 +432,7 @@ static int ctr_desall_crypt(struct blkcipher_desc *desc, long func,
 	if (nbytes) {
 		out = walk->dst.virt.addr;
 		in = walk->src.virt.addr;
-		ret = cpacf_kmctr(func, ctx->key, buf, in,
-				  DES_BLOCK_SIZE, ctrbuf);
-		if (ret < 0 || ret != DES_BLOCK_SIZE)
-			return -EIO;
+		cpacf_kmctr(func, ctx->key, buf, in, DES_BLOCK_SIZE, ctrbuf);
 		memcpy(out, buf, nbytes);
 		crypto_inc(ctrbuf, DES_BLOCK_SIZE);
 		ret = blkcipher_walk_done(desc, walk, 0);
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index ab68de7..8e87f51 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -58,7 +58,6 @@ static int ghash_update(struct shash_desc *desc,
 	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
 	unsigned int n;
 	u8 *buf = dctx->buffer;
-	int ret;
 
 	if (dctx->bytes) {
 		u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
@@ -71,18 +70,14 @@ static int ghash_update(struct shash_desc *desc,
 		src += n;
 
 		if (!dctx->bytes) {
-			ret = cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf,
-					 GHASH_BLOCK_SIZE);
-			if (ret != GHASH_BLOCK_SIZE)
-				return -EIO;
+			cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf,
+				   GHASH_BLOCK_SIZE);
 		}
 	}
 
 	n = srclen & ~(GHASH_BLOCK_SIZE - 1);
 	if (n) {
-		ret = cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n);
-		if (ret != n)
-			return -EIO;
+		cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n);
 		src += n;
 		srclen -= n;
 	}
@@ -98,17 +93,12 @@ static int ghash_update(struct shash_desc *desc,
 static int ghash_flush(struct ghash_desc_ctx *dctx)
 {
 	u8 *buf = dctx->buffer;
-	int ret;
 
 	if (dctx->bytes) {
 		u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
 
 		memset(pos, 0, dctx->bytes);
-
-		ret = cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
-		if (ret != GHASH_BLOCK_SIZE)
-			return -EIO;
-
+		cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
 		dctx->bytes = 0;
 	}
 
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 41527b1..bbf2af7 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -135,12 +135,7 @@ static int generate_entropy(u8 *ebuf, size_t nbytes)
 		else
 			h = ebuf;
 		/* generate sha256 from this page */
-		if (cpacf_kimd(CPACF_KIMD_SHA_256, h,
-			       pg, PAGE_SIZE) != PAGE_SIZE) {
-			prng_errorflag = PRNG_GEN_ENTROPY_FAILED;
-			ret = -EIO;
-			goto out;
-		}
+		cpacf_kimd(CPACF_KIMD_SHA_256, h, pg, PAGE_SIZE);
 		if (n < sizeof(hash))
 			memcpy(ebuf, hash, n);
 		ret += n;
@@ -148,7 +143,6 @@ static int generate_entropy(u8 *ebuf, size_t nbytes)
 		nbytes -= n;
 	}
 
-out:
 	free_page((unsigned long)pg);
 	return ret;
 }
@@ -160,13 +154,11 @@ static void prng_tdes_add_entropy(void)
 {
 	__u64 entropy[4];
 	unsigned int i;
-	int ret;
 
 	for (i = 0; i < 16; i++) {
-		ret = cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block,
-				(char *)entropy, (char *)entropy,
-				sizeof(entropy));
-		BUG_ON(ret < 0 || ret != sizeof(entropy));
+		cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block,
+			  (char *) entropy, (char *) entropy,
+			  sizeof(entropy));
 		memcpy(prng_data->prngws.parm_block, entropy, sizeof(entropy));
 	}
 }
@@ -303,21 +295,14 @@ static int __init prng_sha512_selftest(void)
 		0x21, 0xe4, 0xb0, 0x86, 0x44, 0xf6, 0x72, 0x7c,
 		0x36, 0x8c, 0x5a, 0x9f, 0x7a, 0x4b, 0x3e, 0xe2 };
 
-	int ret = 0;
 	u8 buf[sizeof(random)];
 	struct ppno_ws_s ws;
 
 	memset(&ws, 0, sizeof(ws));
 
 	/* initial seed */
-	ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, &ws, NULL, 0,
-			 seed, sizeof(seed));
-	if (ret < 0) {
-		pr_err("The prng self test seed operation for the "
-		       "SHA-512 mode failed with rc=%d\n", ret);
-		prng_errorflag = PRNG_SELFTEST_FAILED;
-		return -EIO;
-	}
+	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
+		   &ws, NULL, 0, seed, sizeof(seed));
 
 	/* check working states V and C */
 	if (memcmp(ws.V, V0, sizeof(V0)) != 0
@@ -329,22 +314,10 @@ static int __init prng_sha512_selftest(void)
 	}
 
 	/* generate random bytes */
-	ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
-			 &ws, buf, sizeof(buf), NULL, 0);
-	if (ret < 0) {
-		pr_err("The prng self test generate operation for "
-		       "the SHA-512 mode failed with rc=%d\n", ret);
-		prng_errorflag = PRNG_SELFTEST_FAILED;
-		return -EIO;
-	}
-	ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
-			 &ws, buf, sizeof(buf), NULL, 0);
-	if (ret < 0) {
-		pr_err("The prng self test generate operation for "
-		       "the SHA-512 mode failed with rc=%d\n", ret);
-		prng_errorflag = PRNG_SELFTEST_FAILED;
-		return -EIO;
-	}
+	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+		   &ws, buf, sizeof(buf), NULL, 0);
+	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+		   &ws, buf, sizeof(buf), NULL, 0);
 
 	/* check against expected data */
 	if (memcmp(buf, random, sizeof(random)) != 0) {
@@ -392,26 +365,16 @@ static int __init prng_sha512_instantiate(void)
 	get_tod_clock_ext(seed + 48);
 
 	/* initial seed of the ppno drng */
-	ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
-			 &prng_data->ppnows, NULL, 0, seed, sizeof(seed));
-	if (ret < 0) {
-		prng_errorflag = PRNG_SEED_FAILED;
-		ret = -EIO;
-		goto outfree;
-	}
+	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
+		   &prng_data->ppnows, NULL, 0, seed, sizeof(seed));
 
 	/* if fips mode is enabled, generate a first block of random
 	   bytes for the FIPS 140-2 Conditional Self Test */
 	if (fips_enabled) {
 		prng_data->prev = prng_data->buf + prng_chunk_size;
-		ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
-				 &prng_data->ppnows,
-				 prng_data->prev, prng_chunk_size, NULL, 0);
-		if (ret < 0 || ret != prng_chunk_size) {
-			prng_errorflag = PRNG_GEN_FAILED;
-			ret = -EIO;
-			goto outfree;
-		}
+		cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+			   &prng_data->ppnows,
+			   prng_data->prev, prng_chunk_size, NULL, 0);
 	}
 
 	return 0;
@@ -440,12 +403,8 @@ static int prng_sha512_reseed(void)
 		return ret;
 
 	/* do a reseed of the ppno drng with this bytestring */
-	ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
-			 &prng_data->ppnows, NULL, 0, seed, sizeof(seed));
-	if (ret) {
-		prng_errorflag = PRNG_RESEED_FAILED;
-		return -EIO;
-	}
+	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
+		   &prng_data->ppnows, NULL, 0, seed, sizeof(seed));
 
 	return 0;
 }
@@ -463,12 +422,8 @@ static int prng_sha512_generate(u8 *buf, size_t nbytes)
 	}
 
 	/* PPNO generate */
-	ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
-			 &prng_data->ppnows, buf, nbytes, NULL, 0);
-	if (ret < 0 || ret != nbytes) {
-		prng_errorflag = PRNG_GEN_FAILED;
-		return -EIO;
-	}
+	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+		   &prng_data->ppnows, buf, nbytes, NULL, 0);
 
 	/* FIPS 140-2 Conditional Self Test */
 	if (fips_enabled) {
@@ -494,7 +449,7 @@ static int prng_open(struct inode *inode, struct file *file)
 static ssize_t prng_tdes_read(struct file *file, char __user *ubuf,
 			      size_t nbytes, loff_t *ppos)
 {
-	int chunk, n, tmp, ret = 0;
+	int chunk, n, ret = 0;
 
 	/* lock prng_data struct */
 	if (mutex_lock_interruptible(&prng_data->mutex))
@@ -545,13 +500,9 @@ static ssize_t prng_tdes_read(struct file *file, char __user *ubuf,
 		 *
 		 * Note: you can still get strict X9.17 conformity by setting
 		 * prng_chunk_size to 8 bytes.
-		*/
-		tmp = cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block,
-				prng_data->buf, prng_data->buf, n);
-		if (tmp < 0 || tmp != n) {
-			ret = -EIO;
-			break;
-		}
+		 */
+		cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block,
+			  prng_data->buf, prng_data->buf, n);
 
 		prng_data->prngws.byte_counter += n;
 		prng_data->prngws.reseed_counter += n;
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
index 8e90816..c740f77 100644
--- a/arch/s390/crypto/sha_common.c
+++ b/arch/s390/crypto/sha_common.c
@@ -22,8 +22,7 @@ int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len)
 {
 	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
 	unsigned int bsize = crypto_shash_blocksize(desc->tfm);
-	unsigned int index;
-	int ret;
+	unsigned int index, n;
 
 	/* how much is already in the buffer? */
 	index = ctx->count & (bsize - 1);
@@ -35,9 +34,7 @@ int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len)
 	/* process one stored block */
 	if (index) {
 		memcpy(ctx->buf + index, data, bsize - index);
-		ret = cpacf_kimd(ctx->func, ctx->state, ctx->buf, bsize);
-		if (ret != bsize)
-			return -EIO;
+		cpacf_kimd(ctx->func, ctx->state, ctx->buf, bsize);
 		data += bsize - index;
 		len -= bsize - index;
 		index = 0;
@@ -45,12 +42,10 @@ int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len)
 
 	/* process as many blocks as possible */
 	if (len >= bsize) {
-		ret = cpacf_kimd(ctx->func, ctx->state, data,
-				 len & ~(bsize - 1));
-		if (ret != (len & ~(bsize - 1)))
-			return -EIO;
-		data += ret;
-		len -= ret;
+		n = len & ~(bsize - 1);
+		cpacf_kimd(ctx->func, ctx->state, data, n);
+		data += n;
+		len -= n;
 	}
 store:
 	if (len)
@@ -66,7 +61,6 @@ int s390_sha_final(struct shash_desc *desc, u8 *out)
 	unsigned int bsize = crypto_shash_blocksize(desc->tfm);
 	u64 bits;
 	unsigned int index, end, plen;
-	int ret;
 
 	/* SHA-512 uses 128 bit padding length */
 	plen = (bsize > SHA256_BLOCK_SIZE) ? 16 : 8;
@@ -88,10 +82,7 @@ int s390_sha_final(struct shash_desc *desc, u8 *out)
 	 */
 	bits = ctx->count * 8;
 	memcpy(ctx->buf + end - 8, &bits, sizeof(bits));
-
-	ret = cpacf_kimd(ctx->func, ctx->state, ctx->buf, end);
-	if (ret != end)
-		return -EIO;
+	cpacf_kimd(ctx->func, ctx->state, ctx->buf, end);
 
 	/* copy digest to out */
 	memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm));
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index ca88419..c226c9b 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -234,11 +234,9 @@ static inline int cpacf_kmc(unsigned long func, void *param,
  * @param: address of parameter block; see POP for details on each func
  * @src: address of source memory area
  * @src_len: length of src operand in bytes
- *
- * Returns 0 for the query func, number of processed bytes for digest funcs
  */
-static inline int cpacf_kimd(unsigned long func, void *param,
-			     const u8 *src, long src_len)
+static inline void cpacf_kimd(unsigned long func, void *param,
+			      const u8 *src, long src_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
 	register unsigned long r1 asm("1") = (unsigned long) param;
@@ -251,8 +249,6 @@ static inline int cpacf_kimd(unsigned long func, void *param,
 		: [src] "+a" (r2), [len] "+d" (r3)
 		: [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KIMD)
 		: "cc", "memory");
-
-	return src_len - r3;
 }
 
 /**
@@ -261,11 +257,9 @@ static inline int cpacf_kimd(unsigned long func, void *param,
  * @param: address of parameter block; see POP for details on each func
  * @src: address of source memory area
  * @src_len: length of src operand in bytes
- *
- * Returns 0 for the query func, number of processed bytes for digest funcs
  */
-static inline int cpacf_klmd(unsigned long func, void *param,
-			     const u8 *src, long src_len)
+static inline void cpacf_klmd(unsigned long func, void *param,
+			      const u8 *src, long src_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
 	register unsigned long r1 asm("1") = (unsigned long) param;
@@ -278,8 +272,6 @@ static inline int cpacf_klmd(unsigned long func, void *param,
 		: [src] "+a" (r2), [len] "+d" (r3)
 		: [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KLMD)
 		: "cc", "memory");
-
-	return src_len - r3;
 }
 
 /**
@@ -352,13 +344,10 @@ static inline int cpacf_kmctr(unsigned long func, void *param, u8 *dest,
  * @dest_len: size of destination memory area in bytes
  * @seed: address of seed data
  * @seed_len: size of seed data in bytes
- *
- * Returns 0 for the query func, number of random bytes stored in
- * dest buffer for generate function
  */
-static inline int cpacf_ppno(unsigned long func, void *param,
-			     u8 *dest, long dest_len,
-			     const u8 *seed, long seed_len)
+static inline void cpacf_ppno(unsigned long func, void *param,
+			      u8 *dest, long dest_len,
+			      const u8 *seed, long seed_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
 	register unsigned long r1 asm("1") = (unsigned long) param;
@@ -374,8 +363,6 @@ static inline int cpacf_ppno(unsigned long func, void *param,
 		: [fc] "d" (r0), [pba] "a" (r1),
 		  [seed] "a" (r4), [slen] "d" (r5), [opc] "i" (CPACF_PPNO)
 		: "cc", "memory");
-
-	return dest_len - r3;
 }
 
 /**
@@ -383,10 +370,8 @@ static inline int cpacf_ppno(unsigned long func, void *param,
  *		 instruction
  * @func: the function code passed to PCC; see CPACF_KM_xxx defines
  * @param: address of parameter block; see POP for details on each func
- *
- * Returns 0.
  */
-static inline int cpacf_pcc(unsigned long func, void *param)
+static inline void cpacf_pcc(unsigned long func, void *param)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
 	register unsigned long r1 asm("1") = (unsigned long) param;
@@ -397,8 +382,6 @@ static inline int cpacf_pcc(unsigned long func, void *param)
 		:
 		: [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_PCC)
 		: "cc", "memory");
-
-	return 0;
 }
 
 #endif	/* _ASM_S390_CPACF_H */
-- 
cgit v0.10.2


From d863d5945f2be0abfcd9d36b1a7c605f3eaef517 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 18 Aug 2016 12:34:34 +0200
Subject: s390/crypto: simplify init / exit functions

The aes and the des module register multiple crypto algorithms
dependent on the availability of specific CPACF instructions.
To simplify the deregistration with crypto_unregister_alg add
an array with pointers to the successfully registered algorithms
and use it for the error handling in the init function and in
the module exit function.

Reviewed-by: Harald Freudenberger <freude@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 4eb8de4..be87575 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -731,8 +731,6 @@ static struct crypto_alg xts_aes_alg = {
 	}
 };
 
-static int xts_aes_alg_reg;
-
 static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
@@ -870,7 +868,26 @@ static struct crypto_alg ctr_aes_alg = {
 	}
 };
 
-static int ctr_aes_alg_reg;
+static struct crypto_alg *aes_s390_algs_ptr[5];
+static int aes_s390_algs_num;
+
+static int aes_s390_register_alg(struct crypto_alg *alg)
+{
+	int ret;
+
+	ret = crypto_register_alg(alg);
+	if (!ret)
+		aes_s390_algs_ptr[aes_s390_algs_num++] = alg;
+	return ret;
+}
+
+static void aes_s390_fini(void)
+{
+	while (aes_s390_algs_num--)
+		crypto_unregister_alg(aes_s390_algs_ptr[aes_s390_algs_num]);
+	if (ctrblk)
+		free_page((unsigned long) ctrblk);
+}
 
 static int __init aes_s390_init(void)
 {
@@ -891,24 +908,23 @@ static int __init aes_s390_init(void)
 		pr_info("AES hardware acceleration is only available for"
 			" 128-bit keys\n");
 
-	ret = crypto_register_alg(&aes_alg);
+	ret = aes_s390_register_alg(&aes_alg);
 	if (ret)
-		goto aes_err;
+		goto out_err;
 
-	ret = crypto_register_alg(&ecb_aes_alg);
+	ret = aes_s390_register_alg(&ecb_aes_alg);
 	if (ret)
-		goto ecb_aes_err;
+		goto out_err;
 
-	ret = crypto_register_alg(&cbc_aes_alg);
+	ret = aes_s390_register_alg(&cbc_aes_alg);
 	if (ret)
-		goto cbc_aes_err;
+		goto out_err;
 
 	if (cpacf_query(CPACF_KM, CPACF_KM_XTS_128) &&
 	    cpacf_query(CPACF_KM, CPACF_KM_XTS_256)) {
-		ret = crypto_register_alg(&xts_aes_alg);
+		ret = aes_s390_register_alg(&xts_aes_alg);
 		if (ret)
-			goto xts_aes_err;
-		xts_aes_alg_reg = 1;
+			goto out_err;
 	}
 
 	if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_128) &&
@@ -917,42 +933,17 @@ static int __init aes_s390_init(void)
 		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
 		if (!ctrblk) {
 			ret = -ENOMEM;
-			goto ctr_aes_err;
+			goto out_err;
 		}
-		ret = crypto_register_alg(&ctr_aes_alg);
-		if (ret) {
-			free_page((unsigned long) ctrblk);
-			goto ctr_aes_err;
-		}
-		ctr_aes_alg_reg = 1;
+		ret = aes_s390_register_alg(&ctr_aes_alg);
+		if (ret)
+			goto out_err;
 	}
 
-out:
+	return 0;
+out_err:
+	aes_s390_fini();
 	return ret;
-
-ctr_aes_err:
-	crypto_unregister_alg(&xts_aes_alg);
-xts_aes_err:
-	crypto_unregister_alg(&cbc_aes_alg);
-cbc_aes_err:
-	crypto_unregister_alg(&ecb_aes_alg);
-ecb_aes_err:
-	crypto_unregister_alg(&aes_alg);
-aes_err:
-	goto out;
-}
-
-static void __exit aes_s390_fini(void)
-{
-	if (ctr_aes_alg_reg) {
-		crypto_unregister_alg(&ctr_aes_alg);
-		free_page((unsigned long) ctrblk);
-	}
-	if (xts_aes_alg_reg)
-		crypto_unregister_alg(&xts_aes_alg);
-	crypto_unregister_alg(&cbc_aes_alg);
-	crypto_unregister_alg(&ecb_aes_alg);
-	crypto_unregister_alg(&aes_alg);
 }
 
 module_cpu_feature_match(MSA, aes_s390_init);
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 9998785..b77a546 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -529,6 +529,27 @@ static struct crypto_alg ctr_des3_alg = {
 	}
 };
 
+static struct crypto_alg *des_s390_algs_ptr[8];
+static int des_s390_algs_num;
+
+static int des_s390_register_alg(struct crypto_alg *alg)
+{
+	int ret;
+
+	ret = crypto_register_alg(alg);
+	if (!ret)
+		des_s390_algs_ptr[des_s390_algs_num++] = alg;
+	return ret;
+}
+
+static void des_s390_exit(void)
+{
+	while (des_s390_algs_num--)
+		crypto_unregister_alg(des_s390_algs_ptr[des_s390_algs_num]);
+	if (ctrblk)
+		free_page((unsigned long) ctrblk);
+}
+
 static int __init des_s390_init(void)
 {
 	int ret;
@@ -537,75 +558,44 @@ static int __init des_s390_init(void)
 	    !cpacf_query(CPACF_KM, CPACF_KM_TDEA_192))
 		return -EOPNOTSUPP;
 
-	ret = crypto_register_alg(&des_alg);
+	ret = des_s390_register_alg(&des_alg);
 	if (ret)
-		goto des_err;
-	ret = crypto_register_alg(&ecb_des_alg);
+		goto out_err;
+	ret = des_s390_register_alg(&ecb_des_alg);
 	if (ret)
-		goto ecb_des_err;
-	ret = crypto_register_alg(&cbc_des_alg);
+		goto out_err;
+	ret = des_s390_register_alg(&cbc_des_alg);
 	if (ret)
-		goto cbc_des_err;
-	ret = crypto_register_alg(&des3_alg);
+		goto out_err;
+	ret = des_s390_register_alg(&des3_alg);
 	if (ret)
-		goto des3_err;
-	ret = crypto_register_alg(&ecb_des3_alg);
+		goto out_err;
+	ret = des_s390_register_alg(&ecb_des3_alg);
 	if (ret)
-		goto ecb_des3_err;
-	ret = crypto_register_alg(&cbc_des3_alg);
+		goto out_err;
+	ret = des_s390_register_alg(&cbc_des3_alg);
 	if (ret)
-		goto cbc_des3_err;
+		goto out_err;
 
 	if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_DEA) &&
 	    cpacf_query(CPACF_KMCTR, CPACF_KMCTR_TDEA_192)) {
-		ret = crypto_register_alg(&ctr_des_alg);
-		if (ret)
-			goto ctr_des_err;
-		ret = crypto_register_alg(&ctr_des3_alg);
-		if (ret)
-			goto ctr_des3_err;
 		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
 		if (!ctrblk) {
 			ret = -ENOMEM;
-			goto ctr_mem_err;
+			goto out_err;
 		}
+		ret = des_s390_register_alg(&ctr_des_alg);
+		if (ret)
+			goto out_err;
+		ret = des_s390_register_alg(&ctr_des3_alg);
+		if (ret)
+			goto out_err;
 	}
-out:
-	return ret;
-
-ctr_mem_err:
-	crypto_unregister_alg(&ctr_des3_alg);
-ctr_des3_err:
-	crypto_unregister_alg(&ctr_des_alg);
-ctr_des_err:
-	crypto_unregister_alg(&cbc_des3_alg);
-cbc_des3_err:
-	crypto_unregister_alg(&ecb_des3_alg);
-ecb_des3_err:
-	crypto_unregister_alg(&des3_alg);
-des3_err:
-	crypto_unregister_alg(&cbc_des_alg);
-cbc_des_err:
-	crypto_unregister_alg(&ecb_des_alg);
-ecb_des_err:
-	crypto_unregister_alg(&des_alg);
-des_err:
-	goto out;
-}
 
-static void __exit des_s390_exit(void)
-{
-	if (ctrblk) {
-		crypto_unregister_alg(&ctr_des_alg);
-		crypto_unregister_alg(&ctr_des3_alg);
-		free_page((unsigned long) ctrblk);
-	}
-	crypto_unregister_alg(&cbc_des3_alg);
-	crypto_unregister_alg(&ecb_des3_alg);
-	crypto_unregister_alg(&des3_alg);
-	crypto_unregister_alg(&cbc_des_alg);
-	crypto_unregister_alg(&ecb_des_alg);
-	crypto_unregister_alg(&des_alg);
+	return 0;
+out_err:
+	des_s390_exit();
+	return ret;
 }
 
 module_cpu_feature_match(MSA, des_s390_init);
-- 
cgit v0.10.2


From 69c0e360f990c2dc737681f40a361195066cef02 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 18 Aug 2016 12:59:46 +0200
Subject: s390/crypto: cpacf function detection

The CPACF code makes some assumptions about the availablity of hardware
support. E.g. if the machine supports KM(AES-256) without chaining it is
assumed that KMC(AES-256) with chaining is available as well. For the
existing CPUs this is true but the architecturally correct way is to
check each CPACF functions on its own. This is what the query function
of each instructions is all about.

Reviewed-by: Harald Freudenberger <freude@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index be87575..f4ad96e 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -31,13 +31,10 @@
 #include <crypto/xts.h>
 #include <asm/cpacf.h>
 
-#define AES_KEYLEN_128		1
-#define AES_KEYLEN_192		2
-#define AES_KEYLEN_256		4
-
 static u8 *ctrblk;
 static DEFINE_SPINLOCK(ctrblk_lock);
-static char keylen_flag;
+
+static cpacf_mask_t km_functions, kmc_functions, kmctr_functions;
 
 struct s390_aes_ctx {
 	u8 key[AES_MAX_KEY_SIZE];
@@ -65,33 +62,6 @@ struct s390_xts_ctx {
 	struct crypto_skcipher *fallback;
 };
 
-/*
- * Check if the key_len is supported by the HW.
- * Returns 0 if it is, a positive number if it is not and software fallback is
- * required or a negative number in case the key size is not valid
- */
-static int need_fallback(unsigned int key_len)
-{
-	switch (key_len) {
-	case 16:
-		if (!(keylen_flag & AES_KEYLEN_128))
-			return 1;
-		break;
-	case 24:
-		if (!(keylen_flag & AES_KEYLEN_192))
-			return 1;
-		break;
-	case 32:
-		if (!(keylen_flag & AES_KEYLEN_256))
-			return 1;
-		break;
-	default:
-		return -1;
-		break;
-	}
-	return 0;
-}
-
 static int setkey_fallback_cip(struct crypto_tfm *tfm, const u8 *in_key,
 		unsigned int key_len)
 {
@@ -115,72 +85,44 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		       unsigned int key_len)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-	int ret;
+	unsigned long fc;
 
-	ret = need_fallback(key_len);
-	if (ret < 0) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 16) ? CPACF_KM_AES_128 :
+	     (key_len == 24) ? CPACF_KM_AES_192 :
+	     (key_len == 32) ? CPACF_KM_AES_256 : 0;
 
-	sctx->key_len = key_len;
-	if (!ret) {
-		memcpy(sctx->key, in_key, key_len);
-		return 0;
-	}
+	/* Check if the function code is available */
+	sctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+	if (!sctx->fc)
+		return setkey_fallback_cip(tfm, in_key, key_len);
 
-	return setkey_fallback_cip(tfm, in_key, key_len);
+	sctx->key_len = key_len;
+	memcpy(sctx->key, in_key, key_len);
+	return 0;
 }
 
 static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
 
-	if (unlikely(need_fallback(sctx->key_len))) {
+	if (unlikely(!sctx->fc)) {
 		crypto_cipher_encrypt_one(sctx->fallback.cip, out, in);
 		return;
 	}
-
-	switch (sctx->key_len) {
-	case 16:
-		cpacf_km(CPACF_KM_AES_128,
-			 &sctx->key, out, in, AES_BLOCK_SIZE);
-		break;
-	case 24:
-		cpacf_km(CPACF_KM_AES_192,
-			 &sctx->key, out, in, AES_BLOCK_SIZE);
-		break;
-	case 32:
-		cpacf_km(CPACF_KM_AES_256,
-			 &sctx->key, out, in, AES_BLOCK_SIZE);
-		break;
-	}
+	cpacf_km(sctx->fc, &sctx->key, out, in, AES_BLOCK_SIZE);
 }
 
 static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
 
-	if (unlikely(need_fallback(sctx->key_len))) {
+	if (unlikely(!sctx->fc)) {
 		crypto_cipher_decrypt_one(sctx->fallback.cip, out, in);
 		return;
 	}
-
-	switch (sctx->key_len) {
-	case 16:
-		cpacf_km(CPACF_KM_AES_128 | CPACF_DECRYPT,
-			 &sctx->key, out, in, AES_BLOCK_SIZE);
-		break;
-	case 24:
-		cpacf_km(CPACF_KM_AES_192 | CPACF_DECRYPT,
-			 &sctx->key, out, in, AES_BLOCK_SIZE);
-		break;
-	case 32:
-		cpacf_km(CPACF_KM_AES_256 | CPACF_DECRYPT,
-			 &sctx->key, out, in, AES_BLOCK_SIZE);
-		break;
-	}
+	cpacf_km(sctx->fc | CPACF_DECRYPT,
+		 &sctx->key, out, in, AES_BLOCK_SIZE);
 }
 
 static int fallback_init_cip(struct crypto_tfm *tfm)
@@ -289,27 +231,21 @@ static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
-	int ret;
+	unsigned long fc;
 
-	ret = need_fallback(key_len);
-	if (ret > 0) {
-		sctx->key_len = key_len;
-		return setkey_fallback_blk(tfm, in_key, key_len);
-	}
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 16) ? CPACF_KM_AES_128 :
+	     (key_len == 24) ? CPACF_KM_AES_192 :
+	     (key_len == 32) ? CPACF_KM_AES_256 : 0;
 
-	switch (key_len) {
-	case 16:
-		sctx->fc = CPACF_KM_AES_128;
-		break;
-	case 24:
-		sctx->fc = CPACF_KM_AES_192;
-		break;
-	case 32:
-		sctx->fc = CPACF_KM_AES_256;
-		break;
-	}
+	/* Check if the function code is available */
+	sctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+	if (!sctx->fc)
+		return setkey_fallback_blk(tfm, in_key, key_len);
 
-	return aes_set_key(tfm, in_key, key_len);
+	sctx->key_len = key_len;
+	memcpy(sctx->key, in_key, key_len);
+	return 0;
 }
 
 static int ecb_aes_crypt(struct blkcipher_desc *desc, long func, void *param,
@@ -340,7 +276,7 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc,
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
-	if (unlikely(need_fallback(sctx->key_len)))
+	if (unlikely(!sctx->fc))
 		return fallback_blk_enc(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
@@ -354,7 +290,7 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc,
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
-	if (unlikely(need_fallback(sctx->key_len)))
+	if (unlikely(!sctx->fc))
 		return fallback_blk_dec(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
@@ -413,27 +349,21 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
-	int ret;
+	unsigned long fc;
 
-	ret = need_fallback(key_len);
-	if (ret > 0) {
-		sctx->key_len = key_len;
-		return setkey_fallback_blk(tfm, in_key, key_len);
-	}
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 16) ? CPACF_KMC_AES_128 :
+	     (key_len == 24) ? CPACF_KMC_AES_192 :
+	     (key_len == 32) ? CPACF_KMC_AES_256 : 0;
 
-	switch (key_len) {
-	case 16:
-		sctx->fc = CPACF_KMC_AES_128;
-		break;
-	case 24:
-		sctx->fc = CPACF_KMC_AES_192;
-		break;
-	case 32:
-		sctx->fc = CPACF_KMC_AES_256;
-		break;
-	}
+	/* Check if the function code is available */
+	sctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
+	if (!sctx->fc)
+		return setkey_fallback_blk(tfm, in_key, key_len);
 
-	return aes_set_key(tfm, in_key, key_len);
+	sctx->key_len = key_len;
+	memcpy(sctx->key, in_key, key_len);
+	return 0;
 }
 
 static int cbc_aes_crypt(struct blkcipher_desc *desc, long func,
@@ -476,7 +406,7 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc,
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
-	if (unlikely(need_fallback(sctx->key_len)))
+	if (unlikely(!sctx->fc))
 		return fallback_blk_enc(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
@@ -490,7 +420,7 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc,
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
-	if (unlikely(need_fallback(sctx->key_len)))
+	if (unlikely(!sctx->fc))
 		return fallback_blk_dec(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
@@ -582,33 +512,27 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
 	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
+	unsigned long fc;
 	int err;
 
 	err = xts_check_key(tfm, in_key, key_len);
 	if (err)
 		return err;
 
-	switch (key_len) {
-	case 32:
-		xts_ctx->fc = CPACF_KM_XTS_128;
-		memcpy(xts_ctx->key + 16, in_key, 16);
-		memcpy(xts_ctx->pcc_key + 16, in_key + 16, 16);
-		break;
-	case 48:
-		xts_ctx->fc = 0;
-		xts_fallback_setkey(tfm, in_key, key_len);
-		break;
-	case 64:
-		xts_ctx->fc = CPACF_KM_XTS_256;
-		memcpy(xts_ctx->key, in_key, 32);
-		memcpy(xts_ctx->pcc_key, in_key + 32, 32);
-		break;
-	default:
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 32) ? CPACF_KM_XTS_128 :
+	     (key_len == 64) ? CPACF_KM_XTS_256 : 0;
+
+	/* Check if the function code is available */
+	xts_ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+	if (!xts_ctx->fc)
+		return xts_fallback_setkey(tfm, in_key, key_len);
+
+	/* Split the XTS key into the two subkeys */
+	key_len = key_len / 2;
 	xts_ctx->key_len = key_len;
+	memcpy(xts_ctx->key, in_key, key_len);
+	memcpy(xts_ctx->pcc_key, in_key + key_len, key_len);
 	return 0;
 }
 
@@ -616,7 +540,7 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, long func,
 			 struct s390_xts_ctx *xts_ctx,
 			 struct blkcipher_walk *walk)
 {
-	unsigned int offset = (xts_ctx->key_len >> 1) & 0x10;
+	unsigned int offset = xts_ctx->key_len & 0x10;
 	int ret = blkcipher_walk_virt(desc, walk);
 	unsigned int nbytes = walk->nbytes;
 	unsigned int n;
@@ -634,11 +558,11 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, long func,
 	memset(pcc_param.bit, 0, sizeof(pcc_param.bit));
 	memset(pcc_param.xts, 0, sizeof(pcc_param.xts));
 	memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
-	memcpy(pcc_param.key, xts_ctx->pcc_key, 32);
+	memcpy(pcc_param.key + offset, xts_ctx->pcc_key, xts_ctx->key_len);
 	/* remove decipher modifier bit from 'func' and call PCC */
 	cpacf_pcc(func & 0x7f, &pcc_param.key[offset]);
 
-	memcpy(xts_param.key, xts_ctx->key, 32);
+	memcpy(xts_param.key + offset, xts_ctx->key, xts_ctx->key_len);
 	memcpy(xts_param.init, pcc_param.xts, 16);
 	do {
 		/* only use complete blocks */
@@ -662,7 +586,7 @@ static int xts_aes_encrypt(struct blkcipher_desc *desc,
 	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
-	if (unlikely(xts_ctx->key_len == 48))
+	if (unlikely(!xts_ctx->fc))
 		return xts_fallback_encrypt(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
@@ -676,7 +600,7 @@ static int xts_aes_decrypt(struct blkcipher_desc *desc,
 	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
-	if (unlikely(xts_ctx->key_len == 48))
+	if (unlikely(!xts_ctx->fc))
 		return xts_fallback_decrypt(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
@@ -735,20 +659,21 @@ static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	unsigned long fc;
 
-	switch (key_len) {
-	case 16:
-		sctx->fc = CPACF_KMCTR_AES_128;
-		break;
-	case 24:
-		sctx->fc = CPACF_KMCTR_AES_192;
-		break;
-	case 32:
-		sctx->fc = CPACF_KMCTR_AES_256;
-		break;
-	}
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 16) ? CPACF_KMCTR_AES_128 :
+	     (key_len == 24) ? CPACF_KMCTR_AES_192 :
+	     (key_len == 32) ? CPACF_KMCTR_AES_256 : 0;
+
+	/* Check if the function code is available */
+	sctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
+	if (!sctx->fc)
+		return setkey_fallback_blk(tfm, in_key, key_len);
 
-	return aes_set_key(tfm, in_key, key_len);
+	sctx->key_len = key_len;
+	memcpy(sctx->key, in_key, key_len);
+	return 0;
 }
 
 static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes)
@@ -832,6 +757,9 @@ static int ctr_aes_encrypt(struct blkcipher_desc *desc,
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
+	if (unlikely(!sctx->fc))
+		return fallback_blk_enc(desc, dst, src, nbytes);
+
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	return ctr_aes_crypt(desc, sctx->fc, sctx, &walk);
 }
@@ -843,6 +771,9 @@ static int ctr_aes_decrypt(struct blkcipher_desc *desc,
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
+	if (unlikely(!sctx->fc))
+		return fallback_blk_dec(desc, dst, src, nbytes);
+
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	return ctr_aes_crypt(desc, sctx->fc | CPACF_DECRYPT, sctx, &walk);
 }
@@ -851,11 +782,14 @@ static struct crypto_alg ctr_aes_alg = {
 	.cra_name		=	"ctr(aes)",
 	.cra_driver_name	=	"ctr-aes-s390",
 	.cra_priority		=	400,	/* combo: aes + ctr */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
+					CRYPTO_ALG_NEED_FALLBACK,
 	.cra_blocksize		=	1,
 	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
+	.cra_init		=	fallback_init_blk,
+	.cra_exit		=	fallback_exit_blk,
 	.cra_u			=	{
 		.blkcipher = {
 			.min_keysize		=	AES_MIN_KEY_SIZE,
@@ -893,43 +827,40 @@ static int __init aes_s390_init(void)
 {
 	int ret;
 
-	if (cpacf_query(CPACF_KM, CPACF_KM_AES_128))
-		keylen_flag |= AES_KEYLEN_128;
-	if (cpacf_query(CPACF_KM, CPACF_KM_AES_192))
-		keylen_flag |= AES_KEYLEN_192;
-	if (cpacf_query(CPACF_KM, CPACF_KM_AES_256))
-		keylen_flag |= AES_KEYLEN_256;
-
-	if (!keylen_flag)
-		return -EOPNOTSUPP;
-
-	/* z9 109 and z9 BC/EC only support 128 bit key length */
-	if (keylen_flag == AES_KEYLEN_128)
-		pr_info("AES hardware acceleration is only available for"
-			" 128-bit keys\n");
-
-	ret = aes_s390_register_alg(&aes_alg);
-	if (ret)
-		goto out_err;
+	/* Query available functions for KM, KMC and KMCTR */
+	cpacf_query(CPACF_KM, &km_functions);
+	cpacf_query(CPACF_KMC, &kmc_functions);
+	cpacf_query(CPACF_KMCTR, &kmctr_functions);
 
-	ret = aes_s390_register_alg(&ecb_aes_alg);
-	if (ret)
-		goto out_err;
+	if (cpacf_test_func(&km_functions, CPACF_KM_AES_128) ||
+	    cpacf_test_func(&km_functions, CPACF_KM_AES_192) ||
+	    cpacf_test_func(&km_functions, CPACF_KM_AES_256)) {
+		ret = aes_s390_register_alg(&aes_alg);
+		if (ret)
+			goto out_err;
+		ret = aes_s390_register_alg(&ecb_aes_alg);
+		if (ret)
+			goto out_err;
+	}
 
-	ret = aes_s390_register_alg(&cbc_aes_alg);
-	if (ret)
-		goto out_err;
+	if (cpacf_test_func(&kmc_functions, CPACF_KMC_AES_128) ||
+	    cpacf_test_func(&kmc_functions, CPACF_KMC_AES_192) ||
+	    cpacf_test_func(&kmc_functions, CPACF_KMC_AES_256)) {
+		ret = aes_s390_register_alg(&cbc_aes_alg);
+		if (ret)
+			goto out_err;
+	}
 
-	if (cpacf_query(CPACF_KM, CPACF_KM_XTS_128) &&
-	    cpacf_query(CPACF_KM, CPACF_KM_XTS_256)) {
+	if (cpacf_test_func(&km_functions, CPACF_KM_XTS_128) ||
+	    cpacf_test_func(&km_functions, CPACF_KM_XTS_256)) {
 		ret = aes_s390_register_alg(&xts_aes_alg);
 		if (ret)
 			goto out_err;
 	}
 
-	if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_128) &&
-	    cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_192) &&
-	    cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_256)) {
+	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_AES_128) ||
+	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_AES_192) ||
+	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_AES_256)) {
 		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
 		if (!ctrblk) {
 			ret = -ENOMEM;
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index b77a546..965587e 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -27,6 +27,8 @@
 static u8 *ctrblk;
 static DEFINE_SPINLOCK(ctrblk_lock);
 
+static cpacf_mask_t km_functions, kmc_functions, kmctr_functions;
+
 struct s390_des_ctx {
 	u8 iv[DES_BLOCK_SIZE];
 	u8 key[DES3_KEY_SIZE];
@@ -36,12 +38,12 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
 		      unsigned int key_len)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
 	u32 tmp[DES_EXPKEY_WORDS];
 
 	/* check for weak keys */
-	if (!des_ekey(tmp, key) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
-		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
+	if (!des_ekey(tmp, key) &&
+	    (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
 
@@ -238,13 +240,12 @@ static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
 		       unsigned int key_len)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
 
 	if (!(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
 	    crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
 			  DES_KEY_SIZE)) &&
-	    (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
-		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
+	    (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
 	memcpy(ctx->key, key, key_len);
@@ -554,39 +555,53 @@ static int __init des_s390_init(void)
 {
 	int ret;
 
-	if (!cpacf_query(CPACF_KM, CPACF_KM_DEA) ||
-	    !cpacf_query(CPACF_KM, CPACF_KM_TDEA_192))
-		return -EOPNOTSUPP;
-
-	ret = des_s390_register_alg(&des_alg);
-	if (ret)
-		goto out_err;
-	ret = des_s390_register_alg(&ecb_des_alg);
-	if (ret)
-		goto out_err;
-	ret = des_s390_register_alg(&cbc_des_alg);
-	if (ret)
-		goto out_err;
-	ret = des_s390_register_alg(&des3_alg);
-	if (ret)
-		goto out_err;
-	ret = des_s390_register_alg(&ecb_des3_alg);
-	if (ret)
-		goto out_err;
-	ret = des_s390_register_alg(&cbc_des3_alg);
-	if (ret)
-		goto out_err;
-
-	if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_DEA) &&
-	    cpacf_query(CPACF_KMCTR, CPACF_KMCTR_TDEA_192)) {
+	/* Query available functions for KM, KMC and KMCTR */
+	cpacf_query(CPACF_KM, &km_functions);
+	cpacf_query(CPACF_KMC, &kmc_functions);
+	cpacf_query(CPACF_KMCTR, &kmctr_functions);
+
+	if (cpacf_test_func(&km_functions, CPACF_KM_DEA)) {
+		ret = des_s390_register_alg(&des_alg);
+		if (ret)
+			goto out_err;
+		ret = des_s390_register_alg(&ecb_des_alg);
+		if (ret)
+			goto out_err;
+	}
+	if (cpacf_test_func(&kmc_functions, CPACF_KMC_DEA)) {
+		ret = des_s390_register_alg(&cbc_des_alg);
+		if (ret)
+			goto out_err;
+	}
+	if (cpacf_test_func(&km_functions, CPACF_KM_TDEA_192)) {
+		ret = des_s390_register_alg(&des3_alg);
+		if (ret)
+			goto out_err;
+		ret = des_s390_register_alg(&ecb_des3_alg);
+		if (ret)
+			goto out_err;
+	}
+	if (cpacf_test_func(&kmc_functions, CPACF_KMC_TDEA_192)) {
+		ret = des_s390_register_alg(&cbc_des3_alg);
+		if (ret)
+			goto out_err;
+	}
+
+	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_DEA) ||
+	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_TDEA_192)) {
 		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
 		if (!ctrblk) {
 			ret = -ENOMEM;
 			goto out_err;
 		}
+	}
+
+	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_DEA)) {
 		ret = des_s390_register_alg(&ctr_des_alg);
 		if (ret)
 			goto out_err;
+	}
+	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_TDEA_192)) {
 		ret = des_s390_register_alg(&ctr_des3_alg);
 		if (ret)
 			goto out_err;
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index 8e87f51..564616d 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -136,7 +136,7 @@ static struct shash_alg ghash_alg = {
 
 static int __init ghash_mod_init(void)
 {
-	if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_GHASH))
+	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_GHASH))
 		return -EOPNOTSUPP;
 
 	return crypto_register_shash(&ghash_alg);
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index bbf2af7..79e3a1f 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -757,13 +757,13 @@ static int __init prng_init(void)
 	int ret;
 
 	/* check if the CPU has a PRNG */
-	if (!cpacf_query(CPACF_KMC, CPACF_KMC_PRNG))
+	if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG))
 		return -EOPNOTSUPP;
 
 	/* choose prng mode */
 	if (prng_mode != PRNG_MODE_TDES) {
 		/* check for MSA5 support for PPNO operations */
-		if (!cpacf_query(CPACF_PPNO, CPACF_PPNO_SHA512_DRNG_GEN)) {
+		if (!cpacf_query_func(CPACF_PPNO, CPACF_PPNO_SHA512_DRNG_GEN)) {
 			if (prng_mode == PRNG_MODE_SHA512) {
 				pr_err("The prng module cannot "
 				       "start in SHA-512 mode\n");
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 5fbf91b..c7de53d 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -91,7 +91,7 @@ static struct shash_alg alg = {
 
 static int __init sha1_s390_init(void)
 {
-	if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_SHA_1))
+	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_1))
 		return -EOPNOTSUPP;
 	return crypto_register_shash(&alg);
 }
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index 10aac0b..53c2779 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -123,7 +123,7 @@ static int __init sha256_s390_init(void)
 {
 	int ret;
 
-	if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_SHA_256))
+	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256))
 		return -EOPNOTSUPP;
 	ret = crypto_register_shash(&sha256_alg);
 	if (ret < 0)
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index ea85757..2f4caa1 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -133,7 +133,7 @@ static int __init init(void)
 {
 	int ret;
 
-	if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_SHA_512))
+	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_512))
 		return -EOPNOTSUPP;
 	if ((ret = crypto_register_shash(&sha512_alg)) < 0)
 		goto out;
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index c226c9b..2c680db 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -106,6 +106,8 @@
 #define CPACF_PPNO_SHA512_DRNG_GEN	0x03
 #define CPACF_PPNO_SHA512_DRNG_SEED	0x83
 
+typedef struct { unsigned char bytes[16]; } cpacf_mask_t;
+
 /**
  * cpacf_query() - check if a specific CPACF function is available
  * @opcode: the opcode of the crypto instruction
@@ -116,55 +118,66 @@
  *
  * Returns 1 if @func is available for @opcode, 0 otherwise
  */
-static inline void __cpacf_query(unsigned int opcode, unsigned char *status)
+static inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
 {
-	typedef struct { unsigned char _[16]; } status_type;
 	register unsigned long r0 asm("0") = 0;	/* query function */
-	register unsigned long r1 asm("1") = (unsigned long) status;
+	register unsigned long r1 asm("1") = (unsigned long) mask;
 
 	asm volatile(
 		"	spm 0\n" /* pckmo doesn't change the cc */
 		/* Parameter registers are ignored, but may not be 0 */
 		"0:	.insn	rrf,%[opc] << 16,2,2,2,0\n"
 		"	brc	1,0b\n"	/* handle partial completion */
-		: "=m" (*(status_type *) status)
+		: "=m" (*mask)
 		: [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (opcode)
 		: "cc");
 }
 
-static inline int cpacf_query(unsigned int opcode, unsigned int func)
+static inline int __cpacf_check_opcode(unsigned int opcode)
 {
-	unsigned char status[16];
-
 	switch (opcode) {
 	case CPACF_KMAC:
 	case CPACF_KM:
 	case CPACF_KMC:
 	case CPACF_KIMD:
 	case CPACF_KLMD:
-		if (!test_facility(17))	/* check for MSA */
-			return 0;
-		break;
+		return test_facility(17);	/* check for MSA */
 	case CPACF_PCKMO:
-		if (!test_facility(76))	/* check for MSA3 */
-			return 0;
-		break;
+		return test_facility(76);	/* check for MSA3 */
 	case CPACF_KMF:
 	case CPACF_KMO:
 	case CPACF_PCC:
 	case CPACF_KMCTR:
-		if (!test_facility(77))	/* check for MSA4 */
-			return 0;
-		break;
+		return test_facility(77);	/* check for MSA4 */
 	case CPACF_PPNO:
-		if (!test_facility(57))	/* check for MSA5 */
-			return 0;
-		break;
+		return test_facility(57);	/* check for MSA5 */
 	default:
 		BUG();
 	}
-	__cpacf_query(opcode, status);
-	return (status[func >> 3] & (0x80 >> (func & 7))) != 0;
+}
+
+static inline int cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
+{
+	if (__cpacf_check_opcode(opcode)) {
+		__cpacf_query(opcode, mask);
+		return 1;
+	}
+	memset(mask, 0, sizeof(*mask));
+	return 0;
+}
+
+static inline int cpacf_test_func(cpacf_mask_t *mask, unsigned int func)
+{
+	return (mask->bytes[func >> 3] & (0x80 >> (func & 7))) != 0;
+}
+
+static inline int cpacf_query_func(unsigned int opcode, unsigned int func)
+{
+	cpacf_mask_t mask;
+
+	if (cpacf_query(opcode, &mask))
+		return cpacf_test_func(&mask, func);
+	return 0;
 }
 
 /**
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index fd2f120..d6e7e52 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -245,22 +245,33 @@ static void kvm_s390_cpu_feat_init(void)
 		     PTFF_QAF);
 
 	if (test_facility(17)) { /* MSA */
-		__cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
-		__cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
-		__cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
-		__cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
-		__cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
+		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmac);
+		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmc);
+		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.km);
+		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kimd);
+		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.klmd);
 	}
 	if (test_facility(76)) /* MSA3 */
-		__cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
+		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.pckmo);
 	if (test_facility(77)) { /* MSA4 */
-		__cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
-		__cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
-		__cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
-		__cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
+		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmctr);
+		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmf);
+		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmo);
+		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.pcc);
 	}
 	if (test_facility(57)) /* MSA5 */
-		__cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
+		__cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.ppno);
 
 	if (MACHINE_HAS_ESOP)
 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
-- 
cgit v0.10.2


From 7bac4f5b8e3a607f7ba1d3a652f5922a657fa9e8 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 15 Aug 2016 15:17:52 +0200
Subject: s390/crypto: simplify CPACF encryption / decryption functions

The double while loops of the CTR mode encryption / decryption functions
are overly complex for little gain. Simplify the functions to a single
while loop at the cost of an additional memcpy of a few bytes for every
4K page worth of data.
Adapt the other crypto functions to make them all look alike.

Reviewed-by: Harald Freudenberger <freude@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index f4ad96e..303d28e 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -46,14 +46,6 @@ struct s390_aes_ctx {
 	} fallback;
 };
 
-struct pcc_param {
-	u8 key[32];
-	u8 tweak[16];
-	u8 block[16];
-	u8 bit[16];
-	u8 xts[16];
-};
-
 struct s390_xts_ctx {
 	u8 key[32];
 	u8 pcc_key[32];
@@ -248,22 +240,20 @@ static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	return 0;
 }
 
-static int ecb_aes_crypt(struct blkcipher_desc *desc, long func, void *param,
+static int ecb_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 			 struct blkcipher_walk *walk)
 {
-	int ret = blkcipher_walk_virt(desc, walk);
-	unsigned int nbytes;
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int nbytes, n;
+	int ret;
 
-	while ((nbytes = walk->nbytes)) {
+	ret = blkcipher_walk_virt(desc, walk);
+	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
 		/* only use complete blocks */
-		unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1);
-		u8 *out = walk->dst.virt.addr;
-		u8 *in = walk->src.virt.addr;
-
-		cpacf_km(func, param, out, in, n);
-
-		nbytes &= AES_BLOCK_SIZE - 1;
-		ret = blkcipher_walk_done(desc, walk, nbytes);
+		n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		cpacf_km(sctx->fc | modifier, sctx->key,
+			 walk->dst.virt.addr, walk->src.virt.addr, n);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
 	}
 
 	return ret;
@@ -280,7 +270,7 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc,
 		return fallback_blk_enc(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_aes_crypt(desc, sctx->fc, sctx->key, &walk);
+	return ecb_aes_crypt(desc, 0, &walk);
 }
 
 static int ecb_aes_decrypt(struct blkcipher_desc *desc,
@@ -294,7 +284,7 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc,
 		return fallback_blk_dec(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_aes_crypt(desc, sctx->fc | CPACF_DECRYPT, sctx->key, &walk);
+	return ecb_aes_crypt(desc, CPACF_DECRYPT, &walk);
 }
 
 static int fallback_init_blk(struct crypto_tfm *tfm)
@@ -366,36 +356,28 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	return 0;
 }
 
-static int cbc_aes_crypt(struct blkcipher_desc *desc, long func,
+static int cbc_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 			 struct blkcipher_walk *walk)
 {
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	int ret = blkcipher_walk_virt(desc, walk);
-	unsigned int nbytes = walk->nbytes;
+	unsigned int nbytes, n;
+	int ret;
 	struct {
 		u8 iv[AES_BLOCK_SIZE];
 		u8 key[AES_MAX_KEY_SIZE];
 	} param;
 
-	if (!nbytes)
-		goto out;
-
+	ret = blkcipher_walk_virt(desc, walk);
 	memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
 	memcpy(param.key, sctx->key, sctx->key_len);
-	do {
+	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
 		/* only use complete blocks */
-		unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1);
-		u8 *out = walk->dst.virt.addr;
-		u8 *in = walk->src.virt.addr;
-
-		cpacf_kmc(func, &param, out, in, n);
-
-		nbytes &= AES_BLOCK_SIZE - 1;
-		ret = blkcipher_walk_done(desc, walk, nbytes);
-	} while ((nbytes = walk->nbytes));
+		n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		cpacf_kmc(sctx->fc | modifier, &param,
+			  walk->dst.virt.addr, walk->src.virt.addr, n);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+	}
 	memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
-
-out:
 	return ret;
 }
 
@@ -410,7 +392,7 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc,
 		return fallback_blk_enc(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_aes_crypt(desc, sctx->fc, &walk);
+	return cbc_aes_crypt(desc, 0, &walk);
 }
 
 static int cbc_aes_decrypt(struct blkcipher_desc *desc,
@@ -424,7 +406,7 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc,
 		return fallback_blk_dec(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_aes_crypt(desc, sctx->fc | CPACF_DECRYPT, &walk);
+	return cbc_aes_crypt(desc, CPACF_DECRYPT, &walk);
 }
 
 static struct crypto_alg cbc_aes_alg = {
@@ -536,46 +518,43 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	return 0;
 }
 
-static int xts_aes_crypt(struct blkcipher_desc *desc, long func,
-			 struct s390_xts_ctx *xts_ctx,
+static int xts_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 			 struct blkcipher_walk *walk)
 {
-	unsigned int offset = xts_ctx->key_len & 0x10;
-	int ret = blkcipher_walk_virt(desc, walk);
-	unsigned int nbytes = walk->nbytes;
-	unsigned int n;
-	u8 *in, *out;
-	struct pcc_param pcc_param;
+	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int offset, nbytes, n;
+	int ret;
+	struct {
+		u8 key[32];
+		u8 tweak[16];
+		u8 block[16];
+		u8 bit[16];
+		u8 xts[16];
+	} pcc_param;
 	struct {
 		u8 key[32];
 		u8 init[16];
 	} xts_param;
 
-	if (!nbytes)
-		goto out;
-
+	ret = blkcipher_walk_virt(desc, walk);
+	offset = xts_ctx->key_len & 0x10;
 	memset(pcc_param.block, 0, sizeof(pcc_param.block));
 	memset(pcc_param.bit, 0, sizeof(pcc_param.bit));
 	memset(pcc_param.xts, 0, sizeof(pcc_param.xts));
 	memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
 	memcpy(pcc_param.key + offset, xts_ctx->pcc_key, xts_ctx->key_len);
-	/* remove decipher modifier bit from 'func' and call PCC */
-	cpacf_pcc(func & 0x7f, &pcc_param.key[offset]);
+	cpacf_pcc(xts_ctx->fc, pcc_param.key + offset);
 
 	memcpy(xts_param.key + offset, xts_ctx->key, xts_ctx->key_len);
 	memcpy(xts_param.init, pcc_param.xts, 16);
-	do {
+
+	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
-		out = walk->dst.virt.addr;
-		in = walk->src.virt.addr;
-
-		cpacf_km(func, &xts_param.key[offset], out, in, n);
-
-		nbytes &= AES_BLOCK_SIZE - 1;
-		ret = blkcipher_walk_done(desc, walk, nbytes);
-	} while ((nbytes = walk->nbytes));
-out:
+		cpacf_km(xts_ctx->fc | modifier, xts_param.key + offset,
+			 walk->dst.virt.addr, walk->src.virt.addr, n);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+	}
 	return ret;
 }
 
@@ -590,7 +569,7 @@ static int xts_aes_encrypt(struct blkcipher_desc *desc,
 		return xts_fallback_encrypt(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_aes_crypt(desc, xts_ctx->fc, xts_ctx, &walk);
+	return xts_aes_crypt(desc, 0, &walk);
 }
 
 static int xts_aes_decrypt(struct blkcipher_desc *desc,
@@ -604,7 +583,7 @@ static int xts_aes_decrypt(struct blkcipher_desc *desc,
 		return xts_fallback_decrypt(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_aes_crypt(desc, xts_ctx->fc | CPACF_DECRYPT, xts_ctx, &walk);
+	return xts_aes_crypt(desc, CPACF_DECRYPT, &walk);
 }
 
 static int xts_fallback_init(struct crypto_tfm *tfm)
@@ -676,75 +655,58 @@ static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	return 0;
 }
 
-static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes)
+static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
 {
 	unsigned int i, n;
 
 	/* only use complete blocks, max. PAGE_SIZE */
+	memcpy(ctrptr, iv, AES_BLOCK_SIZE);
 	n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
-	for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) {
-		memcpy(ctrptr + i, ctrptr + i - AES_BLOCK_SIZE,
-		       AES_BLOCK_SIZE);
-		crypto_inc(ctrptr + i, AES_BLOCK_SIZE);
+	for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) {
+		memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE);
+		crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+		ctrptr += AES_BLOCK_SIZE;
 	}
 	return n;
 }
 
-static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
-			 struct s390_aes_ctx *sctx, struct blkcipher_walk *walk)
+static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
+			 struct blkcipher_walk *walk)
 {
-	int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	u8 buf[AES_BLOCK_SIZE], *ctrptr;
 	unsigned int n, nbytes;
-	u8 buf[AES_BLOCK_SIZE], ctrbuf[AES_BLOCK_SIZE];
-	u8 *out, *in, *ctrptr = ctrbuf;
-
-	if (!walk->nbytes)
-		return ret;
+	int ret, locked;
 
-	if (spin_trylock(&ctrblk_lock))
-		ctrptr = ctrblk;
+	locked = spin_trylock(&ctrblk_lock);
 
-	memcpy(ctrptr, walk->iv, AES_BLOCK_SIZE);
+	ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
 	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
-		out = walk->dst.virt.addr;
-		in = walk->src.virt.addr;
-		while (nbytes >= AES_BLOCK_SIZE) {
-			if (ctrptr == ctrblk)
-				n = __ctrblk_init(ctrptr, nbytes);
-			else
-				n = AES_BLOCK_SIZE;
-			cpacf_kmctr(func, sctx->key, out, in, n, ctrptr);
-			if (n > AES_BLOCK_SIZE)
-				memcpy(ctrptr, ctrptr + n - AES_BLOCK_SIZE,
-				       AES_BLOCK_SIZE);
-			crypto_inc(ctrptr, AES_BLOCK_SIZE);
-			out += n;
-			in += n;
-			nbytes -= n;
-		}
-		ret = blkcipher_walk_done(desc, walk, nbytes);
+		n = AES_BLOCK_SIZE;
+		if (nbytes >= 2*AES_BLOCK_SIZE && locked)
+			n = __ctrblk_init(ctrblk, walk->iv, nbytes);
+		ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
+		cpacf_kmctr(sctx->fc | modifier, sctx->key,
+			    walk->dst.virt.addr, walk->src.virt.addr,
+			    n, ctrptr);
+		if (ctrptr == ctrblk)
+			memcpy(walk->iv, ctrptr + n - AES_BLOCK_SIZE,
+			       AES_BLOCK_SIZE);
+		crypto_inc(walk->iv, AES_BLOCK_SIZE);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
 	}
-	if (ctrptr == ctrblk) {
-		if (nbytes)
-			memcpy(ctrbuf, ctrptr, AES_BLOCK_SIZE);
-		else
-			memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE);
+	if (locked)
 		spin_unlock(&ctrblk_lock);
-	} else {
-		if (!nbytes)
-			memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE);
-	}
 	/*
 	 * final block may be < AES_BLOCK_SIZE, copy only nbytes
 	 */
 	if (nbytes) {
-		out = walk->dst.virt.addr;
-		in = walk->src.virt.addr;
-		cpacf_kmctr(func, sctx->key, buf, in, AES_BLOCK_SIZE, ctrbuf);
-		memcpy(out, buf, nbytes);
-		crypto_inc(ctrbuf, AES_BLOCK_SIZE);
+		cpacf_kmctr(sctx->fc | modifier, sctx->key,
+			    buf, walk->src.virt.addr,
+			    AES_BLOCK_SIZE, walk->iv);
+		memcpy(walk->dst.virt.addr, buf, nbytes);
+		crypto_inc(walk->iv, AES_BLOCK_SIZE);
 		ret = blkcipher_walk_done(desc, walk, 0);
-		memcpy(walk->iv, ctrbuf, AES_BLOCK_SIZE);
 	}
 
 	return ret;
@@ -761,7 +723,7 @@ static int ctr_aes_encrypt(struct blkcipher_desc *desc,
 		return fallback_blk_enc(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_aes_crypt(desc, sctx->fc, sctx, &walk);
+	return ctr_aes_crypt(desc, 0, &walk);
 }
 
 static int ctr_aes_decrypt(struct blkcipher_desc *desc,
@@ -775,7 +737,7 @@ static int ctr_aes_decrypt(struct blkcipher_desc *desc,
 		return fallback_blk_dec(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_aes_crypt(desc, sctx->fc | CPACF_DECRYPT, sctx, &walk);
+	return ctr_aes_crypt(desc, CPACF_DECRYPT, &walk);
 }
 
 static struct crypto_alg ctr_aes_alg = {
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 965587e..8b83144 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -85,57 +85,46 @@ static struct crypto_alg des_alg = {
 	}
 };
 
-static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
-			    u8 *key, struct blkcipher_walk *walk)
+static int ecb_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
+			    struct blkcipher_walk *walk)
 {
-	int ret = blkcipher_walk_virt(desc, walk);
-	unsigned int nbytes;
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int nbytes, n;
+	int ret;
 
-	while ((nbytes = walk->nbytes)) {
+	ret = blkcipher_walk_virt(desc, walk);
+	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
 		/* only use complete blocks */
-		unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
-		u8 *out = walk->dst.virt.addr;
-		u8 *in = walk->src.virt.addr;
-
-		cpacf_km(func, key, out, in, n);
-
-		nbytes &= DES_BLOCK_SIZE - 1;
-		ret = blkcipher_walk_done(desc, walk, nbytes);
+		n = nbytes & ~(DES_BLOCK_SIZE - 1);
+		cpacf_km(fc, ctx->key, walk->dst.virt.addr,
+			 walk->src.virt.addr, n);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
 	}
-
 	return ret;
 }
 
-static int cbc_desall_crypt(struct blkcipher_desc *desc, long func,
+static int cbc_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
 			    struct blkcipher_walk *walk)
 {
 	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	int ret = blkcipher_walk_virt(desc, walk);
-	unsigned int nbytes = walk->nbytes;
+	unsigned int nbytes, n;
+	int ret;
 	struct {
 		u8 iv[DES_BLOCK_SIZE];
 		u8 key[DES3_KEY_SIZE];
 	} param;
 
-	if (!nbytes)
-		goto out;
-
+	ret = blkcipher_walk_virt(desc, walk);
 	memcpy(param.iv, walk->iv, DES_BLOCK_SIZE);
 	memcpy(param.key, ctx->key, DES3_KEY_SIZE);
-	do {
+	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
 		/* only use complete blocks */
-		unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
-		u8 *out = walk->dst.virt.addr;
-		u8 *in = walk->src.virt.addr;
-
-		cpacf_kmc(func, &param, out, in, n);
-
-		nbytes &= DES_BLOCK_SIZE - 1;
-		ret = blkcipher_walk_done(desc, walk, nbytes);
-	} while ((nbytes = walk->nbytes));
+		n = nbytes & ~(DES_BLOCK_SIZE - 1);
+		cpacf_kmc(fc, &param, walk->dst.virt.addr,
+			  walk->src.virt.addr, n);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+	}
 	memcpy(walk->iv, param.iv, DES_BLOCK_SIZE);
-
-out:
 	return ret;
 }
 
@@ -143,23 +132,20 @@ static int ecb_des_encrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_DEA, ctx->key, &walk);
+	return ecb_desall_crypt(desc, CPACF_KM_DEA, &walk);
 }
 
 static int ecb_des_decrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_DEA | CPACF_DECRYPT,
-				ctx->key, &walk);
+	return ecb_desall_crypt(desc, CPACF_KM_DEA | CPACF_DECRYPT, &walk);
 }
 
 static struct crypto_alg ecb_des_alg = {
@@ -290,23 +276,21 @@ static int ecb_des3_encrypt(struct blkcipher_desc *desc,
 			    struct scatterlist *dst, struct scatterlist *src,
 			    unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192, ctx->key, &walk);
+	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192, &walk);
 }
 
 static int ecb_des3_decrypt(struct blkcipher_desc *desc,
 			    struct scatterlist *dst, struct scatterlist *src,
 			    unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192 | CPACF_DECRYPT,
-				ctx->key, &walk);
+				&walk);
 }
 
 static struct crypto_alg ecb_des3_alg = {
@@ -371,73 +355,54 @@ static struct crypto_alg cbc_des3_alg = {
 	}
 };
 
-static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes)
+static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
 {
 	unsigned int i, n;
 
 	/* align to block size, max. PAGE_SIZE */
 	n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(DES_BLOCK_SIZE - 1);
-	for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) {
-		memcpy(ctrptr + i, ctrptr + i - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
-		crypto_inc(ctrptr + i, DES_BLOCK_SIZE);
+	memcpy(ctrptr, iv, DES_BLOCK_SIZE);
+	for (i = (n / DES_BLOCK_SIZE) - 1; i > 0; i--) {
+		memcpy(ctrptr + DES_BLOCK_SIZE, ctrptr, DES_BLOCK_SIZE);
+		crypto_inc(ctrptr + DES_BLOCK_SIZE, DES_BLOCK_SIZE);
+		ctrptr += DES_BLOCK_SIZE;
 	}
 	return n;
 }
 
-static int ctr_desall_crypt(struct blkcipher_desc *desc, long func,
-			    struct s390_des_ctx *ctx,
+static int ctr_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
 			    struct blkcipher_walk *walk)
 {
-	int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	u8 buf[DES_BLOCK_SIZE], *ctrptr;
 	unsigned int n, nbytes;
-	u8 buf[DES_BLOCK_SIZE], ctrbuf[DES_BLOCK_SIZE];
-	u8 *out, *in, *ctrptr = ctrbuf;
-
-	if (!walk->nbytes)
-		return ret;
+	int ret, locked;
 
-	if (spin_trylock(&ctrblk_lock))
-		ctrptr = ctrblk;
+	locked = spin_trylock(&ctrblk_lock);
 
-	memcpy(ctrptr, walk->iv, DES_BLOCK_SIZE);
+	ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
 	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
-		out = walk->dst.virt.addr;
-		in = walk->src.virt.addr;
-		while (nbytes >= DES_BLOCK_SIZE) {
-			if (ctrptr == ctrblk)
-				n = __ctrblk_init(ctrptr, nbytes);
-			else
-				n = DES_BLOCK_SIZE;
-			cpacf_kmctr(func, ctx->key, out, in, n, ctrptr);
-			if (n > DES_BLOCK_SIZE)
-				memcpy(ctrptr, ctrptr + n - DES_BLOCK_SIZE,
-				       DES_BLOCK_SIZE);
-			crypto_inc(ctrptr, DES_BLOCK_SIZE);
-			out += n;
-			in += n;
-			nbytes -= n;
-		}
-		ret = blkcipher_walk_done(desc, walk, nbytes);
+		n = DES_BLOCK_SIZE;
+		if (nbytes >= 2*DES_BLOCK_SIZE && locked)
+			n = __ctrblk_init(ctrblk, walk->iv, nbytes);
+		ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk->iv;
+		cpacf_kmctr(fc, ctx->key, walk->dst.virt.addr,
+			    walk->src.virt.addr, n, ctrptr);
+		if (ctrptr == ctrblk)
+			memcpy(walk->iv, ctrptr + n - DES_BLOCK_SIZE,
+				DES_BLOCK_SIZE);
+		crypto_inc(walk->iv, DES_BLOCK_SIZE);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
 	}
-	if (ctrptr == ctrblk) {
-		if (nbytes)
-			memcpy(ctrbuf, ctrptr, DES_BLOCK_SIZE);
-		else
-			memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE);
+	if (locked)
 		spin_unlock(&ctrblk_lock);
-	} else {
-		if (!nbytes)
-			memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE);
-	}
 	/* final block may be < DES_BLOCK_SIZE, copy only nbytes */
 	if (nbytes) {
-		out = walk->dst.virt.addr;
-		in = walk->src.virt.addr;
-		cpacf_kmctr(func, ctx->key, buf, in, DES_BLOCK_SIZE, ctrbuf);
-		memcpy(out, buf, nbytes);
-		crypto_inc(ctrbuf, DES_BLOCK_SIZE);
+		cpacf_kmctr(fc, ctx->key, buf, walk->src.virt.addr,
+			    DES_BLOCK_SIZE, walk->iv);
+		memcpy(walk->dst.virt.addr, buf, nbytes);
+		crypto_inc(walk->iv, DES_BLOCK_SIZE);
 		ret = blkcipher_walk_done(desc, walk, 0);
-		memcpy(walk->iv, ctrbuf, DES_BLOCK_SIZE);
 	}
 	return ret;
 }
@@ -446,23 +411,20 @@ static int ctr_des_encrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA, ctx, &walk);
+	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA, &walk);
 }
 
 static int ctr_des_decrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA | CPACF_DECRYPT,
-				ctx, &walk);
+	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA | CPACF_DECRYPT, &walk);
 }
 
 static struct crypto_alg ctr_des_alg = {
@@ -490,23 +452,21 @@ static int ctr_des3_encrypt(struct blkcipher_desc *desc,
 			    struct scatterlist *dst, struct scatterlist *src,
 			    unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192, ctx, &walk);
+	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192, &walk);
 }
 
 static int ctr_des3_decrypt(struct blkcipher_desc *desc,
 			    struct scatterlist *dst, struct scatterlist *src,
 			    unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
 	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192 | CPACF_DECRYPT,
-				ctx, &walk);
+				&walk);
 }
 
 static struct crypto_alg ctr_des3_alg = {
-- 
cgit v0.10.2


From f5b55fa1f81d518925d68b50d2316850c525d1ad Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 31 Aug 2016 09:27:35 +0200
Subject: RAID/s390: provide raid6 recovery optimization

The XC instruction can be used to improve the speed of the raid6
recovery. The loops now operate on blocks of 256 bytes.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index c032a6a..395a4c6 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -116,6 +116,7 @@ struct raid6_recov_calls {
 extern const struct raid6_recov_calls raid6_recov_intx1;
 extern const struct raid6_recov_calls raid6_recov_ssse3;
 extern const struct raid6_recov_calls raid6_recov_avx2;
+extern const struct raid6_recov_calls raid6_recov_s390xc;
 
 extern const struct raid6_calls raid6_neonx1;
 extern const struct raid6_calls raid6_neonx2;
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 667b960..29f503e 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -7,7 +7,7 @@ raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o
 raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
 raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
-raid6_pq-$(CONFIG_S390) += s390vx8.o
+raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
 
 hostprogs-y	+= mktables
 
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index e1923b6..592ff49 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -98,6 +98,9 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = {
 #ifdef CONFIG_AS_SSSE3
 	&raid6_recov_ssse3,
 #endif
+#ifdef CONFIG_S390
+	&raid6_recov_s390xc,
+#endif
 	&raid6_recov_intx1,
 	NULL
 };
diff --git a/lib/raid6/recov_s390xc.c b/lib/raid6/recov_s390xc.c
new file mode 100644
index 0000000..b042dac
--- /dev/null
+++ b/lib/raid6/recov_s390xc.c
@@ -0,0 +1,116 @@
+/*
+ * RAID-6 data recovery in dual failure mode based on the XC instruction.
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/export.h>
+#include <linux/raid/pq.h>
+
+static inline void xor_block(u8 *p1, u8 *p2)
+{
+	typedef struct { u8 _[256]; } addrtype;
+
+	asm volatile(
+		"	xc	0(256,%[p1]),0(%[p2])\n"
+		: "+m" (*(addrtype *) p1) : "m" (*(addrtype *) p2),
+		  [p1] "a" (p1), [p2] "a" (p2) : "cc");
+}
+
+/* Recover two failed data blocks. */
+static void raid6_2data_recov_s390xc(int disks, size_t bytes, int faila,
+		int failb, void **ptrs)
+{
+	u8 *p, *q, *dp, *dq;
+	const u8 *pbmul;	/* P multiplier table for B data */
+	const u8 *qmul;		/* Q multiplier table (for both) */
+	int i;
+
+	p = (u8 *)ptrs[disks-2];
+	q = (u8 *)ptrs[disks-1];
+
+	/* Compute syndrome with zero for the missing data pages
+	   Use the dead data pages as temporary storage for
+	   delta p and delta q */
+	dp = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks-2] = dp;
+	dq = (u8 *)ptrs[failb];
+	ptrs[failb] = (void *)raid6_empty_zero_page;
+	ptrs[disks-1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]   = dp;
+	ptrs[failb]   = dq;
+	ptrs[disks-2] = p;
+	ptrs[disks-1] = q;
+
+	/* Now, pick the proper data tables */
+	pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
+	qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
+
+	/* Now do it... */
+	while (bytes) {
+		xor_block(dp, p);
+		xor_block(dq, q);
+		for (i = 0; i < 256; i++)
+			dq[i] = pbmul[dp[i]] ^ qmul[dq[i]];
+		xor_block(dp, dq);
+		p += 256;
+		q += 256;
+		dp += 256;
+		dq += 256;
+		bytes -= 256;
+	}
+}
+
+/* Recover failure of one data block plus the P block */
+static void raid6_datap_recov_s390xc(int disks, size_t bytes, int faila,
+		void **ptrs)
+{
+	u8 *p, *q, *dq;
+	const u8 *qmul;		/* Q multiplier table */
+	int i;
+
+	p = (u8 *)ptrs[disks-2];
+	q = (u8 *)ptrs[disks-1];
+
+	/* Compute syndrome with zero for the missing data page
+	   Use the dead data page as temporary storage for delta q */
+	dq = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks-1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]   = dq;
+	ptrs[disks-1] = q;
+
+	/* Now, pick the proper data tables */
+	qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+	/* Now do it... */
+	while (bytes) {
+		xor_block(dq, q);
+		for (i = 0; i < 256; i++)
+			dq[i] = qmul[dq[i]];
+		xor_block(p, dq);
+		p += 256;
+		q += 256;
+		dq += 256;
+		bytes -= 256;
+	}
+}
+
+
+const struct raid6_recov_calls raid6_recov_s390xc = {
+	.data2 = raid6_2data_recov_s390xc,
+	.datap = raid6_datap_recov_s390xc,
+	.valid = NULL,
+	.name = "s390xc",
+	.priority = 1,
+};
-- 
cgit v0.10.2


From e68f1d4ca99e08652066f40d7778b6007f0149d9 Mon Sep 17 00:00:00 2001
From: Bhaktipriya Shridhar <bhaktipriya96@gmail.com>
Date: Wed, 31 Aug 2016 01:57:20 +0530
Subject: s390: Remove deprecated create_singlethread_workqueue

The workqueue "appldata_wq" has been replaced with an ordered dedicated
workqueue.

WQ_MEM_RECLAIM has not been set since the workqueue is not being used on
a memory reclaim path.

The adapter->work_queue queues multiple work items viz
&adapter->scan_work, &port->rport_work, &adapter->ns_up_work,
&adapter->stat_work, adapter->work_queue, &adapter->events.work,
&port->gid_pn_work, &port->test_link_work. Hence, an ordered
dedicated workqueue has been used.

WQ_MEM_RECLAIM has been set to ensure forward progress under memory
pressure.

Signed-off-by: Bhaktipriya Shridhar <bhaktipriya96@gmail.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 15c9424..f587c48 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -542,7 +542,7 @@ static int __init appldata_init(void)
 		rc = PTR_ERR(appldata_pdev);
 		goto out_driver;
 	}
-	appldata_wq = create_singlethread_workqueue("appldata");
+	appldata_wq = alloc_ordered_workqueue("appldata", 0);
 	if (!appldata_wq) {
 		rc = -ENOMEM;
 		goto out_device;
diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index c00ac46..bcc8f3d 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -310,7 +310,7 @@ static int zfcp_setup_adapter_work_queue(struct zfcp_adapter *adapter)
 
 	snprintf(name, sizeof(name), "zfcp_q_%s",
 		 dev_name(&adapter->ccw_device->dev));
-	adapter->work_queue = create_singlethread_workqueue(name);
+	adapter->work_queue = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
 
 	if (adapter->work_queue)
 		return 0;
-- 
cgit v0.10.2


From 6512391a30f6b158488e941214541e84473b6bf9 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 6 Sep 2016 09:01:31 +0200
Subject: s390/crypto: avoid returning garbage value

Static analysis with cppcheck detected that ret is not initialized
and hence garbage is potentially being returned in the case where
prng_data->ppnows.reseed_counter <= prng_reseed_limit.

Thanks to Martin Schwidefsky for spotting a mistake in my original
fix.

Fixes: 0177db01adf26cf9 ("s390/crypto: simplify return code handling")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 79e3a1f..9cc050f 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -434,7 +434,7 @@ static int prng_sha512_generate(u8 *buf, size_t nbytes)
 		memcpy(prng_data->prev, buf, nbytes);
 	}
 
-	return ret;
+	return nbytes;
 }
 
 
-- 
cgit v0.10.2


From c783b91ebdbab67e848889c29dd0611c2b2c9fea Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 6 Sep 2016 10:46:36 +0200
Subject: s390: add assembler include path for vx-insn.h

With git commit 0eab11c7e0d30de14a15ccd8269eef238321a8e1
"s390/vx: allow to include vx-insn.h with .include"
and an older gcc we get errors like this:

{standard input}:6: Error: can't open asm/vx-insn.h for reading:
No such file or directory
arch/s390/kernel/fpu.c:57: Error: Unrecognized opcode: `vstm'

To solve this issue simply add the path to arch/s390/include to
all assembler runs.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 224b427..54e0052 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -46,6 +46,8 @@ cflags-$(CONFIG_MARCH_Z196_TUNE)	+= -mtune=z196
 cflags-$(CONFIG_MARCH_ZEC12_TUNE)	+= -mtune=zEC12
 cflags-$(CONFIG_MARCH_Z13_TUNE)	+= -mtune=z13
 
+cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
+
 #KBUILD_IMAGE is necessary for make rpm
 KBUILD_IMAGE	:=arch/s390/boot/image
 
-- 
cgit v0.10.2


From 9078a54996a0989e25a04bbb9276bc340a52a673 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Mon, 12 Sep 2016 13:13:38 +0200
Subject: s390: claim efficient unaligned access

most unaligned accesses are reasonable efficient (no kernel emulation)
on s390, let's announce it

This also
- removes the ubsan false positives for unaligned accesses on s390 with
  default config
- uses simpler arithmetic in several functions in several other areas
  of the kernel like ethernet frame classification

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index e751fe2..f4989fb 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -137,6 +137,7 @@ config S390
 	select HAVE_DMA_API_DEBUG
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS
+	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_EXIT_THREAD
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_GRAPH_TRACER
-- 
cgit v0.10.2


From f296190e41ee1e1e6912f0ddae09b28e9cfae48d Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 13 Sep 2016 03:10:39 +0900
Subject: s390/crashdump: use list_first_entry_or_null

The combo of list_empty() check and return list_first_entry()
can be replaced with list_first_entry_or_null().

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 29df848..f9293bf 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -71,9 +71,7 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
  */
 struct save_area * __init save_area_boot_cpu(void)
 {
-	if (list_empty(&dump_save_areas))
-		return NULL;
-	return list_first_entry(&dump_save_areas, struct save_area, list);
+	return list_first_entry_or_null(&dump_save_areas, struct save_area, list);
 }
 
 /*
-- 
cgit v0.10.2


From 725c4d22bbc4fcac5779963e0ff9cdf232afbb90 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Mon, 12 Sep 2016 14:37:19 +0200
Subject: ubsan: allow to disable the null sanitizer

Some architectures use a hardware defined structure at address zero.
Checking for a null pointer will result in many ubsan reports.
Allow users to disable the null sanitizer.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 39494af..bc6e651 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -1,6 +1,9 @@
 config ARCH_HAS_UBSAN_SANITIZE_ALL
 	bool
 
+config ARCH_WANTS_UBSAN_NO_NULL
+	def_bool n
+
 config UBSAN
 	bool "Undefined behaviour sanity checker"
 	help
@@ -34,3 +37,11 @@ config UBSAN_ALIGNMENT
 	  This option enables detection of unaligned memory accesses.
 	  Enabling this option on architectures that support unaligned
 	  accesses may produce a lot of false positives.
+
+config UBSAN_NULL
+	bool "Enable checking of null pointers"
+	depends on UBSAN
+	default y if !ARCH_WANTS_UBSAN_NO_NULL
+	help
+	  This option enables detection of memory accesses via a
+	  null pointer.
diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan
index 8ab6867..dd779c4 100644
--- a/scripts/Makefile.ubsan
+++ b/scripts/Makefile.ubsan
@@ -3,7 +3,6 @@ ifdef CONFIG_UBSAN
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=integer-divide-by-zero)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=unreachable)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=vla-bound)
-      CFLAGS_UBSAN += $(call cc-option, -fsanitize=null)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=signed-integer-overflow)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=bounds)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=object-size)
@@ -14,4 +13,8 @@ ifdef CONFIG_UBSAN
 ifdef CONFIG_UBSAN_ALIGNMENT
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=alignment)
 endif
+
+ifdef CONFIG_UBSAN_NULL
+      CFLAGS_UBSAN += $(call cc-option, -fsanitize=null)
+endif
 endif
-- 
cgit v0.10.2


From c42d8c7dbe596d849b43b7581bcc39b51f148c48 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Mon, 12 Sep 2016 14:37:20 +0200
Subject: s390: enable UBSAN

This enables UBSAN for s390. We have to disable the null sanitizer
as s390 code does access memory via a null pointer (the prefix page).

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index f4989fb..608f4ea 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -74,6 +74,7 @@ config S390
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_KCOV
 	select ARCH_HAS_SG_CHAIN
+	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_INLINE_READ_LOCK
 	select ARCH_INLINE_READ_LOCK_BH
@@ -110,6 +111,7 @@ config S390
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
 	select ARCH_WANTS_PROT_NUMA_PROT_NONE
+	select ARCH_WANTS_UBSAN_NO_NULL
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS2
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 33ba697c..0daa070 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -17,6 +17,7 @@ KBUILD_CFLAGS += $(call cc-option,-mpacked-stack)
 KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
 
 GCOV_PROFILE := n
+UBSAN_SANITIZE := n
 
 OBJECTS := $(addprefix $(objtree)/arch/s390/kernel/, head.o sclp.o ebcdic.o als.o)
 OBJECTS += $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 3234817..72ccc41 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -48,6 +48,9 @@ AFLAGS_head.o		+= -march=z900
 endif
 GCOV_PROFILE_sclp.o := n
 GCOV_PROFILE_als.o := n
+UBSAN_SANITIZE_als.o := n
+UBSAN_SANITIZE_early.o := n
+UBSAN_SANITIZE_sclp.o := n
 
 obj-y	:= traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index 6814545..6cc9478 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -24,8 +24,9 @@ obj-y += vdso32_wrapper.o
 extra-y += vdso32.lds
 CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
 
-# Disable gcov profiling for VDSO code
+# Disable gcov profiling and ubsan for VDSO code
 GCOV_PROFILE := n
+UBSAN_SANITIZE := n
 
 # Force dependency (incbin is bad)
 $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 0b0fd22..2d54c18 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -24,8 +24,9 @@ obj-y += vdso64_wrapper.o
 extra-y += vdso64.lds
 CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
 
-# Disable gcov profiling for VDSO code
+# Disable gcov profiling and ubsan for VDSO code
 GCOV_PROFILE := n
+UBSAN_SANITIZE := n
 
 # Force dependency (incbin is bad)
 $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
-- 
cgit v0.10.2


From eed5c4b117d1f77553d517072584c4ac779af0ba Mon Sep 17 00:00:00 2001
From: Stefan Haberland <sth@linux.vnet.ibm.com>
Date: Wed, 31 Aug 2016 13:31:10 +0200
Subject: s390/dasd: add missing KOBJ_CHANGE event for unformatted devices

The DASD device driver throws change events for the DASD blockdevice
after the online processing is done so that udev rules can take
actions after it.
The change event was missing for unformatted devices.

Signed-off-by: Stefan Haberland <sth@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index fb1b56a..5245d7e 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -336,6 +336,7 @@ static int dasd_state_basic_to_ready(struct dasd_device *device)
 {
 	int rc;
 	struct dasd_block *block;
+	struct gendisk *disk;
 
 	rc = 0;
 	block = device->block;
@@ -346,6 +347,9 @@ static int dasd_state_basic_to_ready(struct dasd_device *device)
 		if (rc) {
 			if (rc != -EAGAIN) {
 				device->state = DASD_STATE_UNFMT;
+				disk = device->block->gdp;
+				kobject_uevent(&disk_to_dev(disk)->kobj,
+					       KOBJ_CHANGE);
 				goto out;
 			}
 			return rc;
-- 
cgit v0.10.2


From f622b517563b0d3be6c41e932124e0b717149ad8 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 19 Aug 2016 19:57:49 +0200
Subject: s390/vmur: fix irq pointer dereference in int handler

"irq" in vmur's int handler can be an error pointer. Don't dereference
this pointer in that case.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c
index 6c30e93a..ff18f37 100644
--- a/drivers/s390/char/vmur.c
+++ b/drivers/s390/char/vmur.c
@@ -306,10 +306,11 @@ static void ur_int_handler(struct ccw_device *cdev, unsigned long intparm,
 {
 	struct urdev *urd;
 
-	TRACE("ur_int_handler: intparm=0x%lx cstat=%02x dstat=%02x res=%u\n",
-	      intparm, irb->scsw.cmd.cstat, irb->scsw.cmd.dstat,
-	      irb->scsw.cmd.count);
-
+	if (!IS_ERR(irb)) {
+		TRACE("ur_int_handler: intparm=0x%lx cstat=%02x dstat=%02x res=%u\n",
+		      intparm, irb->scsw.cmd.cstat, irb->scsw.cmd.dstat,
+		      irb->scsw.cmd.count);
+	}
 	if (!intparm) {
 		TRACE("ur_int_handler: unsolicited interrupt\n");
 		return;
-- 
cgit v0.10.2


From ecc6410abf898c580077e028dd8eb123bfbda502 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 16 Sep 2016 17:01:46 +0200
Subject: s390: export header for CLP ioctl

Export clp.h for usage by userspace.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index 08fe6da..cc44b09 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -6,6 +6,7 @@ header-y += bitsperlong.h
 header-y += byteorder.h
 header-y += chpid.h
 header-y += chsc.h
+header-y += clp.h
 header-y += cmb.h
 header-y += dasd.h
 header-y += debug.h
-- 
cgit v0.10.2


From dcc096c540d794456c1edbe6d55b9d611c86e8db Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Mon, 19 Sep 2016 17:54:56 -0400
Subject: s390: migrate exception table users off module.h and onto extable.h

These files were only including module.h for exception table
related functions.  We've now separated that content out into its
own file "extable.h" so now move over to that and avoid all the
extra header content in module.h that we don't really need to compile
these files.

The additions of uaccess.h are to deal with implict includes like:

arch/s390/kernel/traps.c: In function 'do_report_trap':
arch/s390/kernel/traps.c:56:4: error: implicit declaration of function 'extable_fixup' [-Werror=implicit-function-declaration]
arch/s390/kernel/traps.c: In function 'illegal_op':
arch/s390/kernel/traps.c:173:3: error: implicit declaration of function 'get_user' [-Werror=implicit-function-declaration]

Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: linux-s390@vger.kernel.org
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 717b03a..2374c5b 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -13,7 +13,7 @@
 #include <linux/string.h>
 #include <linux/ctype.h>
 #include <linux/lockdep.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/pfn.h>
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index dd6306c..fdb4042 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -26,12 +26,14 @@
 #include <linux/stop_machine.h>
 #include <linux/kdebug.h>
 #include <linux/uaccess.h>
+#include <linux/extable.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/hardirq.h>
 #include <linux/ftrace.h>
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
+#include <asm/uaccess.h>
 #include <asm/dis.h>
 
 DEFINE_PER_CPU(struct kprobe *, current_kprobe);
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index dd97a3e..d0539f7 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -14,11 +14,12 @@
  */
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/ptrace.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <asm/uaccess.h>
 #include <asm/fpu/api.h>
 #include "entry.h"
 
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index a58bca6..cca7388 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -24,7 +24,7 @@
 #include <linux/kdebug.h>
 #include <linux/init.h>
 #include <linux/console.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/hardirq.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
-- 
cgit v0.10.2


From bb2b7ffbc4e25b0c4839317b6c31cb768efe8b1f Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Tue, 23 Aug 2016 15:59:15 +0200
Subject: iommu/s390: simplify registration of I/O address translation
 parameters

When a new function is attached to an iommu domain we need to register
I/O address translation parameters. Since commit
69eea95c ("s390/pci_dma: fix DMA table corruption with > 4 TB main memory")
start_dma and end_dma correctly describe the range of usable I/O addresses.

Simplify the code by using these values directly.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index a04d491..3b44b1d 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -101,8 +101,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 		zpci_dma_exit_device(zdev);
 
 	zdev->dma_table = s390_domain->dma_table;
-	rc = zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET,
-				zdev->start_dma + zdev->iommu_size - 1,
+	rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
 				(u64) zdev->dma_table);
 	if (rc)
 		goto out_restore;
-- 
cgit v0.10.2


From 3b13f1fea1be44f29be4150246624502a0227ebd Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Wed, 17 Aug 2016 13:39:46 +0200
Subject: s390/pci_dma: remove dma address range check

We calculate dma addresses using an iommu bitmap. Since commit
69eea95c ("s390/pci_dma: fix DMA table corruption with > 4 TB main memory")
we've made sure that addresses created using that bitmap are below
the maximum reported by firmware. Thus the additional check for
that address to be within range can be removed.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 7297fce..6581239 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -305,12 +305,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 
 	/* Use rounded up size */
 	size = nr_pages * PAGE_SIZE;
-
 	dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE;
-	if (dma_addr + size > zdev->end_dma) {
-		ret = -ERANGE;
-		goto out_free;
-	}
 
 	if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
 		flags |= ZPCI_TABLE_PROTECTED;
-- 
cgit v0.10.2


From 8cb63b78791eef67ea95831c6ef5e6039c572b14 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Wed, 17 Aug 2016 13:51:11 +0200
Subject: s390/pci_dma: simplify dma address calculation

Simplify the code we use to calculate dma addresses by putting
everything related in a dma_alloc_address function. Also provide
a dma_free_address counterpart.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 6581239..12b58b6 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -230,34 +230,36 @@ static unsigned long __dma_alloc_iommu(struct device *dev,
 				boundary_size, 0);
 }
 
-static unsigned long dma_alloc_iommu(struct device *dev, int size)
+static dma_addr_t dma_alloc_address(struct device *dev, int size)
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	unsigned long offset, flags;
-	int wrap = 0;
 
 	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
 	offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
 	if (offset == -1) {
 		/* wrap-around */
 		offset = __dma_alloc_iommu(dev, 0, size);
-		wrap = 1;
-	}
-
-	if (offset != -1) {
-		zdev->next_bit = offset + size;
-		if (!zdev->tlb_refresh && !s390_iommu_strict && wrap)
+		if (offset == -1) {
+			spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
+			return DMA_ERROR_CODE;
+		}
+		if (!zdev->tlb_refresh && !s390_iommu_strict)
 			/* global flush after wrap-around with lazy unmap */
 			zpci_refresh_global(zdev);
 	}
+	zdev->next_bit = offset + size;
 	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-	return offset;
+
+	return zdev->start_dma + offset * PAGE_SIZE;
 }
 
-static void dma_free_iommu(struct device *dev, unsigned long offset, int size)
+static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	unsigned long flags;
+	unsigned long flags, offset;
+
+	offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
 
 	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
 	if (!zdev->iommu_bitmap)
@@ -289,23 +291,22 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 				     unsigned long attrs)
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	unsigned long nr_pages, iommu_page_index;
 	unsigned long pa = page_to_phys(page) + offset;
 	int flags = ZPCI_PTE_VALID;
+	unsigned long nr_pages;
 	dma_addr_t dma_addr;
 	int ret;
 
 	/* This rounds up number of pages based on size and offset */
 	nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
-	iommu_page_index = dma_alloc_iommu(dev, nr_pages);
-	if (iommu_page_index == -1) {
+	dma_addr = dma_alloc_address(dev, nr_pages);
+	if (dma_addr == DMA_ERROR_CODE) {
 		ret = -ENOSPC;
 		goto out_err;
 	}
 
 	/* Use rounded up size */
 	size = nr_pages * PAGE_SIZE;
-	dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE;
 
 	if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
 		flags |= ZPCI_TABLE_PROTECTED;
@@ -318,7 +319,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
 	return dma_addr + (offset & ~PAGE_MASK);
 
 out_free:
-	dma_free_iommu(dev, iommu_page_index, nr_pages);
+	dma_free_address(dev, dma_addr, nr_pages);
 out_err:
 	zpci_err("map error:\n");
 	zpci_err_dma(ret, pa);
@@ -330,7 +331,6 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
 				 unsigned long attrs)
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	unsigned long iommu_page_index;
 	int npages, ret;
 
 	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
@@ -344,8 +344,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
 	}
 
 	atomic64_add(npages, &zdev->unmapped_pages);
-	iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
-	dma_free_iommu(dev, iommu_page_index, npages);
+	dma_free_address(dev, dma_addr, npages);
 }
 
 static void *s390_dma_alloc(struct device *dev, size_t size,
-- 
cgit v0.10.2


From ee877b81c6b92c190e7186c1ffd054804b426c02 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 19 Aug 2016 09:12:09 +0200
Subject: s390/pci_dma: improve map_sg

Our map_sg implementation mapped sg entries independently of each other.
For ease of use and possible performance improvements this patch changes
the implementation to try to map as many (likely physically non-contiguous)
sglist entries as possible into a contiguous DMA segment.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 12b58b6..54cb54c 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -388,37 +388,94 @@ static void s390_dma_free(struct device *dev, size_t size,
 	free_pages((unsigned long) pa, get_order(size));
 }
 
-static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
-			   int nr_elements, enum dma_data_direction dir,
-			   unsigned long attrs)
+/* Map a segment into a contiguous dma address area */
+static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
+			     size_t size, dma_addr_t *handle,
+			     enum dma_data_direction dir)
 {
-	int mapped_elements = 0;
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+	dma_addr_t dma_addr_base, dma_addr;
+	int flags = ZPCI_PTE_VALID;
 	struct scatterlist *s;
-	int i;
+	unsigned long pa;
+	int ret;
 
-	for_each_sg(sg, s, nr_elements, i) {
-		struct page *page = sg_page(s);
-		s->dma_address = s390_dma_map_pages(dev, page, s->offset,
-						    s->length, dir, 0);
-		if (!dma_mapping_error(dev, s->dma_address)) {
-			s->dma_length = s->length;
-			mapped_elements++;
-		} else
+	size = PAGE_ALIGN(size);
+	dma_addr_base = dma_alloc_address(dev, size >> PAGE_SHIFT);
+	if (dma_addr_base == DMA_ERROR_CODE)
+		return -ENOMEM;
+
+	dma_addr = dma_addr_base;
+	if (dir == DMA_NONE || dir == DMA_TO_DEVICE)
+		flags |= ZPCI_TABLE_PROTECTED;
+
+	for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) {
+		pa = page_to_phys(sg_page(s)) + s->offset;
+		ret = dma_update_trans(zdev, pa, dma_addr, s->length, flags);
+		if (ret)
 			goto unmap;
+
+		dma_addr += s->length;
 	}
-out:
-	return mapped_elements;
+	*handle = dma_addr_base;
+	atomic64_add(size >> PAGE_SHIFT, &zdev->mapped_pages);
+
+	return ret;
 
 unmap:
-	for_each_sg(sg, s, mapped_elements, i) {
-		if (s->dma_address)
-			s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
-					     dir, 0);
-		s->dma_address = 0;
+	dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base,
+			 ZPCI_PTE_INVALID);
+	dma_free_address(dev, dma_addr_base, size >> PAGE_SHIFT);
+	zpci_err("map error:\n");
+	zpci_err_dma(ret, pa);
+	return ret;
+}
+
+static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
+			   int nr_elements, enum dma_data_direction dir,
+			   unsigned long attrs)
+{
+	struct scatterlist *s = sg, *start = sg, *dma = sg;
+	unsigned int max = dma_get_max_seg_size(dev);
+	unsigned int size = s->offset + s->length;
+	unsigned int offset = s->offset;
+	int count = 0, i;
+
+	for (i = 1; i < nr_elements; i++) {
+		s = sg_next(s);
+
+		s->dma_address = DMA_ERROR_CODE;
 		s->dma_length = 0;
+
+		if (s->offset || (size & ~PAGE_MASK) ||
+		    size + s->length > max) {
+			if (__s390_dma_map_sg(dev, start, size,
+					      &dma->dma_address, dir))
+				goto unmap;
+
+			dma->dma_address += offset;
+			dma->dma_length = size - offset;
+
+			size = offset = s->offset;
+			start = s;
+			dma = sg_next(dma);
+			count++;
+		}
+		size += s->length;
 	}
-	mapped_elements = 0;
-	goto out;
+	if (__s390_dma_map_sg(dev, start, size, &dma->dma_address, dir))
+		goto unmap;
+
+	dma->dma_address += offset;
+	dma->dma_length = size - offset;
+
+	return count + 1;
+unmap:
+	for_each_sg(sg, s, count, i)
+		s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s),
+				     dir, attrs);
+
+	return 0;
 }
 
 static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
@@ -429,8 +486,9 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 	int i;
 
 	for_each_sg(sg, s, nr_elements, i) {
-		s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, dir,
-				     0);
+		if (s->dma_length)
+			s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
+					     dir, attrs);
 		s->dma_address = 0;
 		s->dma_length = 0;
 	}
-- 
cgit v0.10.2


From 1f166e9e5c7cd5d1fe2a5da7c97c1688d4c93fbb Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Mon, 5 Sep 2016 17:49:17 +0200
Subject: s390/pci_dma: split dma_update_trans

Split dma_update_trans into __dma_update_trans which handles updating
the dma translation tables and __dma_purge_tlb which takes care of
purging associated entries in the dma translation lookaside buffer.

The map_sg API makes use of this split approach by calling
__dma_update_trans once per physically contiguous address range but
__dma_purge_tlb only once per dma contiguous address range.

This results in less invocations of the expensive RPCIT instruction
when using map_sg.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 54cb54c..9e5f2ec 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -129,12 +129,11 @@ void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags)
 		entry_clr_protected(entry);
 }
 
-static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
-			    dma_addr_t dma_addr, size_t size, int flags)
+static int __dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
+			      dma_addr_t dma_addr, size_t size, int flags)
 {
 	unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	u8 *page_addr = (u8 *) (pa & PAGE_MASK);
-	dma_addr_t start_dma_addr = dma_addr;
 	unsigned long irq_flags;
 	unsigned long *entry;
 	int i, rc = 0;
@@ -145,7 +144,7 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
 	spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
 	if (!zdev->dma_table) {
 		rc = -EINVAL;
-		goto no_refresh;
+		goto out_unlock;
 	}
 
 	for (i = 0; i < nr_pages; i++) {
@@ -159,20 +158,6 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
 		dma_addr += PAGE_SIZE;
 	}
 
-	/*
-	 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
-	 * translations when previously invalid translation-table entries are
-	 * validated. With lazy unmap, it also is skipped for previously valid
-	 * entries, but a global rpcit is then required before any address can
-	 * be re-used, i.e. after each iommu bitmap wrap-around.
-	 */
-	if (!zdev->tlb_refresh &&
-			(!s390_iommu_strict ||
-			((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)))
-		goto no_refresh;
-
-	rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
-				nr_pages * PAGE_SIZE);
 undo_cpu_trans:
 	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
 		flags = ZPCI_PTE_INVALID;
@@ -185,12 +170,46 @@ undo_cpu_trans:
 			dma_update_cpu_trans(entry, page_addr, flags);
 		}
 	}
-
-no_refresh:
+out_unlock:
 	spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
 	return rc;
 }
 
+static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
+			   size_t size, int flags)
+{
+	/*
+	 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
+	 * translations when previously invalid translation-table entries are
+	 * validated. With lazy unmap, it also is skipped for previously valid
+	 * entries, but a global rpcit is then required before any address can
+	 * be re-used, i.e. after each iommu bitmap wrap-around.
+	 */
+	if (!zdev->tlb_refresh &&
+			(!s390_iommu_strict ||
+			((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)))
+		return 0;
+
+	return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
+				  PAGE_ALIGN(size));
+}
+
+static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
+			    dma_addr_t dma_addr, size_t size, int flags)
+{
+	int rc;
+
+	rc = __dma_update_trans(zdev, pa, dma_addr, size, flags);
+	if (rc)
+		return rc;
+
+	rc = __dma_purge_tlb(zdev, dma_addr, size, flags);
+	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))
+		__dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID);
+
+	return rc;
+}
+
 void dma_free_seg_table(unsigned long entry)
 {
 	unsigned long *sto = get_rt_sto(entry);
@@ -411,12 +430,16 @@ static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
 
 	for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) {
 		pa = page_to_phys(sg_page(s)) + s->offset;
-		ret = dma_update_trans(zdev, pa, dma_addr, s->length, flags);
+		ret = __dma_update_trans(zdev, pa, dma_addr, s->length, flags);
 		if (ret)
 			goto unmap;
 
 		dma_addr += s->length;
 	}
+	ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags);
+	if (ret)
+		goto unmap;
+
 	*handle = dma_addr_base;
 	atomic64_add(size >> PAGE_SHIFT, &zdev->mapped_pages);
 
-- 
cgit v0.10.2


From 13954fd6913acff8f8b8c21612074b57051ba457 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Thu, 8 Sep 2016 13:25:01 +0200
Subject: s390/pci_dma: improve lazy flush for unmap

Lazy unmap (defer tlb flush after unmap until dma address reuse) can
greatly reduce the number of RPCIT instructions in the best case. In
reality we are often far away from the best case scenario because our
implementation suffers from the following problem:

To create dma addresses we maintain an iommu bitmap and a pointer into
that bitmap to mark the start of the next search. That pointer moves from
the start to the end of that bitmap and we issue a global tlb flush
once that pointer wraps around. To prevent address reuse before we issue
the tlb flush we even have to move the next pointer during unmaps - when
clearing a bit > next. This could lead to a situation where we only use
the rear part of that bitmap and issue more tlb flushes than expected.

To fix this we no longer clear bits during unmap but maintain a 2nd
bitmap which we use to mark addresses that can't be reused until we issue
the global tlb flush after wrap around.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 8769cbf..6611f79 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -118,6 +118,7 @@ struct zpci_dev {
 
 	spinlock_t	iommu_bitmap_lock;
 	unsigned long	*iommu_bitmap;
+	unsigned long	*lazy_bitmap;
 	unsigned long	iommu_size;
 	unsigned long	iommu_pages;
 	unsigned int	next_bit;
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 9e5f2ec..7350c8b 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -257,20 +257,28 @@ static dma_addr_t dma_alloc_address(struct device *dev, int size)
 	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
 	offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
 	if (offset == -1) {
+		if (!zdev->tlb_refresh && !s390_iommu_strict) {
+			/* global flush before DMA addresses are reused */
+			if (zpci_refresh_global(zdev))
+				goto out_error;
+
+			bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
+				      zdev->lazy_bitmap, zdev->iommu_pages);
+			bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
+		}
 		/* wrap-around */
 		offset = __dma_alloc_iommu(dev, 0, size);
-		if (offset == -1) {
-			spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-			return DMA_ERROR_CODE;
-		}
-		if (!zdev->tlb_refresh && !s390_iommu_strict)
-			/* global flush after wrap-around with lazy unmap */
-			zpci_refresh_global(zdev);
+		if (offset == -1)
+			goto out_error;
 	}
 	zdev->next_bit = offset + size;
 	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
 
 	return zdev->start_dma + offset * PAGE_SIZE;
+
+out_error:
+	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
+	return DMA_ERROR_CODE;
 }
 
 static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
@@ -283,13 +291,12 @@ static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
 	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
 	if (!zdev->iommu_bitmap)
 		goto out;
-	bitmap_clear(zdev->iommu_bitmap, offset, size);
-	/*
-	 * Lazy flush for unmap: need to move next_bit to avoid address re-use
-	 * until wrap-around.
-	 */
-	if (!s390_iommu_strict && offset >= zdev->next_bit)
-		zdev->next_bit = offset + size;
+
+	if (zdev->tlb_refresh || s390_iommu_strict)
+		bitmap_clear(zdev->iommu_bitmap, offset, size);
+	else
+		bitmap_set(zdev->lazy_bitmap, offset, size);
+
 out:
 	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
 }
@@ -557,7 +564,14 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
 		rc = -ENOMEM;
 		goto free_dma_table;
 	}
+	if (!zdev->tlb_refresh && !s390_iommu_strict) {
+		zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8);
+		if (!zdev->lazy_bitmap) {
+			rc = -ENOMEM;
+			goto free_bitmap;
+		}
 
+	}
 	rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
 				(u64) zdev->dma_table);
 	if (rc)
@@ -567,6 +581,8 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
 free_bitmap:
 	vfree(zdev->iommu_bitmap);
 	zdev->iommu_bitmap = NULL;
+	vfree(zdev->lazy_bitmap);
+	zdev->lazy_bitmap = NULL;
 free_dma_table:
 	dma_free_cpu_table(zdev->dma_table);
 	zdev->dma_table = NULL;
@@ -588,6 +604,9 @@ void zpci_dma_exit_device(struct zpci_dev *zdev)
 	zdev->dma_table = NULL;
 	vfree(zdev->iommu_bitmap);
 	zdev->iommu_bitmap = NULL;
+	vfree(zdev->lazy_bitmap);
+	zdev->lazy_bitmap = NULL;
+
 	zdev->next_bit = 0;
 }
 
-- 
cgit v0.10.2


From a9f6273ff9c80dd2c226f7a2d5c16272e5092d3e Mon Sep 17 00:00:00 2001
From: Stefan Haberland <sth@linux.vnet.ibm.com>
Date: Tue, 20 Sep 2016 10:29:22 +0200
Subject: s390/dasd: fix hanging offline processing

Internal I/O is processed by the _sleep_on_function which might wait for a
device to get operational. During offline processing this will never happen
and therefore the refcount of the device will not drop to zero and the
offline processing blocks as well.

Fix by letting requests fail in the _sleep_on function during offline
processing. No further handling of the requests is necessary since this is
internal I/O and the device is thrown away afterwards.

Reviewed-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Signed-off-by: Stefan Haberland <sth@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 5245d7e..706ae0a 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2278,6 +2278,15 @@ static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible)
 			continue;
 		}
 		/*
+		 * Don't try to start requests if device is in
+		 * offline processing, it might wait forever
+		 */
+		if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) {
+			cqr->status = DASD_CQR_FAILED;
+			cqr->intrc = -ENODEV;
+			continue;
+		}
+		/*
 		 * Don't try to start requests if device is stopped
 		 * except path verification requests
 		 */
-- 
cgit v0.10.2


From c020d722b110a44c613ef71e657e6dd4116e09d9 Mon Sep 17 00:00:00 2001
From: Stefan Haberland <sth@linux.vnet.ibm.com>
Date: Tue, 20 Sep 2016 10:42:38 +0200
Subject: s390/dasd: fix panic during offline processing

A DASD device consists of the device itself and a discipline with a
corresponding private structure. These fields are set up during online
processing right after the device is created and before it is processed by
the state machine and made available for I/O.
During offline processing the discipline pointer and the private data gets
freed within the state machine and without protection of the existing
reference count. This might lead to a kernel panic because a function might
have taken a device reference and accesses the discipline pointer and/or
private data of the device while this is already freed.

Fix by freeing the discipline pointer and the private data after ensuring
that there is no reference to the device left.

Reviewed-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Signed-off-by: Stefan Haberland <sth@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 706ae0a..1de0890 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -212,16 +212,6 @@ static int dasd_state_known_to_new(struct dasd_device *device)
 {
 	/* Disable extended error reporting for this device. */
 	dasd_eer_disable(device);
-	/* Forget the discipline information. */
-	if (device->discipline) {
-		if (device->discipline->uncheck_device)
-			device->discipline->uncheck_device(device);
-		module_put(device->discipline->owner);
-	}
-	device->discipline = NULL;
-	if (device->base_discipline)
-		module_put(device->base_discipline->owner);
-	device->base_discipline = NULL;
 	device->state = DASD_STATE_NEW;
 
 	if (device->block)
@@ -3377,6 +3367,22 @@ int dasd_generic_probe(struct ccw_device *cdev,
 }
 EXPORT_SYMBOL_GPL(dasd_generic_probe);
 
+void dasd_generic_free_discipline(struct dasd_device *device)
+{
+	/* Forget the discipline information. */
+	if (device->discipline) {
+		if (device->discipline->uncheck_device)
+			device->discipline->uncheck_device(device);
+		module_put(device->discipline->owner);
+		device->discipline = NULL;
+	}
+	if (device->base_discipline) {
+		module_put(device->base_discipline->owner);
+		device->base_discipline = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(dasd_generic_free_discipline);
+
 /*
  * This will one day be called from a global not_oper handler.
  * It is also used by driver_unregister during module unload.
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index 3cdbce4..15a1a70 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -617,6 +617,7 @@ dasd_delete_device(struct dasd_device *device)
 	/* Wait for reference counter to drop to zero. */
 	wait_event(dasd_delete_wq, atomic_read(&device->ref_count) == 0);
 
+	dasd_generic_free_discipline(device);
 	/* Disconnect dasd_device structure from ccw_device structure. */
 	cdev = device->cdev;
 	device->cdev = NULL;
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index ac7027e..87ff6ce 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -725,6 +725,7 @@ void dasd_block_clear_timer(struct dasd_block *);
 int  dasd_cancel_req(struct dasd_ccw_req *);
 int dasd_flush_device_queue(struct dasd_device *);
 int dasd_generic_probe (struct ccw_device *, struct dasd_discipline *);
+void dasd_generic_free_discipline(struct dasd_device *);
 void dasd_generic_remove (struct ccw_device *cdev);
 int dasd_generic_set_online(struct ccw_device *, struct dasd_discipline *);
 int dasd_generic_set_offline (struct ccw_device *cdev);
-- 
cgit v0.10.2


From f50af850f407df7b548802461bc248c7683c6dd3 Mon Sep 17 00:00:00 2001
From: Stefan Haberland <sth@linux.vnet.ibm.com>
Date: Thu, 22 Sep 2016 10:49:40 +0200
Subject: s390/dasd: make query host access interruptible

If the DASD device gets blocked for any reason, e.g. because it is reserved
somewhere, the host_access_count sysfs entry or the host_access_list
debugfs entry may sleep forever. Make it interruptible so that userspace
can use ^C to abort the operation.

Signed-off-by: Stefan Haberland <sth@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 98bbec4..831935a 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -5201,7 +5201,7 @@ static int dasd_eckd_query_host_access(struct dasd_device *device,
 
 	cqr->buildclk = get_tod_clock();
 	cqr->status = DASD_CQR_FILLED;
-	rc = dasd_sleep_on(cqr);
+	rc = dasd_sleep_on_interruptible(cqr);
 	if (rc == 0) {
 		*data = *host_access;
 	} else {
-- 
cgit v0.10.2


From fdcebf6f18ee87c6da69327c9972d57b8ce58166 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Mon, 26 Sep 2016 19:13:04 +0200
Subject: s390/config: Enable config options for Docker

The following config options are required/recommended for running Docker:

 Networking:

 - CONFIG_NF_NAT_MASQUERADE_IPV4=m
 - CONFIG_NF_NAT_MASQUERADE_IPV6=m
 - CONFIG_IPVLAN=m
 - CGROUP_NET_PRIO=y

 Storage drivers:

 - CONFIG_DM_THIN_PROVISIONING=m
 - CONFIG_OVERLAY_FS=m

 Scheduling:

 - CONFIG_FAIR_GROUP_SCHED=y
 - CONFIG_CFS_BANDWIDTH=y

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index 26e0c7f..ced7e7a 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -260,7 +260,6 @@ CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_NF_TABLES_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NF_TABLES_ARP=m
-CONFIG_NF_NAT_IPV4=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
@@ -269,6 +268,8 @@ CONFIG_IP_NF_MATCH_RPFILTER=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_MANGLE=m
 CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
@@ -281,7 +282,6 @@ CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_TABLES_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NF_NAT_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
@@ -299,6 +299,8 @@ CONFIG_IP6_NF_TARGET_REJECT=m
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_IP6_NF_SECURITY=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NET_SCTPPROBE=m
 CONFIG_RDS=m
@@ -359,6 +361,7 @@ CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
+CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
 CONFIG_NET_TCPPROBE=m
@@ -409,6 +412,7 @@ CONFIG_MD_FAULTY=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_LOG_USERSPACE=m
 CONFIG_DM_RAID=m
@@ -428,6 +432,7 @@ CONFIG_EQUALIZER=m
 CONFIG_IFB=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
+CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_TUN=m
 CONFIG_VETH=m
@@ -453,7 +458,6 @@ CONFIG_PPP_SYNC_TTY=m
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
-CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
 CONFIG_LEGACY_PTY_COUNT=0
 CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_RAW_DRIVER=m
@@ -495,6 +499,7 @@ CONFIG_QFMT_V2=m
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
+CONFIG_OVERLAY_FS=m
 CONFIG_FSCACHE=m
 CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index 24879da..d725006 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -15,6 +15,8 @@ CONFIG_NUMA_BALANCING=y
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
 CONFIG_BLK_CGROUP=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_RT_GROUP_SCHED=y
 CONFIG_CGROUP_PIDS=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_HUGETLB=y
@@ -255,7 +257,6 @@ CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_NF_TABLES_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NF_TABLES_ARP=m
-CONFIG_NF_NAT_IPV4=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
@@ -264,6 +265,8 @@ CONFIG_IP_NF_MATCH_RPFILTER=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_MANGLE=m
 CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
@@ -276,7 +279,6 @@ CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_TABLES_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NF_NAT_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
@@ -294,6 +296,8 @@ CONFIG_IP6_NF_TARGET_REJECT=m
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_IP6_NF_SECURITY=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NET_SCTPPROBE=m
 CONFIG_RDS=m
@@ -353,6 +357,7 @@ CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
+CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
 CONFIG_NET_TCPPROBE=m
@@ -403,6 +408,7 @@ CONFIG_MD_FAULTY=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_LOG_USERSPACE=m
 CONFIG_DM_RAID=m
@@ -422,6 +428,7 @@ CONFIG_EQUALIZER=m
 CONFIG_IFB=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
+CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_TUN=m
 CONFIG_VETH=m
@@ -447,7 +454,6 @@ CONFIG_PPP_SYNC_TTY=m
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
-CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
 CONFIG_LEGACY_PTY_COUNT=0
 CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_RAW_DRIVER=m
@@ -487,6 +493,7 @@ CONFIG_QFMT_V2=m
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
+CONFIG_OVERLAY_FS=m
 CONFIG_FSCACHE=m
 CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index a5c1e5f..bf99c38 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -16,6 +16,8 @@ CONFIG_NUMA_BALANCING=y
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
 CONFIG_BLK_CGROUP=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_RT_GROUP_SCHED=y
 CONFIG_CGROUP_PIDS=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_HUGETLB=y
@@ -255,7 +257,6 @@ CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_NF_TABLES_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NF_TABLES_ARP=m
-CONFIG_NF_NAT_IPV4=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
@@ -264,6 +265,8 @@ CONFIG_IP_NF_MATCH_RPFILTER=m
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_MANGLE=m
 CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
@@ -276,7 +279,6 @@ CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_TABLES_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NF_NAT_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
@@ -294,6 +296,8 @@ CONFIG_IP6_NF_TARGET_REJECT=m
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_IP6_NF_SECURITY=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NET_SCTPPROBE=m
 CONFIG_RDS=m
@@ -353,6 +357,7 @@ CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
+CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
 CONFIG_NET_TCPPROBE=m
@@ -403,6 +408,7 @@ CONFIG_MD_FAULTY=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_LOG_USERSPACE=m
 CONFIG_DM_RAID=m
@@ -422,6 +428,7 @@ CONFIG_EQUALIZER=m
 CONFIG_IFB=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
+CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_TUN=m
 CONFIG_VETH=m
@@ -447,7 +454,6 @@ CONFIG_PPP_SYNC_TTY=m
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
-CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
 CONFIG_LEGACY_PTY_COUNT=0
 CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_RAW_DRIVER=m
@@ -488,6 +494,7 @@ CONFIG_QFMT_V2=m
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
+CONFIG_OVERLAY_FS=m
 CONFIG_FSCACHE=m
 CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=y
-- 
cgit v0.10.2


From 871f8bf0c477ef20ac1457bb241416d7d8749732 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 27 Sep 2016 12:13:18 -0700
Subject: s390/dasd: add missing \n to end of dev_err messages

Trival fix, dev_err messages are missing a \n, so add it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/block/dasd_erp.c b/drivers/s390/block/dasd_erp.c
index e1e8848..d138d01 100644
--- a/drivers/s390/block/dasd_erp.c
+++ b/drivers/s390/block/dasd_erp.c
@@ -169,12 +169,12 @@ dasd_log_sense(struct dasd_ccw_req *cqr, struct irb *irb)
 	device = cqr->startdev;
 	if (cqr->intrc == -ETIMEDOUT) {
 		dev_err(&device->cdev->dev,
-			"A timeout error occurred for cqr %p", cqr);
+			"A timeout error occurred for cqr %p\n", cqr);
 		return;
 	}
 	if (cqr->intrc == -ENOLINK) {
 		dev_err(&device->cdev->dev,
-			"A transport error occurred for cqr %p", cqr);
+			"A transport error occurred for cqr %p\n", cqr);
 		return;
 	}
 	/* dump sense data */
-- 
cgit v0.10.2


From d53c51f26145657aa7c55fa396f93677e613548d Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Wed, 28 Sep 2016 13:36:19 +0200
Subject: s390/cio: fix accidental interrupt enabling during resume

Since commit 9f3d6d7 chsc_get_channel_measurement_chars is called with
interrupts disabled during resume from hibernate. Since this function
used spin_unlock_irq, interrupts have been enabled accidentally. Fix
this by using the irqsave variant.

Since we can't guarantee the IRQ-enablement state for all (future/
external) callers, change the locking in related functions to prevent
similar bugs in the future.

Fixes: 9f3d6d7 ("s390/cio: update measurement characteristics")
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 940e725..1167469 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -95,12 +95,13 @@ struct chsc_ssd_area {
 int chsc_get_ssd_info(struct subchannel_id schid, struct chsc_ssd_info *ssd)
 {
 	struct chsc_ssd_area *ssd_area;
+	unsigned long flags;
 	int ccode;
 	int ret;
 	int i;
 	int mask;
 
-	spin_lock_irq(&chsc_page_lock);
+	spin_lock_irqsave(&chsc_page_lock, flags);
 	memset(chsc_page, 0, PAGE_SIZE);
 	ssd_area = chsc_page;
 	ssd_area->request.length = 0x0010;
@@ -144,7 +145,7 @@ int chsc_get_ssd_info(struct subchannel_id schid, struct chsc_ssd_info *ssd)
 			ssd->fla[i] = ssd_area->fla[i];
 	}
 out:
-	spin_unlock_irq(&chsc_page_lock);
+	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	return ret;
 }
 
@@ -832,9 +833,10 @@ int __chsc_do_secm(struct channel_subsystem *css, int enable)
 		u32 fmt : 4;
 		u32 : 16;
 	} __attribute__ ((packed)) *secm_area;
+	unsigned long flags;
 	int ret, ccode;
 
-	spin_lock_irq(&chsc_page_lock);
+	spin_lock_irqsave(&chsc_page_lock, flags);
 	memset(chsc_page, 0, PAGE_SIZE);
 	secm_area = chsc_page;
 	secm_area->request.length = 0x0050;
@@ -864,7 +866,7 @@ int __chsc_do_secm(struct channel_subsystem *css, int enable)
 		CIO_CRW_EVENT(2, "chsc: secm failed (rc=%04x)\n",
 			      secm_area->response.code);
 out:
-	spin_unlock_irq(&chsc_page_lock);
+	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	return ret;
 }
 
@@ -992,6 +994,7 @@ chsc_initialize_cmg_chars(struct channel_path *chp, u8 cmcv,
 
 int chsc_get_channel_measurement_chars(struct channel_path *chp)
 {
+	unsigned long flags;
 	int ccode, ret;
 
 	struct {
@@ -1021,7 +1024,7 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp)
 	if (!css_chsc_characteristics.scmc || !css_chsc_characteristics.secm)
 		return -EINVAL;
 
-	spin_lock_irq(&chsc_page_lock);
+	spin_lock_irqsave(&chsc_page_lock, flags);
 	memset(chsc_page, 0, PAGE_SIZE);
 	scmc_area = chsc_page;
 	scmc_area->request.length = 0x0010;
@@ -1053,7 +1056,7 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp)
 	chsc_initialize_cmg_chars(chp, scmc_area->cmcv,
 				  (struct cmg_chars *) &scmc_area->data);
 out:
-	spin_unlock_irq(&chsc_page_lock);
+	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	return ret;
 }
 
@@ -1134,6 +1137,7 @@ struct css_chsc_char css_chsc_characteristics;
 int __init
 chsc_determine_css_characteristics(void)
 {
+	unsigned long flags;
 	int result;
 	struct {
 		struct chsc_header request;
@@ -1146,7 +1150,7 @@ chsc_determine_css_characteristics(void)
 		u32 chsc_char[508];
 	} __attribute__ ((packed)) *scsc_area;
 
-	spin_lock_irq(&chsc_page_lock);
+	spin_lock_irqsave(&chsc_page_lock, flags);
 	memset(chsc_page, 0, PAGE_SIZE);
 	scsc_area = chsc_page;
 	scsc_area->request.length = 0x0010;
@@ -1168,7 +1172,7 @@ chsc_determine_css_characteristics(void)
 		CIO_CRW_EVENT(2, "chsc: scsc failed (rc=%04x)\n",
 			      scsc_area->response.code);
 exit:
-	spin_unlock_irq(&chsc_page_lock);
+	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	return result;
 }
 
-- 
cgit v0.10.2


From 38b7f07a0503271cf8e08399ecda7063c371a181 Mon Sep 17 00:00:00 2001
From: Stefan Haberland <sth@linux.vnet.ibm.com>
Date: Thu, 29 Sep 2016 12:13:53 +0200
Subject: MAINTAINERS: update DASD maintainer

Signed-off-by: Stefan Haberland <sth@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index 0bbe4b1..30601db 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10076,8 +10076,8 @@ S:	Supported
 F:	drivers/s390/cio/
 
 S390 DASD DRIVER
-M:	Stefan Weinhuber <wein@de.ibm.com>
-M:	Stefan Haberland <stefan.haberland@de.ibm.com>
+M:	Stefan Haberland <sth@linux.vnet.ibm.com>
+M:	Jan Hoeppner <hoeppner@linux.vnet.ibm.com>
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
-- 
cgit v0.10.2


From c14f2aac7aa147861793eed9f41f91dd530f0be1 Mon Sep 17 00:00:00 2001
From: Sascha Silbe <silbe@linux.vnet.ibm.com>
Date: Thu, 11 Aug 2016 21:34:54 +0200
Subject: s390/con3270: fix use of uninitialised data

con3270 contains an optimisation that reduces the amount of data to be
transmitted to the 3270 terminal by putting a Repeat to Address (RA)
order into the data stream. The RA order itself takes up space, so
con3270 only uses it if there's enough space left in the line
buffer. Otherwise it just pads out the line manually.

For lines too long to include the RA order, one byte was left
uninitialised. This was caused by an off-by-one bug in the loop that
pads out the line. Since the buffer is allocated from a common pool,
the single byte left uninitialised contained some previous buffer
content. Usually this was just a space or some character (which can
result in clutter but is otherwise harmless). Sometimes, however, it
was a Repeat to Address order, messing up the entire screen layout and
causing the display to send the entire buffer content on every
keystroke.

Fixes: f51320a5 ("[PATCH] s390: new 3270 driver.") (tglx/history.git)
Reported-by: Liu Jing <liujbjl@linux.vnet.ibm.com>
Tested-by: Jing Liu <liujbjl@linux.vnet.ibm.com>
Tested-by: Yang Chen <bjcyang@linux.vnet.ibm.com>
Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c
index 6b1577c..0c161db 100644
--- a/drivers/s390/char/con3270.c
+++ b/drivers/s390/char/con3270.c
@@ -464,7 +464,7 @@ con3270_cline_end(struct con3270 *cp)
 		s->string[s->len - 4] = TO_RA;
 		s->string[s->len - 1] = 0;
 	} else {
-		while (--size > cp->cline->len)
+		while (--size >= cp->cline->len)
 			s->string[size] = cp->view.ascebc[' '];
 	}
 	/* Replace cline with allocated line s and reset cline. */
-- 
cgit v0.10.2


From 6cd997db911f28f2510b771691270c52b63ed2e6 Mon Sep 17 00:00:00 2001
From: Sascha Silbe <silbe@linux.vnet.ibm.com>
Date: Tue, 20 Sep 2016 19:09:07 +0200
Subject: s390/con3270: fix insufficient space padding

con3270 contains an optimisation that reduces the amount of data to be
transmitted to the 3270 terminal by putting a Repeat to Address (RA)
order into the data stream. The RA order itself takes up space, so
con3270 only uses it if there's enough space left in the line
buffer. Otherwise it just pads out the line manually.

For lines that were _just_ short enough that the RA order still fit in
the line buffer, the line was instead padded with an insufficient
amount of spaces. This was caused by examining the size of the
allocated line buffer rather than the length of the string to be
displayed.

For con3270_cline_end(), we just compare against the line length. For
con3270_update_string() however that isn't available anymore, so we
check whether the Repeat to Address order is present.

Fixes: f51320a5 ("[PATCH] s390: new 3270 driver.") (tglx/history.git)
Tested-by: Jing Liu <liujbjl@linux.vnet.ibm.com>
Tested-by: Yang Chen <bjcyang@linux.vnet.ibm.com>
Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c
index 0c161db..285b400 100644
--- a/drivers/s390/char/con3270.c
+++ b/drivers/s390/char/con3270.c
@@ -124,7 +124,12 @@ con3270_create_status(struct con3270 *cp)
 static void
 con3270_update_string(struct con3270 *cp, struct string *s, int nr)
 {
-	if (s->len >= cp->view.cols - 5)
+	if (s->len < 4) {
+		/* This indicates a bug, but printing a warning would
+		 * cause a deadlock. */
+		return;
+	}
+	if (s->string[s->len - 4] != TO_RA)
 		return;
 	raw3270_buffer_address(cp->view.dev, s->string + s->len - 3,
 			       cp->view.cols * (nr + 1));
@@ -460,7 +465,7 @@ con3270_cline_end(struct con3270 *cp)
 		cp->cline->len + 4 : cp->view.cols;
 	s = con3270_alloc_string(cp, size);
 	memcpy(s->string, cp->cline->string, cp->cline->len);
-	if (s->len < cp->view.cols - 5) {
+	if (cp->cline->len < cp->view.cols - 5) {
 		s->string[s->len - 4] = TO_RA;
 		s->string[s->len - 1] = 0;
 	} else {
-- 
cgit v0.10.2