From f1af9d3af308145478749194346f11efad1134b2 Mon Sep 17 00:00:00 2001
From: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Date: Wed, 29 Jan 2014 18:16:01 +0100
Subject: mm/memblock: Do some refactoring, enhance API

Refactor the memblock code and extend the memblock API to make it
more flexible. With the extended API it is simple to define and
work with additional memory lists.

The static functions memblock_add_region and __memblock_remove are
renamed to memblock_add_range and meblock_remove_range and added to
the memblock API.

The __next_free_mem_range and __next_free_mem_range_rev functions
are replaced with calls to the more generic list walkers
__next_mem_range and __next_mem_range_rev.

To walk an arbitrary memory list two new macros for_each_mem_range
and for_each_mem_range_rev are added. These new macros are used
to define for_each_free_mem_range and for_each_free_mem_range_reverse.

Signed-off-by: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 8a20a51..f669016 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -71,6 +71,63 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size);
 void memblock_trim_memory(phys_addr_t align);
 int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size);
 int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
+
+/* Low level functions */
+int memblock_add_range(struct memblock_type *type,
+		       phys_addr_t base, phys_addr_t size,
+		       int nid, unsigned long flags);
+
+int memblock_remove_range(struct memblock_type *type,
+			  phys_addr_t base,
+			  phys_addr_t size);
+
+void __next_mem_range(u64 *idx, int nid, struct memblock_type *type_a,
+		      struct memblock_type *type_b, phys_addr_t *out_start,
+		      phys_addr_t *out_end, int *out_nid);
+
+void __next_mem_range_rev(u64 *idx, int nid, struct memblock_type *type_a,
+			  struct memblock_type *type_b, phys_addr_t *out_start,
+			  phys_addr_t *out_end, int *out_nid);
+
+/**
+ * for_each_mem_range - iterate through memblock areas from type_a and not
+ * included in type_b. Or just type_a if type_b is NULL.
+ * @i: u64 used as loop variable
+ * @type_a: ptr to memblock_type to iterate
+ * @type_b: ptr to memblock_type which excludes from the iteration
+ * @nid: node selector, %NUMA_NO_NODE for all nodes
+ * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
+ * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
+ * @p_nid: ptr to int for nid of the range, can be %NULL
+ */
+#define for_each_mem_range(i, type_a, type_b, nid,			\
+			   p_start, p_end, p_nid)			\
+	for (i = 0, __next_mem_range(&i, nid, type_a, type_b,		\
+				     p_start, p_end, p_nid);		\
+	     i != (u64)ULLONG_MAX;					\
+	     __next_mem_range(&i, nid, type_a, type_b,			\
+			      p_start, p_end, p_nid))
+
+/**
+ * for_each_mem_range_rev - reverse iterate through memblock areas from
+ * type_a and not included in type_b. Or just type_a if type_b is NULL.
+ * @i: u64 used as loop variable
+ * @type_a: ptr to memblock_type to iterate
+ * @type_b: ptr to memblock_type which excludes from the iteration
+ * @nid: node selector, %NUMA_NO_NODE for all nodes
+ * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
+ * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
+ * @p_nid: ptr to int for nid of the range, can be %NULL
+ */
+#define for_each_mem_range_rev(i, type_a, type_b, nid,			\
+			       p_start, p_end, p_nid)			\
+	for (i = (u64)ULLONG_MAX,					\
+		     __next_mem_range_rev(&i, nid, type_a, type_b,	\
+					 p_start, p_end, p_nid);	\
+	     i != (u64)ULLONG_MAX;					\
+	     __next_mem_range_rev(&i, nid, type_a, type_b,		\
+				  p_start, p_end, p_nid))
+
 #ifdef CONFIG_MOVABLE_NODE
 static inline bool memblock_is_hotpluggable(struct memblock_region *m)
 {
@@ -113,9 +170,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
 	     i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
-void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start,
-			   phys_addr_t *out_end, int *out_nid);
-
 /**
  * for_each_free_mem_range - iterate through free memblock areas
  * @i: u64 used as loop variable
@@ -128,13 +182,8 @@ void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start,
  * soon as memblock is initialized.
  */
 #define for_each_free_mem_range(i, nid, p_start, p_end, p_nid)		\
-	for (i = 0,							\
-	     __next_free_mem_range(&i, nid, p_start, p_end, p_nid);	\
-	     i != (u64)ULLONG_MAX;					\
-	     __next_free_mem_range(&i, nid, p_start, p_end, p_nid))
-
-void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start,
-			       phys_addr_t *out_end, int *out_nid);
+	for_each_mem_range(i, &memblock.memory, &memblock.reserved,	\
+			   nid, p_start, p_end, p_nid)
 
 /**
  * for_each_free_mem_range_reverse - rev-iterate through free memblock areas
@@ -148,10 +197,8 @@ void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start,
  * order.  Available as soon as memblock is initialized.
  */
 #define for_each_free_mem_range_reverse(i, nid, p_start, p_end, p_nid)	\
-	for (i = (u64)ULLONG_MAX,					\
-	     __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid);	\
-	     i != (u64)ULLONG_MAX;					\
-	     __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid))
+	for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved,	\
+			       nid, p_start, p_end, p_nid)
 
 static inline void memblock_set_region_flags(struct memblock_region *r,
 					     unsigned long flags)
diff --git a/mm/memblock.c b/mm/memblock.c
index e9d6ca9..9edd092 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -472,7 +472,7 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type,
 }
 
 /**
- * memblock_add_region - add new memblock region
+ * memblock_add_range - add new memblock region
  * @type: memblock type to add new region into
  * @base: base address of the new region
  * @size: size of the new region
@@ -487,7 +487,7 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type,
  * RETURNS:
  * 0 on success, -errno on failure.
  */
-static int __init_memblock memblock_add_region(struct memblock_type *type,
+int __init_memblock memblock_add_range(struct memblock_type *type,
 				phys_addr_t base, phys_addr_t size,
 				int nid, unsigned long flags)
 {
@@ -569,12 +569,12 @@ repeat:
 int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
 				       int nid)
 {
-	return memblock_add_region(&memblock.memory, base, size, nid, 0);
+	return memblock_add_range(&memblock.memory, base, size, nid, 0);
 }
 
 int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
 {
-	return memblock_add_region(&memblock.memory, base, size,
+	return memblock_add_range(&memblock.memory, base, size,
 				   MAX_NUMNODES, 0);
 }
 
@@ -654,8 +654,8 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type,
 	return 0;
 }
 
-static int __init_memblock __memblock_remove(struct memblock_type *type,
-					     phys_addr_t base, phys_addr_t size)
+int __init_memblock memblock_remove_range(struct memblock_type *type,
+					  phys_addr_t base, phys_addr_t size)
 {
 	int start_rgn, end_rgn;
 	int i, ret;
@@ -671,9 +671,10 @@ static int __init_memblock __memblock_remove(struct memblock_type *type,
 
 int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
 {
-	return __memblock_remove(&memblock.memory, base, size);
+	return memblock_remove_range(&memblock.memory, base, size);
 }
 
+
 int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
 {
 	memblock_dbg("   memblock_free: [%#016llx-%#016llx] %pF\n",
@@ -681,7 +682,7 @@ int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
 		     (unsigned long long)base + size - 1,
 		     (void *)_RET_IP_);
 
-	return __memblock_remove(&memblock.reserved, base, size);
+	return memblock_remove_range(&memblock.reserved, base, size);
 }
 
 static int __init_memblock memblock_reserve_region(phys_addr_t base,
@@ -696,7 +697,7 @@ static int __init_memblock memblock_reserve_region(phys_addr_t base,
 		     (unsigned long long)base + size - 1,
 		     flags, (void *)_RET_IP_);
 
-	return memblock_add_region(_rgn, base, size, nid, flags);
+	return memblock_add_range(_rgn, base, size, nid, flags);
 }
 
 int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
@@ -758,17 +759,19 @@ int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size)
 }
 
 /**
- * __next_free_mem_range - next function for for_each_free_mem_range()
+ * __next__mem_range - next function for for_each_free_mem_range() etc.
  * @idx: pointer to u64 loop variable
  * @nid: node selector, %NUMA_NO_NODE for all nodes
+ * @type_a: pointer to memblock_type from where the range is taken
+ * @type_b: pointer to memblock_type which excludes memory from being taken
  * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
  * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @out_nid: ptr to int for nid of the range, can be %NULL
  *
- * Find the first free area from *@idx which matches @nid, fill the out
+ * Find the first area from *@idx which matches @nid, fill the out
  * parameters, and update *@idx for the next iteration.  The lower 32bit of
- * *@idx contains index into memory region and the upper 32bit indexes the
- * areas before each reserved region.  For example, if reserved regions
+ * *@idx contains index into type_a and the upper 32bit indexes the
+ * areas before each region in type_b.	For example, if type_b regions
  * look like the following,
  *
  *	0:[0-16), 1:[32-48), 2:[128-130)
@@ -780,53 +783,77 @@ int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size)
  * As both region arrays are sorted, the function advances the two indices
  * in lockstep and returns each intersection.
  */
-void __init_memblock __next_free_mem_range(u64 *idx, int nid,
-					   phys_addr_t *out_start,
-					   phys_addr_t *out_end, int *out_nid)
+void __init_memblock __next_mem_range(u64 *idx, int nid,
+				      struct memblock_type *type_a,
+				      struct memblock_type *type_b,
+				      phys_addr_t *out_start,
+				      phys_addr_t *out_end, int *out_nid)
 {
-	struct memblock_type *mem = &memblock.memory;
-	struct memblock_type *rsv = &memblock.reserved;
-	int mi = *idx & 0xffffffff;
-	int ri = *idx >> 32;
+	int idx_a = *idx & 0xffffffff;
+	int idx_b = *idx >> 32;
 
-	if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
+	if (WARN_ONCE(nid == MAX_NUMNODES,
+	"Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
 		nid = NUMA_NO_NODE;
 
-	for ( ; mi < mem->cnt; mi++) {
-		struct memblock_region *m = &mem->regions[mi];
+	for (; idx_a < type_a->cnt; idx_a++) {
+		struct memblock_region *m = &type_a->regions[idx_a];
+
 		phys_addr_t m_start = m->base;
 		phys_addr_t m_end = m->base + m->size;
+		int	    m_nid = memblock_get_region_node(m);
 
 		/* only memory regions are associated with nodes, check it */
-		if (nid != NUMA_NO_NODE && nid != memblock_get_region_node(m))
+		if (nid != NUMA_NO_NODE && nid != m_nid)
 			continue;
 
-		/* scan areas before each reservation for intersection */
-		for ( ; ri < rsv->cnt + 1; ri++) {
-			struct memblock_region *r = &rsv->regions[ri];
-			phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0;
-			phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX;
+		if (!type_b) {
+			if (out_start)
+				*out_start = m_start;
+			if (out_end)
+				*out_end = m_end;
+			if (out_nid)
+				*out_nid = m_nid;
+			idx_a++;
+			*idx = (u32)idx_a | (u64)idx_b << 32;
+			return;
+		}
+
+		/* scan areas before each reservation */
+		for (; idx_b < type_b->cnt + 1; idx_b++) {
+			struct memblock_region *r;
+			phys_addr_t r_start;
+			phys_addr_t r_end;
+
+			r = &type_b->regions[idx_b];
+			r_start = idx_b ? r[-1].base + r[-1].size : 0;
+			r_end = idx_b < type_b->cnt ?
+				r->base : ULLONG_MAX;
 
-			/* if ri advanced past mi, break out to advance mi */
+			/*
+			 * if idx_b advanced past idx_a,
+			 * break out to advance idx_a
+			 */
 			if (r_start >= m_end)
 				break;
 			/* if the two regions intersect, we're done */
 			if (m_start < r_end) {
 				if (out_start)
-					*out_start = max(m_start, r_start);
+					*out_start =
+						max(m_start, r_start);
 				if (out_end)
 					*out_end = min(m_end, r_end);
 				if (out_nid)
-					*out_nid = memblock_get_region_node(m);
+					*out_nid = m_nid;
 				/*
-				 * The region which ends first is advanced
-				 * for the next iteration.
+				 * The region which ends first is
+				 * advanced for the next iteration.
 				 */
 				if (m_end <= r_end)
-					mi++;
+					idx_a++;
 				else
-					ri++;
-				*idx = (u32)mi | (u64)ri << 32;
+					idx_b++;
+				*idx = (u32)idx_a | (u64)idx_b << 32;
 				return;
 			}
 		}
@@ -837,57 +864,80 @@ void __init_memblock __next_free_mem_range(u64 *idx, int nid,
 }
 
 /**
- * __next_free_mem_range_rev - next function for for_each_free_mem_range_reverse()
+ * __next_mem_range_rev - generic next function for for_each_*_range_rev()
+ *
+ * Finds the next range from type_a which is not marked as unsuitable
+ * in type_b.
+ *
  * @idx: pointer to u64 loop variable
  * @nid: nid: node selector, %NUMA_NO_NODE for all nodes
+ * @type_a: pointer to memblock_type from where the range is taken
+ * @type_b: pointer to memblock_type which excludes memory from being taken
  * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
  * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @out_nid: ptr to int for nid of the range, can be %NULL
  *
- * Reverse of __next_free_mem_range().
- *
- * Linux kernel cannot migrate pages used by itself. Memory hotplug users won't
- * be able to hot-remove hotpluggable memory used by the kernel. So this
- * function skip hotpluggable regions if needed when allocating memory for the
- * kernel.
+ * Reverse of __next_mem_range().
  */
-void __init_memblock __next_free_mem_range_rev(u64 *idx, int nid,
-					   phys_addr_t *out_start,
-					   phys_addr_t *out_end, int *out_nid)
+void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
+					  struct memblock_type *type_a,
+					  struct memblock_type *type_b,
+					  phys_addr_t *out_start,
+					  phys_addr_t *out_end, int *out_nid)
 {
-	struct memblock_type *mem = &memblock.memory;
-	struct memblock_type *rsv = &memblock.reserved;
-	int mi = *idx & 0xffffffff;
-	int ri = *idx >> 32;
+	int idx_a = *idx & 0xffffffff;
+	int idx_b = *idx >> 32;
 
 	if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
 		nid = NUMA_NO_NODE;
 
 	if (*idx == (u64)ULLONG_MAX) {
-		mi = mem->cnt - 1;
-		ri = rsv->cnt;
+		idx_a = type_a->cnt - 1;
+		idx_b = type_b->cnt;
 	}
 
-	for ( ; mi >= 0; mi--) {
-		struct memblock_region *m = &mem->regions[mi];
+	for (; idx_a >= 0; idx_a--) {
+		struct memblock_region *m = &type_a->regions[idx_a];
+
 		phys_addr_t m_start = m->base;
 		phys_addr_t m_end = m->base + m->size;
+		int m_nid = memblock_get_region_node(m);
 
 		/* only memory regions are associated with nodes, check it */
-		if (nid != NUMA_NO_NODE && nid != memblock_get_region_node(m))
+		if (nid != NUMA_NO_NODE && nid != m_nid)
 			continue;
 
 		/* skip hotpluggable memory regions if needed */
 		if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
 			continue;
 
-		/* scan areas before each reservation for intersection */
-		for ( ; ri >= 0; ri--) {
-			struct memblock_region *r = &rsv->regions[ri];
-			phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0;
-			phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX;
+		if (!type_b) {
+			if (out_start)
+				*out_start = m_start;
+			if (out_end)
+				*out_end = m_end;
+			if (out_nid)
+				*out_nid = m_nid;
+			idx_a++;
+			*idx = (u32)idx_a | (u64)idx_b << 32;
+			return;
+		}
+
+		/* scan areas before each reservation */
+		for (; idx_b >= 0; idx_b--) {
+			struct memblock_region *r;
+			phys_addr_t r_start;
+			phys_addr_t r_end;
+
+			r = &type_b->regions[idx_b];
+			r_start = idx_b ? r[-1].base + r[-1].size : 0;
+			r_end = idx_b < type_b->cnt ?
+				r->base : ULLONG_MAX;
+			/*
+			 * if idx_b advanced past idx_a,
+			 * break out to advance idx_a
+			 */
 
-			/* if ri advanced past mi, break out to advance mi */
 			if (r_end <= m_start)
 				break;
 			/* if the two regions intersect, we're done */
@@ -897,18 +947,17 @@ void __init_memblock __next_free_mem_range_rev(u64 *idx, int nid,
 				if (out_end)
 					*out_end = min(m_end, r_end);
 				if (out_nid)
-					*out_nid = memblock_get_region_node(m);
-
+					*out_nid = m_nid;
 				if (m_start >= r_start)
-					mi--;
+					idx_a--;
 				else
-					ri--;
-				*idx = (u32)mi | (u64)ri << 32;
+					idx_b--;
+				*idx = (u32)idx_a | (u64)idx_b << 32;
 				return;
 			}
 		}
 	}
-
+	/* signal end of iteration */
 	*idx = ULLONG_MAX;
 }
 
@@ -1201,7 +1250,7 @@ void __init __memblock_free_early(phys_addr_t base, phys_addr_t size)
 		     __func__, (u64)base, (u64)base + size - 1,
 		     (void *)_RET_IP_);
 	kmemleak_free_part(__va(base), size);
-	__memblock_remove(&memblock.reserved, base, size);
+	memblock_remove_range(&memblock.reserved, base, size);
 }
 
 /*
@@ -1287,8 +1336,10 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
 	}
 
 	/* truncate both memory and reserved regions */
-	__memblock_remove(&memblock.memory, max_addr, (phys_addr_t)ULLONG_MAX);
-	__memblock_remove(&memblock.reserved, max_addr, (phys_addr_t)ULLONG_MAX);
+	memblock_remove_range(&memblock.memory, max_addr,
+			      (phys_addr_t)ULLONG_MAX);
+	memblock_remove_range(&memblock.reserved, max_addr,
+			      (phys_addr_t)ULLONG_MAX);
 }
 
 static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)
-- 
cgit v0.10.2


From 70210ed950b538ee7eb811dccc402db9df1c9be4 Mon Sep 17 00:00:00 2001
From: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Date: Wed, 29 Jan 2014 18:16:01 +0100
Subject: mm/memblock: add physical memory list

Add the physmem list to the memblock structure. This list only exists
if HAVE_MEMBLOCK_PHYS_MAP is selected and contains the unmodified
list of physically available memory. It differs from the memblock
memory list as it always contains all memory ranges even if the
memory has been restricted, e.g. by use of the mem= kernel parameter.

Signed-off-by: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index f669016..73dc382 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -18,6 +18,7 @@
 #include <linux/mm.h>
 
 #define INIT_MEMBLOCK_REGIONS	128
+#define INIT_PHYSMEM_REGIONS	4
 
 /* Definition of memblock flags. */
 #define MEMBLOCK_HOTPLUG	0x1	/* hotpluggable region */
@@ -43,6 +44,9 @@ struct memblock {
 	phys_addr_t current_limit;
 	struct memblock_type memory;
 	struct memblock_type reserved;
+#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
+	struct memblock_type physmem;
+#endif
 };
 
 extern struct memblock memblock;
diff --git a/mm/Kconfig b/mm/Kconfig
index 1b5a95f..28cec51 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -134,6 +134,9 @@ config HAVE_MEMBLOCK
 config HAVE_MEMBLOCK_NODE_MAP
 	boolean
 
+config HAVE_MEMBLOCK_PHYS_MAP
+	boolean
+
 config ARCH_DISCARD_MEMBLOCK
 	boolean
 
diff --git a/mm/memblock.c b/mm/memblock.c
index 9edd092..a810ba9 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -27,6 +27,9 @@
 
 static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
 static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
+#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
+static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock;
+#endif
 
 struct memblock memblock __initdata_memblock = {
 	.memory.regions		= memblock_memory_init_regions,
@@ -37,6 +40,12 @@ struct memblock memblock __initdata_memblock = {
 	.reserved.cnt		= 1,	/* empty dummy entry */
 	.reserved.max		= INIT_MEMBLOCK_REGIONS,
 
+#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
+	.physmem.regions	= memblock_physmem_init_regions,
+	.physmem.cnt		= 1,	/* empty dummy entry */
+	.physmem.max		= INIT_PHYSMEM_REGIONS,
+#endif
+
 	.bottom_up		= false,
 	.current_limit		= MEMBLOCK_ALLOC_ANYWHERE,
 };
@@ -1553,6 +1562,9 @@ static int __init memblock_init_debugfs(void)
 		return -ENXIO;
 	debugfs_create_file("memory", S_IRUGO, root, &memblock.memory, &memblock_debug_fops);
 	debugfs_create_file("reserved", S_IRUGO, root, &memblock.reserved, &memblock_debug_fops);
+#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
+	debugfs_create_file("physmem", S_IRUGO, root, &memblock.physmem, &memblock_debug_fops);
+#endif
 
 	return 0;
 }
-- 
cgit v0.10.2


From 50be634507284eea38df78154d22615d21200b42 Mon Sep 17 00:00:00 2001
From: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Date: Wed, 29 Jan 2014 18:16:01 +0100
Subject: s390/mm: Convert bootmem to memblock

The original bootmem allocator is getting replaced by memblock. To
cover the needs of the s390 kdump implementation the physical memory
list is used.
With this patch the bootmem allocator and its bitmaps are completely
removed from s390.

Signed-off-by: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index d68fe34..d239e6a 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -60,7 +60,6 @@ config PCI_QUIRKS
 
 config S390
 	def_bool y
-	select ARCH_DISCARD_MEMBLOCK
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
@@ -130,6 +129,7 @@ config S390
 	select HAVE_KVM if 64BIT
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
+	select HAVE_MEMBLOCK_PHYS_MAP
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_OPROFILE
 	select HAVE_PERF_EVENTS
@@ -139,6 +139,7 @@ config S390
 	select HAVE_VIRT_CPU_ACCOUNTING
 	select KTIME_SCALAR if 32BIT
 	select MODULES_USE_ELF_RELA
+	select NO_BOOTMEM
 	select OLD_SIGACTION
 	select OLD_SIGSUSPEND3
 	select SYSCTL_EXCEPTION_TRACE
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index b31b22d..089a498 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -9,7 +9,6 @@
 
 
 #define PARMAREA		0x10400
-#define MEMORY_CHUNKS		256
 
 #ifndef __ASSEMBLY__
 
@@ -31,22 +30,11 @@
 #endif /* CONFIG_64BIT */
 #define COMMAND_LINE      ((char *)            (0x10480))
 
-#define CHUNK_READ_WRITE 0
-#define CHUNK_READ_ONLY  1
-
-struct mem_chunk {
-	unsigned long addr;
-	unsigned long size;
-	int type;
-};
-
-extern struct mem_chunk memory_chunk[];
 extern int memory_end_set;
 extern unsigned long memory_end;
+extern unsigned long max_physmem_end;
 
-void detect_memory_layout(struct mem_chunk chunk[], unsigned long maxsize);
-void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr,
-		     unsigned long size);
+extern void detect_memory_memblock(void);
 
 /*
  * Machine features detected in head.S
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index d7658c4..a3b9150 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/bootmem.h>
 #include <linux/elf.h>
+#include <linux/memblock.h>
 #include <asm/os_info.h>
 #include <asm/elf.h>
 #include <asm/ipl.h>
@@ -22,6 +23,24 @@
 #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
 #define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y))))
 
+static struct memblock_region oldmem_region;
+
+static struct memblock_type oldmem_type = {
+	.cnt = 1,
+	.max = 1,
+	.total_size = 0,
+	.regions = &oldmem_region,
+};
+
+#define for_each_dump_mem_range(i, nid, p_start, p_end, p_nid)		\
+	for (i = 0, __next_mem_range(&i, nid, &memblock.physmem,	\
+				     &oldmem_type, p_start,		\
+				     p_end, p_nid);			\
+	     i != (u64)ULLONG_MAX;					\
+	     __next_mem_range(&i, nid, &memblock.physmem,		\
+			      &oldmem_type,				\
+			      p_start, p_end, p_nid))
+
 struct dump_save_areas dump_save_areas;
 
 /*
@@ -264,19 +283,6 @@ static void *kzalloc_panic(int len)
 }
 
 /*
- * Get memory layout and create hole for oldmem
- */
-static struct mem_chunk *get_memory_layout(void)
-{
-	struct mem_chunk *chunk_array;
-
-	chunk_array = kzalloc_panic(MEMORY_CHUNKS * sizeof(struct mem_chunk));
-	detect_memory_layout(chunk_array, 0);
-	create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE);
-	return chunk_array;
-}
-
-/*
  * Initialize ELF note
  */
 static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len,
@@ -490,52 +496,33 @@ static int get_cpu_cnt(void)
  */
 static int get_mem_chunk_cnt(void)
 {
-	struct mem_chunk *chunk_array, *mem_chunk;
-	int i, cnt = 0;
+	int cnt = 0;
+	u64 idx;
 
-	chunk_array = get_memory_layout();
-	for (i = 0; i < MEMORY_CHUNKS; i++) {
-		mem_chunk = &chunk_array[i];
-		if (chunk_array[i].type != CHUNK_READ_WRITE &&
-		    chunk_array[i].type != CHUNK_READ_ONLY)
-			continue;
-		if (mem_chunk->size == 0)
-			continue;
+	for_each_dump_mem_range(idx, NUMA_NO_NODE, NULL, NULL, NULL)
 		cnt++;
-	}
-	kfree(chunk_array);
 	return cnt;
 }
 
 /*
  * Initialize ELF loads (new kernel)
  */
-static int loads_init(Elf64_Phdr *phdr, u64 loads_offset)
+static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
 {
-	struct mem_chunk *chunk_array, *mem_chunk;
-	int i;
+	phys_addr_t start, end;
+	u64 idx;
 
-	chunk_array = get_memory_layout();
-	for (i = 0; i < MEMORY_CHUNKS; i++) {
-		mem_chunk = &chunk_array[i];
-		if (mem_chunk->size == 0)
-			continue;
-		if (chunk_array[i].type != CHUNK_READ_WRITE &&
-		    chunk_array[i].type != CHUNK_READ_ONLY)
-			continue;
-		else
-			phdr->p_filesz = mem_chunk->size;
+	for_each_dump_mem_range(idx, NUMA_NO_NODE, &start, &end, NULL) {
+		phdr->p_filesz = end - start;
 		phdr->p_type = PT_LOAD;
-		phdr->p_offset = mem_chunk->addr;
-		phdr->p_vaddr = mem_chunk->addr;
-		phdr->p_paddr = mem_chunk->addr;
-		phdr->p_memsz = mem_chunk->size;
+		phdr->p_offset = start;
+		phdr->p_vaddr = start;
+		phdr->p_paddr = start;
+		phdr->p_memsz = end - start;
 		phdr->p_flags = PF_R | PF_W | PF_X;
 		phdr->p_align = PAGE_SIZE;
 		phdr++;
 	}
-	kfree(chunk_array);
-	return i;
 }
 
 /*
@@ -584,6 +571,14 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
 	/* If we cannot get HSA size for zfcpdump return error */
 	if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp_get_hsa_size())
 		return -ENODEV;
+
+	/* For kdump, exclude previous crashkernel memory */
+	if (OLDMEM_BASE) {
+		oldmem_region.base = OLDMEM_BASE;
+		oldmem_region.size = OLDMEM_SIZE;
+		oldmem_type.total_size = OLDMEM_SIZE;
+	}
+
 	mem_chunk_cnt = get_mem_chunk_cnt();
 
 	alloc_size = 0x1000 + get_cpu_cnt() * 0x300 +
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index a734f35..0dff972 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -258,13 +258,19 @@ static __init void setup_topology(void)
 static void early_pgm_check_handler(void)
 {
 	const struct exception_table_entry *fixup;
+	unsigned long cr0, cr0_new;
 	unsigned long addr;
 
 	addr = S390_lowcore.program_old_psw.addr;
 	fixup = search_exception_tables(addr & PSW_ADDR_INSN);
 	if (!fixup)
 		disabled_wait(0);
+	/* Disable low address protection before storing into lowcore. */
+	__ctl_store(cr0, 0, 0);
+	cr0_new = cr0 & ~(1UL << 28);
+	__ctl_load(cr0_new, 0, 0);
 	S390_lowcore.program_old_psw.addr = extable_fixup(fixup)|PSW_ADDR_AMODE;
+	__ctl_load(cr0, 0, 0);
 }
 
 static noinline __init void setup_lowcore_early(void)
diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S
index 9a99856..6dbe809 100644
--- a/arch/s390/kernel/head31.S
+++ b/arch/s390/kernel/head31.S
@@ -59,7 +59,6 @@ ENTRY(startup_continue)
 	.long	0			# cr13: home space segment table
 	.long	0xc0000000		# cr14: machine check handling off
 	.long	0			# cr15: linkage stack operations
-.Lmchunk:.long	memory_chunk
 .Lbss_bgn:  .long __bss_start
 .Lbss_end:  .long _end
 .Lparmaddr: .long PARMAREA
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 88d1ca8..1f5536c 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -78,10 +78,9 @@ EXPORT_SYMBOL(console_irq);
 unsigned long elf_hwcap = 0;
 char elf_platform[ELF_PLATFORM_SIZE];
 
-struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS];
-
 int __initdata memory_end_set;
 unsigned long __initdata memory_end;
+unsigned long __initdata max_physmem_end;
 
 unsigned long VMALLOC_START;
 EXPORT_SYMBOL(VMALLOC_START);
@@ -273,6 +272,7 @@ EXPORT_SYMBOL_GPL(pm_power_off);
 static int __init early_parse_mem(char *p)
 {
 	memory_end = memparse(p, &p);
+	memory_end &= PAGE_MASK;
 	memory_end_set = 1;
 	return 0;
 }
@@ -401,7 +401,8 @@ static struct resource __initdata *standard_resources[] = {
 static void __init setup_resources(void)
 {
 	struct resource *res, *std_res, *sub_res;
-	int i, j;
+	struct memblock_region *reg;
+	int j;
 
 	code_resource.start = (unsigned long) &_text;
 	code_resource.end = (unsigned long) &_etext - 1;
@@ -410,24 +411,13 @@ static void __init setup_resources(void)
 	bss_resource.start = (unsigned long) &__bss_start;
 	bss_resource.end = (unsigned long) &__bss_stop - 1;
 
-	for (i = 0; i < MEMORY_CHUNKS; i++) {
-		if (!memory_chunk[i].size)
-			continue;
+	for_each_memblock(memory, reg) {
 		res = alloc_bootmem_low(sizeof(*res));
 		res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
-		switch (memory_chunk[i].type) {
-		case CHUNK_READ_WRITE:
-			res->name = "System RAM";
-			break;
-		case CHUNK_READ_ONLY:
-			res->name = "System ROM";
-			res->flags |= IORESOURCE_READONLY;
-			break;
-		default:
-			res->name = "reserved";
-		}
-		res->start = memory_chunk[i].addr;
-		res->end = res->start + memory_chunk[i].size - 1;
+
+		res->name = "System RAM";
+		res->start = reg->base;
+		res->end = reg->base + reg->size - 1;
 		request_resource(&iomem_resource, res);
 
 		for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
@@ -451,48 +441,11 @@ static void __init setup_resources(void)
 static void __init setup_memory_end(void)
 {
 	unsigned long vmax, vmalloc_size, tmp;
-	unsigned long real_memory_size = 0;
-	int i;
-
-
-#ifdef CONFIG_ZFCPDUMP
-	if (ipl_info.type == IPL_TYPE_FCP_DUMP &&
-	    !OLDMEM_BASE && sclp_get_hsa_size()) {
-		memory_end = sclp_get_hsa_size();
-		memory_end_set = 1;
-	}
-#endif
-	memory_end &= PAGE_MASK;
-
-	/*
-	 * Make sure all chunks are MAX_ORDER aligned so we don't need the
-	 * extra checks that HOLES_IN_ZONE would require.
-	 */
-	for (i = 0; i < MEMORY_CHUNKS; i++) {
-		unsigned long start, end;
-		struct mem_chunk *chunk;
-		unsigned long align;
-
-		chunk = &memory_chunk[i];
-		if (!chunk->size)
-			continue;
-		align = 1UL << (MAX_ORDER + PAGE_SHIFT - 1);
-		start = (chunk->addr + align - 1) & ~(align - 1);
-		end = (chunk->addr + chunk->size) & ~(align - 1);
-		if (start >= end)
-			memset(chunk, 0, sizeof(*chunk));
-		else {
-			chunk->addr = start;
-			chunk->size = end - start;
-		}
-		real_memory_size = max(real_memory_size,
-				       chunk->addr + chunk->size);
-	}
 
 	/* Choose kernel address space layout: 2, 3, or 4 levels. */
 #ifdef CONFIG_64BIT
 	vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN;
-	tmp = (memory_end ?: real_memory_size) / PAGE_SIZE;
+	tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
 	tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size;
 	if (tmp <= (1UL << 42))
 		vmax = 1UL << 42;	/* 3-level kernel page table */
@@ -520,21 +473,11 @@ static void __init setup_memory_end(void)
 	vmemmap = (struct page *) tmp;
 
 	/* Take care that memory_end is set and <= vmemmap */
-	memory_end = min(memory_end ?: real_memory_size, tmp);
-
-	/* Fixup memory chunk array to fit into 0..memory_end */
-	for (i = 0; i < MEMORY_CHUNKS; i++) {
-		struct mem_chunk *chunk = &memory_chunk[i];
+	memory_end = min(memory_end ?: max_physmem_end, tmp);
+	max_pfn = max_low_pfn = PFN_DOWN(memory_end);
+	memblock_remove(memory_end, ULONG_MAX);
 
-		if (!chunk->size)
-			continue;
-		if (chunk->addr >= memory_end) {
-			memset(chunk, 0, sizeof(*chunk));
-			continue;
-		}
-		if (chunk->addr + chunk->size > memory_end)
-			chunk->size = memory_end - chunk->addr;
-	}
+	pr_notice("Max memory size: %luMB\n", memory_end >> 20);
 }
 
 static void __init setup_vmcoreinfo(void)
@@ -545,89 +488,6 @@ static void __init setup_vmcoreinfo(void)
 #ifdef CONFIG_CRASH_DUMP
 
 /*
- * Find suitable location for crashkernel memory
- */
-static unsigned long __init find_crash_base(unsigned long crash_size,
-					    char **msg)
-{
-	unsigned long crash_base;
-	struct mem_chunk *chunk;
-	int i;
-
-	if (memory_chunk[0].size < crash_size) {
-		*msg = "first memory chunk must be at least crashkernel size";
-		return 0;
-	}
-	if (OLDMEM_BASE && crash_size == OLDMEM_SIZE)
-		return OLDMEM_BASE;
-
-	for (i = MEMORY_CHUNKS - 1; i >= 0; i--) {
-		chunk = &memory_chunk[i];
-		if (chunk->size == 0)
-			continue;
-		if (chunk->type != CHUNK_READ_WRITE)
-			continue;
-		if (chunk->size < crash_size)
-			continue;
-		crash_base = (chunk->addr + chunk->size) - crash_size;
-		if (crash_base < crash_size)
-			continue;
-		if (crash_base < sclp_get_hsa_size())
-			continue;
-		if (crash_base < (unsigned long) INITRD_START + INITRD_SIZE)
-			continue;
-		return crash_base;
-	}
-	*msg = "no suitable area found";
-	return 0;
-}
-
-/*
- * Check if crash_base and crash_size is valid
- */
-static int __init verify_crash_base(unsigned long crash_base,
-				    unsigned long crash_size,
-				    char **msg)
-{
-	struct mem_chunk *chunk;
-	int i;
-
-	/*
-	 * Because we do the swap to zero, we must have at least 'crash_size'
-	 * bytes free space before crash_base
-	 */
-	if (crash_size > crash_base) {
-		*msg = "crashkernel offset must be greater than size";
-		return -EINVAL;
-	}
-
-	/* First memory chunk must be at least crash_size */
-	if (memory_chunk[0].size < crash_size) {
-		*msg = "first memory chunk must be at least crashkernel size";
-		return -EINVAL;
-	}
-	/* Check if we fit into the respective memory chunk */
-	for (i = 0; i < MEMORY_CHUNKS; i++) {
-		chunk = &memory_chunk[i];
-		if (chunk->size == 0)
-			continue;
-		if (crash_base < chunk->addr)
-			continue;
-		if (crash_base >= chunk->addr + chunk->size)
-			continue;
-		/* we have found the memory chunk */
-		if (crash_base + crash_size > chunk->addr + chunk->size) {
-			*msg = "selected memory chunk is too small for "
-				"crashkernel memory";
-			return -EINVAL;
-		}
-		return 0;
-	}
-	*msg = "invalid memory range specified";
-	return -EINVAL;
-}
-
-/*
  * When kdump is enabled, we have to ensure that no memory from
  * the area [0 - crashkernel memory size] and
  * [crashk_res.start - crashk_res.end] is set offline.
@@ -653,23 +513,44 @@ static struct notifier_block kdump_mem_nb = {
 #endif
 
 /*
+ * Make sure that the area behind memory_end is protected
+ */
+static void reserve_memory_end(void)
+{
+#ifdef CONFIG_ZFCPDUMP
+	if (ipl_info.type == IPL_TYPE_FCP_DUMP &&
+	    !OLDMEM_BASE && sclp_get_hsa_size()) {
+		memory_end = sclp_get_hsa_size();
+		memory_end &= PAGE_MASK;
+		memory_end_set = 1;
+	}
+#endif
+	if (!memory_end_set)
+		return;
+	memblock_reserve(memory_end, ULONG_MAX);
+}
+
+/*
  * Make sure that oldmem, where the dump is stored, is protected
  */
 static void reserve_oldmem(void)
 {
 #ifdef CONFIG_CRASH_DUMP
-	unsigned long real_size = 0;
-	int i;
-
-	if (!OLDMEM_BASE)
-		return;
-	for (i = 0; i < MEMORY_CHUNKS; i++) {
-		struct mem_chunk *chunk = &memory_chunk[i];
+	if (OLDMEM_BASE)
+		/* Forget all memory above the running kdump system */
+		memblock_reserve(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
+#endif
+}
 
-		real_size = max(real_size, chunk->addr + chunk->size);
-	}
-	create_mem_hole(memory_chunk, OLDMEM_BASE, OLDMEM_SIZE);
-	create_mem_hole(memory_chunk, OLDMEM_SIZE, real_size - OLDMEM_SIZE);
+/*
+ * Make sure that oldmem, where the dump is stored, is protected
+ */
+static void remove_oldmem(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+	if (OLDMEM_BASE)
+		/* Forget all memory above the running kdump system */
+		memblock_remove(OLDMEM_SIZE, (phys_addr_t)ULONG_MAX);
 #endif
 }
 
@@ -680,167 +561,132 @@ static void __init reserve_crashkernel(void)
 {
 #ifdef CONFIG_CRASH_DUMP
 	unsigned long long crash_base, crash_size;
-	char *msg = NULL;
+	phys_addr_t low, high;
 	int rc;
 
 	rc = parse_crashkernel(boot_command_line, memory_end, &crash_size,
 			       &crash_base);
-	if (rc || crash_size == 0)
-		return;
+
 	crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
 	crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
-	if (register_memory_notifier(&kdump_mem_nb))
+	if (rc || crash_size == 0)
 		return;
-	if (!crash_base)
-		crash_base = find_crash_base(crash_size, &msg);
-	if (!crash_base) {
-		pr_info("crashkernel reservation failed: %s\n", msg);
-		unregister_memory_notifier(&kdump_mem_nb);
+
+	if (memblock.memory.regions[0].size < crash_size) {
+		pr_info("crashkernel reservation failed: %s\n",
+			"first memory chunk must be at least crashkernel size");
 		return;
 	}
-	if (verify_crash_base(crash_base, crash_size, &msg)) {
-		pr_info("crashkernel reservation failed: %s\n", msg);
-		unregister_memory_notifier(&kdump_mem_nb);
+
+	low = crash_base ?: OLDMEM_BASE;
+	high = low + crash_size;
+	if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) {
+		/* The crashkernel fits into OLDMEM, reuse OLDMEM */
+		crash_base = low;
+	} else {
+		/* Find suitable area in free memory */
+		low = max_t(unsigned long, crash_size, sclp_get_hsa_size());
+		high = crash_base ? crash_base + crash_size : ULONG_MAX;
+
+		if (crash_base && crash_base < low) {
+			pr_info("crashkernel reservation failed: %s\n",
+				"crash_base too low");
+			return;
+		}
+		low = crash_base ?: low;
+		crash_base = memblock_find_in_range(low, high, crash_size,
+						    KEXEC_CRASH_MEM_ALIGN);
+	}
+
+	if (!crash_base) {
+		pr_info("crashkernel reservation failed: %s\n",
+			"no suitable area found");
 		return;
 	}
+
+	if (register_memory_notifier(&kdump_mem_nb))
+		return;
+
 	if (!OLDMEM_BASE && MACHINE_IS_VM)
 		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
 	crashk_res.start = crash_base;
 	crashk_res.end = crash_base + crash_size - 1;
 	insert_resource(&iomem_resource, &crashk_res);
-	create_mem_hole(memory_chunk, crash_base, crash_size);
+	memblock_remove(crash_base, crash_size);
 	pr_info("Reserving %lluMB of memory at %lluMB "
 		"for crashkernel (System RAM: %luMB)\n",
-		crash_size >> 20, crash_base >> 20, memory_end >> 20);
+		crash_size >> 20, crash_base >> 20,
+		(unsigned long)memblock.memory.total_size >> 20);
 	os_info_crashkernel_add(crash_base, crash_size);
 #endif
 }
 
-static void __init setup_memory(void)
+/*
+ * Reserve the initrd from being used by memblock
+ */
+static void __init reserve_initrd(void)
 {
-        unsigned long bootmap_size;
-	unsigned long start_pfn, end_pfn;
-	int i;
+#ifdef CONFIG_BLK_DEV_INITRD
+	initrd_start = INITRD_START;
+	initrd_end = initrd_start + INITRD_SIZE;
+	memblock_reserve(INITRD_START, INITRD_SIZE);
+#endif
+}
 
-	/*
-	 * partially used pages are not usable - thus
-	 * we are rounding upwards:
-	 */
+/*
+ * Check for initrd being in usable memory
+ */
+static void __init check_initrd(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (INITRD_START && INITRD_SIZE &&
+	    !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) {
+		pr_err("initrd does not fit memory.\n");
+		memblock_free(INITRD_START, INITRD_SIZE);
+		initrd_start = initrd_end = 0;
+	}
+#endif
+}
+
+/*
+ * Reserve all kernel text
+ */
+static void __init reserve_kernel(void)
+{
+	unsigned long start_pfn;
 	start_pfn = PFN_UP(__pa(&_end));
-	end_pfn = max_pfn = PFN_DOWN(memory_end);
 
-#ifdef CONFIG_BLK_DEV_INITRD
 	/*
-	 * Move the initrd in case the bitmap of the bootmem allocater
-	 * would overwrite it.
+	 * Reserve memory used for lowcore/command line/kernel image.
 	 */
+	memblock_reserve(0, (unsigned long)_ehead);
+	memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
+			 - (unsigned long)_stext);
+}
 
-	if (INITRD_START && INITRD_SIZE) {
-		unsigned long bmap_size;
-		unsigned long start;
-
-		bmap_size = bootmem_bootmap_pages(end_pfn - start_pfn + 1);
-		bmap_size = PFN_PHYS(bmap_size);
-
-		if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) {
-			start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE;
-
+static void __init reserve_elfcorehdr(void)
+{
 #ifdef CONFIG_CRASH_DUMP
-			if (OLDMEM_BASE) {
-				/* Move initrd behind kdump oldmem */
-				if (start + INITRD_SIZE > OLDMEM_BASE &&
-				    start < OLDMEM_BASE + OLDMEM_SIZE)
-					start = OLDMEM_BASE + OLDMEM_SIZE;
-			}
-#endif
-			if (start + INITRD_SIZE > memory_end) {
-				pr_err("initrd extends beyond end of "
-				       "memory (0x%08lx > 0x%08lx) "
-				       "disabling initrd\n",
-				       start + INITRD_SIZE, memory_end);
-				INITRD_START = INITRD_SIZE = 0;
-			} else {
-				pr_info("Moving initrd (0x%08lx -> "
-					"0x%08lx, size: %ld)\n",
-					INITRD_START, start, INITRD_SIZE);
-				memmove((void *) start, (void *) INITRD_START,
-					INITRD_SIZE);
-				INITRD_START = start;
-			}
-		}
-	}
+	if (is_kdump_kernel())
+		memblock_reserve(elfcorehdr_addr - OLDMEM_BASE,
+				 PAGE_ALIGN(elfcorehdr_size));
 #endif
+}
 
-	/*
-	 * Initialize the boot-time allocator
-	 */
-	bootmap_size = init_bootmem(start_pfn, end_pfn);
+static void __init setup_memory(void)
+{
+	struct memblock_region *reg;
 
 	/*
-	 * Register RAM areas with the bootmem allocator.
+	 * Init storage key for present memory
 	 */
-
-	for (i = 0; i < MEMORY_CHUNKS; i++) {
-		unsigned long start_chunk, end_chunk, pfn;
-
-		if (!memory_chunk[i].size)
-			continue;
-		start_chunk = PFN_DOWN(memory_chunk[i].addr);
-		end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size);
-		end_chunk = min(end_chunk, end_pfn);
-		if (start_chunk >= end_chunk)
-			continue;
-		memblock_add_node(PFN_PHYS(start_chunk),
-				  PFN_PHYS(end_chunk - start_chunk), 0);
-		pfn = max(start_chunk, start_pfn);
-		storage_key_init_range(PFN_PHYS(pfn), PFN_PHYS(end_chunk));
+	for_each_memblock(memory, reg) {
+		storage_key_init_range(reg->base, reg->base + reg->size);
 	}
-
 	psw_set_key(PAGE_DEFAULT_KEY);
 
-	free_bootmem_with_active_regions(0, max_pfn);
-
-	/*
-	 * Reserve memory used for lowcore/command line/kernel image.
-	 */
-	reserve_bootmem(0, (unsigned long)_ehead, BOOTMEM_DEFAULT);
-	reserve_bootmem((unsigned long)_stext,
-			PFN_PHYS(start_pfn) - (unsigned long)_stext,
-			BOOTMEM_DEFAULT);
-	/*
-	 * Reserve the bootmem bitmap itself as well. We do this in two
-	 * steps (first step was init_bootmem()) because this catches
-	 * the (very unlikely) case of us accidentally initializing the
-	 * bootmem allocator with an invalid RAM area.
-	 */
-	reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size,
-			BOOTMEM_DEFAULT);
-
-#ifdef CONFIG_CRASH_DUMP
-	if (crashk_res.start)
-		reserve_bootmem(crashk_res.start,
-				crashk_res.end - crashk_res.start + 1,
-				BOOTMEM_DEFAULT);
-	if (is_kdump_kernel())
-		reserve_bootmem(elfcorehdr_addr - OLDMEM_BASE,
-				PAGE_ALIGN(elfcorehdr_size), BOOTMEM_DEFAULT);
-#endif
-#ifdef CONFIG_BLK_DEV_INITRD
-	if (INITRD_START && INITRD_SIZE) {
-		if (INITRD_START + INITRD_SIZE <= memory_end) {
-			reserve_bootmem(INITRD_START, INITRD_SIZE,
-					BOOTMEM_DEFAULT);
-			initrd_start = INITRD_START;
-			initrd_end = initrd_start + INITRD_SIZE;
-		} else {
-			pr_err("initrd extends beyond end of "
-			       "memory (0x%08lx > 0x%08lx) "
-			       "disabling initrd\n",
-			       initrd_start + INITRD_SIZE, memory_end);
-			initrd_start = initrd_end = 0;
-		}
-	}
-#endif
+	/* Only cosmetics */
+	memblock_enforce_memory_limit(memblock_end_of_DRAM());
 }
 
 /*
@@ -989,23 +835,46 @@ void __init setup_arch(char **cmdline_p)
 
         ROOT_DEV = Root_RAM0;
 
+	/* Is init_mm really needed? */
 	init_mm.start_code = PAGE_OFFSET;
 	init_mm.end_code = (unsigned long) &_etext;
 	init_mm.end_data = (unsigned long) &_edata;
 	init_mm.brk = (unsigned long) &_end;
 
 	parse_early_param();
-	detect_memory_layout(memory_chunk, memory_end);
 	os_info_init();
 	setup_ipl();
+
+	/* Do some memory reservations *before* memory is added to memblock */
+	reserve_memory_end();
 	reserve_oldmem();
+	reserve_kernel();
+	reserve_initrd();
+	reserve_elfcorehdr();
+	memblock_allow_resize();
+
+	/* Get information about *all* installed memory */
+	detect_memory_memblock();
+
+	remove_oldmem();
+
+	/*
+	 * Make sure all chunks are MAX_ORDER aligned so we don't need the
+	 * extra checks that HOLES_IN_ZONE would require.
+	 *
+	 * Is this still required?
+	 */
+	memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT));
+
 	setup_memory_end();
-	reserve_crashkernel();
 	setup_memory();
+
+	check_initrd();
+	reserve_crashkernel();
+
 	setup_resources();
 	setup_vmcoreinfo();
 	setup_lowcore();
-
 	smp_fill_possible_mask();
         cpu_init();
 	s390_init_cpu_topology();
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 6298fed..fa3b8cd 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -333,7 +333,9 @@ static void __init alloc_masks(struct sysinfo_15_1_x *info,
 		nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
 	nr_masks = max(nr_masks, 1);
 	for (i = 0; i < nr_masks; i++) {
-		mask->next = alloc_bootmem(sizeof(struct mask_info));
+		mask->next = alloc_bootmem_align(
+			roundup_pow_of_two(sizeof(struct mask_info)),
+			roundup_pow_of_two(sizeof(struct mask_info)));
 		mask = mask->next;
 	}
 }
diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c
index cca3882..5535cfe 100644
--- a/arch/s390/mm/mem_detect.c
+++ b/arch/s390/mm/mem_detect.c
@@ -6,130 +6,60 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/memblock.h>
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 #include <asm/ipl.h>
 #include <asm/sclp.h>
 #include <asm/setup.h>
 
 #define ADDR2G (1ULL << 31)
 
-static void find_memory_chunks(struct mem_chunk chunk[], unsigned long maxsize)
+#define CHUNK_READ_WRITE 0
+#define CHUNK_READ_ONLY  1
+
+static inline void memblock_physmem_add(phys_addr_t start, phys_addr_t size)
+{
+	memblock_add_range(&memblock.memory, start, size, 0, 0);
+	memblock_add_range(&memblock.physmem, start, size, 0, 0);
+}
+
+void __init detect_memory_memblock(void)
 {
 	unsigned long long memsize, rnmax, rzm;
-	unsigned long addr = 0, size;
-	int i = 0, type;
+	unsigned long addr, size;
+	int type;
 
 	rzm = sclp_get_rzm();
 	rnmax = sclp_get_rnmax();
 	memsize = rzm * rnmax;
 	if (!rzm)
 		rzm = 1ULL << 17;
-	if (sizeof(long) == 4) {
+	if (IS_ENABLED(CONFIG_32BIT)) {
 		rzm = min(ADDR2G, rzm);
-		memsize = memsize ? min(ADDR2G, memsize) : ADDR2G;
+		memsize = min(ADDR2G, memsize);
 	}
-	if (maxsize)
-		memsize = memsize ? min((unsigned long)memsize, maxsize) : maxsize;
+	max_physmem_end = memsize;
+	addr = 0;
+	/* keep memblock lists close to the kernel */
+	memblock_set_bottom_up(true);
 	do {
 		size = 0;
 		type = tprot(addr);
 		do {
 			size += rzm;
-			if (memsize && addr + size >= memsize)
+			if (max_physmem_end && addr + size >= max_physmem_end)
 				break;
 		} while (type == tprot(addr + size));
 		if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) {
-			if (memsize && (addr + size > memsize))
-				size = memsize - addr;
-			chunk[i].addr = addr;
-			chunk[i].size = size;
-			chunk[i].type = type;
-			i++;
+			if (max_physmem_end && (addr + size > max_physmem_end))
+				size = max_physmem_end - addr;
+			memblock_physmem_add(addr, size);
 		}
 		addr += size;
-	} while (addr < memsize && i < MEMORY_CHUNKS);
-}
-
-/**
- * detect_memory_layout - fill mem_chunk array with memory layout data
- * @chunk: mem_chunk array to be filled
- * @maxsize: maximum address where memory detection should stop
- *
- * Fills the passed in memory chunk array with the memory layout of the
- * machine. The array must have a size of at least MEMORY_CHUNKS and will
- * be fully initialized afterwards.
- * If the maxsize paramater has a value > 0 memory detection will stop at
- * that address. It is guaranteed that all chunks have an ending address
- * that is smaller than maxsize.
- * If maxsize is 0 all memory will be detected.
- */
-void detect_memory_layout(struct mem_chunk chunk[], unsigned long maxsize)
-{
-	unsigned long flags, flags_dat, cr0;
-
-	memset(chunk, 0, MEMORY_CHUNKS * sizeof(struct mem_chunk));
-	/*
-	 * Disable IRQs, DAT and low address protection so tprot does the
-	 * right thing and we don't get scheduled away with low address
-	 * protection disabled.
-	 */
-	local_irq_save(flags);
-	flags_dat = __arch_local_irq_stnsm(0xfb);
-	/*
-	 * In case DAT was enabled, make sure chunk doesn't reside in vmalloc
-	 * space. We have disabled DAT and any access to vmalloc area will
-	 * cause an exception.
-	 * If DAT was disabled we are called from early ipl code.
-	 */
-	if (test_bit(5, &flags_dat)) {
-		if (WARN_ON_ONCE(is_vmalloc_or_module_addr(chunk)))
-			goto out;
-	}
-	__ctl_store(cr0, 0, 0);
-	__ctl_clear_bit(0, 28);
-	find_memory_chunks(chunk, maxsize);
-	__ctl_load(cr0, 0, 0);
-out:
-	__arch_local_irq_ssm(flags_dat);
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL(detect_memory_layout);
-
-/*
- * Create memory hole with given address and size.
- */
-void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr,
-		     unsigned long size)
-{
-	int i;
-
-	for (i = 0; i < MEMORY_CHUNKS; i++) {
-		struct mem_chunk *chunk = &mem_chunk[i];
-
-		if (chunk->size == 0)
-			continue;
-		if (addr > chunk->addr + chunk->size)
-			continue;
-		if (addr + size <= chunk->addr)
-			continue;
-		/* Split */
-		if ((addr > chunk->addr) &&
-		    (addr + size < chunk->addr + chunk->size)) {
-			struct mem_chunk *new = chunk + 1;
-
-			memmove(new, chunk, (MEMORY_CHUNKS-i-1) * sizeof(*new));
-			new->addr = addr + size;
-			new->size = chunk->addr + chunk->size - new->addr;
-			chunk->size = addr - chunk->addr;
-			continue;
-		} else if ((addr <= chunk->addr) &&
-			   (addr + size >= chunk->addr + chunk->size)) {
-			memmove(chunk, chunk + 1, (MEMORY_CHUNKS-i-1) * sizeof(*chunk));
-			memset(&mem_chunk[MEMORY_CHUNKS-1], 0, sizeof(*chunk));
-		} else if (addr + size < chunk->addr + chunk->size) {
-			chunk->size =  chunk->addr + chunk->size - addr - size;
-			chunk->addr = addr + size;
-		} else if (addr > chunk->addr) {
-			chunk->size = addr - chunk->addr;
-		}
-	}
+	} while (addr < max_physmem_end);
+	memblock_set_bottom_up(false);
+	if (!max_physmem_end)
+		max_physmem_end = memblock_end_of_DRAM();
 }
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 72b04de..fe9012a 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -10,6 +10,7 @@
 #include <linux/list.h>
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
+#include <linux/memblock.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/setup.h>
@@ -66,7 +67,8 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address)
 	if (slab_is_available())
 		pte = (pte_t *) page_table_alloc(&init_mm, address);
 	else
-		pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t));
+		pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t),
+					  PTRS_PER_PTE * sizeof(pte_t));
 	if (!pte)
 		return NULL;
 	clear_table((unsigned long *) pte, _PAGE_INVALID,
@@ -371,16 +373,14 @@ out:
 void __init vmem_map_init(void)
 {
 	unsigned long ro_start, ro_end;
-	unsigned long start, end;
-	int i;
+	struct memblock_region *reg;
+	phys_addr_t start, end;
 
 	ro_start = PFN_ALIGN((unsigned long)&_stext);
 	ro_end = (unsigned long)&_eshared & PAGE_MASK;
-	for (i = 0; i < MEMORY_CHUNKS; i++) {
-		if (!memory_chunk[i].size)
-			continue;
-		start = memory_chunk[i].addr;
-		end = memory_chunk[i].addr + memory_chunk[i].size;
+	for_each_memblock(memory, reg) {
+		start = reg->base;
+		end = reg->base + reg->size - 1;
 		if (start >= ro_end || end <= ro_start)
 			vmem_add_mem(start, end - start, 0);
 		else if (start >= ro_start && end <= ro_end)
@@ -400,23 +400,21 @@ void __init vmem_map_init(void)
 }
 
 /*
- * Convert memory chunk array to a memory segment list so there is a single
- * list that contains both r/w memory and shared memory segments.
+ * Convert memblock.memory  to a memory segment list so there is a single
+ * list that contains all memory segments.
  */
 static int __init vmem_convert_memory_chunk(void)
 {
+	struct memblock_region *reg;
 	struct memory_segment *seg;
-	int i;
 
 	mutex_lock(&vmem_mutex);
-	for (i = 0; i < MEMORY_CHUNKS; i++) {
-		if (!memory_chunk[i].size)
-			continue;
+	for_each_memblock(memory, reg) {
 		seg = kzalloc(sizeof(*seg), GFP_KERNEL);
 		if (!seg)
 			panic("Out of memory...\n");
-		seg->start = memory_chunk[i].addr;
-		seg->size = memory_chunk[i].size;
+		seg->start = reg->base;
+		seg->size = reg->size;
 		insert_memory_segment(seg);
 	}
 	mutex_unlock(&vmem_mutex);
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index 3d8e4d6..1884653 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -17,6 +17,8 @@
 #include <linux/miscdevice.h>
 #include <linux/debugfs.h>
 #include <linux/module.h>
+#include <linux/memblock.h>
+
 #include <asm/asm-offsets.h>
 #include <asm/ipl.h>
 #include <asm/sclp.h>
@@ -411,33 +413,24 @@ static ssize_t zcore_memmap_read(struct file *filp, char __user *buf,
 				 size_t count, loff_t *ppos)
 {
 	return simple_read_from_buffer(buf, count, ppos, filp->private_data,
-				       MEMORY_CHUNKS * CHUNK_INFO_SIZE);
+				       memblock.memory.cnt * CHUNK_INFO_SIZE);
 }
 
 static int zcore_memmap_open(struct inode *inode, struct file *filp)
 {
-	int i;
+	struct memblock_region *reg;
 	char *buf;
-	struct mem_chunk *chunk_array;
+	int i = 0;
 
-	chunk_array = kzalloc(MEMORY_CHUNKS * sizeof(struct mem_chunk),
-			      GFP_KERNEL);
-	if (!chunk_array)
-		return -ENOMEM;
-	detect_memory_layout(chunk_array, 0);
-	buf = kzalloc(MEMORY_CHUNKS * CHUNK_INFO_SIZE, GFP_KERNEL);
+	buf = kzalloc(memblock.memory.cnt * CHUNK_INFO_SIZE, GFP_KERNEL);
 	if (!buf) {
-		kfree(chunk_array);
 		return -ENOMEM;
 	}
-	for (i = 0; i < MEMORY_CHUNKS; i++) {
-		sprintf(buf + (i * CHUNK_INFO_SIZE), "%016llx %016llx ",
-			(unsigned long long) chunk_array[i].addr,
-			(unsigned long long) chunk_array[i].size);
-		if (chunk_array[i].size == 0)
-			break;
+	for_each_memblock(memory, reg) {
+		sprintf(buf + (i++ * CHUNK_INFO_SIZE), "%016llx %016llx ",
+			(unsigned long long) reg->base,
+			(unsigned long long) reg->size);
 	}
-	kfree(chunk_array);
 	filp->private_data = buf;
 	return nonseekable_open(inode, filp);
 }
@@ -593,21 +586,12 @@ static int __init check_sdias(void)
 
 static int __init get_mem_info(unsigned long *mem, unsigned long *end)
 {
-	int i;
-	struct mem_chunk *chunk_array;
+	struct memblock_region *reg;
 
-	chunk_array = kzalloc(MEMORY_CHUNKS * sizeof(struct mem_chunk),
-			      GFP_KERNEL);
-	if (!chunk_array)
-		return -ENOMEM;
-	detect_memory_layout(chunk_array, 0);
-	for (i = 0; i < MEMORY_CHUNKS; i++) {
-		if (chunk_array[i].size == 0)
-			break;
-		*mem += chunk_array[i].size;
-		*end = max(*end, chunk_array[i].addr + chunk_array[i].size);
+	for_each_memblock(memory, reg) {
+		*mem += reg->size;
+		*end = max_t(unsigned long, *end, reg->base + reg->size);
 	}
-	kfree(chunk_array);
 	return 0;
 }
 
-- 
cgit v0.10.2


From 5b3f683e694a835f5dfdab06102be1a50604c3b7 Mon Sep 17 00:00:00 2001
From: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Date: Mon, 7 Apr 2014 18:25:23 +0200
Subject: s390/spinlock: cleanup spinlock code

Improve the spinlock code in several aspects:
 - Have _raw_compare_and_swap return true if the operation has been
   successful instead of returning the old value.
 - Remove the "volatile" from arch_spinlock_t and arch_rwlock_t
 - Rename 'owner_cpu' to 'lock'
 - Add helper functions arch_spin_trylock_once / arch_spin_tryrelease_once

[ Martin Schwidefsky: patch breakdown and code beautification ]

Signed-off-by: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 83e5d216..b60212a 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -14,15 +14,16 @@
 extern int spin_retry;
 
 static inline int
-_raw_compare_and_swap(volatile unsigned int *lock,
-		      unsigned int old, unsigned int new)
+_raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new)
 {
+	unsigned int old_expected = old;
+
 	asm volatile(
 		"	cs	%0,%3,%1"
 		: "=d" (old), "=Q" (*lock)
 		: "0" (old), "d" (new), "Q" (*lock)
 		: "cc", "memory" );
-	return old;
+	return old == old_expected;
 }
 
 /*
@@ -34,57 +35,66 @@ _raw_compare_and_swap(volatile unsigned int *lock,
  * (the type definitions are in asm/spinlock_types.h)
  */
 
-#define arch_spin_is_locked(x) ((x)->owner_cpu != 0)
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) \
-		 arch_spin_relax(lock); } while (0)
-
-extern void arch_spin_lock_wait(arch_spinlock_t *);
-extern void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags);
-extern int arch_spin_trylock_retry(arch_spinlock_t *);
-extern void arch_spin_relax(arch_spinlock_t *lock);
+void arch_spin_lock_wait(arch_spinlock_t *);
+int arch_spin_trylock_retry(arch_spinlock_t *);
+void arch_spin_relax(arch_spinlock_t *);
+void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags);
 
 static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 {
-	return lock.owner_cpu == 0;
+	return lock.lock == 0;
 }
 
-static inline void arch_spin_lock(arch_spinlock_t *lp)
+static inline int arch_spin_is_locked(arch_spinlock_t *lp)
+{
+	return ACCESS_ONCE(lp->lock) != 0;
+}
+
+static inline int arch_spin_trylock_once(arch_spinlock_t *lp)
 {
-	int old;
+	unsigned int new = ~smp_processor_id();
 
-	old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id());
-	if (likely(old == 0))
-		return;
-	arch_spin_lock_wait(lp);
+	return _raw_compare_and_swap(&lp->lock, 0, new);
 }
 
-static inline void arch_spin_lock_flags(arch_spinlock_t *lp,
-					 unsigned long flags)
+static inline int arch_spin_tryrelease_once(arch_spinlock_t *lp)
 {
-	int old;
+	unsigned int old = ~smp_processor_id();
 
-	old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id());
-	if (likely(old == 0))
-		return;
-	arch_spin_lock_wait_flags(lp, flags);
+	return _raw_compare_and_swap(&lp->lock, old, 0);
 }
 
-static inline int arch_spin_trylock(arch_spinlock_t *lp)
+static inline void arch_spin_lock(arch_spinlock_t *lp)
 {
-	int old;
+	if (unlikely(!arch_spin_trylock_once(lp)))
+		arch_spin_lock_wait(lp);
+}
 
-	old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id());
-	if (likely(old == 0))
-		return 1;
-	return arch_spin_trylock_retry(lp);
+static inline void arch_spin_lock_flags(arch_spinlock_t *lp,
+					unsigned long flags)
+{
+	if (unlikely(!arch_spin_trylock_once(lp)))
+		arch_spin_lock_wait_flags(lp, flags);
+}
+
+static inline int arch_spin_trylock(arch_spinlock_t *lp)
+{
+	if (unlikely(!arch_spin_trylock_once(lp)))
+		return arch_spin_trylock_retry(lp);
+	return 1;
 }
 
 static inline void arch_spin_unlock(arch_spinlock_t *lp)
 {
-	_raw_compare_and_swap(&lp->owner_cpu, lp->owner_cpu, 0);
+	arch_spin_tryrelease_once(lp);
 }
-		
+
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	while (arch_spin_is_locked(lock))
+		arch_spin_relax(lock);
+}
+
 /*
  * Read-write spinlocks, allowing multiple readers
  * but only one writer.
@@ -119,7 +129,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
 {
 	unsigned int old;
 	old = rw->lock & 0x7fffffffU;
-	if (_raw_compare_and_swap(&rw->lock, old, old + 1) != old)
+	if (!_raw_compare_and_swap(&rw->lock, old, old + 1))
 		_raw_read_lock_wait(rw);
 }
 
@@ -127,30 +137,28 @@ static inline void arch_read_lock_flags(arch_rwlock_t *rw, unsigned long flags)
 {
 	unsigned int old;
 	old = rw->lock & 0x7fffffffU;
-	if (_raw_compare_and_swap(&rw->lock, old, old + 1) != old)
+	if (!_raw_compare_and_swap(&rw->lock, old, old + 1))
 		_raw_read_lock_wait_flags(rw, flags);
 }
 
 static inline void arch_read_unlock(arch_rwlock_t *rw)
 {
-	unsigned int old, cmp;
+	unsigned int old;
 
-	old = rw->lock;
 	do {
-		cmp = old;
-		old = _raw_compare_and_swap(&rw->lock, old, old - 1);
-	} while (cmp != old);
+		old = ACCESS_ONCE(rw->lock);
+	} while (!_raw_compare_and_swap(&rw->lock, old, old - 1));
 }
 
 static inline void arch_write_lock(arch_rwlock_t *rw)
 {
-	if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0))
+	if (unlikely(!_raw_compare_and_swap(&rw->lock, 0, 0x80000000)))
 		_raw_write_lock_wait(rw);
 }
 
 static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags)
 {
-	if (unlikely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) != 0))
+	if (unlikely(!_raw_compare_and_swap(&rw->lock, 0, 0x80000000)))
 		_raw_write_lock_wait_flags(rw, flags);
 }
 
@@ -163,14 +171,14 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 {
 	unsigned int old;
 	old = rw->lock & 0x7fffffffU;
-	if (likely(_raw_compare_and_swap(&rw->lock, old, old + 1) == old))
+	if (likely(_raw_compare_and_swap(&rw->lock, old, old + 1)))
 		return 1;
 	return _raw_read_trylock_retry(rw);
 }
 
 static inline int arch_write_trylock(arch_rwlock_t *rw)
 {
-	if (likely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0))
+	if (likely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000)))
 		return 1;
 	return _raw_write_trylock_retry(rw);
 }
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
index 9c76656..b2cd6ff 100644
--- a/arch/s390/include/asm/spinlock_types.h
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -6,13 +6,13 @@
 #endif
 
 typedef struct {
-	volatile unsigned int owner_cpu;
+	unsigned int lock;
 } __attribute__ ((aligned (4))) arch_spinlock_t;
 
-#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0, }
 
 typedef struct {
-	volatile unsigned int lock;
+	unsigned int lock;
 } arch_rwlock_t;
 
 #define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index f709983..4a3b33b 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -31,22 +31,21 @@ void arch_spin_lock_wait(arch_spinlock_t *lp)
 	unsigned int owner;
 
 	while (1) {
-		owner = lp->owner_cpu;
+		owner = lp->lock;
 		if (!owner || smp_vcpu_scheduled(~owner)) {
 			for (count = spin_retry; count > 0; count--) {
 				if (arch_spin_is_locked(lp))
 					continue;
-				if (_raw_compare_and_swap(&lp->owner_cpu, 0,
-							  cpu) == 0)
+				if (_raw_compare_and_swap(&lp->lock, 0, cpu))
 					return;
 			}
 			if (MACHINE_IS_LPAR)
 				continue;
 		}
-		owner = lp->owner_cpu;
+		owner = lp->lock;
 		if (owner)
 			smp_yield_cpu(~owner);
-		if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0)
+		if (_raw_compare_and_swap(&lp->lock, 0, cpu))
 			return;
 	}
 }
@@ -60,57 +59,55 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
 
 	local_irq_restore(flags);
 	while (1) {
-		owner = lp->owner_cpu;
+		owner = lp->lock;
 		if (!owner || smp_vcpu_scheduled(~owner)) {
 			for (count = spin_retry; count > 0; count--) {
 				if (arch_spin_is_locked(lp))
 					continue;
 				local_irq_disable();
-				if (_raw_compare_and_swap(&lp->owner_cpu, 0,
-							  cpu) == 0)
+				if (_raw_compare_and_swap(&lp->lock, 0, cpu))
 					return;
 				local_irq_restore(flags);
 			}
 			if (MACHINE_IS_LPAR)
 				continue;
 		}
-		owner = lp->owner_cpu;
+		owner = lp->lock;
 		if (owner)
 			smp_yield_cpu(~owner);
 		local_irq_disable();
-		if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0)
+		if (_raw_compare_and_swap(&lp->lock, 0, cpu))
 			return;
 		local_irq_restore(flags);
 	}
 }
 EXPORT_SYMBOL(arch_spin_lock_wait_flags);
 
+void arch_spin_relax(arch_spinlock_t *lp)
+{
+	unsigned int cpu = lp->lock;
+	if (cpu != 0) {
+		if (MACHINE_IS_VM || MACHINE_IS_KVM ||
+		    !smp_vcpu_scheduled(~cpu))
+			smp_yield_cpu(~cpu);
+	}
+}
+EXPORT_SYMBOL(arch_spin_relax);
+
 int arch_spin_trylock_retry(arch_spinlock_t *lp)
 {
-	unsigned int cpu = ~smp_processor_id();
 	int count;
 
 	for (count = spin_retry; count > 0; count--) {
 		if (arch_spin_is_locked(lp))
 			continue;
-		if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0)
+		if (arch_spin_trylock_once(lp))
 			return 1;
 	}
 	return 0;
 }
 EXPORT_SYMBOL(arch_spin_trylock_retry);
 
-void arch_spin_relax(arch_spinlock_t *lock)
-{
-	unsigned int cpu = lock->owner_cpu;
-	if (cpu != 0) {
-		if (MACHINE_IS_VM || MACHINE_IS_KVM ||
-		    !smp_vcpu_scheduled(~cpu))
-			smp_yield_cpu(~cpu);
-	}
-}
-EXPORT_SYMBOL(arch_spin_relax);
-
 void _raw_read_lock_wait(arch_rwlock_t *rw)
 {
 	unsigned int old;
@@ -124,7 +121,7 @@ void _raw_read_lock_wait(arch_rwlock_t *rw)
 		if (!arch_read_can_lock(rw))
 			continue;
 		old = rw->lock & 0x7fffffffU;
-		if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old)
+		if (_raw_compare_and_swap(&rw->lock, old, old + 1))
 			return;
 	}
 }
@@ -145,7 +142,7 @@ void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags)
 			continue;
 		old = rw->lock & 0x7fffffffU;
 		local_irq_disable();
-		if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old)
+		if (_raw_compare_and_swap(&rw->lock, old, old + 1))
 			return;
 	}
 }
@@ -160,7 +157,7 @@ int _raw_read_trylock_retry(arch_rwlock_t *rw)
 		if (!arch_read_can_lock(rw))
 			continue;
 		old = rw->lock & 0x7fffffffU;
-		if (_raw_compare_and_swap(&rw->lock, old, old + 1) == old)
+		if (_raw_compare_and_swap(&rw->lock, old, old + 1))
 			return 1;
 	}
 	return 0;
@@ -178,7 +175,7 @@ void _raw_write_lock_wait(arch_rwlock_t *rw)
 		}
 		if (!arch_write_can_lock(rw))
 			continue;
-		if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0)
+		if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000))
 			return;
 	}
 }
@@ -197,7 +194,7 @@ void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags)
 		if (!arch_write_can_lock(rw))
 			continue;
 		local_irq_disable();
-		if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0)
+		if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000))
 			return;
 	}
 }
@@ -210,7 +207,7 @@ int _raw_write_trylock_retry(arch_rwlock_t *rw)
 	while (count-- > 0) {
 		if (!arch_write_can_lock(rw))
 			continue;
-		if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000) == 0)
+		if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000))
 			return 1;
 	}
 	return 0;
-- 
cgit v0.10.2


From 6c8cd5bbda7e6be166cf2e2dd4be5890193e17ac Mon Sep 17 00:00:00 2001
From: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Date: Mon, 7 Apr 2014 18:25:23 +0200
Subject: s390/spinlock: optimize spinlock code sequence

Use lowcore constant to improve the code generated for spinlocks.

[ Martin Schwidefsky: patch breakdown and code beautification ]

Signed-off-by: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index bbf8141..3b476eb 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -139,7 +139,7 @@ struct _lowcore {
 	__u32	percpu_offset;			/* 0x02f0 */
 	__u32	machine_flags;			/* 0x02f4 */
 	__u32	ftrace_func;			/* 0x02f8 */
-	__u8	pad_0x02fc[0x0300-0x02fc];	/* 0x02fc */
+	__u32	spinlock_lockval;		/* 0x02fc */
 
 	/* Interrupt response block */
 	__u8	irb[64];			/* 0x0300 */
@@ -285,7 +285,8 @@ struct _lowcore {
 	__u64	machine_flags;			/* 0x0388 */
 	__u64	ftrace_func;			/* 0x0390 */
 	__u64	gmap;				/* 0x0398 */
-	__u8	pad_0x03a0[0x0400-0x03a0];	/* 0x03a0 */
+	__u32	spinlock_lockval;		/* 0x03a0 */
+	__u8	pad_0x03a0[0x0400-0x03a4];	/* 0x03a4 */
 
 	/* Interrupt response block. */
 	__u8	irb[64];			/* 0x0400 */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index b60212a..5a0b288 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -11,6 +11,8 @@
 
 #include <linux/smp.h>
 
+#define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval)
+
 extern int spin_retry;
 
 static inline int
@@ -40,6 +42,11 @@ int arch_spin_trylock_retry(arch_spinlock_t *);
 void arch_spin_relax(arch_spinlock_t *);
 void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags);
 
+static inline u32 arch_spin_lockval(int cpu)
+{
+	return ~cpu;
+}
+
 static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 {
 	return lock.lock == 0;
@@ -52,16 +59,12 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lp)
 
 static inline int arch_spin_trylock_once(arch_spinlock_t *lp)
 {
-	unsigned int new = ~smp_processor_id();
-
-	return _raw_compare_and_swap(&lp->lock, 0, new);
+	return _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL);
 }
 
 static inline int arch_spin_tryrelease_once(arch_spinlock_t *lp)
 {
-	unsigned int old = ~smp_processor_id();
-
-	return _raw_compare_and_swap(&lp->lock, old, 0);
+	return _raw_compare_and_swap(&lp->lock, SPINLOCK_LOCKVAL, 0);
 }
 
 static inline void arch_spin_lock(arch_spinlock_t *lp)
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 1f5536c..7c5b05f 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -373,6 +373,10 @@ static void __init setup_lowcore(void)
 	mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
 	mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
 
+#ifdef CONFIG_SMP
+	lc->spinlock_lockval = arch_spin_lockval(0);
+#endif
+
 	set_prefix((u32)(unsigned long) lc);
 	lowcore_ptr[0] = lc;
 }
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 86e65ec..1f0b474 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -170,6 +170,7 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
 	lc->panic_stack = pcpu->panic_stack + PAGE_SIZE
 		- STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
 	lc->cpu_nr = cpu;
+	lc->spinlock_lockval = arch_spin_lockval(cpu);
 #ifndef CONFIG_64BIT
 	if (MACHINE_HAS_IEEE) {
 		lc->extended_save_area_addr = get_zeroed_page(GFP_KERNEL);
@@ -226,6 +227,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
 	cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
 	atomic_inc(&init_mm.context.attach_count);
 	lc->cpu_nr = cpu;
+	lc->spinlock_lockval = arch_spin_lockval(cpu);
 	lc->percpu_offset = __per_cpu_offset[cpu];
 	lc->kernel_asce = S390_lowcore.kernel_asce;
 	lc->machine_flags = S390_lowcore.machine_flags;
@@ -809,6 +811,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 void __init smp_setup_processor_id(void)
 {
 	S390_lowcore.cpu_nr = 0;
+	S390_lowcore.spinlock_lockval = arch_spin_lockval(0);
 }
 
 /*
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 4a3b33b..3ca9de4 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -27,7 +27,7 @@ __setup("spin_retry=", spin_retry_setup);
 void arch_spin_lock_wait(arch_spinlock_t *lp)
 {
 	int count = spin_retry;
-	unsigned int cpu = ~smp_processor_id();
+	unsigned int cpu = SPINLOCK_LOCKVAL;
 	unsigned int owner;
 
 	while (1) {
@@ -54,7 +54,7 @@ EXPORT_SYMBOL(arch_spin_lock_wait);
 void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
 {
 	int count = spin_retry;
-	unsigned int cpu = ~smp_processor_id();
+	unsigned int cpu = SPINLOCK_LOCKVAL;
 	unsigned int owner;
 
 	local_irq_restore(flags);
-- 
cgit v0.10.2


From 71c40f7f8fe1d06ed1e1eafdcf56345eb8c17bf2 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 11 Apr 2014 11:45:40 +0200
Subject: s390/cmma: remove "cmma disable" code in case of dump again

This is not necessary anymore, since the offending code is gone with
the conversion to the memblock code.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>-
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index 27c50f4..a90d45e 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -12,8 +12,6 @@
 #include <linux/mm.h>
 #include <linux/gfp.h>
 #include <linux/init.h>
-#include <asm/setup.h>
-#include <asm/ipl.h>
 
 #define ESSA_SET_STABLE		1
 #define ESSA_SET_UNUSED		2
@@ -43,14 +41,6 @@ void __init cmma_init(void)
 
 	if (!cmma_flag)
 		return;
-	/*
-	 * Disable CMM for dump, otherwise  the tprot based memory
-	 * detection can fail because of unstable pages.
-	 */
-	if (OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP) {
-		cmma_flag = 0;
-		return;
-	}
 	asm volatile(
 		"       .insn rrf,0xb9ab0000,%1,%1,0,0\n"
 		"0:     la      %0,0\n"
-- 
cgit v0.10.2


From bf28a5970de3a349ec05cea45ca5272404b6609f Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Mon, 14 Apr 2014 10:38:05 +0200
Subject: s390/dump: Remove CONFIG_ZFCPDUMP

Currently there are two s390 kernel dump config options "CONFIG_ZFCPDUMP"
and "CONFIG_CRASH_DUMP". In order to keep things simple and because the
"CONFIG_ZFCPDUMP" option already has a dependency to "CONFIG_CRASH_DUMP"
remove the CONFIG_ZFCPDUMP option.

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Reviewed-by: Eric Farman <farman@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/Documentation/s390/zfcpdump.txt b/Documentation/s390/zfcpdump.txt
index cf45d27..1cae248 100644
--- a/Documentation/s390/zfcpdump.txt
+++ b/Documentation/s390/zfcpdump.txt
@@ -21,7 +21,7 @@ standalone dump format. It can be used in the same way as e.g. /dev/mem. The
 dump format defines a 4K header followed by plain uncompressed memory. The
 register sets are stored in the prefix pages of the respective cpus. To build a
 dump enabled kernel with the zcore driver, the kernel config option
-CONFIG_ZFCPDUMP has to be set. When reading from "zcore/mem", the part of
+CONFIG_CRASH_DUMP has to be set. When reading from "zcore/mem", the part of
 memory, which has been saved by hardware is read by the driver via the SCLP
 hardware interface. The second part is just copied from the non overwritten real
 memory.
@@ -32,7 +32,7 @@ SCSI disk.
 
 To build a zfcpdump kernel use the following settings in your kernel
 configuration:
- * CONFIG_ZFCPDUMP=y
+ * CONFIG_CRASH_DUMP=y
  * Enable ZFCP driver
  * Enable SCSI driver
  * Enable ext2 and ext3 filesystems
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index d239e6a..bb63499 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -593,21 +593,14 @@ config CRASH_DUMP
 	bool "kernel crash dumps"
 	depends on 64BIT && SMP
 	select KEXEC
-	select ZFCPDUMP
 	help
 	  Generate crash dump after being started by kexec.
 	  Crash dump kernels are loaded in the main kernel with kexec-tools
 	  into a specially reserved region and then later executed after
 	  a crash by kdump/kexec.
-	  For more details see Documentation/kdump/kdump.txt
-
-config ZFCPDUMP
-	def_bool n
-	prompt "zfcpdump support"
-	depends on 64BIT && SMP
-	help
-	  Select this option if you want to build an zfcpdump enabled kernel.
 	  Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
+	  This option also enables s390 zfcpdump.
+	  See also <file:Documentation/s390/zfcpdump.txt>
 
 endmenu
 
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 7c5b05f..1e2264b 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -211,7 +211,7 @@ static void __init conmode_default(void)
 	}
 }
 
-#ifdef CONFIG_ZFCPDUMP
+#ifdef CONFIG_CRASH_DUMP
 static void __init setup_zfcpdump(void)
 {
 	if (ipl_info.type != IPL_TYPE_FCP_DUMP)
@@ -223,7 +223,7 @@ static void __init setup_zfcpdump(void)
 }
 #else
 static inline void setup_zfcpdump(void) {}
-#endif /* CONFIG_ZFCPDUMP */
+#endif /* CONFIG_CRASH_DUMP */
 
  /*
  * Reboot, halt and power_off stubs. They just call _machine_restart,
@@ -521,7 +521,7 @@ static struct notifier_block kdump_mem_nb = {
  */
 static void reserve_memory_end(void)
 {
-#ifdef CONFIG_ZFCPDUMP
+#ifdef CONFIG_CRASH_DUMP
 	if (ipl_info.type == IPL_TYPE_FCP_DUMP &&
 	    !OLDMEM_BASE && sclp_get_hsa_size()) {
 		memory_end = sclp_get_hsa_size();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 1f0b474..4b8b7f6 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -521,7 +521,7 @@ void smp_ctl_clear_bit(int cr, int bit)
 }
 EXPORT_SYMBOL(smp_ctl_clear_bit);
 
-#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP)
+#ifdef CONFIG_CRASH_DUMP
 
 static void __init smp_get_save_area(int cpu, u16 address)
 {
@@ -536,14 +536,12 @@ static void __init smp_get_save_area(int cpu, u16 address)
 	save_area = dump_save_area_create(cpu);
 	if (!save_area)
 		panic("could not allocate memory for save area\n");
-#ifdef CONFIG_CRASH_DUMP
 	if (address == boot_cpu_address) {
 		/* Copy the registers of the boot cpu. */
 		copy_oldmem_page(1, (void *) save_area, sizeof(*save_area),
 				 SAVE_AREA_BASE - PAGE_SIZE, 0);
 		return;
 	}
-#endif
 	/* Get the registers of a non-boot cpu. */
 	__pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL);
 	memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area));
@@ -560,11 +558,11 @@ int smp_store_status(int cpu)
 	return 0;
 }
 
-#else /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */
+#else /* CONFIG_CRASH_DUMP */
 
 static inline void smp_get_save_area(int cpu, u16 address) { }
 
-#endif /* CONFIG_ZFCPDUMP || CONFIG_CRASH_DUMP */
+#endif /* CONFIG_CRASH_DUMP */
 
 void smp_cpu_set_polarization(int cpu, int val)
 {
diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile
index b69ab17..629fcc2 100644
--- a/drivers/s390/char/Makefile
+++ b/drivers/s390/char/Makefile
@@ -33,4 +33,4 @@ obj-$(CONFIG_MONWRITER) += monwriter.o
 obj-$(CONFIG_S390_VMUR) += vmur.o
 
 zcore_mod-objs := sclp_sdias.o zcore.o
-obj-$(CONFIG_ZFCPDUMP) += zcore_mod.o
+obj-$(CONFIG_CRASH_DUMP) += zcore_mod.o
-- 
cgit v0.10.2


From 7d4c738ce522692cc328869a6633704e535d7124 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Mon, 14 Apr 2014 11:56:07 +0200
Subject: s390/zfcpdump: Update documentation in zfcpdump.txt

Do the following changes:

 - Document new /proc/vmcore interface
 - Document partition dump external initramfs with s390-tools-1.24.0
 - Remove initramfs config file because initramfs is now built automatically
   in s390-tools
 - Replace description of kernel config options with "make zfcpdump_defconfig"
 - Some editorial changes

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/Documentation/s390/zfcpdump.txt b/Documentation/s390/zfcpdump.txt
index 1cae248..dc929be 100644
--- a/Documentation/s390/zfcpdump.txt
+++ b/Documentation/s390/zfcpdump.txt
@@ -1,15 +1,15 @@
-s390 SCSI dump tool (zfcpdump)
+The s390 SCSI dump tool (zfcpdump)
 
 System z machines (z900 or higher) provide hardware support for creating system
 dumps on SCSI disks. The dump process is initiated by booting a dump tool, which
 has to create a dump of the current (probably crashed) Linux image. In order to
 not overwrite memory of the crashed Linux with data of the dump tool, the
-hardware saves some memory plus the register sets of the boot cpu before the
+hardware saves some memory plus the register sets of the boot CPU before the
 dump tool is loaded. There exists an SCLP hardware interface to obtain the saved
 memory afterwards. Currently 32 MB are saved.
 
 This zfcpdump implementation consists of a Linux dump kernel together with
-a userspace dump tool, which are loaded together into the saved memory region
+a user space dump tool, which are loaded together into the saved memory region
 below 32 MB. zfcpdump is installed on a SCSI disk using zipl (as contained in
 the s390-tools package) to make the device bootable. The operator of a Linux
 system can then trigger a SCSI dump by booting the SCSI disk, where zfcpdump
@@ -19,68 +19,33 @@ The kernel part of zfcpdump is implemented as a debugfs file under "zcore/mem",
 which exports memory and registers of the crashed Linux in an s390
 standalone dump format. It can be used in the same way as e.g. /dev/mem. The
 dump format defines a 4K header followed by plain uncompressed memory. The
-register sets are stored in the prefix pages of the respective cpus. To build a
+register sets are stored in the prefix pages of the respective CPUs. To build a
 dump enabled kernel with the zcore driver, the kernel config option
 CONFIG_CRASH_DUMP has to be set. When reading from "zcore/mem", the part of
 memory, which has been saved by hardware is read by the driver via the SCLP
 hardware interface. The second part is just copied from the non overwritten real
 memory.
 
-The userspace application of zfcpdump can reside e.g. in an intitramfs or an
-initrd. It reads from zcore/mem and writes the system dump to a file on a
-SCSI disk.
+Since kernel version 3.12 also the /proc/vmcore file can also be used to access
+the dump.
 
-To build a zfcpdump kernel use the following settings in your kernel
-configuration:
- * CONFIG_CRASH_DUMP=y
- * Enable ZFCP driver
- * Enable SCSI driver
- * Enable ext2 and ext3 filesystems
- * Disable as many features as possible to keep the kernel small.
-   E.g. network support is not needed at all.
+To get a valid zfcpdump kernel configuration use "make zfcpdump_defconfig".
 
-To use the zfcpdump userspace application in an initramfs you have to do the
-following:
+The s390 zipl tool looks for the zfcpdump kernel and optional initrd/initramfs
+under the following locations:
 
- * Copy the zfcpdump executable somewhere into your Linux tree.
-   E.g. to "arch/s390/boot/zfcpdump. If you do not want to include
-   shared libraries, compile the tool with the "-static" gcc option.
- * If you want to include e2fsck, add it to your source tree, too. The zfcpdump
-   application attempts to start /sbin/e2fsck from the ramdisk.
- * Use an initramfs config file like the following:
+* kernel:  <zfcpdump directory>/zfcpdump.image
+* ramdisk: <zfcpdump directory>/zfcpdump.rd
 
-   dir /dev 755 0 0
-   nod /dev/console 644 0 0 c 5 1
-   nod /dev/null 644 0 0 c 1 3
-   nod /dev/sda1 644 0 0 b 8 1
-   nod /dev/sda2 644 0 0 b 8 2
-   nod /dev/sda3 644 0 0 b 8 3
-   nod /dev/sda4 644 0 0 b 8 4
-   nod /dev/sda5 644 0 0 b 8 5
-   nod /dev/sda6 644 0 0 b 8 6
-   nod /dev/sda7 644 0 0 b 8 7
-   nod /dev/sda8 644 0 0 b 8 8
-   nod /dev/sda9 644 0 0 b 8 9
-   nod /dev/sda10 644 0 0 b 8 10
-   nod /dev/sda11 644 0 0 b 8 11
-   nod /dev/sda12 644 0 0 b 8 12
-   nod /dev/sda13 644 0 0 b 8 13
-   nod /dev/sda14 644 0 0 b 8 14
-   nod /dev/sda15 644 0 0 b 8 15
-   file /init arch/s390/boot/zfcpdump 755 0 0
-   file /sbin/e2fsck arch/s390/boot/e2fsck 755 0 0
-   dir /proc 755 0 0
-   dir /sys 755 0 0
-   dir /mnt 755 0 0
-   dir /sbin 755 0 0
+The zfcpdump directory is defined in the s390-tools package.
 
- * Issue "make image" to build the zfcpdump image with initramfs.
+The user space application of zfcpdump can reside in an intitramfs or an
+initrd. It can also be included in a built-in kernel initramfs. The application
+reads from /proc/vmcore or zcore/mem and writes the system dump to a SCSI disk.
 
-In a Linux distribution the zfcpdump enabled kernel image must be copied to
-/usr/share/zfcpdump/zfcpdump.image, where the s390 zipl tool is looking for the
-dump kernel when preparing a SCSI dump disk.
-
-If you use a ramdisk copy it to "/usr/share/zfcpdump/zfcpdump.rd".
+The s390-tools package version 1.24.0 and above builds an external zfcpdump
+initramfs with a user space application that writes the dump to a SCSI
+partition.
 
 For more information on how to use zfcpdump refer to the s390 'Using the Dump
 Tools book', which is available from
-- 
cgit v0.10.2


From 0d234a28960413bfa91aa7030774ae7758fca44a Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 14 Apr 2014 10:41:29 -0700
Subject: s390: fix new ccwgroup.h kernel-doc warning

Fix new s390 kernel-doc warning:

Warning(arch/s390/include/asm/ccwgroup.h:27): No description found for parameter 'ungroup_work'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc:	Sebastian Ott <sebott@linux.vnet.ibm.com>
Cc:	Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Cc:	linux-s390@vger.kernel.org
Cc:	Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc:	Heiko Carstens <heiko.carstens@de.ibm.com>
Cc:	linux390@de.ibm.com
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h
index ebc2913..057ce0c 100644
--- a/arch/s390/include/asm/ccwgroup.h
+++ b/arch/s390/include/asm/ccwgroup.h
@@ -10,6 +10,8 @@ struct ccw_driver;
  * @count: number of attached slave devices
  * @dev: embedded device structure
  * @cdev: variable number of slave devices, allocated as needed
+ * @ungroup_work: work to be done when a ccwgroup notifier has action
+ *	type %BUS_NOTIFY_UNBIND_DRIVER
  */
 struct ccwgroup_device {
 	enum {
-- 
cgit v0.10.2


From f4192bf2dc5ae3b24ffb004e771397e737ef01e0 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Tue, 15 Apr 2014 11:25:28 +0200
Subject: s390/smp: Avoid busy loop after halt and "begin" on z/VM

Currently the smp_stop_cpu() function for SMP kernels enters a busy
loop when "begin" is entered on the z/VM console after Linux is halted.
To avoid this behavior, use the non-SMP variant of smp_stop_cpu()
which stops the CPU again after "begin" is entered. As a side
effect we now have consistent behavior for SMP and non-SMP Linux.

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 21703f8..4f13079 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -30,7 +30,6 @@ extern int smp_store_status(int cpu);
 extern int smp_vcpu_scheduled(int cpu);
 extern void smp_yield_cpu(int cpu);
 extern void smp_yield(void);
-extern void smp_stop_cpu(void);
 extern void smp_cpu_set_polarization(int cpu, int val);
 extern int smp_cpu_get_polarization(int cpu);
 extern void smp_fill_possible_mask(void);
@@ -54,6 +53,8 @@ static inline void smp_yield_cpu(int cpu) { }
 static inline void smp_yield(void) { }
 static inline void smp_fill_possible_mask(void) { }
 
+#endif /* CONFIG_SMP */
+
 static inline void smp_stop_cpu(void)
 {
 	u16 pcpu = stap();
@@ -64,8 +65,6 @@ static inline void smp_stop_cpu(void)
 	}
 }
 
-#endif /* CONFIG_SMP */
-
 #ifdef CONFIG_HOTPLUG_CPU
 extern int smp_rescan_cpus(void);
 extern void __noreturn cpu_die(void);
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 4b8b7f6..243c7e5 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -405,15 +405,6 @@ void smp_send_stop(void)
 }
 
 /*
- * Stop the current cpu.
- */
-void smp_stop_cpu(void)
-{
-	pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
-	for (;;) ;
-}
-
-/*
  * This is the main routine where commands issued by other
  * cpus are handled.
  */
-- 
cgit v0.10.2


From beef560b4cdfafb2211a856e1d722540f5151933 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 14 Apr 2014 15:11:26 +0200
Subject: s390/uaccess: simplify control register updates

Always switch to the kernel ASCE in switch_mm. Load the secondary
space ASCE in finish_arch_post_lock_switch after checking that
any pending page table operations have completed. The primary
ASCE is loaded in entry[64].S. With this the update_primary_asce
call can be removed from the switch_to macro and from the start
of switch_mm function. Remove the load_primary argument from
update_user_asce/clear_user_asce, rename update_user_asce to
set_user_asce and rename update_primary_asce to load_kernel_asce.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index 69cf5b5..a4811aa 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -29,7 +29,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
 	int cmparg = (encoded_op << 20) >> 20;
 	int oldval = 0, newval, ret;
 
-	update_primary_asce(current);
+	load_kernel_asce();
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
@@ -79,7 +79,7 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 {
 	int ret;
 
-	update_primary_asce(current);
+	load_kernel_asce();
 	asm volatile(
 		"   sacf 256\n"
 		"0: cs   %1,%4,0(%5)\n"
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 71be346..93ec0c8 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -30,33 +30,31 @@ static inline int init_new_context(struct task_struct *tsk,
 
 #define destroy_context(mm)             do { } while (0)
 
-static inline void update_user_asce(struct mm_struct *mm, int load_primary)
+static inline void set_user_asce(struct mm_struct *mm)
 {
 	pgd_t *pgd = mm->pgd;
 
 	S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd);
-	if (load_primary)
-		__ctl_load(S390_lowcore.user_asce, 1, 1);
 	set_fs(current->thread.mm_segment);
+	set_thread_flag(TIF_ASCE);
 }
 
-static inline void clear_user_asce(struct mm_struct *mm, int load_primary)
+static inline void clear_user_asce(void)
 {
 	S390_lowcore.user_asce = S390_lowcore.kernel_asce;
 
-	if (load_primary)
-		__ctl_load(S390_lowcore.user_asce, 1, 1);
+	__ctl_load(S390_lowcore.user_asce, 1, 1);
 	__ctl_load(S390_lowcore.user_asce, 7, 7);
 }
 
-static inline void update_primary_asce(struct task_struct *tsk)
+static inline void load_kernel_asce(void)
 {
 	unsigned long asce;
 
 	__ctl_store(asce, 1, 1);
 	if (asce != S390_lowcore.kernel_asce)
 		__ctl_load(S390_lowcore.kernel_asce, 1, 1);
-	set_tsk_thread_flag(tsk, TIF_ASCE);
+	set_thread_flag(TIF_ASCE);
 }
 
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
@@ -64,25 +62,17 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 {
 	int cpu = smp_processor_id();
 
-	update_primary_asce(tsk);
 	if (prev == next)
 		return;
 	if (MACHINE_HAS_TLB_LC)
 		cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
-	if (atomic_inc_return(&next->context.attach_count) >> 16) {
-		/* Delay update_user_asce until all TLB flushes are done. */
-		set_tsk_thread_flag(tsk, TIF_TLB_WAIT);
-		/* Clear old ASCE by loading the kernel ASCE. */
-		clear_user_asce(next, 0);
-	} else {
-		cpumask_set_cpu(cpu, mm_cpumask(next));
-		update_user_asce(next, 0);
-		if (next->context.flush_mm)
-			/* Flush pending TLBs */
-			__tlb_flush_mm(next);
-	}
+	/* Clear old ASCE by loading the kernel ASCE. */
+	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
+	__ctl_load(S390_lowcore.kernel_asce, 7, 7);
+	/* Delay loading of the new ASCE to control registers CR1 & CR7 */
+	set_thread_flag(TIF_ASCE);
+	atomic_inc(&next->context.attach_count);
 	atomic_dec(&prev->context.attach_count);
-	WARN_ON(atomic_read(&prev->context.attach_count) < 0);
 	if (MACHINE_HAS_TLB_LC)
 		cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
 }
@@ -93,15 +83,14 @@ static inline void finish_arch_post_lock_switch(void)
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
 
-	if (!test_tsk_thread_flag(tsk, TIF_TLB_WAIT))
+	if (!mm)
 		return;
 	preempt_disable();
-	clear_tsk_thread_flag(tsk, TIF_TLB_WAIT);
 	while (atomic_read(&mm->context.attach_count) >> 16)
 		cpu_relax();
 
 	cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
-	update_user_asce(mm, 0);
+	set_user_asce(mm);
 	if (mm->context.flush_mm)
 		__tlb_flush_mm(mm);
 	preempt_enable();
@@ -113,7 +102,9 @@ static inline void finish_arch_post_lock_switch(void)
 static inline void activate_mm(struct mm_struct *prev,
                                struct mm_struct *next)
 {
-        switch_mm(prev, next, current);
+	switch_mm(prev, next, current);
+	cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+	set_user_asce(next);
 }
 
 static inline void arch_dup_mmap(struct mm_struct *oldmm,
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index e759181..29c81f8 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -132,7 +132,6 @@ static inline void restore_access_regs(unsigned int *acrs)
 		update_cr_regs(next);					\
 	}								\
 	prev = __switch_to(prev,next);					\
-	update_primary_asce(current);					\
 } while (0)
 
 #define finish_arch_switch(prev) do {					     \
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 50630e6..f3e5cad 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -81,8 +81,7 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
-#define TIF_TLB_WAIT		4	/* wait for TLB flush completion */
-#define TIF_ASCE		5	/* primary asce needs fixup / uaccess */
+#define TIF_ASCE		5	/* user asce needs fixup / uaccess */
 #define TIF_PER_TRAP		6	/* deliver sigtrap on return to user */
 #define TIF_MCCK_PENDING	7	/* machine check handling is pending */
 #define TIF_SYSCALL_TRACE	8	/* syscall trace active */
@@ -99,7 +98,6 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
-#define _TIF_TLB_WAIT		(1<<TIF_TLB_WAIT)
 #define _TIF_ASCE		(1<<TIF_ASCE)
 #define _TIF_PER_TRAP		(1<<TIF_PER_TRAP)
 #define _TIF_MCCK_PENDING	(1<<TIF_MCCK_PENDING)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 1662038..7006bfd 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -43,7 +43,7 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING | _TIF_ASCE)
 _TIF_TRACE    = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
 		 _TIF_SYSCALL_TRACEPOINT)
-_TIF_TRANSFER = (_TIF_MCCK_PENDING | _TIF_TLB_WAIT)
+_TIF_TRANSFER = (_TIF_MCCK_PENDING | _TIF_ASCE)
 
 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
 STACK_SIZE  = 1 << STACK_SHIFT
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 5963e43..d15e7bf 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -48,7 +48,7 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING | _TIF_ASCE)
 _TIF_TRACE    = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
 		 _TIF_SYSCALL_TRACEPOINT)
-_TIF_TRANSFER = (_TIF_MCCK_PENDING | _TIF_TLB_WAIT)
+_TIF_TRANSFER = (_TIF_MCCK_PENDING | _TIF_ASCE)
 
 #define BASED(name) name-system_call(%r13)
 
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 7416efe..53dd5d7 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -76,7 +76,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
 {
 	unsigned long tmp1, tmp2;
 
-	update_primary_asce(current);
+	load_kernel_asce();
 	tmp1 = -256UL;
 	asm volatile(
 		"   sacf  0\n"
@@ -159,7 +159,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
 {
 	unsigned long tmp1, tmp2;
 
-	update_primary_asce(current);
+	load_kernel_asce();
 	tmp1 = -256UL;
 	asm volatile(
 		"   sacf  0\n"
@@ -225,7 +225,7 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user
 {
 	unsigned long tmp1;
 
-	update_primary_asce(current);
+	load_kernel_asce();
 	asm volatile(
 		"   sacf  256\n"
 		"  "AHI"  %0,-1\n"
@@ -292,7 +292,7 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
 {
 	unsigned long tmp1, tmp2;
 
-	update_primary_asce(current);
+	load_kernel_asce();
 	asm volatile(
 		"   sacf  256\n"
 		"  "AHI"  %0,-1\n"
@@ -358,7 +358,7 @@ unsigned long __strnlen_user(const char __user *src, unsigned long size)
 {
 	if (unlikely(!size))
 		return 0;
-	update_primary_asce(current);
+	load_kernel_asce();
 	return strnlen_user_srst(src, size);
 }
 EXPORT_SYMBOL(__strnlen_user);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index d7cfd57..7881d4e 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -53,8 +53,10 @@ static void __crst_table_upgrade(void *arg)
 {
 	struct mm_struct *mm = arg;
 
-	if (current->active_mm == mm)
-		update_user_asce(mm, 1);
+	if (current->active_mm == mm) {
+		clear_user_asce();
+		set_user_asce(mm);
+	}
 	__tlb_flush_local();
 }
 
@@ -108,7 +110,7 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
 	pgd_t *pgd;
 
 	if (current->active_mm == mm) {
-		clear_user_asce(mm, 1);
+		clear_user_asce();
 		__tlb_flush_mm(mm);
 	}
 	while (mm->context.asce_limit > limit) {
@@ -134,7 +136,7 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
 		crst_table_free(mm, (unsigned long *) pgd);
 	}
 	if (current->active_mm == mm)
-		update_user_asce(mm, 1);
+		set_user_asce(mm);
 }
 #endif
 
-- 
cgit v0.10.2


From d3a73acbc26a4a81a01a35fd162973e53d0386f5 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 15 Apr 2014 12:55:07 +0200
Subject: s390: split TIF bits into CIF, PIF and TIF bits

The oi and ni instructions used in entry[64].S to set and clear bits
in the thread-flags are not guaranteed to be atomic in regard to other
CPUs. Split the TIF bits into CPU, pt_regs and thread-info specific
bits. Updates on the TIF bits are done with atomic instructions,
updates on CPU and pt_regs bits are done with non-atomic instructions.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 3b476eb..26e03ce 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -93,7 +93,9 @@ struct _lowcore {
 	__u32	save_area_sync[8];		/* 0x0200 */
 	__u32	save_area_async[8];		/* 0x0220 */
 	__u32	save_area_restart[1];		/* 0x0240 */
-	__u8	pad_0x0244[0x0248-0x0244];	/* 0x0244 */
+
+	/* CPU flags. */
+	__u32	cpu_flags;			/* 0x0244 */
 
 	/* Return psws. */
 	psw_t	return_psw;			/* 0x0248 */
@@ -237,7 +239,9 @@ struct _lowcore {
 	__u64	save_area_sync[8];		/* 0x0200 */
 	__u64	save_area_async[8];		/* 0x0240 */
 	__u64	save_area_restart[1];		/* 0x0280 */
-	__u8	pad_0x0288[0x0290-0x0288];	/* 0x0288 */
+
+	/* CPU flags. */
+	__u64	cpu_flags;			/* 0x0288 */
 
 	/* Return psws. */
 	psw_t	return_psw;			/* 0x0290 */
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 93ec0c8..056d7ef 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -36,7 +36,7 @@ static inline void set_user_asce(struct mm_struct *mm)
 
 	S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd);
 	set_fs(current->thread.mm_segment);
-	set_thread_flag(TIF_ASCE);
+	set_cpu_flag(CIF_ASCE);
 }
 
 static inline void clear_user_asce(void)
@@ -54,7 +54,7 @@ static inline void load_kernel_asce(void)
 	__ctl_store(asce, 1, 1);
 	if (asce != S390_lowcore.kernel_asce)
 		__ctl_load(S390_lowcore.kernel_asce, 1, 1);
-	set_thread_flag(TIF_ASCE);
+	set_cpu_flag(CIF_ASCE);
 }
 
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
@@ -70,7 +70,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
 	__ctl_load(S390_lowcore.kernel_asce, 7, 7);
 	/* Delay loading of the new ASCE to control registers CR1 & CR7 */
-	set_thread_flag(TIF_ASCE);
+	set_cpu_flag(CIF_ASCE);
 	atomic_inc(&next->context.attach_count);
 	atomic_dec(&prev->context.attach_count);
 	if (MACHINE_HAS_TLB_LC)
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index dc5fc4f..6f02d45 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -11,6 +11,13 @@
 #ifndef __ASM_S390_PROCESSOR_H
 #define __ASM_S390_PROCESSOR_H
 
+#define CIF_MCCK_PENDING	0	/* machine check handling is pending */
+#define CIF_ASCE		1	/* user asce needs fixup / uaccess */
+
+#define _CIF_MCCK_PENDING	(1<<CIF_MCCK_PENDING)
+#define _CIF_ASCE		(1<<CIF_ASCE)
+
+
 #ifndef __ASSEMBLY__
 
 #include <linux/linkage.h>
@@ -21,6 +28,21 @@
 #include <asm/setup.h>
 #include <asm/runtime_instr.h>
 
+static inline void set_cpu_flag(int flag)
+{
+	S390_lowcore.cpu_flags |= (1U << flag);
+}
+
+static inline void clear_cpu_flag(int flag)
+{
+	S390_lowcore.cpu_flags &= ~(1U << flag);
+}
+
+static inline int test_cpu_flag(int flag)
+{
+	return !!(S390_lowcore.cpu_flags & (1U << flag));
+}
+
 /*
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index f4783c0..1b5300c 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -8,6 +8,12 @@
 
 #include <uapi/asm/ptrace.h>
 
+#define PIF_SYSCALL		0	/* inside a system call */
+#define PIF_PER_TRAP		1	/* deliver sigtrap on return to user */
+
+#define _PIF_SYSCALL		(1<<PIF_SYSCALL)
+#define _PIF_PER_TRAP		(1<<PIF_PER_TRAP)
+
 #ifndef __ASSEMBLY__
 
 #define PSW_KERNEL_BITS	(PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_HOME | \
@@ -29,6 +35,7 @@ struct pt_regs
 	unsigned int int_code;
 	unsigned int int_parm;
 	unsigned long int_parm_long;
+	unsigned long flags;
 };
 
 /*
@@ -79,6 +86,21 @@ struct per_struct_kernel {
 #define PER_CONTROL_SUSPENSION		0x00400000UL
 #define PER_CONTROL_ALTERATION		0x00200000UL
 
+static inline void set_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+	regs->flags |= (1U << flag);
+}
+
+static inline void clear_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+	regs->flags &= ~(1U << flag);
+}
+
+static inline int test_pt_regs_flag(struct pt_regs *regs, int flag)
+{
+	return !!(regs->flags & (1U << flag));
+}
+
 /*
  * These are defined as per linux/ptrace.h, which see.
  */
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index 7776870..abad78d 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -28,7 +28,7 @@ extern const unsigned int sys_call_table_emu[];
 static inline long syscall_get_nr(struct task_struct *task,
 				  struct pt_regs *regs)
 {
-	return test_tsk_thread_flag(task, TIF_SYSCALL) ?
+	return test_pt_regs_flag(regs, PIF_SYSCALL) ?
 		(regs->int_code & 0xffff) : -1;
 }
 
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index f3e5cad..b833e9c 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -77,30 +77,22 @@ static inline struct thread_info *current_thread_info(void)
 /*
  * thread information flags bit numbers
  */
-#define TIF_SYSCALL		0	/* inside a system call */
-#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
-#define TIF_SIGPENDING		2	/* signal pending */
-#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
-#define TIF_ASCE		5	/* user asce needs fixup / uaccess */
-#define TIF_PER_TRAP		6	/* deliver sigtrap on return to user */
-#define TIF_MCCK_PENDING	7	/* machine check handling is pending */
-#define TIF_SYSCALL_TRACE	8	/* syscall trace active */
-#define TIF_SYSCALL_AUDIT	9	/* syscall auditing active */
-#define TIF_SECCOMP		10	/* secure computing */
-#define TIF_SYSCALL_TRACEPOINT	11	/* syscall tracepoint instrumentation */
-#define TIF_31BIT		17	/* 32bit process */
-#define TIF_MEMDIE		18	/* is terminating due to OOM killer */
-#define TIF_RESTORE_SIGMASK	19	/* restore signal mask in do_signal() */
-#define TIF_SINGLE_STEP		20	/* This task is single stepped */
-#define TIF_BLOCK_STEP		21	/* This task is block stepped */
+#define TIF_NOTIFY_RESUME	0	/* callback before returning to user */
+#define TIF_SIGPENDING		1	/* signal pending */
+#define TIF_NEED_RESCHED	2	/* rescheduling necessary */
+#define TIF_SYSCALL_TRACE	3	/* syscall trace active */
+#define TIF_SYSCALL_AUDIT	4	/* syscall auditing active */
+#define TIF_SECCOMP		5	/* secure computing */
+#define TIF_SYSCALL_TRACEPOINT	6	/* syscall tracepoint instrumentation */
+#define TIF_31BIT		16	/* 32bit process */
+#define TIF_MEMDIE		17	/* is terminating due to OOM killer */
+#define TIF_RESTORE_SIGMASK	18	/* restore signal mask in do_signal() */
+#define TIF_SINGLE_STEP		19	/* This task is single stepped */
+#define TIF_BLOCK_STEP		20	/* This task is block stepped */
 
-#define _TIF_SYSCALL		(1<<TIF_SYSCALL)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
-#define _TIF_ASCE		(1<<TIF_ASCE)
-#define _TIF_PER_TRAP		(1<<TIF_PER_TRAP)
-#define _TIF_MCCK_PENDING	(1<<TIF_MCCK_PENDING)
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index cc10cdd..76c4e2f 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -50,6 +50,7 @@ int main(void)
 	DEFINE(__PT_INT_CODE, offsetof(struct pt_regs, int_code));
 	DEFINE(__PT_INT_PARM, offsetof(struct pt_regs, int_parm));
 	DEFINE(__PT_INT_PARM_LONG, offsetof(struct pt_regs, int_parm_long));
+	DEFINE(__PT_FLAGS, offsetof(struct pt_regs, flags));
 	DEFINE(__PT_SIZE, sizeof(struct pt_regs));
 	BLANK();
 	DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain));
@@ -115,6 +116,7 @@ int main(void)
 	DEFINE(__LC_SAVE_AREA_SYNC, offsetof(struct _lowcore, save_area_sync));
 	DEFINE(__LC_SAVE_AREA_ASYNC, offsetof(struct _lowcore, save_area_async));
 	DEFINE(__LC_SAVE_AREA_RESTART, offsetof(struct _lowcore, save_area_restart));
+	DEFINE(__LC_CPU_FLAGS, offsetof(struct _lowcore, cpu_flags));
 	DEFINE(__LC_RETURN_PSW, offsetof(struct _lowcore, return_psw));
 	DEFINE(__LC_RETURN_MCCK_PSW, offsetof(struct _lowcore, return_mcck_psw));
 	DEFINE(__LC_SYNC_ENTER_TIMER, offsetof(struct _lowcore, sync_enter_timer));
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 7df5ed9..f204d69 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -213,7 +213,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
 	       sizeof(current->thread.fp_regs));
 
 	restore_fp_regs(current->thread.fp_regs.fprs);
-	clear_thread_flag(TIF_SYSCALL);	/* No longer in a system call */
+	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
 	return 0;
 }
 
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 7006bfd..18e5af8 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -10,6 +10,7 @@
 
 #include <linux/init.h>
 #include <linux/linkage.h>
+#include <asm/processor.h>
 #include <asm/cache.h>
 #include <asm/errno.h>
 #include <asm/ptrace.h>
@@ -37,18 +38,16 @@ __PT_R13     =	__PT_GPRS + 524
 __PT_R14     =	__PT_GPRS + 56
 __PT_R15     =	__PT_GPRS + 60
 
-_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
-		 _TIF_MCCK_PENDING | _TIF_PER_TRAP | _TIF_ASCE)
-_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
-		 _TIF_MCCK_PENDING | _TIF_ASCE)
-_TIF_TRACE    = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
-		 _TIF_SYSCALL_TRACEPOINT)
-_TIF_TRANSFER = (_TIF_MCCK_PENDING | _TIF_ASCE)
-
 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
 STACK_SIZE  = 1 << STACK_SHIFT
 STACK_INIT  = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
 
+_TIF_WORK	= (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED)
+_TIF_TRACE	= (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
+		   _TIF_SYSCALL_TRACEPOINT)
+_CIF_WORK	= (_CIF_MCCK_PENDING | _CIF_ASCE)
+_PIF_WORK	= (_PIF_PER_TRAP)
+
 #define BASED(name) name-system_call(%r13)
 
 	.macro	TRACE_IRQS_ON
@@ -160,13 +159,7 @@ ENTRY(__switch_to)
 	lctl	%c4,%c4,__TASK_pid(%r3)		# load pid to control reg. 4
 	mvc	__LC_CURRENT_PID(4,%r0),__TASK_pid(%r3)	# store pid of next
 	l	%r15,__THREAD_ksp(%r3)		# load kernel stack of next
-	lhi	%r6,_TIF_TRANSFER		# transfer TIF bits
-	n	%r6,__TI_flags(%r4)		# isolate TIF bits
-	jz	0f
-	o	%r6,__TI_flags(%r5)		# set TIF bits of next
-	st	%r6,__TI_flags(%r5)
-	ni	__TI_flags+3(%r4),255-_TIF_TRANSFER # clear TIF bits of prev
-0:	lm	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
+	lm	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
 	br	%r14
 
 __critical_start:
@@ -181,6 +174,7 @@ sysc_stm:
 	stm	%r8,%r15,__LC_SAVE_AREA_SYNC
 	l	%r12,__LC_THREAD_INFO
 	l	%r13,__LC_SVC_NEW_PSW+4
+	lhi	%r14,_PIF_SYSCALL
 sysc_per:
 	l	%r15,__LC_KERNEL_STACK
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)	# pointer to pt_regs
@@ -190,8 +184,8 @@ sysc_vtime:
 	mvc	__PT_R8(32,%r11),__LC_SAVE_AREA_SYNC
 	mvc	__PT_PSW(8,%r11),__LC_SVC_OLD_PSW
 	mvc	__PT_INT_CODE(4,%r11),__LC_SVC_ILC
+	st	%r14,__PT_FLAGS(%r11)
 sysc_do_svc:
-	oi	__TI_flags+3(%r12),_TIF_SYSCALL
 	l	%r10,__TI_sysc_table(%r12)	# 31 bit system call table
 	lh	%r8,__PT_INT_CODE+2(%r11)
 	sla	%r8,2				# shift and test for svc0
@@ -207,7 +201,7 @@ sysc_nr_ok:
 	st	%r2,__PT_ORIG_GPR2(%r11)
 	st	%r7,STACK_FRAME_OVERHEAD(%r15)
 	l	%r9,0(%r8,%r10)			# get system call addr.
-	tm	__TI_flags+2(%r12),_TIF_TRACE >> 8
+	tm	__TI_flags+3(%r12),_TIF_TRACE
 	jnz	sysc_tracesys
 	basr	%r14,%r9			# call sys_xxxx
 	st	%r2,__PT_R2(%r11)		# store return value
@@ -217,9 +211,12 @@ sysc_return:
 sysc_tif:
 	tm	__PT_PSW+1(%r11),0x01		# returning to user ?
 	jno	sysc_restore
-	tm	__TI_flags+3(%r12),_TIF_WORK_SVC
-	jnz	sysc_work			# check for work
-	ni	__TI_flags+3(%r12),255-_TIF_SYSCALL
+	tm	__PT_FLAGS+3(%r11),_PIF_WORK
+	jnz	sysc_work
+	tm	__TI_flags+3(%r12),_TIF_WORK
+	jnz	sysc_work			# check for thread work
+	tm	__LC_CPU_FLAGS+3,_CIF_WORK
+	jnz	sysc_work
 sysc_restore:
 	mvc	__LC_RETURN_PSW(8),__PT_PSW(%r11)
 	stpt	__LC_EXIT_TIMER
@@ -231,17 +228,17 @@ sysc_done:
 # One of the work bits is on. Find out which one.
 #
 sysc_work:
-	tm	__TI_flags+3(%r12),_TIF_MCCK_PENDING
+	tm	__LC_CPU_FLAGS+3,_CIF_MCCK_PENDING
 	jo	sysc_mcck_pending
 	tm	__TI_flags+3(%r12),_TIF_NEED_RESCHED
 	jo	sysc_reschedule
-	tm	__TI_flags+3(%r12),_TIF_PER_TRAP
+	tm	__PT_FLAGS+3(%r11),_PIF_PER_TRAP
 	jo	sysc_singlestep
 	tm	__TI_flags+3(%r12),_TIF_SIGPENDING
 	jo	sysc_sigpending
 	tm	__TI_flags+3(%r12),_TIF_NOTIFY_RESUME
 	jo	sysc_notify_resume
-	tm	__TI_flags+3(%r12),_TIF_ASCE
+	tm	__LC_CPU_FLAGS+3,_CIF_ASCE
 	jo	sysc_uaccess
 	j	sysc_return		# beware of critical section cleanup
 
@@ -254,7 +251,7 @@ sysc_reschedule:
 	br	%r1			# call schedule
 
 #
-# _TIF_MCCK_PENDING is set, call handler
+# _CIF_MCCK_PENDING is set, call handler
 #
 sysc_mcck_pending:
 	l	%r1,BASED(.Lhandle_mcck)
@@ -262,10 +259,10 @@ sysc_mcck_pending:
 	br	%r1			# TIF bit will be cleared by handler
 
 #
-# _TIF_ASCE is set, load user space asce
+# _CIF_ASCE is set, load user space asce
 #
 sysc_uaccess:
-	ni	__TI_flags+3(%r12),255-_TIF_ASCE
+	ni	__LC_CPU_FLAGS+3,255-_CIF_ASCE
 	lctl	%c1,%c1,__LC_USER_ASCE	# load primary asce
 	j	sysc_return
 
@@ -276,7 +273,7 @@ sysc_sigpending:
 	lr	%r2,%r11		# pass pointer to pt_regs
 	l	%r1,BASED(.Ldo_signal)
 	basr	%r14,%r1		# call do_signal
-	tm	__TI_flags+3(%r12),_TIF_SYSCALL
+	tm	__PT_FLAGS+3(%r11),_PIF_SYSCALL
 	jno	sysc_return
 	lm	%r2,%r7,__PT_R2(%r11)	# load svc arguments
 	l	%r10,__TI_sysc_table(%r12)	# 31 bit system call table
@@ -297,10 +294,10 @@ sysc_notify_resume:
 	br	%r1			# call do_notify_resume
 
 #
-# _TIF_PER_TRAP is set, call do_per_trap
+# _PIF_PER_TRAP is set, call do_per_trap
 #
 sysc_singlestep:
-	ni	__TI_flags+3(%r12),255-_TIF_PER_TRAP
+	ni	__PT_FLAGS+3(%r11),255-_PIF_PER_TRAP
 	lr	%r2,%r11		# pass pointer to pt_regs
 	l	%r1,BASED(.Ldo_per_trap)
 	la	%r14,BASED(sysc_return)
@@ -330,7 +327,7 @@ sysc_tracego:
 	basr	%r14,%r9		# call sys_xxx
 	st	%r2,__PT_R2(%r11)	# store return value
 sysc_tracenogo:
-	tm	__TI_flags+2(%r12),_TIF_TRACE >> 8
+	tm	__TI_flags+3(%r12),_TIF_TRACE
 	jz	sysc_return
 	l	%r1,BASED(.Ltrace_exit)
 	lr	%r2,%r11		# pass pointer to pt_regs
@@ -384,12 +381,13 @@ ENTRY(pgm_check_handler)
 	stm	%r8,%r9,__PT_PSW(%r11)
 	mvc	__PT_INT_CODE(4,%r11),__LC_PGM_ILC
 	mvc	__PT_INT_PARM_LONG(4,%r11),__LC_TRANS_EXC_CODE
+	xc	__PT_FLAGS(4,%r11),__PT_FLAGS(%r11)
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
 	jz	0f
 	l	%r1,__TI_task(%r12)
 	tmh	%r8,0x0001		# kernel per event ?
 	jz	pgm_kprobe
-	oi	__TI_flags+3(%r12),_TIF_PER_TRAP
+	oi	__PT_FLAGS+3(%r11),_PIF_PER_TRAP
 	mvc	__THREAD_per_address(4,%r1),__LC_PER_ADDRESS
 	mvc	__THREAD_per_cause(2,%r1),__LC_PER_CAUSE
 	mvc	__THREAD_per_paid(1,%r1),__LC_PER_PAID
@@ -420,9 +418,9 @@ pgm_kprobe:
 # single stepped system call
 #
 pgm_svcper:
-	oi	__TI_flags+3(%r12),_TIF_PER_TRAP
 	mvc	__LC_RETURN_PSW(4),__LC_SVC_NEW_PSW
 	mvc	__LC_RETURN_PSW+4(4),BASED(.Lsysc_per)
+	lhi	%r14,_PIF_SYSCALL | _PIF_PER_TRAP
 	lpsw	__LC_RETURN_PSW		# branch to sysc_per and enable irqs
 
 /*
@@ -445,6 +443,7 @@ io_skip:
 	mvc	__PT_R8(32,%r11),__LC_SAVE_AREA_ASYNC
 	stm	%r8,%r9,__PT_PSW(%r11)
 	mvc	__PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+	xc	__PT_FLAGS(4,%r11),__PT_FLAGS(%r11)
 	TRACE_IRQS_OFF
 	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
 io_loop:
@@ -466,8 +465,10 @@ io_return:
 	LOCKDEP_SYS_EXIT
 	TRACE_IRQS_ON
 io_tif:
-	tm	__TI_flags+3(%r12),_TIF_WORK_INT
+	tm	__TI_flags+3(%r12),_TIF_WORK
 	jnz	io_work			# there is work to do (signals etc.)
+	tm	__LC_CPU_FLAGS+3,_CIF_WORK
+	jnz	io_work
 io_restore:
 	mvc	__LC_RETURN_PSW(8),__PT_PSW(%r11)
 	stpt	__LC_EXIT_TIMER
@@ -477,7 +478,7 @@ io_done:
 
 #
 # There is work todo, find out in which context we have been interrupted:
-# 1) if we return to user space we can do all _TIF_WORK_INT work
+# 1) if we return to user space we can do all _TIF_WORK work
 # 2) if we return to kernel code and preemptive scheduling is enabled check
 #    the preemption counter and if it is zero call preempt_schedule_irq
 # Before any work can be done, a switch to the kernel stack is required.
@@ -520,11 +521,9 @@ io_work_user:
 
 #
 # One of the work bits is on. Find out which one.
-# Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED
-#		and _TIF_MCCK_PENDING
 #
 io_work_tif:
-	tm	__TI_flags+3(%r12),_TIF_MCCK_PENDING
+	tm	__LC_CPU_FLAGS+3(%r12),_CIF_MCCK_PENDING
 	jo	io_mcck_pending
 	tm	__TI_flags+3(%r12),_TIF_NEED_RESCHED
 	jo	io_reschedule
@@ -532,12 +531,12 @@ io_work_tif:
 	jo	io_sigpending
 	tm	__TI_flags+3(%r12),_TIF_NOTIFY_RESUME
 	jo	io_notify_resume
-	tm	__TI_flags+3(%r12),_TIF_ASCE
+	tm	__LC_CPU_FLAGS+3,_CIF_ASCE
 	jo	io_uaccess
 	j	io_return		# beware of critical section cleanup
 
 #
-# _TIF_MCCK_PENDING is set, call handler
+# _CIF_MCCK_PENDING is set, call handler
 #
 io_mcck_pending:
 	# TRACE_IRQS_ON already done at io_return
@@ -547,10 +546,10 @@ io_mcck_pending:
 	j	io_return
 
 #
-# _TIF_ASCE is set, load user space asce
+# _CIF_ASCE is set, load user space asce
 #
 io_uaccess:
-	ni	__TI_flags+3(%r12),255-_TIF_ASCE
+	ni	__LC_CPU_FLAGS+3,255-_CIF_ASCE
 	lctl	%c1,%c1,__LC_USER_ASCE	# load primary asce
 	j	io_return
 
@@ -613,6 +612,7 @@ ext_skip:
 	stm	%r8,%r9,__PT_PSW(%r11)
 	mvc	__PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
 	mvc	__PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
+	xc	__PT_FLAGS(4,%r11),__PT_FLAGS(%r11)
 	TRACE_IRQS_OFF
 	l	%r1,BASED(.Ldo_IRQ)
 	lr	%r2,%r11		# pass pointer to pt_regs
@@ -677,6 +677,7 @@ mcck_skip:
 	stm	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(32,%r11),__LC_GPREGS_SAVE_AREA+32
 	stm	%r8,%r9,__PT_PSW(%r11)
+	xc	__PT_FLAGS(4,%r11),__PT_FLAGS(%r11)
 	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
 	l	%r1,BASED(.Ldo_machine_check)
 	lr	%r2,%r11		# pass pointer to pt_regs
@@ -689,7 +690,7 @@ mcck_skip:
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	lr	%r15,%r1
 	ssm	__LC_PGM_NEW_PSW	# turn dat on, keep irqs off
-	tm	__TI_flags+3(%r12),_TIF_MCCK_PENDING
+	tm	__LC_CPU_FLAGS+3,_CIF_MCCK_PENDING
 	jno	mcck_return
 	TRACE_IRQS_OFF
 	l	%r1,BASED(.Lhandle_mcck)
@@ -842,6 +843,8 @@ cleanup_system_call:
 	stm	%r0,%r7,__PT_R0(%r9)
 	mvc	__PT_PSW(8,%r9),__LC_SVC_OLD_PSW
 	mvc	__PT_INT_CODE(4,%r9),__LC_SVC_ILC
+	xc	__PT_FLAGS(4,%r9),__PT_FLAGS(%r9)
+	mvi	__PT_FLAGS+3(%r9),_PIF_SYSCALL
 	# setup saved register 15
 	st	%r15,28(%r11)		# r15 stack pointer
 	# set new psw address and exit
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index d15e7bf..c41f3f9 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -42,13 +42,11 @@ STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
 STACK_SIZE  = 1 << STACK_SHIFT
 STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
 
-_TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
-		 _TIF_MCCK_PENDING | _TIF_PER_TRAP | _TIF_ASCE)
-_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
-		 _TIF_MCCK_PENDING | _TIF_ASCE)
-_TIF_TRACE    = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
-		 _TIF_SYSCALL_TRACEPOINT)
-_TIF_TRANSFER = (_TIF_MCCK_PENDING | _TIF_ASCE)
+_TIF_WORK	= (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED)
+_TIF_TRACE	= (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
+		   _TIF_SYSCALL_TRACEPOINT)
+_CIF_WORK	= (_CIF_MCCK_PENDING | _CIF_ASCE)
+_PIF_WORK	= (_PIF_PER_TRAP)
 
 #define BASED(name) name-system_call(%r13)
 
@@ -190,13 +188,7 @@ ENTRY(__switch_to)
 	lctl	%c4,%c4,__TASK_pid(%r3)		# load pid to control reg. 4
 	mvc	__LC_CURRENT_PID+4(4,%r0),__TASK_pid(%r3) # store pid of next
 	lg	%r15,__THREAD_ksp(%r3)		# load kernel stack of next
-	llill	%r6,_TIF_TRANSFER		# transfer TIF bits
-	ng	%r6,__TI_flags(%r4)		# isolate TIF bits
-	jz	0f
-	og	%r6,__TI_flags(%r5)		# set TIF bits of next
-	stg	%r6,__TI_flags(%r5)
-	ni	__TI_flags+7(%r4),255-_TIF_TRANSFER # clear TIF bits of prev
-0:	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
+	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
 	br	%r14
 
 __critical_start:
@@ -211,6 +203,7 @@ sysc_stmg:
 	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
 	lg	%r10,__LC_LAST_BREAK
 	lg	%r12,__LC_THREAD_INFO
+	lghi	%r14,_PIF_SYSCALL
 sysc_per:
 	lg	%r15,__LC_KERNEL_STACK
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)	# pointer to pt_regs
@@ -221,8 +214,8 @@ sysc_vtime:
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
 	mvc	__PT_PSW(16,%r11),__LC_SVC_OLD_PSW
 	mvc	__PT_INT_CODE(4,%r11),__LC_SVC_ILC
+	stg	%r14,__PT_FLAGS(%r11)
 sysc_do_svc:
-	oi	__TI_flags+7(%r12),_TIF_SYSCALL
 	lg	%r10,__TI_sysc_table(%r12)	# address of system call table
 	llgh	%r8,__PT_INT_CODE+2(%r11)
 	slag	%r8,%r8,2			# shift and test for svc 0
@@ -238,7 +231,7 @@ sysc_nr_ok:
 	stg	%r2,__PT_ORIG_GPR2(%r11)
 	stg	%r7,STACK_FRAME_OVERHEAD(%r15)
 	lgf	%r9,0(%r8,%r10)			# get system call add.
-	tm	__TI_flags+6(%r12),_TIF_TRACE >> 8
+	tm	__TI_flags+7(%r12),_TIF_TRACE
 	jnz	sysc_tracesys
 	basr	%r14,%r9			# call sys_xxxx
 	stg	%r2,__PT_R2(%r11)		# store return value
@@ -248,9 +241,12 @@ sysc_return:
 sysc_tif:
 	tm	__PT_PSW+1(%r11),0x01		# returning to user ?
 	jno	sysc_restore
-	tm	__TI_flags+7(%r12),_TIF_WORK_SVC
+	tm	__PT_FLAGS+7(%r11),_PIF_WORK
+	jnz	sysc_work
+	tm	__TI_flags+7(%r12),_TIF_WORK
 	jnz	sysc_work			# check for work
-	ni	__TI_flags+7(%r12),255-_TIF_SYSCALL
+	tm	__LC_CPU_FLAGS+7,_CIF_WORK
+	jnz	sysc_work
 sysc_restore:
 	lg	%r14,__LC_VDSO_PER_CPU
 	lmg	%r0,%r10,__PT_R0(%r11)
@@ -265,17 +261,17 @@ sysc_done:
 # One of the work bits is on. Find out which one.
 #
 sysc_work:
-	tm	__TI_flags+7(%r12),_TIF_MCCK_PENDING
+	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
 	jo	sysc_mcck_pending
 	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
 	jo	sysc_reschedule
-	tm	__TI_flags+7(%r12),_TIF_PER_TRAP
+	tm	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
 	jo	sysc_singlestep
 	tm	__TI_flags+7(%r12),_TIF_SIGPENDING
 	jo	sysc_sigpending
 	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
 	jo	sysc_notify_resume
-	tm	__TI_flags+7(%r12),_TIF_ASCE
+	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
 	jo	sysc_uaccess
 	j	sysc_return		# beware of critical section cleanup
 
@@ -287,17 +283,17 @@ sysc_reschedule:
 	jg	schedule
 
 #
-# _TIF_MCCK_PENDING is set, call handler
+# _CIF_MCCK_PENDING is set, call handler
 #
 sysc_mcck_pending:
 	larl	%r14,sysc_return
 	jg	s390_handle_mcck	# TIF bit will be cleared by handler
 
 #
-# _TIF_ASCE is set, load user space asce
+# _CIF_ASCE is set, load user space asce
 #
 sysc_uaccess:
-	ni	__TI_flags+7(%r12),255-_TIF_ASCE
+	ni	__LC_CPU_FLAGS+7,255-_CIF_ASCE
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	j	sysc_return
 
@@ -307,7 +303,7 @@ sysc_uaccess:
 sysc_sigpending:
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,do_signal
-	tm	__TI_flags+7(%r12),_TIF_SYSCALL
+	tm	__PT_FLAGS+7(%r11),_PIF_SYSCALL
 	jno	sysc_return
 	lmg	%r2,%r7,__PT_R2(%r11)	# load svc arguments
 	lg	%r10,__TI_sysc_table(%r12)	# address of system call table
@@ -327,10 +323,10 @@ sysc_notify_resume:
 	jg	do_notify_resume
 
 #
-# _TIF_PER_TRAP is set, call do_per_trap
+# _PIF_PER_TRAP is set, call do_per_trap
 #
 sysc_singlestep:
-	ni	__TI_flags+7(%r12),255-_TIF_PER_TRAP
+	ni	__PT_FLAGS+7(%r11),255-_PIF_PER_TRAP
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	larl	%r14,sysc_return
 	jg	do_per_trap
@@ -357,7 +353,7 @@ sysc_tracego:
 	basr	%r14,%r9		# call sys_xxx
 	stg	%r2,__PT_R2(%r11)	# store return value
 sysc_tracenogo:
-	tm	__TI_flags+6(%r12),_TIF_TRACE >> 8
+	tm	__TI_flags+7(%r12),_TIF_TRACE
 	jz	sysc_return
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	larl	%r14,sysc_return
@@ -416,12 +412,13 @@ ENTRY(pgm_check_handler)
 	stmg	%r8,%r9,__PT_PSW(%r11)
 	mvc	__PT_INT_CODE(4,%r11),__LC_PGM_ILC
 	mvc	__PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE
+	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	stg	%r10,__PT_ARGS(%r11)
 	tm	__LC_PGM_ILC+3,0x80	# check for per exception
 	jz	0f
 	tmhh	%r8,0x0001		# kernel per event ?
 	jz	pgm_kprobe
-	oi	__TI_flags+7(%r12),_TIF_PER_TRAP
+	oi	__PT_FLAGS+7(%r11),_PIF_PER_TRAP
 	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
 	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CAUSE
 	mvc	__THREAD_per_paid(1,%r14),__LC_PER_PAID
@@ -451,10 +448,10 @@ pgm_kprobe:
 # single stepped system call
 #
 pgm_svcper:
-	oi	__TI_flags+7(%r12),_TIF_PER_TRAP
 	mvc	__LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
 	larl	%r14,sysc_per
 	stg	%r14,__LC_RETURN_PSW+8
+	lghi	%r14,_PIF_SYSCALL | _PIF_PER_TRAP
 	lpswe	__LC_RETURN_PSW		# branch to sysc_per and enable irqs
 
 /*
@@ -479,6 +476,7 @@ io_skip:
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
 	mvc	__PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
+	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	TRACE_IRQS_OFF
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 io_loop:
@@ -499,8 +497,10 @@ io_return:
 	LOCKDEP_SYS_EXIT
 	TRACE_IRQS_ON
 io_tif:
-	tm	__TI_flags+7(%r12),_TIF_WORK_INT
+	tm	__TI_flags+7(%r12),_TIF_WORK
 	jnz	io_work 		# there is work to do (signals etc.)
+	tm	__LC_CPU_FLAGS+7,_CIF_WORK
+	jnz	io_work
 io_restore:
 	lg	%r14,__LC_VDSO_PER_CPU
 	lmg	%r0,%r10,__PT_R0(%r11)
@@ -513,7 +513,7 @@ io_done:
 
 #
 # There is work todo, find out in which context we have been interrupted:
-# 1) if we return to user space we can do all _TIF_WORK_INT work
+# 1) if we return to user space we can do all _TIF_WORK work
 # 2) if we return to kernel code and kvm is enabled check if we need to
 #    modify the psw to leave SIE
 # 3) if we return to kernel code and preemptive scheduling is enabled check
@@ -557,11 +557,9 @@ io_work_user:
 
 #
 # One of the work bits is on. Find out which one.
-# Checked are: _TIF_SIGPENDING, _TIF_NOTIFY_RESUME, _TIF_NEED_RESCHED
-#	       and _TIF_MCCK_PENDING
 #
 io_work_tif:
-	tm	__TI_flags+7(%r12),_TIF_MCCK_PENDING
+	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
 	jo	io_mcck_pending
 	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
 	jo	io_reschedule
@@ -569,12 +567,12 @@ io_work_tif:
 	jo	io_sigpending
 	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
 	jo	io_notify_resume
-	tm	__TI_flags+7(%r12),_TIF_ASCE
+	tm	__LC_CPU_FLAGS+7,_CIF_ASCE
 	jo	io_uaccess
 	j	io_return		# beware of critical section cleanup
 
 #
-# _TIF_MCCK_PENDING is set, call handler
+# _CIF_MCCK_PENDING is set, call handler
 #
 io_mcck_pending:
 	# TRACE_IRQS_ON already done at io_return
@@ -583,10 +581,10 @@ io_mcck_pending:
 	j	io_return
 
 #
-# _TIF_ASCE is set, load user space asce
+# _CIF_ASCE is set, load user space asce
 #
 io_uaccess:
-	ni	__TI_flags+7(%r12),255-_TIF_ASCE
+	ni	__LC_CPU_FLAGS+7,255-_CIF_ASCE
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	j	io_return
 
@@ -650,6 +648,7 @@ ext_skip:
 	mvc	__PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR
 	mvc	__PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
 	mvc	__PT_INT_PARM_LONG(8,%r11),0(%r1)
+	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	TRACE_IRQS_OFF
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	lgr	%r2,%r11		# pass pointer to pt_regs
@@ -716,6 +715,7 @@ mcck_skip:
 	stmg	%r0,%r7,__PT_R0(%r11)
 	mvc	__PT_R8(64,%r11),0(%r14)
 	stmg	%r8,%r9,__PT_PSW(%r11)
+	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,s390_do_machine_check
@@ -727,7 +727,7 @@ mcck_skip:
 	la	%r11,STACK_FRAME_OVERHEAD(%r1)
 	lgr	%r15,%r1
 	ssm	__LC_PGM_NEW_PSW	# turn dat on, keep irqs off
-	tm	__TI_flags+7(%r12),_TIF_MCCK_PENDING
+	tm	__LC_CPU_FLAGS+7,_CIF_MCCK_PENDING
 	jno	mcck_return
 	TRACE_IRQS_OFF
 	brasl	%r14,s390_handle_mcck
@@ -884,6 +884,8 @@ cleanup_system_call:
 	stmg	%r0,%r7,__PT_R0(%r9)
 	mvc	__PT_PSW(16,%r9),__LC_SVC_OLD_PSW
 	mvc	__PT_INT_CODE(4,%r9),__LC_SVC_ILC
+	xc	__PT_FLAGS(8,%r9),__PT_FLAGS(%r9)
+	mvi	__PT_FLAGS+7(%r9),_PIF_SYSCALL
 	# setup saved register r15
 	stg	%r15,56(%r11)		# r15 stack pointer
 	# set new psw address and exit
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index c4c0338..210e128 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -55,7 +55,7 @@ void s390_handle_mcck(void)
 	local_mcck_disable();
 	mcck = __get_cpu_var(cpu_mcck);
 	memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct));
-	clear_thread_flag(TIF_MCCK_PENDING);
+	clear_cpu_flag(CIF_MCCK_PENDING);
 	local_mcck_enable();
 	local_irq_restore(flags);
 
@@ -313,7 +313,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
 			 */
 			mcck->kill_task = 1;
 			mcck->mcck_code = *(unsigned long long *) mci;
-			set_thread_flag(TIF_MCCK_PENDING);
+			set_cpu_flag(CIF_MCCK_PENDING);
 		} else {
 			/*
 			 * Couldn't restore all register contents while in
@@ -352,12 +352,12 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
 	if (mci->cp) {
 		/* Channel report word pending */
 		mcck->channel_report = 1;
-		set_thread_flag(TIF_MCCK_PENDING);
+		set_cpu_flag(CIF_MCCK_PENDING);
 	}
 	if (mci->w) {
 		/* Warning pending */
 		mcck->warning = 1;
-		set_thread_flag(TIF_MCCK_PENDING);
+		set_cpu_flag(CIF_MCCK_PENDING);
 	}
 	nmi_exit();
 }
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index dd14532..93b9ca4 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -64,7 +64,7 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
 void arch_cpu_idle(void)
 {
 	local_mcck_disable();
-	if (test_thread_flag(TIF_MCCK_PENDING)) {
+	if (test_cpu_flag(CIF_MCCK_PENDING)) {
 		local_mcck_enable();
 		local_irq_enable();
 		return;
@@ -76,7 +76,7 @@ void arch_cpu_idle(void)
 
 void arch_cpu_idle_exit(void)
 {
-	if (test_thread_flag(TIF_MCCK_PENDING))
+	if (test_cpu_flag(CIF_MCCK_PENDING))
 		s390_handle_mcck();
 }
 
@@ -123,7 +123,6 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 	memset(&p->thread.per_user, 0, sizeof(p->thread.per_user));
 	memset(&p->thread.per_event, 0, sizeof(p->thread.per_event));
 	clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
-	clear_tsk_thread_flag(p, TIF_PER_TRAP);
 	/* Initialize per thread user and system timer values */
 	ti = task_thread_info(p);
 	ti->user_timer = 0;
@@ -152,6 +151,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
 	}
 	frame->childregs = *current_pt_regs();
 	frame->childregs.gprs[2] = 0;	/* child returns 0 on fork. */
+	frame->childregs.flags = 0;
 	if (new_stackp)
 		frame->childregs.gprs[15] = new_stackp;
 
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 1c82619..2d716734 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -136,7 +136,7 @@ void ptrace_disable(struct task_struct *task)
 	memset(&task->thread.per_user, 0, sizeof(task->thread.per_user));
 	memset(&task->thread.per_event, 0, sizeof(task->thread.per_event));
 	clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
-	clear_tsk_thread_flag(task, TIF_PER_TRAP);
+	clear_pt_regs_flag(task_pt_regs(task), PIF_PER_TRAP);
 	task->thread.per_flags = 0;
 }
 
@@ -813,7 +813,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 		 * debugger stored an invalid system call number. Skip
 		 * the system call and the system call restart handling.
 		 */
-		clear_thread_flag(TIF_SYSCALL);
+		clear_pt_regs_flag(regs, PIF_SYSCALL);
 		ret = -1;
 	}
 
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index d8fd508..42b49f9 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -113,7 +113,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
 	       sizeof(current->thread.fp_regs));
 
 	restore_fp_regs(current->thread.fp_regs.fprs);
-	clear_thread_flag(TIF_SYSCALL);	/* No longer in a system call */
+	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
 	return 0;
 }
 
@@ -356,7 +356,7 @@ void do_signal(struct pt_regs *regs)
 	 * call information.
 	 */
 	current_thread_info()->system_call =
-		test_thread_flag(TIF_SYSCALL) ? regs->int_code : 0;
+		test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0;
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 
 	if (signr > 0) {
@@ -384,7 +384,7 @@ void do_signal(struct pt_regs *regs)
 			}
 		}
 		/* No longer in a system call */
-		clear_thread_flag(TIF_SYSCALL);
+		clear_pt_regs_flag(regs, PIF_SYSCALL);
 
 		if (is_compat_task())
 			handle_signal32(signr, &ka, &info, oldset, regs);
@@ -394,7 +394,7 @@ void do_signal(struct pt_regs *regs)
 	}
 
 	/* No handlers present - check for system call restart */
-	clear_thread_flag(TIF_SYSCALL);
+	clear_pt_regs_flag(regs, PIF_SYSCALL);
 	if (current_thread_info()->system_call) {
 		regs->int_code = current_thread_info()->system_call;
 		switch (regs->gprs[2]) {
@@ -407,9 +407,9 @@ void do_signal(struct pt_regs *regs)
 		case -ERESTARTNOINTR:
 			/* Restart system call with magic TIF bit. */
 			regs->gprs[2] = regs->orig_gpr2;
-			set_thread_flag(TIF_SYSCALL);
+			set_pt_regs_flag(regs, PIF_SYSCALL);
 			if (test_thread_flag(TIF_SINGLE_STEP))
-				set_thread_flag(TIF_PER_TRAP);
+				clear_pt_regs_flag(regs, PIF_PER_TRAP);
 			break;
 		}
 	}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b3ecb8f..0206597 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -906,7 +906,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 	if (need_resched())
 		schedule();
 
-	if (test_thread_flag(TIF_MCCK_PENDING))
+	if (test_cpu_flag(CIF_MCCK_PENDING))
 		s390_handle_mcck();
 
 	if (!kvm_is_ucontrol(vcpu->kvm))
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 2f51a99..3f3b354 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -415,7 +415,7 @@ static inline int do_exception(struct pt_regs *regs, int access)
 	 * The instruction that caused the program check has
 	 * been nullified. Don't signal single step via SIGTRAP.
 	 */
-	clear_tsk_thread_flag(tsk, TIF_PER_TRAP);
+	clear_pt_regs_flag(regs, PIF_PER_TRAP);
 
 	if (notify_page_fault(regs))
 		return 0;
-- 
cgit v0.10.2


From ca25f564c484b4f32eec1e667926dcf87ec03234 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Wed, 16 Apr 2014 16:09:23 +0200
Subject: s390/pci: improve state check when processing hotplug events

Processing pci hotplug events can fail when a pci function is in an
unexpected state. This can happen when we already processed the
change associated with the hotplug event (especially when receiving
hotplug events during early boot).
Just ignore the event in this case.

Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 01e251b..6d7f5a3 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -76,7 +76,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 
 	switch (ccdf->pec) {
 	case 0x0301: /* Standby -> Configured */
-		if (!zdev || zdev->state == ZPCI_FN_STATE_CONFIGURED)
+		if (!zdev || zdev->state != ZPCI_FN_STATE_STANDBY)
 			break;
 		zdev->state = ZPCI_FN_STATE_CONFIGURED;
 		zdev->fh = ccdf->fh;
@@ -86,7 +86,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 		pci_rescan_bus(zdev->bus);
 		break;
 	case 0x0302: /* Reserved -> Standby */
-		clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
+		if (!zdev)
+			clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
 		break;
 	case 0x0303: /* Deconfiguration requested */
 		if (pdev)
-- 
cgit v0.10.2


From b346953ddac0d57d4852ddc2b5cd9e16c84ddd59 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Wed, 16 Apr 2014 16:10:18 +0200
Subject: s390/pci: use macro for attribute creation

Introduce the zpci_attr macro to create read only sysfs attributes
to avoid duplicate code.

Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index ab4a913..ebe2c16 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -12,43 +12,22 @@
 #include <linux/stat.h>
 #include <linux/pci.h>
 
-static ssize_t show_fid(struct device *dev, struct device_attribute *attr,
-			char *buf)
-{
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
-
-	return sprintf(buf, "0x%08x\n", zdev->fid);
-}
-static DEVICE_ATTR(function_id, S_IRUGO, show_fid, NULL);
-
-static ssize_t show_fh(struct device *dev, struct device_attribute *attr,
-		       char *buf)
-{
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
-
-	return sprintf(buf, "0x%08x\n", zdev->fh);
-}
-static DEVICE_ATTR(function_handle, S_IRUGO, show_fh, NULL);
-
-static ssize_t show_pchid(struct device *dev, struct device_attribute *attr,
-			  char *buf)
-{
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
-
-	return sprintf(buf, "0x%04x\n", zdev->pchid);
-}
-static DEVICE_ATTR(pchid, S_IRUGO, show_pchid, NULL);
-
-static ssize_t show_pfgid(struct device *dev, struct device_attribute *attr,
-			  char *buf)
-{
-	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
-
-	return sprintf(buf, "0x%02x\n", zdev->pfgid);
-}
-static DEVICE_ATTR(pfgid, S_IRUGO, show_pfgid, NULL);
-
-static ssize_t store_recover(struct device *dev, struct device_attribute *attr,
+#define zpci_attr(name, fmt, member)					\
+static ssize_t name##_show(struct device *dev,				\
+			   struct device_attribute *attr, char *buf)	\
+{									\
+	struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));		\
+									\
+	return sprintf(buf, fmt, zdev->member);				\
+}									\
+static DEVICE_ATTR_RO(name)
+
+zpci_attr(function_id, "0x%08x\n", fid);
+zpci_attr(function_handle, "0x%08x\n", fh);
+zpci_attr(pchid, "0x%04x\n", pchid);
+zpci_attr(pfgid, "0x%02x\n", pfgid);
+
+static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
 			     const char *buf, size_t count)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
@@ -70,7 +49,7 @@ static ssize_t store_recover(struct device *dev, struct device_attribute *attr,
 	pci_rescan_bus(zdev->bus);
 	return count;
 }
-static DEVICE_ATTR(recover, S_IWUSR, NULL, store_recover);
+static DEVICE_ATTR_WO(recover);
 
 static struct device_attribute *zpci_dev_attrs[] = {
 	&dev_attr_function_id,
-- 
cgit v0.10.2


From 93065d045a99391c60fa9ab8aca6b503f51b2e95 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Wed, 16 Apr 2014 16:11:00 +0200
Subject: s390/pci: use pdev->dev.groups for attribute creation

Let the driver core handle attribute creation by putting all s390
specific pci attributes in an attribute group which is referenced
by pdev->dev.groups in pcibios_add_device.

Link: https://lkml.kernel.org/r/alpine.LFD.2.11.1404141101500.1529@denkbrett
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 2583466..79b5f07 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -120,6 +120,8 @@ static inline bool zdev_enabled(struct zpci_dev *zdev)
 	return (zdev->fh & (1UL << 31)) ? true : false;
 }
 
+extern const struct attribute_group *zpci_attr_groups[];
+
 /* -----------------------------------------------------------------------------
   Prototypes
 ----------------------------------------------------------------------------- */
@@ -166,10 +168,6 @@ static inline void zpci_exit_slot(struct zpci_dev *zdev) {}
 struct zpci_dev *get_zdev(struct pci_dev *);
 struct zpci_dev *get_zdev_by_fid(u32);
 
-/* sysfs */
-int zpci_sysfs_add_device(struct device *);
-void zpci_sysfs_remove_device(struct device *);
-
 /* DMA */
 int zpci_dma_init(void);
 void zpci_dma_exit(void);
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 1df1d29..bdf0257 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -530,11 +530,6 @@ static void zpci_unmap_resources(struct zpci_dev *zdev)
 	}
 }
 
-int pcibios_add_platform_entries(struct pci_dev *pdev)
-{
-	return zpci_sysfs_add_device(&pdev->dev);
-}
-
 static int __init zpci_irq_init(void)
 {
 	int rc;
@@ -671,6 +666,7 @@ int pcibios_add_device(struct pci_dev *pdev)
 	int i;
 
 	zdev->pdev = pdev;
+	pdev->dev.groups = zpci_attr_groups;
 	zpci_map_resources(zdev);
 
 	for (i = 0; i < PCI_BAR_COUNT; i++) {
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index ebe2c16..f23da1c 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -51,36 +51,18 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_WO(recover);
 
-static struct device_attribute *zpci_dev_attrs[] = {
-	&dev_attr_function_id,
-	&dev_attr_function_handle,
-	&dev_attr_pchid,
-	&dev_attr_pfgid,
-	&dev_attr_recover,
+static struct attribute *zpci_dev_attrs[] = {
+	&dev_attr_function_id.attr,
+	&dev_attr_function_handle.attr,
+	&dev_attr_pchid.attr,
+	&dev_attr_pfgid.attr,
+	&dev_attr_recover.attr,
+	NULL,
+};
+static struct attribute_group zpci_attr_group = {
+	.attrs = zpci_dev_attrs,
+};
+const struct attribute_group *zpci_attr_groups[] = {
+	&zpci_attr_group,
 	NULL,
 };
-
-int zpci_sysfs_add_device(struct device *dev)
-{
-	int i, rc = 0;
-
-	for (i = 0; zpci_dev_attrs[i]; i++) {
-		rc = device_create_file(dev, zpci_dev_attrs[i]);
-		if (rc)
-			goto error;
-	}
-	return 0;
-
-error:
-	while (--i >= 0)
-		device_remove_file(dev, zpci_dev_attrs[i]);
-	return rc;
-}
-
-void zpci_sysfs_remove_device(struct device *dev)
-{
-	int i;
-
-	for (i = 0; zpci_dev_attrs[i]; i++)
-		device_remove_file(dev, zpci_dev_attrs[i]);
-}
-- 
cgit v0.10.2


From ac4995b9d5705f10a69ea74d440e3943db41f2ca Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Wed, 16 Apr 2014 16:12:05 +0200
Subject: s390/pci: add some new arch specific pci attributes

Add a bunch of s390 specific pci attributes to help
identifying pci functions.

Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 79b5f07..c030900 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -78,10 +78,16 @@ struct zpci_dev {
 	enum zpci_state state;
 	u32		fid;		/* function ID, used by sclp */
 	u32		fh;		/* function handle, used by insn's */
+	u16		vfn;		/* virtual function number */
 	u16		pchid;		/* physical channel ID */
 	u8		pfgid;		/* function group ID */
+	u8		pft;		/* pci function type */
 	u16		domain;
 
+	u8 pfip[CLP_PFIP_NR_SEGMENTS];	/* pci function internal path */
+	u32 uid;			/* user defined id */
+	u8 util_str[CLP_UTIL_STR_LEN];	/* utility string */
+
 	/* IRQ stuff */
 	u64		msi_addr;	/* MSI address */
 	struct airq_iv *aibv;		/* adapter interrupt bit vector */
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index d31d739..dd78f92 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -44,6 +44,7 @@ struct clp_fh_list_entry {
 #define CLP_SET_DISABLE_PCI_FN	1	/* Yes, 1 disables it */
 
 #define CLP_UTIL_STR_LEN	64
+#define CLP_PFIP_NR_SEGMENTS	4
 
 /* List PCI functions request */
 struct clp_req_list_pci {
@@ -85,7 +86,7 @@ struct clp_rsp_query_pci {
 	struct clp_rsp_hdr hdr;
 	u32 fmt			:  4;	/* cmd request block format */
 	u32			: 28;
-	u64 reserved1;
+	u64			: 64;
 	u16 vfn;			/* virtual fn number */
 	u16			:  7;
 	u16 util_str_avail	:  1;	/* utility string available? */
@@ -94,10 +95,13 @@ struct clp_rsp_query_pci {
 	u8 bar_size[PCI_BAR_COUNT];
 	u16 pchid;
 	u32 bar[PCI_BAR_COUNT];
-	u64 reserved2;
+	u8 pfip[CLP_PFIP_NR_SEGMENTS];	/* pci function internal path */
+	u32			: 24;
+	u8 pft;				/* pci function type */
 	u64 sdma;			/* start dma as */
 	u64 edma;			/* end dma as */
-	u64 reserved3[6];
+	u32 reserved[11];
+	u32 uid;			/* user defined id */
 	u8 util_str[CLP_UTIL_STR_LEN];	/* utility string */
 } __packed;
 
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index c747394..96545d7 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -114,6 +114,16 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
 	zdev->end_dma = response->edma;
 	zdev->pchid = response->pchid;
 	zdev->pfgid = response->pfgid;
+	zdev->pft = response->pft;
+	zdev->vfn = response->vfn;
+	zdev->uid = response->uid;
+
+	memcpy(zdev->pfip, response->pfip, sizeof(zdev->pfip));
+	if (response->util_str_avail) {
+		memcpy(zdev->util_str, response->util_str,
+		       sizeof(zdev->util_str));
+	}
+
 	return 0;
 }
 
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index f23da1c..9190214 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -26,6 +26,13 @@ zpci_attr(function_id, "0x%08x\n", fid);
 zpci_attr(function_handle, "0x%08x\n", fh);
 zpci_attr(pchid, "0x%04x\n", pchid);
 zpci_attr(pfgid, "0x%02x\n", pfgid);
+zpci_attr(vfn, "0x%04x\n", vfn);
+zpci_attr(pft, "0x%02x\n", pft);
+zpci_attr(uid, "0x%x\n", uid);
+zpci_attr(segment0, "0x%02x\n", pfip[0]);
+zpci_attr(segment1, "0x%02x\n", pfip[1]);
+zpci_attr(segment2, "0x%02x\n", pfip[2]);
+zpci_attr(segment3, "0x%02x\n", pfip[3]);
 
 static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
 			     const char *buf, size_t count)
@@ -51,18 +58,53 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_WO(recover);
 
+static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *attr, char *buf,
+				loff_t off, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct zpci_dev *zdev = get_zdev(pdev);
+
+	return memory_read_from_buffer(buf, count, &off, zdev->util_str,
+				       sizeof(zdev->util_str));
+}
+static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN);
+static struct bin_attribute *zpci_bin_attrs[] = {
+	&bin_attr_util_string,
+	NULL,
+};
+
 static struct attribute *zpci_dev_attrs[] = {
 	&dev_attr_function_id.attr,
 	&dev_attr_function_handle.attr,
 	&dev_attr_pchid.attr,
 	&dev_attr_pfgid.attr,
+	&dev_attr_pft.attr,
+	&dev_attr_vfn.attr,
+	&dev_attr_uid.attr,
 	&dev_attr_recover.attr,
 	NULL,
 };
 static struct attribute_group zpci_attr_group = {
 	.attrs = zpci_dev_attrs,
+	.bin_attrs = zpci_bin_attrs,
+};
+
+static struct attribute *pfip_attrs[] = {
+	&dev_attr_segment0.attr,
+	&dev_attr_segment1.attr,
+	&dev_attr_segment2.attr,
+	&dev_attr_segment3.attr,
+	NULL,
+};
+static struct attribute_group pfip_attr_group = {
+	.name = "pfip",
+	.attrs = pfip_attrs,
 };
+
 const struct attribute_group *zpci_attr_groups[] = {
 	&zpci_attr_group,
+	&pfip_attr_group,
 	NULL,
 };
-- 
cgit v0.10.2


From c9ca78415ac1cbb0e9846111d5f01376266cf6f3 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 17 Apr 2014 14:16:03 +0200
Subject: s390/uaccess: provide inline variants of get_user/put_user

This shortens the code by ~17k (performace_defconfig, march=z196).
The number of exception table entries however increases from 164
entries to 2500 entries (+~18k).
However the executed code is shorter and also faster since we save
the branches to the out-of-line copy_to/from_user implementations.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 1be64a1..cd4c68e 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -132,6 +132,34 @@ unsigned long __must_check __copy_to_user(void __user *to, const void *from,
 #define __copy_to_user_inatomic __copy_to_user
 #define __copy_from_user_inatomic __copy_from_user
 
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+
+#define __put_get_user_asm(to, from, size, spec)		\
+({								\
+	register unsigned long __reg0 asm("0") = spec;		\
+	int __rc;						\
+								\
+	asm volatile(						\
+		"0:	mvcos	%1,%3,%2\n"			\
+		"1:	xr	%0,%0\n"			\
+		"2:\n"						\
+		".pushsection .fixup, \"ax\"\n"			\
+		"3:	lhi	%0,%5\n"			\
+		"	jg	2b\n"				\
+		".popsection\n"					\
+		EX_TABLE(0b,3b) EX_TABLE(1b,3b)			\
+		: "=d" (__rc), "=Q" (*(to))			\
+		: "d" (size), "Q" (*(from)),			\
+		  "d" (__reg0), "K" (-EFAULT)			\
+		: "cc");					\
+	__rc;							\
+})
+
+#define __put_user_fn(x, ptr, size) __put_get_user_asm(ptr, x, size, 0x810000UL)
+#define __get_user_fn(x, ptr, size) __put_get_user_asm(x, ptr, size, 0x81UL)
+
+#else /* CONFIG_HAVE_MARCH_Z10_FEATURES */
+
 static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
 {
 	size = __copy_to_user(ptr, x, size);
@@ -144,6 +172,8 @@ static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long s
 	return size ? -EFAULT : 0;
 }
 
+#endif /* CONFIG_HAVE_MARCH_Z10_FEATURES */
+
 /*
  * These are the main single-value transfer routines.  They automatically
  * use the right size if we just have the right pointer type.
-- 
cgit v0.10.2


From f1a858206804a5a694f30196e50756b86eb7d68c Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Sun, 27 Apr 2014 17:35:43 -0400
Subject: s390/appldata: add slab.h for kzalloc/kfree

This fixes:
arch/s390/appldata/appldata_mem.c:135:2: error: implicit declaration of function 'kzalloc' [-Werror=implicit-function-declaration]
arch/s390/appldata/appldata_mem.c:141:3: error: implicit declaration of function 'kfree' [-Werror=implicit-function-declaration]

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index 42be537..edcf2a7 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -13,6 +13,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
+#include <linux/slab.h>
 #include <asm/io.h>
 
 #include "appldata.h"
-- 
cgit v0.10.2


From 2e4006b34d06681ed95d55510d4450f29a13c417 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Tue, 6 May 2014 19:41:36 +0200
Subject: s390/spinlock: fix system hang with spin_retry <= 0

On LPAR, when spin_retry is set to <= 0, arch_spin_lock_wait() and
arch_spin_lock_wait_flags() may end up in a while(1) loop w/o doing
any compare and swap operation. To fix this, use do/while instead of
for loop.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 3ca9de4..3f0e682 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -26,19 +26,20 @@ __setup("spin_retry=", spin_retry_setup);
 
 void arch_spin_lock_wait(arch_spinlock_t *lp)
 {
-	int count = spin_retry;
 	unsigned int cpu = SPINLOCK_LOCKVAL;
 	unsigned int owner;
+	int count;
 
 	while (1) {
 		owner = lp->lock;
 		if (!owner || smp_vcpu_scheduled(~owner)) {
-			for (count = spin_retry; count > 0; count--) {
+			count = spin_retry;
+			do {
 				if (arch_spin_is_locked(lp))
 					continue;
 				if (_raw_compare_and_swap(&lp->lock, 0, cpu))
 					return;
-			}
+			} while (count-- > 0);
 			if (MACHINE_IS_LPAR)
 				continue;
 		}
@@ -53,22 +54,23 @@ EXPORT_SYMBOL(arch_spin_lock_wait);
 
 void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
 {
-	int count = spin_retry;
 	unsigned int cpu = SPINLOCK_LOCKVAL;
 	unsigned int owner;
+	int count;
 
 	local_irq_restore(flags);
 	while (1) {
 		owner = lp->lock;
 		if (!owner || smp_vcpu_scheduled(~owner)) {
-			for (count = spin_retry; count > 0; count--) {
+			count = spin_retry;
+			do {
 				if (arch_spin_is_locked(lp))
 					continue;
 				local_irq_disable();
 				if (_raw_compare_and_swap(&lp->lock, 0, cpu))
 					return;
 				local_irq_restore(flags);
-			}
+			} while (count-- > 0);
 			if (MACHINE_IS_LPAR)
 				continue;
 		}
-- 
cgit v0.10.2


From 2bf29df7460f4038f84ac5dea3cbe582d6d4af82 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Wed, 7 May 2014 13:27:21 +0200
Subject: s390/cio: fix multiple structure definitions

Fix multiple definitions of struct channel_path_desc by moving it
to asm/chpid.h . Also change ccw_device_get_chp_desc to use proper
types.

Reviewed-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index a9c2c06..b80e456 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -229,5 +229,5 @@ int ccw_device_siosl(struct ccw_device *);
 
 extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *);
 
-extern void *ccw_device_get_chp_desc(struct ccw_device *, int);
+struct channel_path_desc *ccw_device_get_chp_desc(struct ccw_device *, int);
 #endif /* _S390_CCWDEV_H_ */
diff --git a/arch/s390/include/asm/chpid.h b/arch/s390/include/asm/chpid.h
index 38c405e..7298eec 100644
--- a/arch/s390/include/asm/chpid.h
+++ b/arch/s390/include/asm/chpid.h
@@ -8,6 +8,17 @@
 #include <uapi/asm/chpid.h>
 #include <asm/cio.h>
 
+struct channel_path_desc {
+	u8 flags;
+	u8 lsn;
+	u8 desc;
+	u8 chpid;
+	u8 swla;
+	u8 zeroes;
+	u8 chla;
+	u8 chpp;
+} __packed;
+
 static inline void chp_id_init(struct chp_id *chpid)
 {
 	memset(chpid, 0, sizeof(struct chp_id));
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 6c440d4..d497aa0 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -509,7 +509,7 @@ out:
  * On success return a newly allocated copy of the channel-path description
  * data associated with the given channel-path ID. Return %NULL on error.
  */
-void *chp_get_chp_desc(struct chp_id chpid)
+struct channel_path_desc *chp_get_chp_desc(struct chp_id chpid)
 {
 	struct channel_path *chp;
 	struct channel_path_desc *desc;
diff --git a/drivers/s390/cio/chp.h b/drivers/s390/cio/chp.h
index 9284b78..4efd5b8 100644
--- a/drivers/s390/cio/chp.h
+++ b/drivers/s390/cio/chp.h
@@ -60,7 +60,7 @@ static inline struct channel_path *chpid_to_chp(struct chp_id chpid)
 int chp_get_status(struct chp_id chpid);
 u8 chp_get_sch_opm(struct subchannel *sch);
 int chp_is_registered(struct chp_id chpid);
-void *chp_get_chp_desc(struct chp_id chpid);
+struct channel_path_desc *chp_get_chp_desc(struct chp_id chpid);
 void chp_remove_cmg_attr(struct channel_path *chp);
 int chp_add_cmg_attr(struct channel_path *chp);
 int chp_update_desc(struct channel_path *chp);
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h
index 7e53a9c..76c9b50 100644
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h
@@ -21,17 +21,6 @@ struct cmg_entry {
 	u32 values[NR_MEASUREMENT_ENTRIES];
 } __attribute__ ((packed));
 
-struct channel_path_desc {
-	u8 flags;
-	u8 lsn;
-	u8 desc;
-	u8 chpid;
-	u8 swla;
-	u8 zeroes;
-	u8 chla;
-	u8 chpp;
-} __attribute__ ((packed));
-
 struct channel_path_desc_fmt1 {
 	u8 flags;
 	u8 lsn;
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 4845d64..f3c4179 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -563,14 +563,23 @@ out_unlock:
 	return rc;
 }
 
-void *ccw_device_get_chp_desc(struct ccw_device *cdev, int chp_no)
+/**
+ * chp_get_chp_desc - return newly allocated channel-path descriptor
+ * @cdev: device to obtain the descriptor for
+ * @chp_idx: index of the channel path
+ *
+ * On success return a newly allocated copy of the channel-path description
+ * data associated with the given channel path. Return %NULL on error.
+ */
+struct channel_path_desc *ccw_device_get_chp_desc(struct ccw_device *cdev,
+						  int chp_idx)
 {
 	struct subchannel *sch;
 	struct chp_id chpid;
 
 	sch = to_subchannel(cdev->dev.parent);
 	chp_id_init(&chpid);
-	chpid.id = sch->schib.pmcw.chpid[chp_no];
+	chpid.id = sch->schib.pmcw.chpid[chp_idx];
 	return chp_get_chp_desc(chpid);
 }
 
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 22470a3..e89f38c 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -22,6 +22,7 @@
 #include <net/iucv/af_iucv.h>
 
 #include <asm/ebcdic.h>
+#include <asm/chpid.h>
 #include <asm/io.h>
 #include <asm/sysinfo.h>
 #include <asm/compat.h>
@@ -1344,16 +1345,7 @@ static void qeth_set_multiple_write_queues(struct qeth_card *card)
 static void qeth_update_from_chp_desc(struct qeth_card *card)
 {
 	struct ccw_device *ccwdev;
-	struct channelPath_dsc {
-		u8 flags;
-		u8 lsn;
-		u8 desc;
-		u8 chpid;
-		u8 swla;
-		u8 zeroes;
-		u8 chla;
-		u8 chpp;
-	} *chp_dsc;
+	struct channel_path_desc *chp_dsc;
 
 	QETH_DBF_TEXT(SETUP, 2, "chp_desc");
 
-- 
cgit v0.10.2


From bae8f567344a7cb6a23ca6e13096ba785c69eb42 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 15 May 2014 11:00:44 +0200
Subject: s390/spinlock,rwlock: always to a load-and-test first

In case a lock is contended it is better to do a load-and-test first
before trying to get the lock with compare-and-swap. This helps to avoid
unnecessary cache invalidations of the cacheline for the lock if the
CPU has to wait for the lock. For an uncontended lock doing the
compare-and-swap directly is a bit better, if the CPU does not have the
cacheline in its cache yet the compare-and-swap will get it read-write
immediately while a load-and-test would get it read-only first.

Always to the load-and-test first to avoid the cacheline invalidations
for the contended case outweight the potential read-only to read-write
cacheline upgrade for the uncontended case.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 5a0b288..96879f7 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -59,7 +59,9 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lp)
 
 static inline int arch_spin_trylock_once(arch_spinlock_t *lp)
 {
-	return _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL);
+	barrier();
+	return likely(arch_spin_value_unlocked(*lp) &&
+		      _raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL));
 }
 
 static inline int arch_spin_tryrelease_once(arch_spinlock_t *lp)
@@ -69,20 +71,20 @@ static inline int arch_spin_tryrelease_once(arch_spinlock_t *lp)
 
 static inline void arch_spin_lock(arch_spinlock_t *lp)
 {
-	if (unlikely(!arch_spin_trylock_once(lp)))
+	if (!arch_spin_trylock_once(lp))
 		arch_spin_lock_wait(lp);
 }
 
 static inline void arch_spin_lock_flags(arch_spinlock_t *lp,
 					unsigned long flags)
 {
-	if (unlikely(!arch_spin_trylock_once(lp)))
+	if (!arch_spin_trylock_once(lp))
 		arch_spin_lock_wait_flags(lp, flags);
 }
 
 static inline int arch_spin_trylock(arch_spinlock_t *lp)
 {
-	if (unlikely(!arch_spin_trylock_once(lp)))
+	if (!arch_spin_trylock_once(lp))
 		return arch_spin_trylock_retry(lp);
 	return 1;
 }
@@ -128,19 +130,29 @@ extern void _raw_write_lock_wait(arch_rwlock_t *lp);
 extern void _raw_write_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags);
 extern int _raw_write_trylock_retry(arch_rwlock_t *lp);
 
+static inline int arch_read_trylock_once(arch_rwlock_t *rw)
+{
+	unsigned int old = ACCESS_ONCE(rw->lock);
+	return likely((int) old >= 0 &&
+		      _raw_compare_and_swap(&rw->lock, old, old + 1));
+}
+
+static inline int arch_write_trylock_once(arch_rwlock_t *rw)
+{
+	unsigned int old = ACCESS_ONCE(rw->lock);
+	return likely(old == 0 &&
+		      _raw_compare_and_swap(&rw->lock, 0, 0x80000000));
+}
+
 static inline void arch_read_lock(arch_rwlock_t *rw)
 {
-	unsigned int old;
-	old = rw->lock & 0x7fffffffU;
-	if (!_raw_compare_and_swap(&rw->lock, old, old + 1))
+	if (!arch_read_trylock_once(rw))
 		_raw_read_lock_wait(rw);
 }
 
 static inline void arch_read_lock_flags(arch_rwlock_t *rw, unsigned long flags)
 {
-	unsigned int old;
-	old = rw->lock & 0x7fffffffU;
-	if (!_raw_compare_and_swap(&rw->lock, old, old + 1))
+	if (!arch_read_trylock_once(rw))
 		_raw_read_lock_wait_flags(rw, flags);
 }
 
@@ -155,13 +167,13 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
 
 static inline void arch_write_lock(arch_rwlock_t *rw)
 {
-	if (unlikely(!_raw_compare_and_swap(&rw->lock, 0, 0x80000000)))
+	if (!arch_write_trylock_once(rw))
 		_raw_write_lock_wait(rw);
 }
 
 static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags)
 {
-	if (unlikely(!_raw_compare_and_swap(&rw->lock, 0, 0x80000000)))
+	if (!arch_write_trylock_once(rw))
 		_raw_write_lock_wait_flags(rw, flags);
 }
 
@@ -172,18 +184,16 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 
 static inline int arch_read_trylock(arch_rwlock_t *rw)
 {
-	unsigned int old;
-	old = rw->lock & 0x7fffffffU;
-	if (likely(_raw_compare_and_swap(&rw->lock, old, old + 1)))
-		return 1;
-	return _raw_read_trylock_retry(rw);
+	if (!arch_read_trylock_once(rw))
+		return _raw_read_trylock_retry(rw);
+	return 1;
 }
 
 static inline int arch_write_trylock(arch_rwlock_t *rw)
 {
-	if (likely(_raw_compare_and_swap(&rw->lock, 0, 0x80000000)))
-		return 1;
-	return _raw_write_trylock_retry(rw);
+	if (!arch_write_trylock_once(rw))
+		return _raw_write_trylock_retry(rw);
+	return 1;
 }
 
 #define arch_read_relax(lock)	cpu_relax()
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 3f0e682..284d879a 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -100,12 +100,9 @@ int arch_spin_trylock_retry(arch_spinlock_t *lp)
 {
 	int count;
 
-	for (count = spin_retry; count > 0; count--) {
-		if (arch_spin_is_locked(lp))
-			continue;
+	for (count = spin_retry; count > 0; count--)
 		if (arch_spin_trylock_once(lp))
 			return 1;
-	}
 	return 0;
 }
 EXPORT_SYMBOL(arch_spin_trylock_retry);
@@ -120,9 +117,9 @@ void _raw_read_lock_wait(arch_rwlock_t *rw)
 			smp_yield();
 			count = spin_retry;
 		}
-		if (!arch_read_can_lock(rw))
+		old = ACCESS_ONCE(rw->lock);
+		if ((int) old < 0)
 			continue;
-		old = rw->lock & 0x7fffffffU;
 		if (_raw_compare_and_swap(&rw->lock, old, old + 1))
 			return;
 	}
@@ -140,9 +137,9 @@ void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags)
 			smp_yield();
 			count = spin_retry;
 		}
-		if (!arch_read_can_lock(rw))
+		old = ACCESS_ONCE(rw->lock);
+		if ((int) old < 0)
 			continue;
-		old = rw->lock & 0x7fffffffU;
 		local_irq_disable();
 		if (_raw_compare_and_swap(&rw->lock, old, old + 1))
 			return;
@@ -156,9 +153,9 @@ int _raw_read_trylock_retry(arch_rwlock_t *rw)
 	int count = spin_retry;
 
 	while (count-- > 0) {
-		if (!arch_read_can_lock(rw))
+		old = ACCESS_ONCE(rw->lock);
+		if ((int) old < 0)
 			continue;
-		old = rw->lock & 0x7fffffffU;
 		if (_raw_compare_and_swap(&rw->lock, old, old + 1))
 			return 1;
 	}
@@ -168,6 +165,7 @@ EXPORT_SYMBOL(_raw_read_trylock_retry);
 
 void _raw_write_lock_wait(arch_rwlock_t *rw)
 {
+	unsigned int old;
 	int count = spin_retry;
 
 	while (1) {
@@ -175,7 +173,8 @@ void _raw_write_lock_wait(arch_rwlock_t *rw)
 			smp_yield();
 			count = spin_retry;
 		}
-		if (!arch_write_can_lock(rw))
+		old = ACCESS_ONCE(rw->lock);
+		if (old)
 			continue;
 		if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000))
 			return;
@@ -185,6 +184,7 @@ EXPORT_SYMBOL(_raw_write_lock_wait);
 
 void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags)
 {
+	unsigned int old;
 	int count = spin_retry;
 
 	local_irq_restore(flags);
@@ -193,7 +193,8 @@ void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags)
 			smp_yield();
 			count = spin_retry;
 		}
-		if (!arch_write_can_lock(rw))
+		old = ACCESS_ONCE(rw->lock);
+		if (old)
 			continue;
 		local_irq_disable();
 		if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000))
@@ -204,10 +205,12 @@ EXPORT_SYMBOL(_raw_write_lock_wait_flags);
 
 int _raw_write_trylock_retry(arch_rwlock_t *rw)
 {
+	unsigned int old;
 	int count = spin_retry;
 
 	while (count-- > 0) {
-		if (!arch_write_can_lock(rw))
+		old = ACCESS_ONCE(rw->lock);
+		if (old)
 			continue;
 		if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000))
 			return 1;
-- 
cgit v0.10.2


From 939c5ae4029e1679bb93f7d09afb8c831db985bd Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 16 May 2014 09:35:02 +0200
Subject: s390/rwlock: add missing local_irq_restore calls

The out of line _raw_read_lock_wait_flags/_raw_write_lock_wait_flags
functions for the arch_read_lock_flags/arch_write_lock_flags  calls
fail to re-enable the interrupts after another unsuccessful try to
get the lock with compare-and-swap. The following wait would be
done with interrupts disabled which is suboptimal.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 284d879a..1dd282c 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -143,6 +143,7 @@ void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags)
 		local_irq_disable();
 		if (_raw_compare_and_swap(&rw->lock, old, old + 1))
 			return;
+		local_irq_restore(flags);
 	}
 }
 EXPORT_SYMBOL(_raw_read_lock_wait_flags);
@@ -199,6 +200,7 @@ void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags)
 		local_irq_disable();
 		if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000))
 			return;
+		local_irq_restore(flags);
 	}
 }
 EXPORT_SYMBOL(_raw_write_lock_wait_flags);
-- 
cgit v0.10.2


From 470ada6b1a1d80a173586c036f84e2c3a486ebf9 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 16 May 2014 15:11:12 +0200
Subject: s390/spinlock: refactor arch_spin_lock_wait[_flags]

Reorder the spinlock wait code to make it more readable.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 1dd282c..5b0e445 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -31,23 +31,31 @@ void arch_spin_lock_wait(arch_spinlock_t *lp)
 	int count;
 
 	while (1) {
-		owner = lp->lock;
-		if (!owner || smp_vcpu_scheduled(~owner)) {
-			count = spin_retry;
-			do {
-				if (arch_spin_is_locked(lp))
-					continue;
-				if (_raw_compare_and_swap(&lp->lock, 0, cpu))
-					return;
-			} while (count-- > 0);
-			if (MACHINE_IS_LPAR)
-				continue;
+		owner = ACCESS_ONCE(lp->lock);
+		/* Try to get the lock if it is free. */
+		if (!owner) {
+			if (_raw_compare_and_swap(&lp->lock, 0, cpu))
+				return;
+			continue;
 		}
-		owner = lp->lock;
-		if (owner)
+		/* Check if the lock owner is running. */
+		if (!smp_vcpu_scheduled(~owner)) {
+			smp_yield_cpu(~owner);
+			continue;
+		}
+		/* Loop for a while on the lock value. */
+		count = spin_retry;
+		do {
+			owner = ACCESS_ONCE(lp->lock);
+		} while (owner && count-- > 0);
+		if (!owner)
+			continue;
+		/*
+		 * For multiple layers of hypervisors, e.g. z/VM + LPAR
+		 * yield the CPU if the lock is still unavailable.
+		 */
+		if (!MACHINE_IS_LPAR)
 			smp_yield_cpu(~owner);
-		if (_raw_compare_and_swap(&lp->lock, 0, cpu))
-			return;
 	}
 }
 EXPORT_SYMBOL(arch_spin_lock_wait);
@@ -60,27 +68,32 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
 
 	local_irq_restore(flags);
 	while (1) {
-		owner = lp->lock;
-		if (!owner || smp_vcpu_scheduled(~owner)) {
-			count = spin_retry;
-			do {
-				if (arch_spin_is_locked(lp))
-					continue;
-				local_irq_disable();
-				if (_raw_compare_and_swap(&lp->lock, 0, cpu))
-					return;
-				local_irq_restore(flags);
-			} while (count-- > 0);
-			if (MACHINE_IS_LPAR)
-				continue;
+		owner = ACCESS_ONCE(lp->lock);
+		/* Try to get the lock if it is free. */
+		if (!owner) {
+			local_irq_disable();
+			if (_raw_compare_and_swap(&lp->lock, 0, cpu))
+				return;
+			local_irq_restore(flags);
 		}
-		owner = lp->lock;
-		if (owner)
+		/* Check if the lock owner is running. */
+		if (!smp_vcpu_scheduled(~owner)) {
+			smp_yield_cpu(~owner);
+			continue;
+		}
+		/* Loop for a while on the lock value. */
+		count = spin_retry;
+		do {
+			owner = ACCESS_ONCE(lp->lock);
+		} while (owner && count-- > 0);
+		if (!owner)
+			continue;
+		/*
+		 * For multiple layers of hypervisors, e.g. z/VM + LPAR
+		 * yield the CPU if the lock is still unavailable.
+		 */
+		if (!MACHINE_IS_LPAR)
 			smp_yield_cpu(~owner);
-		local_irq_disable();
-		if (_raw_compare_and_swap(&lp->lock, 0, cpu))
-			return;
-		local_irq_restore(flags);
 	}
 }
 EXPORT_SYMBOL(arch_spin_lock_wait_flags);
-- 
cgit v0.10.2


From 94414ca08afabbb5d19511ef57ec397a07fc6d35 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Mon, 19 May 2014 06:31:15 +0200
Subject: s390/oprofile: make return of 0 explicit

Delete unnecessary local variable whose value is always 0 and that hides
the fact that the result is always 0.

A simplified version of the semantic patch that fixes this problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@r exists@
local idexpression ret;
expression e;
position p;
@@

-ret = 0;
... when != ret = e
return
- ret
+ 0
  ;
// </smpl>

[heiko.carstens: turn prepare_cpu_buffers into a void returning function]
Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index 276f2e2..e53c6f2 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -209,13 +209,11 @@ static void init_all_cpu_buffers(void)
 	}
 }
 
-static int prepare_cpu_buffers(void)
+static void prepare_cpu_buffers(void)
 {
-	int cpu;
-	int rc;
 	struct hws_cpu_buffer *cb;
+	int cpu;
 
-	rc = 0;
 	for_each_online_cpu(cpu) {
 		cb = &per_cpu(sampler_cpu_buffer, cpu);
 		atomic_set(&cb->ext_params, 0);
@@ -230,8 +228,6 @@ static int prepare_cpu_buffers(void)
 		cb->oom = 0;
 		cb->stop_mode = 0;
 	}
-
-	return rc;
 }
 
 /*
@@ -1107,9 +1103,7 @@ int hwsampler_start_all(unsigned long rate)
 	if (rc)
 		goto start_all_exit;
 
-	rc = prepare_cpu_buffers();
-	if (rc)
-		goto start_all_exit;
+	prepare_cpu_buffers();
 
 	for_each_online_cpu(cpu) {
 		rc = start_sampling(cpu);
@@ -1156,7 +1150,7 @@ int hwsampler_stop_all(void)
 	rc = 0;
 	if (hws_state == HWS_INIT) {
 		mutex_unlock(&hws_sem);
-		return rc;
+		return 0;
 	}
 	hws_state = HWS_STOPPING;
 	mutex_unlock(&hws_sem);
-- 
cgit v0.10.2


From b6f4296279ab3ada554d993d12844272fd86b36a Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 20 May 2014 17:21:35 +0200
Subject: s390/time: cast tv_nsec to u64 prior to shift in update_vsyscall

Analog to git commit 28b92e09e25bdc0ae864b22eacf195a74f861389
first cast tk->wall_to_monotonic.tv_nsec to u64 before doing
the shift with tk->shift to avoid loosing relevant bits on a
32-bit kernel.

Cc: stable@vger.kernel.org # 3.13+
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 386d37a..0931b11 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -226,7 +226,7 @@ void update_vsyscall(struct timekeeper *tk)
 	vdso_data->wtom_clock_sec =
 		tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
 	vdso_data->wtom_clock_nsec = tk->xtime_nsec +
-		+ (tk->wall_to_monotonic.tv_nsec << tk->shift);
+		+ ((u64) tk->wall_to_monotonic.tv_nsec << tk->shift);
 	nsecps = (u64) NSEC_PER_SEC << tk->shift;
 	while (vdso_data->wtom_clock_nsec >= nsecps) {
 		vdso_data->wtom_clock_nsec -= nsecps;
-- 
cgit v0.10.2


From 6adbc9236bc01225c056757d6e0eb14cd0409dff Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Thu, 22 May 2014 13:35:50 +0200
Subject: s390/cio: remove weird assignment during argument evaluation

Get rid of a useless assignment during argument evaluation.

No functional change.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/cio/ccwreq.c b/drivers/s390/cio/ccwreq.c
index 5156264..fc5fb51 100644
--- a/drivers/s390/cio/ccwreq.c
+++ b/drivers/s390/cio/ccwreq.c
@@ -46,7 +46,7 @@ static u16 ccwreq_next_path(struct ccw_device *cdev)
 		goto out;
 	}
 	req->retries	= req->maxretries;
-	req->mask	= lpm_adjust(req->mask >>= 1, req->lpm);
+	req->mask	= lpm_adjust(req->mask >> 1, req->lpm);
 out:
 	return req->mask;
 }
-- 
cgit v0.10.2


From cedbecd60a8effc66dc8bed4e5489ff9365c9b19 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 23 May 2014 13:18:27 +0200
Subject: s390/boot: fix boot of compressed kernel built with gcc 4.9

Add -fno-delete-null-pointer-checks to CFLAGS for the code in
arch/s390/boot. Without the option a compressed kernel built with
gcc 4.9 won't boot.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 866ecbe..f90d1fc 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -12,7 +12,7 @@ targets += misc.o piggy.o sizes.h head$(BITS).o
 
 KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
-KBUILD_CFLAGS += $(cflags-y)
+KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks
 KBUILD_CFLAGS += $(call cc-option,-mpacked-stack)
 KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
 
-- 
cgit v0.10.2


From 0c91f98e28d2d123037ff59366ab7e1bca3e1276 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 27 May 2014 09:40:26 +0200
Subject: s390: require mvcos facility for z10 and newer machines

With inlined uaccess functions we always need the mvcos facility.
Checking at each inline place if mvcos is available would make the
inlining of get_user/put_user pointless.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 429afcc..37579ad 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -437,11 +437,11 @@ ENTRY(startup_kdump)
 
 #if defined(CONFIG_64BIT)
 #if defined(CONFIG_MARCH_ZEC12)
-	.long 3, 0xc100efe3, 0xf46ce800, 0x00400000
+	.long 3, 0xc100efeb, 0xf46ce800, 0x00400000
 #elif defined(CONFIG_MARCH_Z196)
-	.long 2, 0xc100efe3, 0xf46c0000
+	.long 2, 0xc100efeb, 0xf46c0000
 #elif defined(CONFIG_MARCH_Z10)
-	.long 2, 0xc100efe3, 0xf0680000
+	.long 2, 0xc100efeb, 0xf0680000
 #elif defined(CONFIG_MARCH_Z9_109)
 	.long 1, 0xc100efc3
 #elif defined(CONFIG_MARCH_Z990)
-- 
cgit v0.10.2


From 6eb58d9bc113b9d3f97b710d9e5c1bc4f5044768 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 27 May 2014 09:52:01 +0200
Subject: s390/facilities: remove extract-cpu-time facility check

Remove the check for the extract-cpu-time facility within early startup
code. Both kernel and user space work if the facility is not installed.

The vdso code has a run time check if the ectg is available. Besides that
there is no known user.

Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 37579ad..7ba7d67 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -437,13 +437,13 @@ ENTRY(startup_kdump)
 
 #if defined(CONFIG_64BIT)
 #if defined(CONFIG_MARCH_ZEC12)
-	.long 3, 0xc100efeb, 0xf46ce800, 0x00400000
+	.long 3, 0xc100efea, 0xf46ce800, 0x00400000
 #elif defined(CONFIG_MARCH_Z196)
-	.long 2, 0xc100efeb, 0xf46c0000
+	.long 2, 0xc100efea, 0xf46c0000
 #elif defined(CONFIG_MARCH_Z10)
-	.long 2, 0xc100efeb, 0xf0680000
+	.long 2, 0xc100efea, 0xf0680000
 #elif defined(CONFIG_MARCH_Z9_109)
-	.long 1, 0xc100efc3
+	.long 1, 0xc100efc2
 #elif defined(CONFIG_MARCH_Z990)
 	.long 1, 0xc0002000
 #elif defined(CONFIG_MARCH_Z900)
-- 
cgit v0.10.2


From 993072ee67aa179c48c85eb19869804e68887d86 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Mon, 26 May 2014 21:55:08 +0200
Subject: s390/lowcore: reserve 96 bytes for IRB in lowcore

The IRB might be 96 bytes if the extended-I/O-measurement facility is
used. This feature is currently not used by Linux, but struct irb
already has the emw defined. So let's make the irb in lowcore match the
size of the internal data structure to be future proof.
We also have to add a pad, to correctly align the paste.

The bigger irb field also circumvents a bug in some QEMU versions that
always write the emw field on test subchannel and therefore destroy the
paste definitions of this CPU. Running under these QEMU version broke
some timing functions in the VDSO and all users of these functions,
e.g. some JREs.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Sebastian Ott <sebott@linux.vnet.ibm.com>
Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: stable@vger.kernel.org

diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 26e03ce..a406f24 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -144,9 +144,9 @@ struct _lowcore {
 	__u32	spinlock_lockval;		/* 0x02fc */
 
 	/* Interrupt response block */
-	__u8	irb[64];			/* 0x0300 */
+	__u8	irb[96];			/* 0x0300 */
 
-	__u8	pad_0x0340[0x0e00-0x0340];	/* 0x0340 */
+	__u8	pad_0x0360[0x0e00-0x0360];	/* 0x0360 */
 
 	/*
 	 * 0xe00 contains the address of the IPL Parameter Information
@@ -293,12 +293,13 @@ struct _lowcore {
 	__u8	pad_0x03a0[0x0400-0x03a4];	/* 0x03a4 */
 
 	/* Interrupt response block. */
-	__u8	irb[64];			/* 0x0400 */
+	__u8	irb[96];			/* 0x0400 */
+	__u8	pad_0x0460[0x0480-0x0460];	/* 0x0460 */
 
 	/* Per cpu primary space access list */
-	__u32	paste[16];			/* 0x0440 */
+	__u32	paste[16];			/* 0x0480 */
 
-	__u8	pad_0x0480[0x0e00-0x0480];	/* 0x0480 */
+	__u8	pad_0x04c0[0x0e00-0x04c0];	/* 0x04c0 */
 
 	/*
 	 * 0xe00 contains the address of the IPL Parameter Information
-- 
cgit v0.10.2


From 63aef00b55d37e9fad837a8b38a2c261f0d32041 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 27 May 2014 14:40:39 +0200
Subject: s390/lowcore: replace lowcore irb array with a per-cpu variable

Remove the 96-byte irb array from the lowcore and create a per-cpu
variable instead. That way we will pick up any change in the definition
of the struct irb automatically.

Acked-By: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index a406f24..2070cad 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -143,10 +143,7 @@ struct _lowcore {
 	__u32	ftrace_func;			/* 0x02f8 */
 	__u32	spinlock_lockval;		/* 0x02fc */
 
-	/* Interrupt response block */
-	__u8	irb[96];			/* 0x0300 */
-
-	__u8	pad_0x0360[0x0e00-0x0360];	/* 0x0360 */
+	__u8	pad_0x0300[0x0e00-0x0300];	/* 0x0300 */
 
 	/*
 	 * 0xe00 contains the address of the IPL Parameter Information
@@ -292,14 +289,10 @@ struct _lowcore {
 	__u32	spinlock_lockval;		/* 0x03a0 */
 	__u8	pad_0x03a0[0x0400-0x03a4];	/* 0x03a4 */
 
-	/* Interrupt response block. */
-	__u8	irb[96];			/* 0x0400 */
-	__u8	pad_0x0460[0x0480-0x0460];	/* 0x0460 */
-
 	/* Per cpu primary space access list */
-	__u32	paste[16];			/* 0x0480 */
+	__u32	paste[16];			/* 0x0400 */
 
-	__u8	pad_0x04c0[0x0e00-0x04c0];	/* 0x04c0 */
+	__u8	pad_0x04c0[0x0e00-0x0440];	/* 0x0440 */
 
 	/*
 	 * 0xe00 contains the address of the IPL Parameter Information
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 76c4e2f..0c070c4 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -144,7 +144,6 @@ int main(void)
 	DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock));
 	DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags));
 	DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func));
-	DEFINE(__LC_IRB, offsetof(struct _lowcore, irb));
 	DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib));
 	BLANK();
 	DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area));
diff --git a/drivers/s390/cio/ccwreq.c b/drivers/s390/cio/ccwreq.c
index fc5fb51..07676c2 100644
--- a/drivers/s390/cio/ccwreq.c
+++ b/drivers/s390/cio/ccwreq.c
@@ -252,7 +252,7 @@ static void ccwreq_log_status(struct ccw_device *cdev, enum io_status status)
  */
 void ccw_request_handler(struct ccw_device *cdev)
 {
-	struct irb *irb = (struct irb *)&S390_lowcore.irb;
+	struct irb *irb = &__get_cpu_var(cio_irb);
 	struct ccw_request *req = &cdev->private->req;
 	enum io_status status;
 	int rc = -EOPNOTSUPP;
diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c
index 1d3661a..3d22d2a 100644
--- a/drivers/s390/cio/chsc_sch.c
+++ b/drivers/s390/cio/chsc_sch.c
@@ -58,7 +58,7 @@ static void chsc_subchannel_irq(struct subchannel *sch)
 {
 	struct chsc_private *private = dev_get_drvdata(&sch->dev);
 	struct chsc_request *request = private->request;
-	struct irb *irb = (struct irb *)&S390_lowcore.irb;
+	struct irb *irb = &__get_cpu_var(cio_irb);
 
 	CHSC_LOG(4, "irb");
 	CHSC_LOG_HEX(4, irb, sizeof(*irb));
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 9e058c4..77f9c92d 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -46,6 +46,9 @@ debug_info_t *cio_debug_msg_id;
 debug_info_t *cio_debug_trace_id;
 debug_info_t *cio_debug_crw_id;
 
+DEFINE_PER_CPU_ALIGNED(struct irb, cio_irb);
+EXPORT_PER_CPU_SYMBOL(cio_irb);
+
 /*
  * Function: cio_debug_init
  * Initializes three debug logs for common I/O:
@@ -560,7 +563,7 @@ static irqreturn_t do_cio_interrupt(int irq, void *dummy)
 
 	__this_cpu_write(s390_idle.nohz_delay, 1);
 	tpi_info = (struct tpi_info *) &get_irq_regs()->int_code;
-	irb = (struct irb *) &S390_lowcore.irb;
+	irb = &__get_cpu_var(cio_irb);
 	sch = (struct subchannel *)(unsigned long) tpi_info->intparm;
 	if (!sch) {
 		/* Clear pending interrupt condition. */
@@ -609,7 +612,7 @@ void cio_tsch(struct subchannel *sch)
 	struct irb *irb;
 	int irq_context;
 
-	irb = (struct irb *)&S390_lowcore.irb;
+	irb = &__get_cpu_var(cio_irb);
 	/* Store interrupt response block to lowcore. */
 	if (tsch(sch->schid, irb) != 0)
 		/* Not status pending or not operational. */
@@ -746,7 +749,7 @@ __clear_io_subchannel_easy(struct subchannel_id schid)
 		struct tpi_info ti;
 
 		if (tpi(&ti)) {
-			tsch(ti.schid, (struct irb *)&S390_lowcore.irb);
+			tsch(ti.schid, &__get_cpu_var(cio_irb));
 			if (schid_equal(&ti.schid, &schid))
 				return 0;
 		}
diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index d42f674..a01376a 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -102,6 +102,8 @@ struct subchannel {
 	struct schib_config config;
 } __attribute__ ((aligned(8)));
 
+DECLARE_PER_CPU(struct irb, cio_irb);
+
 #define to_subchannel(n) container_of(n, struct subchannel, dev)
 
 extern int cio_validate_subchannel (struct subchannel *, struct subchannel_id);
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index c7638c5..0bc902b 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -739,7 +739,7 @@ ccw_device_irq(struct ccw_device *cdev, enum dev_event dev_event)
 	struct irb *irb;
 	int is_cmd;
 
-	irb = (struct irb *)&S390_lowcore.irb;
+	irb = &__get_cpu_var(cio_irb);
 	is_cmd = !scsw_is_tm(&irb->scsw);
 	/* Check for unsolicited interrupt. */
 	if (!scsw_is_solicited(&irb->scsw)) {
@@ -805,7 +805,7 @@ ccw_device_w4sense(struct ccw_device *cdev, enum dev_event dev_event)
 {
 	struct irb *irb;
 
-	irb = (struct irb *)&S390_lowcore.irb;
+	irb = &__get_cpu_var(cio_irb);
 	/* Check for unsolicited interrupt. */
 	if (scsw_stctl(&irb->scsw) ==
 	    (SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS)) {
diff --git a/drivers/s390/cio/eadm_sch.c b/drivers/s390/cio/eadm_sch.c
index 3a2ee4a..c4f7bf3 100644
--- a/drivers/s390/cio/eadm_sch.c
+++ b/drivers/s390/cio/eadm_sch.c
@@ -134,7 +134,7 @@ static void eadm_subchannel_irq(struct subchannel *sch)
 {
 	struct eadm_private *private = get_eadm_private(sch);
 	struct eadm_scsw *scsw = &sch->schib.scsw.eadm;
-	struct irb *irb = (struct irb *)&S390_lowcore.irb;
+	struct irb *irb = &__get_cpu_var(cio_irb);
 	int error = 0;
 
 	EADM_LOG(6, "irq");
-- 
cgit v0.10.2