summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/s390/Kconfig.debug12
-rw-r--r--arch/s390/include/asm/chpid.h2
-rw-r--r--arch/s390/include/asm/css_chars.h3
-rw-r--r--arch/s390/include/asm/page.h14
-rw-r--r--arch/s390/include/asm/pgtable.h30
-rw-r--r--arch/s390/include/asm/setup.h14
-rw-r--r--arch/s390/include/uapi/asm/chsc.h10
-rw-r--r--arch/s390/kernel/early.c17
-rw-r--r--arch/s390/kernel/entry64.S2
-rw-r--r--arch/s390/kernel/head.S101
-rw-r--r--arch/s390/kernel/head31.S3
-rw-r--r--arch/s390/kernel/head64.S3
-rw-r--r--arch/s390/kernel/module.c11
-rw-r--r--arch/s390/kernel/setup.c51
-rw-r--r--arch/s390/mm/Makefile1
-rw-r--r--arch/s390/mm/dump_pagetables.c226
-rw-r--r--arch/s390/mm/pageattr.c40
-rw-r--r--arch/s390/mm/vmem.c45
18 files changed, 474 insertions, 111 deletions
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index d76cef3..fc32a2d 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -31,6 +31,18 @@ config DEBUG_STRICT_USER_COPY_CHECKS
If unsure, or if you run an older (pre 4.4) gcc, say N.
+config S390_PTDUMP
+ bool "Export kernel pagetable layout to userspace via debugfs"
+ depends on DEBUG_KERNEL
+ select DEBUG_FS
+ ---help---
+ Say Y here if you want to show the kernel pagetable layout in a
+ debugfs file. This information is only useful for kernel developers
+ who are working in architecture specific areas of the kernel.
+ It is probably not a good idea to enable this feature in a production
+ kernel.
+ If in doubt, say "N"
+
config DEBUG_SET_MODULE_RONX
def_bool y
depends on MODULES
diff --git a/arch/s390/include/asm/chpid.h b/arch/s390/include/asm/chpid.h
index 64c76dd..38c405e 100644
--- a/arch/s390/include/asm/chpid.h
+++ b/arch/s390/include/asm/chpid.h
@@ -1,5 +1,5 @@
/*
- * Copyright IBM Corp. 2007
+ * Copyright IBM Corp. 2007, 2012
* Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
*/
#ifndef _ASM_S390_CHPID_H
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index a06ebc2..7e1c917 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -3,8 +3,6 @@
#include <linux/types.h>
-#ifdef __KERNEL__
-
struct css_general_char {
u64 : 12;
u32 dynio : 1; /* bit 12 */
@@ -35,5 +33,4 @@ struct css_general_char {
extern struct css_general_char css_general_characteristics;
-#endif /* __KERNEL__ */
#endif
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 27ab3c7..6d53670 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -30,12 +30,20 @@
#include <asm/setup.h>
#ifndef __ASSEMBLY__
+static unsigned long pfmf(unsigned long function, unsigned long address)
+{
+ asm volatile(
+ " .insn rre,0xb9af0000,%[function],%[address]"
+ : [address] "+a" (address)
+ : [function] "d" (function)
+ : "memory");
+ return address;
+}
+
static inline void clear_page(void *page)
{
if (MACHINE_HAS_PFMF) {
- asm volatile(
- " .insn rre,0xb9af0000,%0,%1"
- : : "d" (0x10000), "a" (page) : "memory", "cc");
+ pfmf(0x10000, (unsigned long)page);
} else {
register unsigned long reg1 asm ("1") = 0;
register void *reg2 asm ("2") = page;
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 979fe3d..dd647c9 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -119,13 +119,12 @@ static inline int is_zero_pfn(unsigned long pfn)
#ifndef __ASSEMBLY__
/*
- * The vmalloc area will always be on the topmost area of the kernel
- * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc,
- * which should be enough for any sane case.
- * By putting vmalloc at the top, we maximise the gap between physical
- * memory and vmalloc to catch misplaced memory accesses. As a side
- * effect, this also makes sure that 64 bit module code cannot be used
- * as system call address.
+ * The vmalloc and module area will always be on the topmost area of the kernel
+ * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc and modules.
+ * On 64 bit kernels we have a 2GB area at the top of the vmalloc area where
+ * modules will reside. That makes sure that inter module branches always
+ * happen without trampolines and in addition the placement within a 2GB frame
+ * is branch prediction unit friendly.
*/
extern unsigned long VMALLOC_START;
extern unsigned long VMALLOC_END;
@@ -133,6 +132,14 @@ extern struct page *vmemmap;
#define VMEM_MAX_PHYS ((unsigned long) vmemmap)
+#ifdef CONFIG_64BIT
+extern unsigned long MODULES_VADDR;
+extern unsigned long MODULES_END;
+#define MODULES_VADDR MODULES_VADDR
+#define MODULES_END MODULES_END
+#define MODULES_LEN (1UL << 31)
+#endif
+
/*
* A 31 bit pagetable entry of S390 has following format:
* | PFRA | | OS |
@@ -507,6 +514,15 @@ static inline int pmd_none(pmd_t pmd)
return (pmd_val(pmd) & _SEGMENT_ENTRY_INV) != 0UL;
}
+static inline int pmd_large(pmd_t pmd)
+{
+#ifdef CONFIG_64BIT
+ return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE);
+#else
+ return 0;
+#endif
+}
+
static inline int pmd_bad(pmd_t pmd)
{
unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INV;
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 55ad134..f69f76b 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -71,8 +71,8 @@ extern unsigned int s390_user_mode;
#define MACHINE_FLAG_DIAG9C (1UL << 7)
#define MACHINE_FLAG_MVCOS (1UL << 8)
#define MACHINE_FLAG_KVM (1UL << 9)
-#define MACHINE_FLAG_HPAGE (1UL << 10)
-#define MACHINE_FLAG_PFMF (1UL << 11)
+#define MACHINE_FLAG_EDAT1 (1UL << 10)
+#define MACHINE_FLAG_EDAT2 (1UL << 11)
#define MACHINE_FLAG_LPAR (1UL << 12)
#define MACHINE_FLAG_SPP (1UL << 13)
#define MACHINE_FLAG_TOPOLOGY (1UL << 14)
@@ -84,6 +84,8 @@ extern unsigned int s390_user_mode;
#define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR)
#define MACHINE_HAS_DIAG9C (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C)
+#define MACHINE_HAS_PFMF MACHINE_HAS_EDAT1
+#define MACHINE_HAS_HPAGE MACHINE_HAS_EDAT1
#ifndef CONFIG_64BIT
#define MACHINE_HAS_IEEE (S390_lowcore.machine_flags & MACHINE_FLAG_IEEE)
@@ -92,8 +94,8 @@ extern unsigned int s390_user_mode;
#define MACHINE_HAS_DIAG44 (1)
#define MACHINE_HAS_MVPG (S390_lowcore.machine_flags & MACHINE_FLAG_MVPG)
#define MACHINE_HAS_MVCOS (0)
-#define MACHINE_HAS_HPAGE (0)
-#define MACHINE_HAS_PFMF (0)
+#define MACHINE_HAS_EDAT1 (0)
+#define MACHINE_HAS_EDAT2 (0)
#define MACHINE_HAS_SPP (0)
#define MACHINE_HAS_TOPOLOGY (0)
#define MACHINE_HAS_TE (0)
@@ -105,8 +107,8 @@ extern unsigned int s390_user_mode;
#define MACHINE_HAS_DIAG44 (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44)
#define MACHINE_HAS_MVPG (1)
#define MACHINE_HAS_MVCOS (S390_lowcore.machine_flags & MACHINE_FLAG_MVCOS)
-#define MACHINE_HAS_HPAGE (S390_lowcore.machine_flags & MACHINE_FLAG_HPAGE)
-#define MACHINE_HAS_PFMF (S390_lowcore.machine_flags & MACHINE_FLAG_PFMF)
+#define MACHINE_HAS_EDAT1 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1)
+#define MACHINE_HAS_EDAT2 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2)
#define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP)
#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
diff --git a/arch/s390/include/uapi/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h
index aea451f..1c6a7f8 100644
--- a/arch/s390/include/uapi/asm/chsc.h
+++ b/arch/s390/include/uapi/asm/chsc.h
@@ -1,7 +1,7 @@
/*
* ioctl interface for /dev/chsc
*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008, 2012
* Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
*/
@@ -9,9 +9,12 @@
#define _ASM_CHSC_H
#include <linux/types.h>
+#include <linux/ioctl.h>
#include <asm/chpid.h>
#include <asm/schid.h>
+#define CHSC_SIZE 0x1000
+
struct chsc_async_header {
__u16 length;
__u16 code;
@@ -23,15 +26,14 @@ struct chsc_async_header {
struct chsc_async_area {
struct chsc_async_header header;
- __u8 data[PAGE_SIZE - 16 /* size of chsc_async_header */];
+ __u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)];
} __attribute__ ((packed));
-
struct chsc_response_struct {
__u16 length;
__u16 code;
__u32 parms;
- __u8 data[PAGE_SIZE - 8];
+ __u8 data[CHSC_SIZE - 2 * sizeof(__u16) - sizeof(__u32)];
} __attribute__ ((packed));
struct chsc_chp_cd {
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 00d1144..1f0eee9 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -283,14 +283,6 @@ static noinline __init void setup_facility_list(void)
ARRAY_SIZE(S390_lowcore.stfle_fac_list));
}
-static noinline __init void setup_hpage(void)
-{
- if (!test_facility(2) || !test_facility(8))
- return;
- S390_lowcore.machine_flags |= MACHINE_FLAG_HPAGE;
- __ctl_set_bit(0, 23);
-}
-
static __init void detect_mvpg(void)
{
#ifndef CONFIG_64BIT
@@ -378,10 +370,14 @@ static __init void detect_diag44(void)
static __init void detect_machine_facilities(void)
{
#ifdef CONFIG_64BIT
+ if (test_facility(8)) {
+ S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1;
+ __ctl_set_bit(0, 23);
+ }
+ if (test_facility(78))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2;
if (test_facility(3))
S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
- if (test_facility(8))
- S390_lowcore.machine_flags |= MACHINE_FLAG_PFMF;
if (test_facility(27))
S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS;
if (test_facility(40))
@@ -484,7 +480,6 @@ void __init startup_init(void)
detect_diag9c();
detect_diag44();
detect_machine_facilities();
- setup_hpage();
setup_topology();
sclp_facilities_detect();
detect_memory_layout(memory_chunk);
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 7549985..8f211ad 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -295,7 +295,7 @@ sysc_sigpending:
jno sysc_return
lmg %r2,%r7,__PT_R2(%r11) # load svc arguments
lghi %r8,0 # svc 0 returns -ENOSYS
- lh %r1,__PT_INT_CODE+2(%r11) # load new svc number
+ llgh %r1,__PT_INT_CODE+2(%r11) # load new svc number
cghi %r1,NR_syscalls
jnl sysc_nr_ok # invalid svc number -> do svc 0
slag %r8,%r1,2
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 805b668..984726c 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -52,7 +52,7 @@ __HEAD
.long 0x02000370,0x60000050 # the channel program the PSW
.long 0x020003c0,0x60000050 # at location 0 is loaded.
.long 0x02000410,0x60000050 # Initial processing starts
- .long 0x02000460,0x60000050 # at 0xf0 = iplstart.
+ .long 0x02000460,0x60000050 # at 0x200 = iplstart.
.long 0x020004b0,0x60000050
.long 0x02000500,0x60000050
.long 0x02000550,0x60000050
@@ -62,11 +62,54 @@ __HEAD
.long 0x02000690,0x60000050
.long 0x020006e0,0x20000050
- .org 0xf0
+ .org 0x200
+#
+# subroutine to set architecture mode
+#
+.Lsetmode:
+#ifdef CONFIG_64BIT
+ mvi __LC_AR_MODE_ID,1 # set esame flag
+ slr %r0,%r0 # set cpuid to zero
+ lhi %r1,2 # mode 2 = esame (dump)
+ sigp %r1,%r0,0x12 # switch to esame mode
+ bras %r13,0f
+ .fill 16,4,0x0
+0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
+ sam31 # switch to 31 bit addressing mode
+#else
+ mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0)
+#endif
+ br %r14
+
+#
+# subroutine to wait for end I/O
+#
+.Lirqwait:
+#ifdef CONFIG_64BIT
+ mvc 0x1f0(16),.Lnewpsw # set up IO interrupt psw
+ lpsw .Lwaitpsw
+.Lioint:
+ br %r14
+ .align 8
+.Lnewpsw:
+ .quad 0x0000000080000000,.Lioint
+#else
+ mvc 0x78(8),.Lnewpsw # set up IO interrupt psw
+ lpsw .Lwaitpsw
+.Lioint:
+ br %r14
+ .align 8
+.Lnewpsw:
+ .long 0x00080000,0x80000000+.Lioint
+#endif
+.Lwaitpsw:
+ .long 0x020a0000,0x80000000+.Lioint
+
#
# subroutine for loading cards from the reader
#
.Lloader:
+ la %r4,0(%r14)
la %r3,.Lorb # r2 = address of orb into r2
la %r5,.Lirb # r4 = address of irb
la %r6,.Lccws
@@ -83,9 +126,7 @@ __HEAD
ssch 0(%r3) # load chunk of 1600 bytes
bnz .Llderr
.Lwait4irq:
- mvc 0x78(8),.Lnewpsw # set up IO interrupt psw
- lpsw .Lwaitpsw
-.Lioint:
+ bas %r14,.Lirqwait
c %r1,0xb8 # compare subchannel number
bne .Lwait4irq
tsch 0(%r5)
@@ -104,7 +145,7 @@ __HEAD
sr %r0,%r3 # #ccws*80-residual=#bytes read
ar %r2,%r0
- br %r14 # r2 contains the total size
+ br %r4 # r2 contains the total size
.Lcont:
ahi %r2,0x640 # add 0x640 to total size
@@ -128,10 +169,6 @@ __HEAD
.Lloadp:.long 0,0
.align 8
.Lcrash:.long 0x000a0000,0x00000000
-.Lnewpsw:
- .long 0x00080000,0x80000000+.Lioint
-.Lwaitpsw:
- .long 0x020a0000,0x80000000+.Lioint
.align 8
.Lccws: .rept 19
@@ -140,6 +177,7 @@ __HEAD
.long 0x02200050,0x00000000
iplstart:
+ bas %r14,.Lsetmode # Immediately switch to 64 bit mode
lh %r1,0xb8 # test if subchannel number
bct %r1,.Lnoload # is valid
l %r1,0xb8 # load ipl subchannel number
@@ -209,8 +247,8 @@ iplstart:
#
# reset files in VM reader
#
- stidp __LC_SAVE_AREA_SYNC # store cpuid
- tm __LC_SAVE_AREA_SYNC,0xff# running VM ?
+ stidp .Lcpuid # store cpuid
+ tm .Lcpuid,0xff # running VM ?
bno .Lnoreset
la %r2,.Lreset
lhi %r3,26
@@ -222,23 +260,14 @@ iplstart:
tm 31(%r5),0xff # bits is set in the schib
bz .Lnoreset
.Lwaitforirq:
- mvc 0x78(8),.Lrdrnewpsw # set up IO interrupt psw
-.Lwaitrdrirq:
- lpsw .Lrdrwaitpsw
-.Lrdrint:
+ bas %r14,.Lirqwait # wait for IO interrupt
c %r1,0xb8 # compare subchannel number
- bne .Lwaitrdrirq
+ bne .Lwaitforirq
la %r5,.Lirb
tsch 0(%r5)
.Lnoreset:
b .Lnoload
- .align 8
-.Lrdrnewpsw:
- .long 0x00080000,0x80000000+.Lrdrint
-.Lrdrwaitpsw:
- .long 0x020a0000,0x80000000+.Lrdrint
-
#
# everything loaded, go for it
#
@@ -254,6 +283,8 @@ iplstart:
.byte 0xc8,0xd6,0xd3,0xc4 # "change rdr all keep nohold"
.L_eof: .long 0xc5d6c600 /* C'EOF' */
.L_hdr: .long 0xc8c4d900 /* C'HDR' */
+ .align 8
+.Lcpuid:.fill 8,1,0
#
# SALIPL loader support. Based on a patch by Rob van der Heij.
@@ -263,6 +294,7 @@ iplstart:
.org 0x800
ENTRY(start)
stm %r0,%r15,0x07b0 # store registers
+ bas %r14,.Lsetmode # Immediately switch to 64 bit mode
basr %r12,%r0
.base:
l %r11,.parm
@@ -343,6 +375,18 @@ ENTRY(startup)
ENTRY(startup_kdump)
j .Lep_startup_kdump
.Lep_startup_normal:
+#ifdef CONFIG_64BIT
+ mvi __LC_AR_MODE_ID,1 # set esame flag
+ slr %r0,%r0 # set cpuid to zero
+ lhi %r1,2 # mode 2 = esame (dump)
+ sigp %r1,%r0,0x12 # switch to esame mode
+ bras %r13,0f
+ .fill 16,4,0x0
+0: lmh %r0,%r15,0(%r13) # clear high-order half of gprs
+ sam31 # switch to 31 bit addressing mode
+#else
+ mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0)
+#endif
basr %r13,0 # get base
.LPG0:
xc 0x200(256),0x200 # partially clear lowcore
@@ -410,22 +454,17 @@ ENTRY(startup_kdump)
#endif
#ifdef CONFIG_64BIT
- mvi __LC_AR_MODE_ID,1 # set esame flag
- slr %r0,%r0 # set cpuid to zero
- lhi %r1,2 # mode 2 = esame (dump)
- sigp %r1,%r0,0x12 # switch to esame mode
+ /* Continue with 64bit startup code in head64.S */
sam64 # switch to 64 bit mode
- larl %r13,4f
- lmh %r0,%r15,0(%r13) # clear high-order half
jg startup_continue
-4: .fill 16,4,0x0
#else
- mvi __LC_AR_MODE_ID,0 # set ESA flag (mode 0)
+ /* Continue with 31bit startup code in head31.S */
l %r13,4f-.LPG0(%r13)
b 0(%r13)
.align 8
4: .long startup_continue
#endif
+
.align 8
5: .long 0x7fffffff,0xffffffff
diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S
index a1372ae..9a99856 100644
--- a/arch/s390/kernel/head31.S
+++ b/arch/s390/kernel/head31.S
@@ -78,10 +78,7 @@ ENTRY(startup_continue)
ENTRY(_ehead)
-#ifdef CONFIG_SHARED_KERNEL
.org 0x100000 - 0x11000 # head.o ends at 0x11000
-#endif
-
#
# startup-code, running in absolute addressing mode
#
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index c108af2..b9e25ae 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -76,10 +76,7 @@ ENTRY(startup_continue)
ENTRY(_ehead)
-#ifdef CONFIG_SHARED_KERNEL
.org 0x100000 - 0x11000 # head.o ends at 0x11000
-#endif
-
#
# startup-code, running in absolute addressing mode
#
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 46412b1..4610dea 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -44,6 +44,17 @@
#define PLT_ENTRY_SIZE 20
#endif /* CONFIG_64BIT */
+#ifdef CONFIG_64BIT
+void *module_alloc(unsigned long size)
+{
+ if (PAGE_ALIGN(size) > MODULES_LEN)
+ return NULL;
+ return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+ GFP_KERNEL, PAGE_KERNEL, -1,
+ __builtin_return_address(0));
+}
+#endif
+
/* Free memory returned from module_alloc */
void module_free(struct module *mod, void *module_region)
{
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index afa9fdb..b1f2be9 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -105,6 +105,11 @@ EXPORT_SYMBOL(VMALLOC_END);
struct page *vmemmap;
EXPORT_SYMBOL(vmemmap);
+#ifdef CONFIG_64BIT
+unsigned long MODULES_VADDR;
+unsigned long MODULES_END;
+#endif
+
/* An array with a pointer to the lowcore of every CPU. */
struct _lowcore *lowcore_ptr[NR_CPUS];
EXPORT_SYMBOL(lowcore_ptr);
@@ -544,19 +549,23 @@ static void __init setup_memory_end(void)
/* Choose kernel address space layout: 2, 3, or 4 levels. */
#ifdef CONFIG_64BIT
- vmalloc_size = VMALLOC_END ?: 128UL << 30;
+ vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN;
tmp = (memory_end ?: real_memory_size) / PAGE_SIZE;
tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size;
if (tmp <= (1UL << 42))
vmax = 1UL << 42; /* 3-level kernel page table */
else
vmax = 1UL << 53; /* 4-level kernel page table */
+ /* module area is at the end of the kernel address space. */
+ MODULES_END = vmax;
+ MODULES_VADDR = MODULES_END - MODULES_LEN;
+ VMALLOC_END = MODULES_VADDR;
#else
vmalloc_size = VMALLOC_END ?: 96UL << 20;
vmax = 1UL << 31; /* 2-level kernel page table */
-#endif
/* vmalloc area is at the end of the kernel address space. */
VMALLOC_END = vmax;
+#endif
VMALLOC_START = vmax - vmalloc_size;
/* Split remaining virtual space between 1:1 mapping & vmemmap array */
@@ -768,6 +777,40 @@ static void __init reserve_crashkernel(void)
#endif
}
+static void __init init_storage_keys(unsigned long start, unsigned long end)
+{
+ unsigned long boundary, function, size;
+
+ while (start < end) {
+ if (MACHINE_HAS_EDAT2) {
+ /* set storage keys for a 2GB frame */
+ function = 0x22000 | PAGE_DEFAULT_KEY;
+ size = 1UL << 31;
+ boundary = (start + size) & ~(size - 1);
+ if (boundary <= end) {
+ do {
+ start = pfmf(function, start);
+ } while (start < boundary);
+ continue;
+ }
+ }
+ if (MACHINE_HAS_EDAT1) {
+ /* set storage keys for a 1MB frame */
+ function = 0x21000 | PAGE_DEFAULT_KEY;
+ size = 1UL << 20;
+ boundary = (start + size) & ~(size - 1);
+ if (boundary <= end) {
+ do {
+ start = pfmf(function, start);
+ } while (start < boundary);
+ continue;
+ }
+ }
+ page_set_storage_key(start, PAGE_DEFAULT_KEY, 0);
+ start += PAGE_SIZE;
+ }
+}
+
static void __init setup_memory(void)
{
unsigned long bootmap_size;
@@ -846,9 +889,7 @@ static void __init setup_memory(void)
memblock_add_node(PFN_PHYS(start_chunk),
PFN_PHYS(end_chunk - start_chunk), 0);
pfn = max(start_chunk, start_pfn);
- for (; pfn < end_chunk; pfn++)
- page_set_storage_key(PFN_PHYS(pfn),
- PAGE_DEFAULT_KEY, 0);
+ init_storage_keys(PFN_PHYS(pfn), PFN_PHYS(end_chunk));
}
psw_set_key(PAGE_DEFAULT_KEY);
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 0f5536b..1bea6d1 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -7,3 +7,4 @@ obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_DEBUG_SET_MODULE_RONX) += pageattr.o
+obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
new file mode 100644
index 0000000..cbc6668
--- /dev/null
+++ b/arch/s390/mm/dump_pagetables.c
@@ -0,0 +1,226 @@
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <asm/sections.h>
+#include <asm/pgtable.h>
+
+static unsigned long max_addr;
+
+struct addr_marker {
+ unsigned long start_address;
+ const char *name;
+};
+
+enum address_markers_idx {
+ IDENTITY_NR = 0,
+ KERNEL_START_NR,
+ KERNEL_END_NR,
+ VMEMMAP_NR,
+ VMALLOC_NR,
+#ifdef CONFIG_64BIT
+ MODULES_NR,
+#endif
+};
+
+static struct addr_marker address_markers[] = {
+ [IDENTITY_NR] = {0, "Identity Mapping"},
+ [KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"},
+ [KERNEL_END_NR] = {(unsigned long)&_end, "Kernel Image End"},
+ [VMEMMAP_NR] = {0, "vmemmap Area"},
+ [VMALLOC_NR] = {0, "vmalloc Area"},
+#ifdef CONFIG_64BIT
+ [MODULES_NR] = {0, "Modules Area"},
+#endif
+ { -1, NULL }
+};
+
+struct pg_state {
+ int level;
+ unsigned int current_prot;
+ unsigned long start_address;
+ unsigned long current_address;
+ const struct addr_marker *marker;
+};
+
+static void print_prot(struct seq_file *m, unsigned int pr, int level)
+{
+ static const char * const level_name[] =
+ { "ASCE", "PGD", "PUD", "PMD", "PTE" };
+
+ seq_printf(m, "%s ", level_name[level]);
+ if (pr & _PAGE_INVALID)
+ seq_printf(m, "I\n");
+ else
+ seq_printf(m, "%s\n", pr & _PAGE_RO ? "RO" : "RW");
+}
+
+static void note_page(struct seq_file *m, struct pg_state *st,
+ unsigned int new_prot, int level)
+{
+ static const char units[] = "KMGTPE";
+ int width = sizeof(unsigned long) * 2;
+ const char *unit = units;
+ unsigned int prot, cur;
+ unsigned long delta;
+
+ /*
+ * If we have a "break" in the series, we need to flush the state
+ * that we have now. "break" is either changing perms, levels or
+ * address space marker.
+ */
+ prot = new_prot;
+ cur = st->current_prot;
+
+ if (!st->level) {
+ /* First entry */
+ st->current_prot = new_prot;
+ st->level = level;
+ st->marker = address_markers;
+ seq_printf(m, "---[ %s ]---\n", st->marker->name);
+ } else if (prot != cur || level != st->level ||
+ st->current_address >= st->marker[1].start_address) {
+ /* Print the actual finished series */
+ seq_printf(m, "0x%0*lx-0x%0*lx",
+ width, st->start_address,
+ width, st->current_address);
+ delta = (st->current_address - st->start_address) >> 10;
+ while (!(delta & 0x3ff) && unit[1]) {
+ delta >>= 10;
+ unit++;
+ }
+ seq_printf(m, "%9lu%c ", delta, *unit);
+ print_prot(m, st->current_prot, st->level);
+ if (st->current_address >= st->marker[1].start_address) {
+ st->marker++;
+ seq_printf(m, "---[ %s ]---\n", st->marker->name);
+ }
+ st->start_address = st->current_address;
+ st->current_prot = new_prot;
+ st->level = level;
+ }
+}
+
+/*
+ * The actual page table walker functions. In order to keep the implementation
+ * of print_prot() short, we only check and pass _PAGE_INVALID and _PAGE_RO
+ * flags to note_page() if a region, segment or page table entry is invalid or
+ * read-only.
+ * After all it's just a hint that the current level being walked contains an
+ * invalid or read-only entry.
+ */
+static void walk_pte_level(struct seq_file *m, struct pg_state *st,
+ pmd_t *pmd, unsigned long addr)
+{
+ unsigned int prot;
+ pte_t *pte;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
+ st->current_address = addr;
+ pte = pte_offset_kernel(pmd, addr);
+ prot = pte_val(*pte) & (_PAGE_RO | _PAGE_INVALID);
+ note_page(m, st, prot, 4);
+ addr += PAGE_SIZE;
+ }
+}
+
+static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
+ pud_t *pud, unsigned long addr)
+{
+ unsigned int prot;
+ pmd_t *pmd;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) {
+ st->current_address = addr;
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_none(*pmd)) {
+ if (pmd_large(*pmd)) {
+ prot = pmd_val(*pmd) & _SEGMENT_ENTRY_RO;
+ note_page(m, st, prot, 3);
+ } else
+ walk_pte_level(m, st, pmd, addr);
+ } else
+ note_page(m, st, _PAGE_INVALID, 3);
+ addr += PMD_SIZE;
+ }
+}
+
+static void walk_pud_level(struct seq_file *m, struct pg_state *st,
+ pgd_t *pgd, unsigned long addr)
+{
+ pud_t *pud;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) {
+ st->current_address = addr;
+ pud = pud_offset(pgd, addr);
+ if (!pud_none(*pud))
+ walk_pmd_level(m, st, pud, addr);
+ else
+ note_page(m, st, _PAGE_INVALID, 2);
+ addr += PUD_SIZE;
+ }
+}
+
+static void walk_pgd_level(struct seq_file *m)
+{
+ unsigned long addr = 0;
+ struct pg_state st;
+ pgd_t *pgd;
+ int i;
+
+ memset(&st, 0, sizeof(st));
+ for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) {
+ st.current_address = addr;
+ pgd = pgd_offset_k(addr);
+ if (!pgd_none(*pgd))
+ walk_pud_level(m, &st, pgd, addr);
+ else
+ note_page(m, &st, _PAGE_INVALID, 1);
+ addr += PGDIR_SIZE;
+ }
+ /* Flush out the last page */
+ st.current_address = max_addr;
+ note_page(m, &st, 0, 0);
+}
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+ walk_pgd_level(m);
+ return 0;
+}
+
+static int ptdump_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, ptdump_show, NULL);
+}
+
+static const struct file_operations ptdump_fops = {
+ .open = ptdump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int pt_dump_init(void)
+{
+ /*
+ * Figure out the maximum virtual address being accessible with the
+ * kernel ASCE. We need this to keep the page table walker functions
+ * from accessing non-existent entries.
+ */
+#ifdef CONFIG_32BIT
+ max_addr = 1UL << 31;
+#else
+ max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
+ max_addr = 1UL << (max_addr * 11 + 31);
+ address_markers[MODULES_NR].start_address = MODULES_VADDR;
+#endif
+ address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
+ address_markers[VMALLOC_NR].start_address = VMALLOC_START;
+ debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
+ return 0;
+}
+device_initcall(pt_dump_init);
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index b36537a..00be01c 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -8,25 +8,38 @@
#include <asm/cacheflush.h>
#include <asm/pgtable.h>
+static pte_t *walk_page_table(unsigned long addr)
+{
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ pgdp = pgd_offset_k(addr);
+ if (pgd_none(*pgdp))
+ return NULL;
+ pudp = pud_offset(pgdp, addr);
+ if (pud_none(*pudp))
+ return NULL;
+ pmdp = pmd_offset(pudp, addr);
+ if (pmd_none(*pmdp) || pmd_large(*pmdp))
+ return NULL;
+ ptep = pte_offset_kernel(pmdp, addr);
+ if (pte_none(*ptep))
+ return NULL;
+ return ptep;
+}
+
static void change_page_attr(unsigned long addr, int numpages,
pte_t (*set) (pte_t))
{
pte_t *ptep, pte;
- pmd_t *pmdp;
- pud_t *pudp;
- pgd_t *pgdp;
int i;
for (i = 0; i < numpages; i++) {
- pgdp = pgd_offset(&init_mm, addr);
- pudp = pud_offset(pgdp, addr);
- pmdp = pmd_offset(pudp, addr);
- if (pmd_huge(*pmdp)) {
- WARN_ON_ONCE(1);
- continue;
- }
- ptep = pte_offset_kernel(pmdp, addr);
-
+ ptep = walk_page_table(addr);
+ if (WARN_ON_ONCE(!ptep))
+ break;
pte = *ptep;
pte = set(pte);
__ptep_ipte(addr, ptep);
@@ -40,21 +53,18 @@ int set_memory_ro(unsigned long addr, int numpages)
change_page_attr(addr, numpages, pte_wrprotect);
return 0;
}
-EXPORT_SYMBOL_GPL(set_memory_ro);
int set_memory_rw(unsigned long addr, int numpages)
{
change_page_attr(addr, numpages, pte_mkwrite);
return 0;
}
-EXPORT_SYMBOL_GPL(set_memory_rw);
/* not possible */
int set_memory_nx(unsigned long addr, int numpages)
{
return 0;
}
-EXPORT_SYMBOL_GPL(set_memory_nx);
int set_memory_x(unsigned long addr, int numpages)
{
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index c22abf9..387c7c6 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -79,7 +79,8 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address)
*/
static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
{
- unsigned long address;
+ unsigned long end = start + size;
+ unsigned long address = start;
pgd_t *pg_dir;
pud_t *pu_dir;
pmd_t *pm_dir;
@@ -87,7 +88,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
pte_t pte;
int ret = -ENOMEM;
- for (address = start; address < start + size; address += PAGE_SIZE) {
+ while (address < end) {
pg_dir = pgd_offset_k(address);
if (pgd_none(*pg_dir)) {
pu_dir = vmem_pud_alloc();
@@ -108,12 +109,11 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
pm_dir = pmd_offset(pu_dir, address);
#if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC)
- if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) &&
- (address + HPAGE_SIZE <= start + size) &&
- (address >= HPAGE_SIZE)) {
+ if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
+ !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
pte_val(pte) |= _SEGMENT_ENTRY_LARGE;
pmd_val(*pm_dir) = pte_val(pte);
- address += HPAGE_SIZE - PAGE_SIZE;
+ address += PMD_SIZE;
continue;
}
#endif
@@ -126,10 +126,11 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
pt_dir = pte_offset_kernel(pm_dir, address);
*pt_dir = pte;
+ address += PAGE_SIZE;
}
ret = 0;
out:
- flush_tlb_kernel_range(start, start + size);
+ flush_tlb_kernel_range(start, end);
return ret;
}
@@ -139,7 +140,8 @@ out:
*/
static void vmem_remove_range(unsigned long start, unsigned long size)
{
- unsigned long address;
+ unsigned long end = start + size;
+ unsigned long address = start;
pgd_t *pg_dir;
pud_t *pu_dir;
pmd_t *pm_dir;
@@ -147,25 +149,32 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
pte_t pte;
pte_val(pte) = _PAGE_TYPE_EMPTY;
- for (address = start; address < start + size; address += PAGE_SIZE) {
+ while (address < end) {
pg_dir = pgd_offset_k(address);
+ if (pgd_none(*pg_dir)) {
+ address += PGDIR_SIZE;
+ continue;
+ }
pu_dir = pud_offset(pg_dir, address);
- if (pud_none(*pu_dir))
+ if (pud_none(*pu_dir)) {
+ address += PUD_SIZE;
continue;
+ }
pm_dir = pmd_offset(pu_dir, address);
- if (pmd_none(*pm_dir))
+ if (pmd_none(*pm_dir)) {
+ address += PMD_SIZE;
continue;
-
- if (pmd_huge(*pm_dir)) {
+ }
+ if (pmd_large(*pm_dir)) {
pmd_clear(pm_dir);
- address += HPAGE_SIZE - PAGE_SIZE;
+ address += PMD_SIZE;
continue;
}
-
pt_dir = pte_offset_kernel(pm_dir, address);
*pt_dir = pte;
+ address += PAGE_SIZE;
}
- flush_tlb_kernel_range(start, start + size);
+ flush_tlb_kernel_range(start, end);
}
/*
@@ -330,8 +339,8 @@ void __init vmem_map_init(void)
unsigned long start, end;
int i;
- ro_start = ((unsigned long)&_stext) & PAGE_MASK;
- ro_end = PFN_ALIGN((unsigned long)&_eshared);
+ ro_start = PFN_ALIGN((unsigned long)&_stext);
+ ro_end = (unsigned long)&_eshared & PAGE_MASK;
for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
if (memory_chunk[i].type == CHUNK_CRASHK ||
memory_chunk[i].type == CHUNK_OLDMEM)