summaryrefslogtreecommitdiff
path: root/arch/s390/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/mm')
-rw-r--r--arch/s390/mm/fault.c40
-rw-r--r--arch/s390/mm/init.c3
-rw-r--r--arch/s390/mm/mmap.c65
-rw-r--r--arch/s390/mm/pgtable.c176
-rw-r--r--arch/s390/mm/vmem.c28
5 files changed, 286 insertions, 26 deletions
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 2456b52..ed13d42 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -32,6 +32,7 @@
#include <asm/system.h>
#include <asm/pgtable.h>
#include <asm/s390_ext.h>
+#include <asm/mmu_context.h>
#ifndef CONFIG_64BIT
#define __FAIL_ADDR_MASK 0x7ffff000
@@ -444,6 +445,45 @@ void __kprobes do_dat_exception(struct pt_regs *regs, unsigned long error_code)
do_exception(regs, error_code & 0xff, 0);
}
+#ifdef CONFIG_64BIT
+void __kprobes do_asce_exception(struct pt_regs *regs, unsigned long error_code)
+{
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ unsigned long address;
+ int space;
+
+ mm = current->mm;
+ address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK;
+ space = check_space(current);
+
+ if (unlikely(space == 0 || in_atomic() || !mm))
+ goto no_context;
+
+ local_irq_enable();
+
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, address);
+ up_read(&mm->mmap_sem);
+
+ if (vma) {
+ update_mm(mm, current);
+ return;
+ }
+
+ /* User mode accesses just cause a SIGSEGV */
+ if (regs->psw.mask & PSW_MASK_PSTATE) {
+ current->thread.prot_addr = address;
+ current->thread.trap_no = error_code;
+ do_sigsegv(regs, error_code, SEGV_MAPERR, address);
+ return;
+ }
+
+no_context:
+ do_no_context(regs, error_code, address);
+}
+#endif
+
#ifdef CONFIG_PFAULT
/*
* 'pfault' pseudo page faults routines.
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 983ec6e..8053245 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -112,6 +112,7 @@ void __init paging_init(void)
init_mm.pgd = swapper_pg_dir;
S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK;
#ifdef CONFIG_64BIT
+ /* A three level page table (4TB) is enough for the kernel space. */
S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
pgd_type = _REGION3_ENTRY_EMPTY;
#else
@@ -184,7 +185,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
pmd = pmd_offset(pud, address);
pte = pte_offset_kernel(pmd, address);
if (!enable) {
- ptep_invalidate(address, pte);
+ ptep_invalidate(&init_mm, address, pte);
continue;
}
*pte = mk_pte_phys(address, __pgprot(_PAGE_TYPE_RW));
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 356257c..5932a82 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -27,6 +27,7 @@
#include <linux/personality.h>
#include <linux/mm.h>
#include <linux/module.h>
+#include <asm/pgalloc.h>
/*
* Top of mmap area (just below the process stack).
@@ -62,6 +63,8 @@ static inline int mmap_is_legacy(void)
current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY;
}
+#ifndef CONFIG_64BIT
+
/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
@@ -84,3 +87,65 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
}
EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
+#else
+
+static unsigned long
+s390_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct mm_struct *mm = current->mm;
+ int rc;
+
+ addr = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
+ if (addr & ~PAGE_MASK)
+ return addr;
+ if (unlikely(mm->context.asce_limit < addr + len)) {
+ rc = crst_table_upgrade(mm, addr + len);
+ if (rc)
+ return (unsigned long) rc;
+ }
+ return addr;
+}
+
+static unsigned long
+s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ const unsigned long len, const unsigned long pgoff,
+ const unsigned long flags)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long addr = addr0;
+ int rc;
+
+ addr = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
+ if (addr & ~PAGE_MASK)
+ return addr;
+ if (unlikely(mm->context.asce_limit < addr + len)) {
+ rc = crst_table_upgrade(mm, addr + len);
+ if (rc)
+ return (unsigned long) rc;
+ }
+ return addr;
+}
+/*
+ * This function, called very early during the creation of a new
+ * process VM image, sets up which VM layout function to use:
+ */
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+ /*
+ * Fall back to the standard layout if the personality
+ * bit is set, or if the expected stack growth is unlimited:
+ */
+ if (mmap_is_legacy()) {
+ mm->mmap_base = TASK_UNMAPPED_BASE;
+ mm->get_unmapped_area = s390_get_unmapped_area;
+ mm->unmap_area = arch_unmap_area;
+ } else {
+ mm->mmap_base = mmap_base();
+ mm->get_unmapped_area = s390_get_unmapped_area_topdown;
+ mm->unmap_area = arch_unmap_area_topdown;
+ }
+}
+EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
+
+#endif
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 019f518..fd07201 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -23,11 +23,18 @@
#include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
#ifndef CONFIG_64BIT
#define ALLOC_ORDER 1
+#define TABLES_PER_PAGE 4
+#define FRAG_MASK 15UL
+#define SECOND_HALVES 10UL
#else
#define ALLOC_ORDER 2
+#define TABLES_PER_PAGE 2
+#define FRAG_MASK 3UL
+#define SECOND_HALVES 2UL
#endif
unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
@@ -45,52 +52,179 @@ unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
}
page->index = page_to_phys(shadow);
}
+ spin_lock(&mm->page_table_lock);
+ list_add(&page->lru, &mm->context.crst_list);
+ spin_unlock(&mm->page_table_lock);
return (unsigned long *) page_to_phys(page);
}
-void crst_table_free(unsigned long *table)
+void crst_table_free(struct mm_struct *mm, unsigned long *table)
{
unsigned long *shadow = get_shadow_table(table);
+ struct page *page = virt_to_page(table);
+ spin_lock(&mm->page_table_lock);
+ list_del(&page->lru);
+ spin_unlock(&mm->page_table_lock);
if (shadow)
free_pages((unsigned long) shadow, ALLOC_ORDER);
free_pages((unsigned long) table, ALLOC_ORDER);
}
+#ifdef CONFIG_64BIT
+int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
+{
+ unsigned long *table, *pgd;
+ unsigned long entry;
+
+ BUG_ON(limit > (1UL << 53));
+repeat:
+ table = crst_table_alloc(mm, mm->context.noexec);
+ if (!table)
+ return -ENOMEM;
+ spin_lock(&mm->page_table_lock);
+ if (mm->context.asce_limit < limit) {
+ pgd = (unsigned long *) mm->pgd;
+ if (mm->context.asce_limit <= (1UL << 31)) {
+ entry = _REGION3_ENTRY_EMPTY;
+ mm->context.asce_limit = 1UL << 42;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_REGION3;
+ } else {
+ entry = _REGION2_ENTRY_EMPTY;
+ mm->context.asce_limit = 1UL << 53;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_REGION2;
+ }
+ crst_table_init(table, entry);
+ pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
+ mm->pgd = (pgd_t *) table;
+ table = NULL;
+ }
+ spin_unlock(&mm->page_table_lock);
+ if (table)
+ crst_table_free(mm, table);
+ if (mm->context.asce_limit < limit)
+ goto repeat;
+ update_mm(mm, current);
+ return 0;
+}
+
+void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
+{
+ pgd_t *pgd;
+
+ if (mm->context.asce_limit <= limit)
+ return;
+ __tlb_flush_mm(mm);
+ while (mm->context.asce_limit > limit) {
+ pgd = mm->pgd;
+ switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
+ case _REGION_ENTRY_TYPE_R2:
+ mm->context.asce_limit = 1UL << 42;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_REGION3;
+ break;
+ case _REGION_ENTRY_TYPE_R3:
+ mm->context.asce_limit = 1UL << 31;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_SEGMENT;
+ break;
+ default:
+ BUG();
+ }
+ mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
+ crst_table_free(mm, (unsigned long *) pgd);
+ }
+ update_mm(mm, current);
+}
+#endif
+
/*
* page table entry allocation/free routines.
*/
-unsigned long *page_table_alloc(int noexec)
+unsigned long *page_table_alloc(struct mm_struct *mm)
{
- struct page *page = alloc_page(GFP_KERNEL);
+ struct page *page;
unsigned long *table;
+ unsigned long bits;
- if (!page)
- return NULL;
- page->index = 0;
- if (noexec) {
- struct page *shadow = alloc_page(GFP_KERNEL);
- if (!shadow) {
- __free_page(page);
+ bits = mm->context.noexec ? 3UL : 1UL;
+ spin_lock(&mm->page_table_lock);
+ page = NULL;
+ if (!list_empty(&mm->context.pgtable_list)) {
+ page = list_first_entry(&mm->context.pgtable_list,
+ struct page, lru);
+ if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
+ page = NULL;
+ }
+ if (!page) {
+ spin_unlock(&mm->page_table_lock);
+ page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+ if (!page)
return NULL;
- }
- table = (unsigned long *) page_to_phys(shadow);
+ pgtable_page_ctor(page);
+ page->flags &= ~FRAG_MASK;
+ table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
- page->index = (addr_t) table;
+ spin_lock(&mm->page_table_lock);
+ list_add(&page->lru, &mm->context.pgtable_list);
}
- pgtable_page_ctor(page);
table = (unsigned long *) page_to_phys(page);
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+ while (page->flags & bits) {
+ table += 256;
+ bits <<= 1;
+ }
+ page->flags |= bits;
+ if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
+ list_move_tail(&page->lru, &mm->context.pgtable_list);
+ spin_unlock(&mm->page_table_lock);
return table;
}
-void page_table_free(unsigned long *table)
+void page_table_free(struct mm_struct *mm, unsigned long *table)
{
- unsigned long *shadow = get_shadow_pte(table);
+ struct page *page;
+ unsigned long bits;
- pgtable_page_dtor(virt_to_page(table));
- if (shadow)
- free_page((unsigned long) shadow);
- free_page((unsigned long) table);
+ bits = mm->context.noexec ? 3UL : 1UL;
+ bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ spin_lock(&mm->page_table_lock);
+ page->flags ^= bits;
+ if (page->flags & FRAG_MASK) {
+ /* Page now has some free pgtable fragments. */
+ list_move(&page->lru, &mm->context.pgtable_list);
+ page = NULL;
+ } else
+ /* All fragments of the 4K page have been freed. */
+ list_del(&page->lru);
+ spin_unlock(&mm->page_table_lock);
+ if (page) {
+ pgtable_page_dtor(page);
+ __free_page(page);
+ }
+}
+void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
+{
+ struct page *page;
+
+ spin_lock(&mm->page_table_lock);
+ /* Free shadow region and segment tables. */
+ list_for_each_entry(page, &mm->context.crst_list, lru)
+ if (page->index) {
+ free_pages((unsigned long) page->index, ALLOC_ORDER);
+ page->index = 0;
+ }
+ /* "Free" second halves of page tables. */
+ list_for_each_entry(page, &mm->context.pgtable_list, lru)
+ page->flags &= ~SECOND_HALVES;
+ spin_unlock(&mm->page_table_lock);
+ mm->context.noexec = 0;
+ update_mm(mm, tsk);
}
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 7c1287c..35d90a4 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -69,7 +69,19 @@ static void __ref *vmem_alloc_pages(unsigned int order)
return alloc_bootmem_pages((1 << order) * PAGE_SIZE);
}
-#define vmem_pud_alloc() ({ BUG(); ((pud_t *) NULL); })
+static inline pud_t *vmem_pud_alloc(void)
+{
+ pud_t *pud = NULL;
+
+#ifdef CONFIG_64BIT
+ pud = vmem_alloc_pages(2);
+ if (!pud)
+ return NULL;
+ pud_val(*pud) = _REGION3_ENTRY_EMPTY;
+ memcpy(pud + 1, pud, (PTRS_PER_PUD - 1)*sizeof(pud_t));
+#endif
+ return pud;
+}
static inline pmd_t *vmem_pmd_alloc(void)
{
@@ -84,13 +96,18 @@ static inline pmd_t *vmem_pmd_alloc(void)
return pmd;
}
-static inline pte_t *vmem_pte_alloc(void)
+static pte_t __init_refok *vmem_pte_alloc(void)
{
- pte_t *pte = vmem_alloc_pages(0);
+ pte_t *pte;
+ if (slab_is_available())
+ pte = (pte_t *) page_table_alloc(&init_mm);
+ else
+ pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t));
if (!pte)
return NULL;
- clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+ clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY,
+ PTRS_PER_PTE * sizeof(pte_t));
return pte;
}
@@ -360,6 +377,9 @@ void __init vmem_map_init(void)
{
int i;
+ INIT_LIST_HEAD(&init_mm.context.crst_list);
+ INIT_LIST_HEAD(&init_mm.context.pgtable_list);
+ init_mm.context.noexec = 0;
NODE_DATA(0)->node_mem_map = VMEM_MAP;
for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++)
vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size);