summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/Makefile1
-rw-r--r--arch/powerpc/mm/fault.c8
-rw-r--r--arch/powerpc/mm/fsl_booke_mmu.c6
-rw-r--r--arch/powerpc/mm/hash_low_64.S16
-rw-r--r--arch/powerpc/mm/hash_utils_64.c107
-rw-r--r--arch/powerpc/mm/hugetlbpage.c119
-rw-r--r--arch/powerpc/mm/lmb.c13
-rw-r--r--arch/powerpc/mm/mem.c21
-rw-r--r--arch/powerpc/mm/slb.c3
-rw-r--r--arch/powerpc/mm/slb_low.S5
-rw-r--r--arch/powerpc/mm/subpage-prot.c213
11 files changed, 445 insertions, 67 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 20629ae..41649a5 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -22,3 +22,4 @@ obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o
obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
obj-$(CONFIG_PPC_MM_SLICES) += slice.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 8135da0..7b25107 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -167,10 +167,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
if (notify_page_fault(regs))
return 0;
- if (trap == 0x300) {
- if (debugger_fault_handler(regs))
- return 0;
- }
+ if (unlikely(debugger_fault_handler(regs)))
+ return 0;
/* On a kernel SLB miss we can only check for a valid exception entry */
if (!user_mode(regs) && (address >= TASK_SIZE))
@@ -189,7 +187,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
return SIGSEGV;
/* in_atomic() in user mode is really bad,
as is current->mm == NULL. */
- printk(KERN_EMERG "Page fault in user mode with"
+ printk(KERN_EMERG "Page fault in user mode with "
"in_atomic() = %d mm = %p\n", in_atomic(), mm);
printk(KERN_EMERG "NIP = %lx MSR = %lx\n",
regs->nip, regs->msr);
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index 17139da..c93a966 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -165,15 +165,15 @@ void invalidate_tlbcam_entry(int index)
void __init cam_mapin_ram(unsigned long cam0, unsigned long cam1,
unsigned long cam2)
{
- settlbcam(0, KERNELBASE, PPC_MEMSTART, cam0, _PAGE_KERNEL, 0);
+ settlbcam(0, PAGE_OFFSET, PPC_MEMSTART, cam0, _PAGE_KERNEL, 0);
tlbcam_index++;
if (cam1) {
tlbcam_index++;
- settlbcam(1, KERNELBASE+cam0, PPC_MEMSTART+cam0, cam1, _PAGE_KERNEL, 0);
+ settlbcam(1, PAGE_OFFSET+cam0, PPC_MEMSTART+cam0, cam1, _PAGE_KERNEL, 0);
}
if (cam2) {
tlbcam_index++;
- settlbcam(2, KERNELBASE+cam0+cam1, PPC_MEMSTART+cam0+cam1, cam2, _PAGE_KERNEL, 0);
+ settlbcam(2, PAGE_OFFSET+cam0+cam1, PPC_MEMSTART+cam0+cam1, cam2, _PAGE_KERNEL, 0);
}
}
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index e935edd..21d2484 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -331,7 +331,8 @@ htab_pte_insert_failure:
*****************************************************************************/
/* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
- * pte_t *ptep, unsigned long trap, int local, int ssize)
+ * pte_t *ptep, unsigned long trap, int local, int ssize,
+ * int subpg_prot)
*/
/*
@@ -429,12 +430,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
xor r28,r28,r0 /* hash */
/* Convert linux PTE bits into HW equivalents */
-4: andi. r3,r30,0x1fe /* Get basic set of flags */
- xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
+4:
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+ andc r10,r30,r10
+ andi. r3,r10,0x1fe /* Get basic set of flags */
+ rlwinm r0,r10,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
+#else
+ andi. r3,r30,0x1fe /* Get basic set of flags */
rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
+#endif
+ xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
- andc r0,r30,r0 /* r0 = pte & ~r0 */
+ andc r0,r3,r0 /* r0 = pte & ~r0 */
rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
ori r3,r3,HPTE_R_C /* Always add "C" bit for perf. */
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index f09730b..32f4161 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -96,6 +96,7 @@ int mmu_vmalloc_psize = MMU_PAGE_4K;
int mmu_io_psize = MMU_PAGE_4K;
int mmu_kernel_ssize = MMU_SEGSIZE_256M;
int mmu_highuser_ssize = MMU_SEGSIZE_256M;
+u16 mmu_slb_size = 64;
#ifdef CONFIG_HUGETLB_PAGE
int mmu_huge_psize = MMU_PAGE_16M;
unsigned int HPAGE_SHIFT;
@@ -368,18 +369,11 @@ static void __init htab_init_page_sizes(void)
* on what is available
*/
if (mmu_psize_defs[MMU_PAGE_16M].shift)
- mmu_huge_psize = MMU_PAGE_16M;
+ set_huge_psize(MMU_PAGE_16M);
/* With 4k/4level pagetables, we can't (for now) cope with a
* huge page size < PMD_SIZE */
else if (mmu_psize_defs[MMU_PAGE_1M].shift)
- mmu_huge_psize = MMU_PAGE_1M;
-
- /* Calculate HPAGE_SHIFT and sanity check it */
- if (mmu_psize_defs[mmu_huge_psize].shift > MIN_HUGEPTE_SHIFT &&
- mmu_psize_defs[mmu_huge_psize].shift < SID_SHIFT)
- HPAGE_SHIFT = mmu_psize_defs[mmu_huge_psize].shift;
- else
- HPAGE_SHIFT = 0; /* No huge pages dude ! */
+ set_huge_psize(MMU_PAGE_1M);
#endif /* CONFIG_HUGETLB_PAGE */
}
@@ -477,7 +471,7 @@ void __init htab_initialize(void)
unsigned long table;
unsigned long pteg_count;
unsigned long mode_rw;
- unsigned long base = 0, size = 0;
+ unsigned long base = 0, size = 0, limit;
int i;
extern unsigned long tce_alloc_start, tce_alloc_end;
@@ -511,9 +505,15 @@ void __init htab_initialize(void)
_SDR1 = 0;
} else {
/* Find storage for the HPT. Must be contiguous in
- * the absolute address space.
+ * the absolute address space. On cell we want it to be
+ * in the first 1 Gig.
*/
- table = lmb_alloc(htab_size_bytes, htab_size_bytes);
+ if (machine_is(cell))
+ limit = 0x40000000;
+ else
+ limit = 0;
+
+ table = lmb_alloc_base(htab_size_bytes, htab_size_bytes, limit);
DBG("Hash table allocated at %lx, size: %lx\n", table,
htab_size_bytes);
@@ -643,7 +643,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
* For now this makes the whole process use 4k pages.
*/
#ifdef CONFIG_PPC_64K_PAGES
-static void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
+void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
{
if (mm->context.user_psize == MMU_PAGE_4K)
return;
@@ -651,13 +651,62 @@ static void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
#ifdef CONFIG_SPU_BASE
spu_flush_all_slbs(mm);
#endif
+ if (get_paca()->context.user_psize != MMU_PAGE_4K) {
+ get_paca()->context = mm->context;
+ slb_flush_and_rebolt();
+ }
}
#endif /* CONFIG_PPC_64K_PAGES */
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+/*
+ * This looks up a 2-bit protection code for a 4k subpage of a 64k page.
+ * Userspace sets the subpage permissions using the subpage_prot system call.
+ *
+ * Result is 0: full permissions, _PAGE_RW: read-only,
+ * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access.
+ */
+static int subpage_protection(pgd_t *pgdir, unsigned long ea)
+{
+ struct subpage_prot_table *spt = pgd_subpage_prot(pgdir);
+ u32 spp = 0;
+ u32 **sbpm, *sbpp;
+
+ if (ea >= spt->maxaddr)
+ return 0;
+ if (ea < 0x100000000) {
+ /* addresses below 4GB use spt->low_prot */
+ sbpm = spt->low_prot;
+ } else {
+ sbpm = spt->protptrs[ea >> SBP_L3_SHIFT];
+ if (!sbpm)
+ return 0;
+ }
+ sbpp = sbpm[(ea >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1)];
+ if (!sbpp)
+ return 0;
+ spp = sbpp[(ea >> PAGE_SHIFT) & (SBP_L1_COUNT - 1)];
+
+ /* extract 2-bit bitfield for this 4k subpage */
+ spp >>= 30 - 2 * ((ea >> 12) & 0xf);
+
+ /* turn 0,1,2,3 into combination of _PAGE_USER and _PAGE_RW */
+ spp = ((spp & 2) ? _PAGE_USER : 0) | ((spp & 1) ? _PAGE_RW : 0);
+ return spp;
+}
+
+#else /* CONFIG_PPC_SUBPAGE_PROT */
+static inline int subpage_protection(pgd_t *pgdir, unsigned long ea)
+{
+ return 0;
+}
+#endif
+
/* Result code is:
* 0 - handled
* 1 - normal page fault
* -1 - critical hash insertion error
+ * -2 - access not permitted by subpage protection mechanism
*/
int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
{
@@ -808,7 +857,14 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize);
else
#endif /* CONFIG_PPC_HAS_HASH_64K */
- rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize);
+ {
+ int spp = subpage_protection(pgdir, ea);
+ if (access & spp)
+ rc = -2;
+ else
+ rc = __hash_page_4K(ea, access, vsid, ptep, trap,
+ local, ssize, spp);
+ }
#ifndef CONFIG_PPC_64K_PAGES
DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
@@ -880,7 +936,8 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
__hash_page_64K(ea, access, vsid, ptep, trap, local, ssize);
else
#endif /* CONFIG_PPC_HAS_HASH_64K */
- __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize);
+ __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize,
+ subpage_protection(pgdir, ea));
local_irq_restore(flags);
}
@@ -925,19 +982,17 @@ void flush_hash_range(unsigned long number, int local)
* low_hash_fault is called when we the low level hash code failed
* to instert a PTE due to an hypervisor error
*/
-void low_hash_fault(struct pt_regs *regs, unsigned long address)
+void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc)
{
if (user_mode(regs)) {
- siginfo_t info;
-
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGBUS, &info, current);
- return;
- }
- bad_page_fault(regs, address, SIGBUS);
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+ if (rc == -2)
+ _exception(SIGSEGV, regs, SEGV_ACCERR, address);
+ else
+#endif
+ _exception(SIGBUS, regs, BUS_ADRERR, address);
+ } else
+ bad_page_fault(regs, address, SIGBUS);
}
#ifdef CONFIG_DEBUG_PAGEALLOC
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 71efb38..a02266d 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -24,18 +24,17 @@
#include <asm/cputable.h>
#include <asm/spu.h>
+#define HPAGE_SHIFT_64K 16
+#define HPAGE_SHIFT_16M 24
+
#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT)
#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT)
-#ifdef CONFIG_PPC_64K_PAGES
-#define HUGEPTE_INDEX_SIZE (PMD_SHIFT-HPAGE_SHIFT)
-#else
-#define HUGEPTE_INDEX_SIZE (PUD_SHIFT-HPAGE_SHIFT)
-#endif
-#define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE)
-#define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << HUGEPTE_INDEX_SIZE)
+unsigned int hugepte_shift;
+#define PTRS_PER_HUGEPTE (1 << hugepte_shift)
+#define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << hugepte_shift)
-#define HUGEPD_SHIFT (HPAGE_SHIFT + HUGEPTE_INDEX_SIZE)
+#define HUGEPD_SHIFT (HPAGE_SHIFT + hugepte_shift)
#define HUGEPD_SIZE (1UL << HUGEPD_SHIFT)
#define HUGEPD_MASK (~(HUGEPD_SIZE-1))
@@ -82,11 +81,35 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
return 0;
}
+/* Base page size affects how we walk hugetlb page tables */
+#ifdef CONFIG_PPC_64K_PAGES
+#define hpmd_offset(pud, addr) pmd_offset(pud, addr)
+#define hpmd_alloc(mm, pud, addr) pmd_alloc(mm, pud, addr)
+#else
+static inline
+pmd_t *hpmd_offset(pud_t *pud, unsigned long addr)
+{
+ if (HPAGE_SHIFT == HPAGE_SHIFT_64K)
+ return pmd_offset(pud, addr);
+ else
+ return (pmd_t *) pud;
+}
+static inline
+pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr)
+{
+ if (HPAGE_SHIFT == HPAGE_SHIFT_64K)
+ return pmd_alloc(mm, pud, addr);
+ else
+ return (pmd_t *) pud;
+}
+#endif
+
/* Modelled after find_linux_pte() */
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pg;
pud_t *pu;
+ pmd_t *pm;
BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize);
@@ -96,14 +119,9 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
if (!pgd_none(*pg)) {
pu = pud_offset(pg, addr);
if (!pud_none(*pu)) {
-#ifdef CONFIG_PPC_64K_PAGES
- pmd_t *pm;
- pm = pmd_offset(pu, addr);
+ pm = hpmd_offset(pu, addr);
if (!pmd_none(*pm))
return hugepte_offset((hugepd_t *)pm, addr);
-#else
- return hugepte_offset((hugepd_t *)pu, addr);
-#endif
}
}
@@ -114,6 +132,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pg;
pud_t *pu;
+ pmd_t *pm;
hugepd_t *hpdp = NULL;
BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize);
@@ -124,14 +143,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
pu = pud_alloc(mm, pg, addr);
if (pu) {
-#ifdef CONFIG_PPC_64K_PAGES
- pmd_t *pm;
- pm = pmd_alloc(mm, pu, addr);
+ pm = hpmd_alloc(mm, pu, addr);
if (pm)
hpdp = (hugepd_t *)pm;
-#else
- hpdp = (hugepd_t *)pu;
-#endif
}
if (! hpdp)
@@ -158,7 +172,6 @@ static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp)
PGF_CACHENUM_MASK));
}
-#ifdef CONFIG_PPC_64K_PAGES
static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
unsigned long addr, unsigned long end,
unsigned long floor, unsigned long ceiling)
@@ -191,7 +204,6 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
pud_clear(pud);
pmd_free_tlb(tlb, pmd);
}
-#endif
static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
unsigned long addr, unsigned long end,
@@ -210,9 +222,15 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
continue;
hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling);
#else
- if (pud_none(*pud))
- continue;
- free_hugepte_range(tlb, (hugepd_t *)pud);
+ if (HPAGE_SHIFT == HPAGE_SHIFT_64K) {
+ if (pud_none_or_clear_bad(pud))
+ continue;
+ hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling);
+ } else {
+ if (pud_none(*pud))
+ continue;
+ free_hugepte_range(tlb, (hugepd_t *)pud);
+ }
#endif
} while (pud++, addr = next, addr != end);
@@ -526,6 +544,57 @@ repeat:
return err;
}
+void set_huge_psize(int psize)
+{
+ /* Check that it is a page size supported by the hardware and
+ * that it fits within pagetable limits. */
+ if (mmu_psize_defs[psize].shift && mmu_psize_defs[psize].shift < SID_SHIFT &&
+ (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT ||
+ mmu_psize_defs[psize].shift == HPAGE_SHIFT_64K)) {
+ HPAGE_SHIFT = mmu_psize_defs[psize].shift;
+ mmu_huge_psize = psize;
+#ifdef CONFIG_PPC_64K_PAGES
+ hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT);
+#else
+ if (HPAGE_SHIFT == HPAGE_SHIFT_64K)
+ hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT);
+ else
+ hugepte_shift = (PUD_SHIFT-HPAGE_SHIFT);
+#endif
+
+ } else
+ HPAGE_SHIFT = 0;
+}
+
+static int __init hugepage_setup_sz(char *str)
+{
+ unsigned long long size;
+ int mmu_psize = -1;
+ int shift;
+
+ size = memparse(str, &str);
+
+ shift = __ffs(size);
+ switch (shift) {
+#ifndef CONFIG_PPC_64K_PAGES
+ case HPAGE_SHIFT_64K:
+ mmu_psize = MMU_PAGE_64K;
+ break;
+#endif
+ case HPAGE_SHIFT_16M:
+ mmu_psize = MMU_PAGE_16M;
+ break;
+ }
+
+ if (mmu_psize >=0 && mmu_psize_defs[mmu_psize].shift)
+ set_huge_psize(mmu_psize);
+ else
+ printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size);
+
+ return 1;
+}
+__setup("hugepagesz=", hugepage_setup_sz);
+
static void zero_ctor(struct kmem_cache *cache, void *addr)
{
memset(addr, 0, kmem_cache_size(cache));
diff --git a/arch/powerpc/mm/lmb.c b/arch/powerpc/mm/lmb.c
index 8f4d2dc..4ce23bc 100644
--- a/arch/powerpc/mm/lmb.c
+++ b/arch/powerpc/mm/lmb.c
@@ -342,3 +342,16 @@ void __init lmb_enforce_memory_limit(unsigned long memory_limit)
}
}
}
+
+int __init lmb_is_reserved(unsigned long addr)
+{
+ int i;
+
+ for (i = 0; i < lmb.reserved.cnt; i++) {
+ unsigned long upper = lmb.reserved.region[i].base +
+ lmb.reserved.region[i].size - 1;
+ if ((addr >= lmb.reserved.region[i].base) && (addr <= upper))
+ return 1;
+ }
+ return 0;
+}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 5402fb6b..e812244 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -213,15 +213,30 @@ void __init do_init_bootmem(void)
*/
#ifdef CONFIG_HIGHMEM
free_bootmem_with_active_regions(0, total_lowmem >> PAGE_SHIFT);
+
+ /* reserve the sections we're already using */
+ for (i = 0; i < lmb.reserved.cnt; i++) {
+ unsigned long addr = lmb.reserved.region[i].base +
+ lmb_size_bytes(&lmb.reserved, i) - 1;
+ if (addr < total_lowmem)
+ reserve_bootmem(lmb.reserved.region[i].base,
+ lmb_size_bytes(&lmb.reserved, i));
+ else if (lmb.reserved.region[i].base < total_lowmem) {
+ unsigned long adjusted_size = total_lowmem -
+ lmb.reserved.region[i].base;
+ reserve_bootmem(lmb.reserved.region[i].base,
+ adjusted_size);
+ }
+ }
#else
free_bootmem_with_active_regions(0, max_pfn);
-#endif
/* reserve the sections we're already using */
for (i = 0; i < lmb.reserved.cnt; i++)
reserve_bootmem(lmb.reserved.region[i].base,
lmb_size_bytes(&lmb.reserved, i));
+#endif
/* XXX need to clip this if using highmem? */
sparse_memory_present_with_active_regions(0);
@@ -334,11 +349,13 @@ void __init mem_init(void)
highmem_mapnr = total_lowmem >> PAGE_SHIFT;
for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) {
struct page *page = pfn_to_page(pfn);
-
+ if (lmb_is_reserved(pfn << PAGE_SHIFT))
+ continue;
ClearPageReserved(page);
init_page_count(page);
__free_page(page);
totalhigh_pages++;
+ reservedpages--;
}
totalram_pages += totalhigh_pages;
printk(KERN_DEBUG "High memory: %luk\n",
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 50d7372..47b06ba 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -256,6 +256,7 @@ void slb_initialize(void)
static int slb_encoding_inited;
extern unsigned int *slb_miss_kernel_load_linear;
extern unsigned int *slb_miss_kernel_load_io;
+ extern unsigned int *slb_compare_rr_to_size;
/* Prepare our SLB miss handler based on our page size */
linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
@@ -269,6 +270,8 @@ void slb_initialize(void)
SLB_VSID_KERNEL | linear_llp);
patch_slb_encoding(slb_miss_kernel_load_io,
SLB_VSID_KERNEL | io_llp);
+ patch_slb_encoding(slb_compare_rr_to_size,
+ mmu_slb_size);
DBG("SLB: linear LLP = %04x\n", linear_llp);
DBG("SLB: io LLP = %04x\n", io_llp);
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 1328a81..657f6b3 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -227,8 +227,9 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES)
7: ld r10,PACASTABRR(r13)
addi r10,r10,1
- /* use a cpu feature mask if we ever change our slb size */
- cmpldi r10,SLB_NUM_ENTRIES
+ /* This gets soft patched on boot. */
+_GLOBAL(slb_compare_rr_to_size)
+ cmpldi r10,0
blt+ 4f
li r10,SLB_NUM_BOLTED
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
new file mode 100644
index 0000000..4cafc0c
--- /dev/null
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright 2007-2008 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/tlbflush.h>
+
+/*
+ * Free all pages allocated for subpage protection maps and pointers.
+ * Also makes sure that the subpage_prot_table structure is
+ * reinitialized for the next user.
+ */
+void subpage_prot_free(pgd_t *pgd)
+{
+ struct subpage_prot_table *spt = pgd_subpage_prot(pgd);
+ unsigned long i, j, addr;
+ u32 **p;
+
+ for (i = 0; i < 4; ++i) {
+ if (spt->low_prot[i]) {
+ free_page((unsigned long)spt->low_prot[i]);
+ spt->low_prot[i] = NULL;
+ }
+ }
+ addr = 0;
+ for (i = 0; i < 2; ++i) {
+ p = spt->protptrs[i];
+ if (!p)
+ continue;
+ spt->protptrs[i] = NULL;
+ for (j = 0; j < SBP_L2_COUNT && addr < spt->maxaddr;
+ ++j, addr += PAGE_SIZE)
+ if (p[j])
+ free_page((unsigned long)p[j]);
+ free_page((unsigned long)p);
+ }
+ spt->maxaddr = 0;
+}
+
+static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
+ int npages)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ spinlock_t *ptl;
+
+ pgd = pgd_offset(mm, addr);
+ if (pgd_none(*pgd))
+ return;
+ pud = pud_offset(pgd, addr);
+ if (pud_none(*pud))
+ return;
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd))
+ return;
+ pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ arch_enter_lazy_mmu_mode();
+ for (; npages > 0; --npages) {
+ pte_update(mm, addr, pte, 0, 0);
+ addr += PAGE_SIZE;
+ ++pte;
+ }
+ arch_leave_lazy_mmu_mode();
+ pte_unmap_unlock(pte - 1, ptl);
+}
+
+/*
+ * Clear the subpage protection map for an address range, allowing
+ * all accesses that are allowed by the pte permissions.
+ */
+static void subpage_prot_clear(unsigned long addr, unsigned long len)
+{
+ struct mm_struct *mm = current->mm;
+ struct subpage_prot_table *spt = pgd_subpage_prot(mm->pgd);
+ u32 **spm, *spp;
+ int i, nw;
+ unsigned long next, limit;
+
+ down_write(&mm->mmap_sem);
+ limit = addr + len;
+ if (limit > spt->maxaddr)
+ limit = spt->maxaddr;
+ for (; addr < limit; addr = next) {
+ next = pmd_addr_end(addr, limit);
+ if (addr < 0x100000000) {
+ spm = spt->low_prot;
+ } else {
+ spm = spt->protptrs[addr >> SBP_L3_SHIFT];
+ if (!spm)
+ continue;
+ }
+ spp = spm[(addr >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1)];
+ if (!spp)
+ continue;
+ spp += (addr >> PAGE_SHIFT) & (SBP_L1_COUNT - 1);
+
+ i = (addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+ nw = PTRS_PER_PTE - i;
+ if (addr + (nw << PAGE_SHIFT) > next)
+ nw = (next - addr) >> PAGE_SHIFT;
+
+ memset(spp, 0, nw * sizeof(u32));
+
+ /* now flush any existing HPTEs for the range */
+ hpte_flush_range(mm, addr, nw);
+ }
+ up_write(&mm->mmap_sem);
+}
+
+/*
+ * Copy in a subpage protection map for an address range.
+ * The map has 2 bits per 4k subpage, so 32 bits per 64k page.
+ * Each 2-bit field is 0 to allow any access, 1 to prevent writes,
+ * 2 or 3 to prevent all accesses.
+ * Note that the normal page protections also apply; the subpage
+ * protection mechanism is an additional constraint, so putting 0
+ * in a 2-bit field won't allow writes to a page that is otherwise
+ * write-protected.
+ */
+long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
+{
+ struct mm_struct *mm = current->mm;
+ struct subpage_prot_table *spt = pgd_subpage_prot(mm->pgd);
+ u32 **spm, *spp;
+ int i, nw;
+ unsigned long next, limit;
+ int err;
+
+ /* Check parameters */
+ if ((addr & ~PAGE_MASK) || (len & ~PAGE_MASK) ||
+ addr >= TASK_SIZE || len >= TASK_SIZE || addr + len > TASK_SIZE)
+ return -EINVAL;
+
+ if (is_hugepage_only_range(mm, addr, len))
+ return -EINVAL;
+
+ if (!map) {
+ /* Clear out the protection map for the address range */
+ subpage_prot_clear(addr, len);
+ return 0;
+ }
+
+ if (!access_ok(VERIFY_READ, map, (len >> PAGE_SHIFT) * sizeof(u32)))
+ return -EFAULT;
+
+ down_write(&mm->mmap_sem);
+ for (limit = addr + len; addr < limit; addr = next) {
+ next = pmd_addr_end(addr, limit);
+ err = -ENOMEM;
+ if (addr < 0x100000000) {
+ spm = spt->low_prot;
+ } else {
+ spm = spt->protptrs[addr >> SBP_L3_SHIFT];
+ if (!spm) {
+ spm = (u32 **)get_zeroed_page(GFP_KERNEL);
+ if (!spm)
+ goto out;
+ spt->protptrs[addr >> SBP_L3_SHIFT] = spm;
+ }
+ }
+ spm += (addr >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1);
+ spp = *spm;
+ if (!spp) {
+ spp = (u32 *)get_zeroed_page(GFP_KERNEL);
+ if (!spp)
+ goto out;
+ *spm = spp;
+ }
+ spp += (addr >> PAGE_SHIFT) & (SBP_L1_COUNT - 1);
+
+ local_irq_disable();
+ demote_segment_4k(mm, addr);
+ local_irq_enable();
+
+ i = (addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+ nw = PTRS_PER_PTE - i;
+ if (addr + (nw << PAGE_SHIFT) > next)
+ nw = (next - addr) >> PAGE_SHIFT;
+
+ up_write(&mm->mmap_sem);
+ err = -EFAULT;
+ if (__copy_from_user(spp, map, nw * sizeof(u32)))
+ goto out2;
+ map += nw;
+ down_write(&mm->mmap_sem);
+
+ /* now flush any existing HPTEs for the range */
+ hpte_flush_range(mm, addr, nw);
+ }
+ if (limit > spt->maxaddr)
+ spt->maxaddr = limit;
+ err = 0;
+ out:
+ up_write(&mm->mmap_sem);
+ out2:
+ return err;
+}