summaryrefslogtreecommitdiff
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/fault.c45
-rw-r--r--arch/powerpc/mm/gup.c18
-rw-r--r--arch/powerpc/mm/hash_low_64.S22
-rw-r--r--arch/powerpc/mm/hash_native_64.c198
-rw-r--r--arch/powerpc/mm/hash_utils_64.c256
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c33
-rw-r--r--arch/powerpc/mm/hugetlbpage.c198
-rw-r--r--arch/powerpc/mm/icswx.c2
-rw-r--r--arch/powerpc/mm/init_64.c20
-rw-r--r--arch/powerpc/mm/mem.c61
-rw-r--r--arch/powerpc/mm/mmu_context_hash64.c48
-rw-r--r--arch/powerpc/mm/numa.c288
-rw-r--r--arch/powerpc/mm/pgtable_64.c120
-rw-r--r--arch/powerpc/mm/slb_low.S50
-rw-r--r--arch/powerpc/mm/slice.c223
-rw-r--r--arch/powerpc/mm/tlb_hash64.c2
-rw-r--r--arch/powerpc/mm/tlb_nohash.c2
17 files changed, 1086 insertions, 500 deletions
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 3a8489a..8726779 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -32,6 +32,7 @@
#include <linux/perf_event.h>
#include <linux/magic.h>
#include <linux/ratelimit.h>
+#include <linux/context_tracking.h>
#include <asm/firmware.h>
#include <asm/page.h>
@@ -196,6 +197,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
unsigned long error_code)
{
+ enum ctx_state prev_state = exception_enter();
struct vm_area_struct * vma;
struct mm_struct *mm = current->mm;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
@@ -204,6 +206,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
int trap = TRAP(regs);
int is_exec = trap == 0x400;
int fault;
+ int rc = 0;
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
/*
@@ -230,28 +233,30 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
* look at it
*/
if (error_code & ICSWX_DSI_UCT) {
- int rc = acop_handle_fault(regs, address, error_code);
+ rc = acop_handle_fault(regs, address, error_code);
if (rc)
- return rc;
+ goto bail;
}
#endif /* CONFIG_PPC_ICSWX */
if (notify_page_fault(regs))
- return 0;
+ goto bail;
if (unlikely(debugger_fault_handler(regs)))
- return 0;
+ goto bail;
/* On a kernel SLB miss we can only check for a valid exception entry */
- if (!user_mode(regs) && (address >= TASK_SIZE))
- return SIGSEGV;
+ if (!user_mode(regs) && (address >= TASK_SIZE)) {
+ rc = SIGSEGV;
+ goto bail;
+ }
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \
defined(CONFIG_PPC_BOOK3S_64))
if (error_code & DSISR_DABRMATCH) {
- /* DABR match */
- do_dabr(regs, address, error_code);
- return 0;
+ /* breakpoint match */
+ do_break(regs, address, error_code);
+ goto bail;
}
#endif
@@ -260,8 +265,10 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
local_irq_enable();
if (in_atomic() || mm == NULL) {
- if (!user_mode(regs))
- return SIGSEGV;
+ if (!user_mode(regs)) {
+ rc = SIGSEGV;
+ goto bail;
+ }
/* in_atomic() in user mode is really bad,
as is current->mm == NULL. */
printk(KERN_EMERG "Page fault in user mode with "
@@ -417,9 +424,11 @@ good_area:
*/
fault = handle_mm_fault(mm, vma, address, flags);
if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
- int rc = mm_fault_error(regs, address, fault);
+ rc = mm_fault_error(regs, address, fault);
if (rc >= MM_FAULT_RETURN)
- return rc;
+ goto bail;
+ else
+ rc = 0;
}
/*
@@ -454,7 +463,7 @@ good_area:
}
up_read(&mm->mmap_sem);
- return 0;
+ goto bail;
bad_area:
up_read(&mm->mmap_sem);
@@ -463,7 +472,7 @@ bad_area_nosemaphore:
/* User mode accesses cause a SIGSEGV */
if (user_mode(regs)) {
_exception(SIGSEGV, regs, code, address);
- return 0;
+ goto bail;
}
if (is_exec && (error_code & DSISR_PROTFAULT))
@@ -471,7 +480,11 @@ bad_area_nosemaphore:
" page (%lx) - exploit attempt? (uid: %d)\n",
address, from_kuid(&init_user_ns, current_uid()));
- return SIGSEGV;
+ rc = SIGSEGV;
+
+bail:
+ exception_exit(prev_state);
+ return rc;
}
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
index d7efdbf..4b921af 100644
--- a/arch/powerpc/mm/gup.c
+++ b/arch/powerpc/mm/gup.c
@@ -68,7 +68,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
next = pmd_addr_end(addr, end);
if (pmd_none(pmd))
return 0;
- if (is_hugepd(pmdp)) {
+ if (pmd_huge(pmd)) {
+ if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next,
+ write, pages, nr))
+ return 0;
+ } else if (is_hugepd(pmdp)) {
if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT,
addr, next, write, pages, nr))
return 0;
@@ -92,7 +96,11 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
next = pud_addr_end(addr, end);
if (pud_none(pud))
return 0;
- if (is_hugepd(pudp)) {
+ if (pud_huge(pud)) {
+ if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next,
+ write, pages, nr))
+ return 0;
+ } else if (is_hugepd(pudp)) {
if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT,
addr, next, write, pages, nr))
return 0;
@@ -153,7 +161,11 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
goto slow;
- if (is_hugepd(pgdp)) {
+ if (pgd_huge(pgd)) {
+ if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next,
+ write, pages, &nr))
+ goto slow;
+ } else if (is_hugepd(pgdp)) {
if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT,
addr, next, write, pages, &nr))
goto slow;
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 7443481..0e980ac 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -196,7 +196,8 @@ htab_insert_pte:
mr r4,r29 /* Retrieve vpn */
li r7,0 /* !bolted, !secondary */
li r8,MMU_PAGE_4K /* page size */
- ld r9,STK_PARAM(R9)(r1) /* segment size */
+ li r9,MMU_PAGE_4K /* actual page size */
+ ld r10,STK_PARAM(R9)(r1) /* segment size */
_GLOBAL(htab_call_hpte_insert1)
bl . /* Patched by htab_finish_init() */
cmpdi 0,r3,0
@@ -219,7 +220,8 @@ _GLOBAL(htab_call_hpte_insert1)
mr r4,r29 /* Retrieve vpn */
li r7,HPTE_V_SECONDARY /* !bolted, secondary */
li r8,MMU_PAGE_4K /* page size */
- ld r9,STK_PARAM(R9)(r1) /* segment size */
+ li r9,MMU_PAGE_4K /* actual page size */
+ ld r10,STK_PARAM(R9)(r1) /* segment size */
_GLOBAL(htab_call_hpte_insert2)
bl . /* Patched by htab_finish_init() */
cmpdi 0,r3,0
@@ -490,7 +492,7 @@ END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
beq htab_inval_old_hpte
ld r6,STK_PARAM(R6)(r1)
- ori r26,r6,0x8000 /* Load the hidx mask */
+ ori r26,r6,PTE_PAGE_HIDX_OFFSET /* Load the hidx mask. */
ld r26,0(r26)
addi r5,r25,36 /* Check actual HPTE_SUB bit, this */
rldcr. r0,r31,r5,0 /* must match pgtable.h definition */
@@ -515,7 +517,8 @@ htab_special_pfn:
mr r4,r29 /* Retrieve vpn */
li r7,0 /* !bolted, !secondary */
li r8,MMU_PAGE_4K /* page size */
- ld r9,STK_PARAM(R9)(r1) /* segment size */
+ li r9,MMU_PAGE_4K /* actual page size */
+ ld r10,STK_PARAM(R9)(r1) /* segment size */
_GLOBAL(htab_call_hpte_insert1)
bl . /* patched by htab_finish_init() */
cmpdi 0,r3,0
@@ -542,7 +545,8 @@ _GLOBAL(htab_call_hpte_insert1)
mr r4,r29 /* Retrieve vpn */
li r7,HPTE_V_SECONDARY /* !bolted, secondary */
li r8,MMU_PAGE_4K /* page size */
- ld r9,STK_PARAM(R9)(r1) /* segment size */
+ li r9,MMU_PAGE_4K /* actual page size */
+ ld r10,STK_PARAM(R9)(r1) /* segment size */
_GLOBAL(htab_call_hpte_insert2)
bl . /* patched by htab_finish_init() */
cmpdi 0,r3,0
@@ -607,7 +611,7 @@ htab_pte_insert_ok:
sld r4,r4,r5
andc r26,r26,r4
or r26,r26,r3
- ori r5,r6,0x8000
+ ori r5,r6,PTE_PAGE_HIDX_OFFSET
std r26,0(r5)
lwsync
std r30,0(r6)
@@ -840,7 +844,8 @@ ht64_insert_pte:
mr r4,r29 /* Retrieve vpn */
li r7,0 /* !bolted, !secondary */
li r8,MMU_PAGE_64K
- ld r9,STK_PARAM(R9)(r1) /* segment size */
+ li r9,MMU_PAGE_64K /* actual page size */
+ ld r10,STK_PARAM(R9)(r1) /* segment size */
_GLOBAL(ht64_call_hpte_insert1)
bl . /* patched by htab_finish_init() */
cmpdi 0,r3,0
@@ -863,7 +868,8 @@ _GLOBAL(ht64_call_hpte_insert1)
mr r4,r29 /* Retrieve vpn */
li r7,HPTE_V_SECONDARY /* !bolted, secondary */
li r8,MMU_PAGE_64K
- ld r9,STK_PARAM(R9)(r1) /* segment size */
+ li r9,MMU_PAGE_64K /* actual page size */
+ ld r10,STK_PARAM(R9)(r1) /* segment size */
_GLOBAL(ht64_call_hpte_insert2)
bl . /* patched by htab_finish_init() */
cmpdi 0,r3,0
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index ffc1e00..4c122c3 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -39,7 +39,7 @@
DEFINE_RAW_SPINLOCK(native_tlbie_lock);
-static inline void __tlbie(unsigned long vpn, int psize, int ssize)
+static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
{
unsigned long va;
unsigned int penc;
@@ -61,17 +61,31 @@ static inline void __tlbie(unsigned long vpn, int psize, int ssize)
switch (psize) {
case MMU_PAGE_4K:
+ /* clear out bits after (52) [0....52.....63] */
+ va &= ~((1ul << (64 - 52)) - 1);
va |= ssize << 8;
+ va |= mmu_psize_defs[apsize].sllp << 6;
asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
: : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
: "memory");
break;
default:
/* We need 14 to 14 + i bits of va */
- penc = mmu_psize_defs[psize].penc;
- va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
+ penc = mmu_psize_defs[psize].penc[apsize];
+ va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
va |= penc << 12;
va |= ssize << 8;
+ /* Add AVAL part */
+ if (psize != apsize) {
+ /*
+ * MPSS, 64K base page size and 16MB parge page size
+ * We don't need all the bits, but rest of the bits
+ * must be ignored by the processor.
+ * vpn cover upto 65 bits of va. (0...65) and we need
+ * 58..64 bits of va.
+ */
+ va |= (vpn & 0xfe);
+ }
va |= 1; /* L */
asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2)
: : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
@@ -80,7 +94,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int ssize)
}
}
-static inline void __tlbiel(unsigned long vpn, int psize, int ssize)
+static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
{
unsigned long va;
unsigned int penc;
@@ -96,16 +110,30 @@ static inline void __tlbiel(unsigned long vpn, int psize, int ssize)
switch (psize) {
case MMU_PAGE_4K:
+ /* clear out bits after(52) [0....52.....63] */
+ va &= ~((1ul << (64 - 52)) - 1);
va |= ssize << 8;
+ va |= mmu_psize_defs[apsize].sllp << 6;
asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
: : "r"(va) : "memory");
break;
default:
/* We need 14 to 14 + i bits of va */
- penc = mmu_psize_defs[psize].penc;
- va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
+ penc = mmu_psize_defs[psize].penc[apsize];
+ va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1);
va |= penc << 12;
va |= ssize << 8;
+ /* Add AVAL part */
+ if (psize != apsize) {
+ /*
+ * MPSS, 64K base page size and 16MB parge page size
+ * We don't need all the bits, but rest of the bits
+ * must be ignored by the processor.
+ * vpn cover upto 65 bits of va. (0...65) and we need
+ * 58..64 bits of va.
+ */
+ va |= (vpn & 0xfe);
+ }
va |= 1; /* L */
asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
: : "r"(va) : "memory");
@@ -114,7 +142,8 @@ static inline void __tlbiel(unsigned long vpn, int psize, int ssize)
}
-static inline void tlbie(unsigned long vpn, int psize, int ssize, int local)
+static inline void tlbie(unsigned long vpn, int psize, int apsize,
+ int ssize, int local)
{
unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL);
int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
@@ -125,10 +154,10 @@ static inline void tlbie(unsigned long vpn, int psize, int ssize, int local)
raw_spin_lock(&native_tlbie_lock);
asm volatile("ptesync": : :"memory");
if (use_local) {
- __tlbiel(vpn, psize, ssize);
+ __tlbiel(vpn, psize, apsize, ssize);
asm volatile("ptesync": : :"memory");
} else {
- __tlbie(vpn, psize, ssize);
+ __tlbie(vpn, psize, apsize, ssize);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
if (lock_tlbie && !use_local)
@@ -156,7 +185,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep)
static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
unsigned long pa, unsigned long rflags,
- unsigned long vflags, int psize, int ssize)
+ unsigned long vflags, int psize, int apsize, int ssize)
{
struct hash_pte *hptep = htab_address + hpte_group;
unsigned long hpte_v, hpte_r;
@@ -183,8 +212,8 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
if (i == HPTES_PER_GROUP)
return -1;
- hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(pa, psize) | rflags;
+ hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
+ hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
if (!(vflags & HPTE_V_BOLTED)) {
DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
@@ -244,6 +273,51 @@ static long native_hpte_remove(unsigned long hpte_group)
return i;
}
+static inline int __hpte_actual_psize(unsigned int lp, int psize)
+{
+ int i, shift;
+ unsigned int mask;
+
+ /* start from 1 ignoring MMU_PAGE_4K */
+ for (i = 1; i < MMU_PAGE_COUNT; i++) {
+
+ /* invalid penc */
+ if (mmu_psize_defs[psize].penc[i] == -1)
+ continue;
+ /*
+ * encoding bits per actual page size
+ * PTE LP actual page size
+ * rrrr rrrz >=8KB
+ * rrrr rrzz >=16KB
+ * rrrr rzzz >=32KB
+ * rrrr zzzz >=64KB
+ * .......
+ */
+ shift = mmu_psize_defs[i].shift - LP_SHIFT;
+ if (shift > LP_BITS)
+ shift = LP_BITS;
+ mask = (1 << shift) - 1;
+ if ((lp & mask) == mmu_psize_defs[psize].penc[i])
+ return i;
+ }
+ return -1;
+}
+
+static inline int hpte_actual_psize(struct hash_pte *hptep, int psize)
+{
+ /* Look at the 8 bit LP value */
+ unsigned int lp = (hptep->r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
+
+ if (!(hptep->v & HPTE_V_VALID))
+ return -1;
+
+ /* First check if it is large page */
+ if (!(hptep->v & HPTE_V_LARGE))
+ return MMU_PAGE_4K;
+
+ return __hpte_actual_psize(lp, psize);
+}
+
static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
unsigned long vpn, int psize, int ssize,
int local)
@@ -251,8 +325,9 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
struct hash_pte *hptep = htab_address + slot;
unsigned long hpte_v, want_v;
int ret = 0;
+ int actual_psize;
- want_v = hpte_encode_v(vpn, psize, ssize);
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
vpn, want_v & HPTE_V_AVPN, slot, newpp);
@@ -260,9 +335,20 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
native_lock_hpte(hptep);
hpte_v = hptep->v;
-
- /* Even if we miss, we need to invalidate the TLB */
- if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
+ actual_psize = hpte_actual_psize(hptep, psize);
+ /*
+ * We need to invalidate the TLB always because hpte_remove doesn't do
+ * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
+ * random entry from it. When we do that we don't invalidate the TLB
+ * (hpte_remove) because we assume the old translation is still
+ * technically "valid".
+ */
+ if (actual_psize < 0) {
+ actual_psize = psize;
+ ret = -1;
+ goto err_out;
+ }
+ if (!HPTE_V_COMPARE(hpte_v, want_v)) {
DBG_LOW(" -> miss\n");
ret = -1;
} else {
@@ -271,10 +357,11 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
(newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C));
}
+err_out:
native_unlock_hpte(hptep);
/* Ensure it is out of the tlb too. */
- tlbie(vpn, psize, ssize, local);
+ tlbie(vpn, psize, actual_psize, ssize, local);
return ret;
}
@@ -288,7 +375,7 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
unsigned long want_v, hpte_v;
hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
- want_v = hpte_encode_v(vpn, psize, ssize);
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
/* Bolted mappings are only ever in the primary group */
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
@@ -315,6 +402,7 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
int psize, int ssize)
{
+ int actual_psize;
unsigned long vpn;
unsigned long vsid;
long slot;
@@ -327,13 +415,16 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
if (slot == -1)
panic("could not find page to bolt\n");
hptep = htab_address + slot;
+ actual_psize = hpte_actual_psize(hptep, psize);
+ if (actual_psize < 0)
+ actual_psize = psize;
/* Update the HPTE */
hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
(newpp & (HPTE_R_PP | HPTE_R_N));
/* Ensure it is out of the tlb too. */
- tlbie(vpn, psize, ssize, 0);
+ tlbie(vpn, psize, actual_psize, ssize, 0);
}
static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
@@ -343,64 +434,66 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
unsigned long hpte_v;
unsigned long want_v;
unsigned long flags;
+ int actual_psize;
local_irq_save(flags);
DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
- want_v = hpte_encode_v(vpn, psize, ssize);
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
native_lock_hpte(hptep);
hpte_v = hptep->v;
- /* Even if we miss, we need to invalidate the TLB */
- if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
+ actual_psize = hpte_actual_psize(hptep, psize);
+ /*
+ * We need to invalidate the TLB always because hpte_remove doesn't do
+ * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
+ * random entry from it. When we do that we don't invalidate the TLB
+ * (hpte_remove) because we assume the old translation is still
+ * technically "valid".
+ */
+ if (actual_psize < 0) {
+ actual_psize = psize;
+ native_unlock_hpte(hptep);
+ goto err_out;
+ }
+ if (!HPTE_V_COMPARE(hpte_v, want_v))
native_unlock_hpte(hptep);
else
/* Invalidate the hpte. NOTE: this also unlocks it */
hptep->v = 0;
+err_out:
/* Invalidate the TLB */
- tlbie(vpn, psize, ssize, local);
-
+ tlbie(vpn, psize, actual_psize, ssize, local);
local_irq_restore(flags);
}
-#define LP_SHIFT 12
-#define LP_BITS 8
-#define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT)
-
static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
- int *psize, int *ssize, unsigned long *vpn)
+ int *psize, int *apsize, int *ssize, unsigned long *vpn)
{
unsigned long avpn, pteg, vpi;
- unsigned long hpte_r = hpte->r;
unsigned long hpte_v = hpte->v;
unsigned long vsid, seg_off;
- int i, size, shift, penc;
+ int size, a_size, shift;
+ /* Look at the 8 bit LP value */
+ unsigned int lp = (hpte->r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
- if (!(hpte_v & HPTE_V_LARGE))
- size = MMU_PAGE_4K;
- else {
- for (i = 0; i < LP_BITS; i++) {
- if ((hpte_r & LP_MASK(i+1)) == LP_MASK(i+1))
- break;
- }
- penc = LP_MASK(i+1) >> LP_SHIFT;
+ if (!(hpte_v & HPTE_V_LARGE)) {
+ size = MMU_PAGE_4K;
+ a_size = MMU_PAGE_4K;
+ } else {
for (size = 0; size < MMU_PAGE_COUNT; size++) {
- /* 4K pages are not represented by LP */
- if (size == MMU_PAGE_4K)
- continue;
-
/* valid entries have a shift value */
if (!mmu_psize_defs[size].shift)
continue;
- if (penc == mmu_psize_defs[size].penc)
+ a_size = __hpte_actual_psize(lp, size);
+ if (a_size != -1)
break;
}
}
-
/* This works for all page sizes, and for 256M and 1T segments */
*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
shift = mmu_psize_defs[size].shift;
@@ -433,7 +526,8 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
default:
*vpn = size = 0;
}
- *psize = size;
+ *psize = size;
+ *apsize = a_size;
}
/*
@@ -451,7 +545,7 @@ static void native_hpte_clear(void)
struct hash_pte *hptep = htab_address;
unsigned long hpte_v;
unsigned long pteg_count;
- int psize, ssize;
+ int psize, apsize, ssize;
pteg_count = htab_hash_mask + 1;
@@ -477,9 +571,9 @@ static void native_hpte_clear(void)
* already hold the native_tlbie_lock.
*/
if (hpte_v & HPTE_V_VALID) {
- hpte_decode(hptep, slot, &psize, &ssize, &vpn);
+ hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn);
hptep->v = 0;
- __tlbie(vpn, psize, ssize);
+ __tlbie(vpn, psize, apsize, ssize);
}
}
@@ -520,7 +614,7 @@ static void native_flush_hash_range(unsigned long number, int local)
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
hptep = htab_address + slot;
- want_v = hpte_encode_v(vpn, psize, ssize);
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
native_lock_hpte(hptep);
hpte_v = hptep->v;
if (!HPTE_V_COMPARE(hpte_v, want_v) ||
@@ -540,7 +634,7 @@ static void native_flush_hash_range(unsigned long number, int local)
pte_iterate_hashed_subpages(pte, psize,
vpn, index, shift) {
- __tlbiel(vpn, psize, ssize);
+ __tlbiel(vpn, psize, psize, ssize);
} pte_iterate_hashed_end();
}
asm volatile("ptesync":::"memory");
@@ -557,7 +651,7 @@ static void native_flush_hash_range(unsigned long number, int local)
pte_iterate_hashed_subpages(pte, psize,
vpn, index, shift) {
- __tlbie(vpn, psize, ssize);
+ __tlbie(vpn, psize, psize, ssize);
} pte_iterate_hashed_end();
}
asm volatile("eieio; tlbsync; ptesync":::"memory");
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 3a292be..e303a6d 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -33,6 +33,7 @@
#include <linux/init.h>
#include <linux/signal.h>
#include <linux/memblock.h>
+#include <linux/context_tracking.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
@@ -55,6 +56,7 @@
#include <asm/code-patching.h>
#include <asm/fadump.h>
#include <asm/firmware.h>
+#include <asm/tm.h>
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -125,7 +127,7 @@ static struct mmu_psize_def mmu_psize_defaults_old[] = {
[MMU_PAGE_4K] = {
.shift = 12,
.sllp = 0,
- .penc = 0,
+ .penc = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1},
.avpnm = 0,
.tlbiel = 0,
},
@@ -139,14 +141,15 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = {
[MMU_PAGE_4K] = {
.shift = 12,
.sllp = 0,
- .penc = 0,
+ .penc = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1},
.avpnm = 0,
.tlbiel = 1,
},
[MMU_PAGE_16M] = {
.shift = 24,
.sllp = SLB_VSID_L,
- .penc = 0,
+ .penc = {[0 ... MMU_PAGE_16M - 1] = -1, [MMU_PAGE_16M] = 0,
+ [MMU_PAGE_16M + 1 ... MMU_PAGE_COUNT - 1] = -1 },
.avpnm = 0x1UL,
.tlbiel = 0,
},
@@ -194,6 +197,11 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
unsigned long vpn = hpt_vpn(vaddr, vsid, ssize);
unsigned long tprot = prot;
+ /*
+ * If we hit a bad address return error.
+ */
+ if (!vsid)
+ return -1;
/* Make kernel text executable */
if (overlaps_kernel_text(vaddr, vaddr + step))
tprot &= ~HPTE_R_N;
@@ -203,7 +211,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
BUG_ON(!ppc_md.hpte_insert);
ret = ppc_md.hpte_insert(hpteg, vpn, paddr, tprot,
- HPTE_V_BOLTED, psize, ssize);
+ HPTE_V_BOLTED, psize, psize, ssize);
if (ret < 0)
break;
@@ -270,6 +278,30 @@ static void __init htab_init_seg_sizes(void)
of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL);
}
+static int __init get_idx_from_shift(unsigned int shift)
+{
+ int idx = -1;
+
+ switch (shift) {
+ case 0xc:
+ idx = MMU_PAGE_4K;
+ break;
+ case 0x10:
+ idx = MMU_PAGE_64K;
+ break;
+ case 0x14:
+ idx = MMU_PAGE_1M;
+ break;
+ case 0x18:
+ idx = MMU_PAGE_16M;
+ break;
+ case 0x22:
+ idx = MMU_PAGE_16G;
+ break;
+ }
+ return idx;
+}
+
static int __init htab_dt_scan_page_sizes(unsigned long node,
const char *uname, int depth,
void *data)
@@ -285,64 +317,65 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
prop = (u32 *)of_get_flat_dt_prop(node,
"ibm,segment-page-sizes", &size);
if (prop != NULL) {
- DBG("Page sizes from device-tree:\n");
+ pr_info("Page sizes from device-tree:\n");
size /= 4;
cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
while(size > 0) {
- unsigned int shift = prop[0];
+ unsigned int base_shift = prop[0];
unsigned int slbenc = prop[1];
unsigned int lpnum = prop[2];
- unsigned int lpenc = 0;
struct mmu_psize_def *def;
- int idx = -1;
+ int idx, base_idx;
size -= 3; prop += 3;
- while(size > 0 && lpnum) {
- if (prop[0] == shift)
- lpenc = prop[1];
- prop += 2; size -= 2;
- lpnum--;
+ base_idx = get_idx_from_shift(base_shift);
+ if (base_idx < 0) {
+ /*
+ * skip the pte encoding also
+ */
+ prop += lpnum * 2; size -= lpnum * 2;
+ continue;
}
- switch(shift) {
- case 0xc:
- idx = MMU_PAGE_4K;
- break;
- case 0x10:
- idx = MMU_PAGE_64K;
- break;
- case 0x14:
- idx = MMU_PAGE_1M;
- break;
- case 0x18:
- idx = MMU_PAGE_16M;
+ def = &mmu_psize_defs[base_idx];
+ if (base_idx == MMU_PAGE_16M)
cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE;
- break;
- case 0x22:
- idx = MMU_PAGE_16G;
- break;
- }
- if (idx < 0)
- continue;
- def = &mmu_psize_defs[idx];
- def->shift = shift;
- if (shift <= 23)
+
+ def->shift = base_shift;
+ if (base_shift <= 23)
def->avpnm = 0;
else
- def->avpnm = (1 << (shift - 23)) - 1;
+ def->avpnm = (1 << (base_shift - 23)) - 1;
def->sllp = slbenc;
- def->penc = lpenc;
- /* We don't know for sure what's up with tlbiel, so
+ /*
+ * We don't know for sure what's up with tlbiel, so
* for now we only set it for 4K and 64K pages
*/
- if (idx == MMU_PAGE_4K || idx == MMU_PAGE_64K)
+ if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K)
def->tlbiel = 1;
else
def->tlbiel = 0;
- DBG(" %d: shift=%02x, sllp=%04lx, avpnm=%08lx, "
- "tlbiel=%d, penc=%d\n",
- idx, shift, def->sllp, def->avpnm, def->tlbiel,
- def->penc);
+ while (size > 0 && lpnum) {
+ unsigned int shift = prop[0];
+ int penc = prop[1];
+
+ prop += 2; size -= 2;
+ lpnum--;
+
+ idx = get_idx_from_shift(shift);
+ if (idx < 0)
+ continue;
+
+ if (penc == -1)
+ pr_err("Invalid penc for base_shift=%d "
+ "shift=%d\n", base_shift, shift);
+
+ def->penc[idx] = penc;
+ pr_info("base_shift=%d: shift=%d, sllp=0x%04lx,"
+ " avpnm=0x%08lx, tlbiel=%d, penc=%d\n",
+ base_shift, shift, def->sllp,
+ def->avpnm, def->tlbiel, def->penc[idx]);
+ }
}
return 1;
}
@@ -391,10 +424,21 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
}
#endif /* CONFIG_HUGETLB_PAGE */
+static void mmu_psize_set_default_penc(void)
+{
+ int bpsize, apsize;
+ for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
+ for (apsize = 0; apsize < MMU_PAGE_COUNT; apsize++)
+ mmu_psize_defs[bpsize].penc[apsize] = -1;
+}
+
static void __init htab_init_page_sizes(void)
{
int rc;
+ /* se the invalid penc to -1 */
+ mmu_psize_set_default_penc();
+
/* Default to 4K pages only */
memcpy(mmu_psize_defs, mmu_psize_defaults_old,
sizeof(mmu_psize_defaults_old));
@@ -758,6 +802,8 @@ void __init early_init_mmu(void)
/* Initialize stab / SLB management */
if (mmu_has_feature(MMU_FTR_SLB))
slb_initialize();
+ else
+ stab_initialize(get_paca()->stab_real);
}
#ifdef CONFIG_SMP
@@ -891,14 +937,14 @@ static inline int subpage_protection(struct mm_struct *mm, unsigned long ea)
void hash_failure_debug(unsigned long ea, unsigned long access,
unsigned long vsid, unsigned long trap,
- int ssize, int psize, unsigned long pte)
+ int ssize, int psize, int lpsize, unsigned long pte)
{
if (!printk_ratelimit())
return;
pr_info("mm: Hashing failure ! EA=0x%lx access=0x%lx current=%s\n",
ea, access, current->comm);
- pr_info(" trap=0x%lx vsid=0x%lx ssize=%d psize=%d pte=0x%lx\n",
- trap, vsid, ssize, psize, pte);
+ pr_info(" trap=0x%lx vsid=0x%lx ssize=%d base psize=%d psize %d pte=0x%lx\n",
+ trap, vsid, ssize, psize, lpsize, pte);
}
/* Result code is:
@@ -909,6 +955,7 @@ void hash_failure_debug(unsigned long ea, unsigned long access,
*/
int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
{
+ enum ctx_state prev_state = exception_enter();
pgd_t *pgdir;
unsigned long vsid;
struct mm_struct *mm;
@@ -921,11 +968,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
ea, access, trap);
- if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) {
- DBG_LOW(" out of pgtable range !\n");
- return 1;
- }
-
/* Get region & vsid */
switch (REGION_ID(ea)) {
case USER_REGION_ID:
@@ -933,7 +975,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
mm = current->mm;
if (! mm) {
DBG_LOW(" user region with no mm !\n");
- return 1;
+ rc = 1;
+ goto bail;
}
psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea);
@@ -952,14 +995,23 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
/* Not a valid range
* Send the problem up to do_page_fault
*/
- return 1;
+ rc = 1;
+ goto bail;
}
DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid);
+ /* Bad address. */
+ if (!vsid) {
+ DBG_LOW("Bad address!\n");
+ rc = 1;
+ goto bail;
+ }
/* Get pgdir */
pgdir = mm->pgd;
- if (pgdir == NULL)
- return 1;
+ if (pgdir == NULL) {
+ rc = 1;
+ goto bail;
+ }
/* Check CPU locality */
tmp = cpumask_of(smp_processor_id());
@@ -982,7 +1034,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift);
if (ptep == NULL || !pte_present(*ptep)) {
DBG_LOW(" no PTE !\n");
- return 1;
+ rc = 1;
+ goto bail;
}
/* Add _PAGE_PRESENT to the required access perm */
@@ -993,13 +1046,16 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
*/
if (access & ~pte_val(*ptep)) {
DBG_LOW(" no access !\n");
- return 1;
+ rc = 1;
+ goto bail;
}
#ifdef CONFIG_HUGETLB_PAGE
- if (hugeshift)
- return __hash_page_huge(ea, access, vsid, ptep, trap, local,
+ if (hugeshift) {
+ rc = __hash_page_huge(ea, access, vsid, ptep, trap, local,
ssize, hugeshift, psize);
+ goto bail;
+ }
#endif /* CONFIG_HUGETLB_PAGE */
#ifndef CONFIG_PPC_64K_PAGES
@@ -1071,7 +1127,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
*/
if (rc == -1)
hash_failure_debug(ea, access, vsid, trap, ssize, psize,
- pte_val(*ptep));
+ psize, pte_val(*ptep));
#ifndef CONFIG_PPC_64K_PAGES
DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
#else
@@ -1079,6 +1135,9 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
pte_val(*(ptep + PTRS_PER_PTE)));
#endif
DBG_LOW(" -> rc=%d\n", rc);
+
+bail:
+ exception_exit(prev_state);
return rc;
}
EXPORT_SYMBOL_GPL(hash_page);
@@ -1125,6 +1184,8 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
/* Get VSID */
ssize = user_segment_size(ea);
vsid = get_vsid(mm->context.id, ea, ssize);
+ if (!vsid)
+ return;
/* Hash doesn't like irqs */
local_irq_save(flags);
@@ -1147,7 +1208,9 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
*/
if (rc == -1)
hash_failure_debug(ea, access, vsid, trap, ssize,
- mm->context.user_psize, pte_val(*ptep));
+ mm->context.user_psize,
+ mm->context.user_psize,
+ pte_val(*ptep));
local_irq_restore(flags);
}
@@ -1171,6 +1234,22 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx);
ppc_md.hpte_invalidate(slot, vpn, psize, ssize, local);
} pte_iterate_hashed_end();
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /* Transactions are not aborted by tlbiel, only tlbie.
+ * Without, syncing a page back to a block device w/ PIO could pick up
+ * transactional data (bad!) so we force an abort here. Before the
+ * sync the page will be made read-only, which will flush_hash_page.
+ * BIG ISSUE here: if the kernel uses a page from userspace without
+ * unmapping it first, it may see the speculated version.
+ */
+ if (local && cpu_has_feature(CPU_FTR_TM) &&
+ current->thread.regs &&
+ MSR_TM_ACTIVE(current->thread.regs->msr)) {
+ tm_enable();
+ tm_abort(TM_CAUSE_TLBI);
+ }
+#endif
}
void flush_hash_range(unsigned long number, int local)
@@ -1194,6 +1273,8 @@ void flush_hash_range(unsigned long number, int local)
*/
void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc)
{
+ enum ctx_state prev_state = exception_enter();
+
if (user_mode(regs)) {
#ifdef CONFIG_PPC_SUBPAGE_PROT
if (rc == -2)
@@ -1203,23 +1284,64 @@ void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc)
_exception(SIGBUS, regs, BUS_ADRERR, address);
} else
bad_page_fault(regs, address, SIGBUS);
+
+ exception_exit(prev_state);
+}
+
+long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
+ unsigned long pa, unsigned long rflags,
+ unsigned long vflags, int psize, int ssize)
+{
+ unsigned long hpte_group;
+ long slot;
+
+repeat:
+ hpte_group = ((hash & htab_hash_mask) *
+ HPTES_PER_GROUP) & ~0x7UL;
+
+ /* Insert into the hash table, primary slot */
+ slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, vflags,
+ psize, psize, ssize);
+
+ /* Primary is full, try the secondary */
+ if (unlikely(slot == -1)) {
+ hpte_group = ((~hash & htab_hash_mask) *
+ HPTES_PER_GROUP) & ~0x7UL;
+ slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags,
+ vflags | HPTE_V_SECONDARY,
+ psize, psize, ssize);
+ if (slot == -1) {
+ if (mftb() & 0x1)
+ hpte_group = ((hash & htab_hash_mask) *
+ HPTES_PER_GROUP)&~0x7UL;
+
+ ppc_md.hpte_remove(hpte_group);
+ goto repeat;
+ }
+ }
+
+ return slot;
}
#ifdef CONFIG_DEBUG_PAGEALLOC
static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
{
- unsigned long hash, hpteg;
+ unsigned long hash;
unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL);
- int ret;
+ long ret;
hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
- hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
- ret = ppc_md.hpte_insert(hpteg, vpn, __pa(vaddr),
- mode, HPTE_V_BOLTED,
- mmu_linear_psize, mmu_kernel_ssize);
+ /* Don't create HPTE entries for bad address */
+ if (!vsid)
+ return;
+
+ ret = hpte_insert_repeating(hash, vpn, __pa(vaddr), mode,
+ HPTE_V_BOLTED,
+ mmu_linear_psize, mmu_kernel_ssize);
+
BUG_ON (ret < 0);
spin_lock(&linear_map_hash_lock);
BUG_ON(linear_map_hash_slots[lmi] & 0x80);
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index cecad34..0f1d94a 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -14,6 +14,10 @@
#include <asm/cacheflush.h>
#include <asm/machdep.h>
+extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
+ unsigned long pa, unsigned long rlags,
+ unsigned long vflags, int psize, int ssize);
+
int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
pte_t *ptep, unsigned long trap, int local, int ssize,
unsigned int shift, unsigned int mmu_psize)
@@ -83,14 +87,9 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
if (likely(!(old_pte & _PAGE_HASHPTE))) {
unsigned long hash = hpt_hash(vpn, shift, ssize);
- unsigned long hpte_group;
pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
-repeat:
- hpte_group = ((hash & htab_hash_mask) *
- HPTES_PER_GROUP) & ~0x7UL;
-
/* clear HPTE slot informations in new PTE */
#ifdef CONFIG_PPC_64K_PAGES
new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0;
@@ -101,26 +100,8 @@ repeat:
rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
_PAGE_COHERENT | _PAGE_GUARDED));
- /* Insert into the hash table, primary slot */
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
- mmu_psize, ssize);
-
- /* Primary is full, try the secondary */
- if (unlikely(slot == -1)) {
- hpte_group = ((~hash & htab_hash_mask) *
- HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags,
- HPTE_V_SECONDARY,
- mmu_psize, ssize);
- if (slot == -1) {
- if (mftb() & 0x1)
- hpte_group = ((hash & htab_hash_mask) *
- HPTES_PER_GROUP)&~0x7UL;
-
- ppc_md.hpte_remove(hpte_group);
- goto repeat;
- }
- }
+ slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0,
+ mmu_psize, ssize);
/*
* Hypervisor failure. Restore old pte and return -1
@@ -129,7 +110,7 @@ repeat:
if (unlikely(slot == -2)) {
*ptep = __pte(old_pte);
hash_failure_debug(ea, access, vsid, trap, ssize,
- mmu_psize, old_pte);
+ mmu_psize, mmu_psize, old_pte);
return -1;
}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 1a6de0a..77fdd2c 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -48,30 +48,71 @@ static u64 gpage_freearray[MAX_NUMBER_GPAGES];
static unsigned nr_gpages;
#endif
-static inline int shift_to_mmu_psize(unsigned int shift)
+#define hugepd_none(hpd) ((hpd).pd == 0)
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * At this point we do the placement change only for BOOK3S 64. This would
+ * possibly work on other subarchs.
+ */
+
+/*
+ * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
+ * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
+ */
+int pmd_huge(pmd_t pmd)
{
- int psize;
+ /*
+ * leaf pte for huge page, bottom two bits != 00
+ */
+ return ((pmd_val(pmd) & 0x3) != 0x0);
+}
- for (psize = 0; psize < MMU_PAGE_COUNT; ++psize)
- if (mmu_psize_defs[psize].shift == shift)
- return psize;
- return -1;
+int pud_huge(pud_t pud)
+{
+ /*
+ * leaf pte for huge page, bottom two bits != 00
+ */
+ return ((pud_val(pud) & 0x3) != 0x0);
}
-static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
+int pgd_huge(pgd_t pgd)
{
- if (mmu_psize_defs[mmu_psize].shift)
- return mmu_psize_defs[mmu_psize].shift;
- BUG();
+ /*
+ * leaf pte for huge page, bottom two bits != 00
+ */
+ return ((pgd_val(pgd) & 0x3) != 0x0);
+}
+#else
+int pmd_huge(pmd_t pmd)
+{
+ return 0;
}
-#define hugepd_none(hpd) ((hpd).pd == 0)
+int pud_huge(pud_t pud)
+{
+ return 0;
+}
+int pgd_huge(pgd_t pgd)
+{
+ return 0;
+}
+#endif
+
+/*
+ * We have 4 cases for pgds and pmds:
+ * (1) invalid (all zeroes)
+ * (2) pointer to next table, as normal; bottom 6 bits == 0
+ * (3) leaf pte for huge page, bottom two bits != 00
+ * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table
+ */
pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
{
pgd_t *pg;
pud_t *pu;
pmd_t *pm;
+ pte_t *ret_pte;
hugepd_t *hpdp = NULL;
unsigned pdshift = PGDIR_SHIFT;
@@ -79,30 +120,43 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
*shift = 0;
pg = pgdir + pgd_index(ea);
- if (is_hugepd(pg)) {
+
+ if (pgd_huge(*pg)) {
+ ret_pte = (pte_t *) pg;
+ goto out;
+ } else if (is_hugepd(pg))
hpdp = (hugepd_t *)pg;
- } else if (!pgd_none(*pg)) {
+ else if (!pgd_none(*pg)) {
pdshift = PUD_SHIFT;
pu = pud_offset(pg, ea);
- if (is_hugepd(pu))
+
+ if (pud_huge(*pu)) {
+ ret_pte = (pte_t *) pu;
+ goto out;
+ } else if (is_hugepd(pu))
hpdp = (hugepd_t *)pu;
else if (!pud_none(*pu)) {
pdshift = PMD_SHIFT;
pm = pmd_offset(pu, ea);
- if (is_hugepd(pm))
+
+ if (pmd_huge(*pm)) {
+ ret_pte = (pte_t *) pm;
+ goto out;
+ } else if (is_hugepd(pm))
hpdp = (hugepd_t *)pm;
- else if (!pmd_none(*pm)) {
+ else if (!pmd_none(*pm))
return pte_offset_kernel(pm, ea);
- }
}
}
-
if (!hpdp)
return NULL;
+ ret_pte = hugepte_offset(hpdp, ea, pdshift);
+ pdshift = hugepd_shift(*hpdp);
+out:
if (shift)
- *shift = hugepd_shift(*hpdp);
- return hugepte_offset(hpdp, ea, pdshift);
+ *shift = pdshift;
+ return ret_pte;
}
EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
@@ -145,6 +199,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
if (unlikely(!hugepd_none(*hpdp)))
break;
else
+ /* We use the old format for PPC_FSL_BOOK3E */
hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
}
/* If we bailed from the for loop early, an error occurred, clean up */
@@ -156,9 +211,15 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
#else
if (!hugepd_none(*hpdp))
kmem_cache_free(cachep, new);
- else
+ else {
+#ifdef CONFIG_PPC_BOOK3S_64
+ hpdp->pd = (unsigned long)new |
+ (shift_to_mmu_psize(pshift) << 2);
+#else
hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
#endif
+ }
+#endif
spin_unlock(&mm->page_table_lock);
return 0;
}
@@ -175,6 +236,61 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
#define HUGEPD_PUD_SHIFT PMD_SHIFT
#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * At this point we do the placement change only for BOOK3S 64. This would
+ * possibly work on other subarchs.
+ */
+pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
+{
+ pgd_t *pg;
+ pud_t *pu;
+ pmd_t *pm;
+ hugepd_t *hpdp = NULL;
+ unsigned pshift = __ffs(sz);
+ unsigned pdshift = PGDIR_SHIFT;
+
+ addr &= ~(sz-1);
+ pg = pgd_offset(mm, addr);
+
+ if (pshift == PGDIR_SHIFT)
+ /* 16GB huge page */
+ return (pte_t *) pg;
+ else if (pshift > PUD_SHIFT)
+ /*
+ * We need to use hugepd table
+ */
+ hpdp = (hugepd_t *)pg;
+ else {
+ pdshift = PUD_SHIFT;
+ pu = pud_alloc(mm, pg, addr);
+ if (pshift == PUD_SHIFT)
+ return (pte_t *)pu;
+ else if (pshift > PMD_SHIFT)
+ hpdp = (hugepd_t *)pu;
+ else {
+ pdshift = PMD_SHIFT;
+ pm = pmd_alloc(mm, pu, addr);
+ if (pshift == PMD_SHIFT)
+ /* 16MB hugepage */
+ return (pte_t *)pm;
+ else
+ hpdp = (hugepd_t *)pm;
+ }
+ }
+ if (!hpdp)
+ return NULL;
+
+ BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
+
+ if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
+ return NULL;
+
+ return hugepte_offset(hpdp, addr, pdshift);
+}
+
+#else
+
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
{
pgd_t *pg;
@@ -212,6 +328,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
return hugepte_offset(hpdp, addr, pdshift);
}
+#endif
#ifdef CONFIG_PPC_FSL_BOOK3E
/* Build list of addresses of gigantic pages. This function is used in early
@@ -475,8 +592,14 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
do {
pmd = pmd_offset(pud, addr);
next = pmd_addr_end(addr, end);
- if (pmd_none(*pmd))
+ if (!is_hugepd(pmd)) {
+ /*
+ * if it is not hugepd pointer, we should already find
+ * it cleared.
+ */
+ WARN_ON(!pmd_none_or_clear_bad(pmd));
continue;
+ }
#ifdef CONFIG_PPC_FSL_BOOK3E
/*
* Increment next by the size of the huge mapping since
@@ -628,16 +751,6 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
return page;
}
-int pmd_huge(pmd_t pmd)
-{
- return 0;
-}
-
-int pud_huge(pud_t pud)
-{
- return 0;
-}
-
struct page *
follow_huge_pmd(struct mm_struct *mm, unsigned long address,
pmd_t *pmd, int write)
@@ -646,8 +759,8 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
return NULL;
}
-static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
+int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long mask;
unsigned long pte_end;
@@ -742,7 +855,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
struct hstate *hstate = hstate_file(file);
int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
- return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
+ return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
}
#endif
@@ -883,11 +996,16 @@ static int __init hugetlbpage_init(void)
pdshift = PUD_SHIFT;
else
pdshift = PGDIR_SHIFT;
-
- pgtable_cache_add(pdshift - shift, NULL);
- if (!PGT_CACHE(pdshift - shift))
- panic("hugetlbpage_init(): could not create "
- "pgtable cache for %d bit pagesize\n", shift);
+ /*
+ * if we have pdshift and shift value same, we don't
+ * use pgt cache for hugepd.
+ */
+ if (pdshift != shift) {
+ pgtable_cache_add(pdshift - shift, NULL);
+ if (!PGT_CACHE(pdshift - shift))
+ panic("hugetlbpage_init(): could not create "
+ "pgtable cache for %d bit pagesize\n", shift);
+ }
}
/* Set default large page size. Currently, we pick 16M or 1M
diff --git a/arch/powerpc/mm/icswx.c b/arch/powerpc/mm/icswx.c
index 8cdbd86..915412e 100644
--- a/arch/powerpc/mm/icswx.c
+++ b/arch/powerpc/mm/icswx.c
@@ -67,7 +67,7 @@
void switch_cop(struct mm_struct *next)
{
-#ifdef CONFIG_ICSWX_PID
+#ifdef CONFIG_PPC_ICSWX_PID
mtspr(SPRN_PID, next->context.cop_pid);
#endif
mtspr(SPRN_ACOP, next->context.acop);
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 95a4529..a90b9c4 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -129,8 +129,7 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
align = max_t(unsigned long, align, minalign);
name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
new = kmem_cache_create(name, table_size, align, 0, ctor);
- PGT_CACHE(shift) = new;
-
+ pgtable_cache[shift - 1] = new;
pr_debug("Allocated pgtable cache for order %d\n", shift);
}
@@ -216,7 +215,8 @@ static void __meminit vmemmap_create_mapping(unsigned long start,
unsigned long phys)
{
int mapped = htab_bolt_mapping(start, start + page_size, phys,
- PAGE_KERNEL, mmu_vmemmap_psize,
+ pgprot_val(PAGE_KERNEL),
+ mmu_vmemmap_psize,
mmu_kernel_ssize);
BUG_ON(mapped < 0);
}
@@ -263,19 +263,14 @@ static __meminit void vmemmap_list_populate(unsigned long phys,
vmemmap_list = vmem_back;
}
-int __meminit vmemmap_populate(struct page *start_page,
- unsigned long nr_pages, int node)
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
{
- unsigned long start = (unsigned long)start_page;
- unsigned long end = (unsigned long)(start_page + nr_pages);
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
/* Align to the page size of the linear mapping. */
start = _ALIGN_DOWN(start, page_size);
- pr_debug("vmemmap_populate page %p, %ld pages, node %d\n",
- start_page, nr_pages, node);
- pr_debug(" -> map %lx..%lx\n", start, end);
+ pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node);
for (; start < end; start += page_size) {
void *p;
@@ -297,5 +292,10 @@ int __meminit vmemmap_populate(struct page *start_page,
return 0;
}
+
+void vmemmap_free(unsigned long start, unsigned long end)
+{
+}
+
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 1a8f694..af90b90 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -70,10 +70,9 @@ unsigned long long memory_limit;
#ifdef CONFIG_HIGHMEM
pte_t *kmap_pte;
+EXPORT_SYMBOL(kmap_pte);
pgprot_t kmap_prot;
-
EXPORT_SYMBOL(kmap_prot);
-EXPORT_SYMBOL(kmap_pte);
static inline pte_t *virt_to_kpte(unsigned long vaddr)
{
@@ -137,6 +136,18 @@ int arch_add_memory(int nid, u64 start, u64 size)
return __add_pages(nid, zone, start_pfn, nr_pages);
}
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int arch_remove_memory(u64 start, u64 size)
+{
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+ struct zone *zone;
+
+ zone = page_zone(pfn_to_page(start_pfn));
+ return __remove_pages(zone, start_pfn, nr_pages);
+}
+#endif
#endif /* CONFIG_MEMORY_HOTPLUG */
/*
@@ -199,13 +210,10 @@ void __init do_init_bootmem(void)
min_low_pfn = MEMORY_START >> PAGE_SHIFT;
boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn);
- /* Add active regions with valid PFNs */
- for_each_memblock(memory, reg) {
- unsigned long start_pfn, end_pfn;
- start_pfn = memblock_region_memory_base_pfn(reg);
- end_pfn = memblock_region_memory_end_pfn(reg);
- memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
- }
+ /* Place all memblock_regions in the same node and merge contiguous
+ * memblock_regions
+ */
+ memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
/* Add all physical memory to the bootmem map, mark each area
* present.
@@ -349,13 +357,9 @@ void __init mem_init(void)
struct page *page = pfn_to_page(pfn);
if (memblock_is_reserved(paddr))
continue;
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
- totalhigh_pages++;
+ free_highmem_page(page);
reservedpages--;
}
- totalram_pages += totalhigh_pages;
printk(KERN_DEBUG "High memory: %luk\n",
totalhigh_pages << (PAGE_SHIFT-10));
}
@@ -402,39 +406,14 @@ void __init mem_init(void)
void free_initmem(void)
{
- unsigned long addr;
-
ppc_md.progress = ppc_printk_progress;
-
- addr = (unsigned long)__init_begin;
- for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) {
- memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- free_page(addr);
- totalram_pages++;
- }
- pr_info("Freeing unused kernel memory: %luk freed\n",
- ((unsigned long)__init_end -
- (unsigned long)__init_begin) >> 10);
+ free_initmem_default(POISON_FREE_INITMEM);
}
#ifdef CONFIG_BLK_DEV_INITRD
void __init free_initrd_mem(unsigned long start, unsigned long end)
{
- if (start >= end)
- return;
-
- start = _ALIGN_DOWN(start, PAGE_SIZE);
- end = _ALIGN_UP(end, PAGE_SIZE);
- pr_info("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
-
- for (; start < end; start += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(start));
- init_page_count(virt_to_page(start));
- free_page(start);
- totalram_pages++;
- }
+ free_reserved_area(start, end, 0, "initrd");
}
#endif
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
index 40bc5b0..178876ae 100644
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ b/arch/powerpc/mm/mmu_context_hash64.c
@@ -23,21 +23,13 @@
#include <linux/slab.h>
#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
#include "icswx.h"
static DEFINE_SPINLOCK(mmu_context_lock);
static DEFINE_IDA(mmu_context_ida);
-/*
- * 256MB segment
- * The proto-VSID space has 2^(CONTEX_BITS + USER_ESID_BITS) - 1 segments
- * available for user mappings. Each segment contains 2^28 bytes. Each
- * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts
- * (19 == 37 + 28 - 46).
- */
-#define MAX_CONTEXT ((1UL << CONTEXT_BITS) - 1)
-
int __init_new_context(void)
{
int index;
@@ -56,7 +48,7 @@ again:
else if (err)
return err;
- if (index > MAX_CONTEXT) {
+ if (index > MAX_USER_CONTEXT) {
spin_lock(&mmu_context_lock);
ida_remove(&mmu_context_ida, index);
spin_unlock(&mmu_context_lock);
@@ -94,6 +86,9 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
spin_lock_init(mm->context.cop_lockp);
#endif /* CONFIG_PPC_ICSWX */
+#ifdef CONFIG_PPC_64K_PAGES
+ mm->context.pte_frag = NULL;
+#endif
return 0;
}
@@ -105,13 +100,46 @@ void __destroy_context(int context_id)
}
EXPORT_SYMBOL_GPL(__destroy_context);
+#ifdef CONFIG_PPC_64K_PAGES
+static void destroy_pagetable_page(struct mm_struct *mm)
+{
+ int count;
+ void *pte_frag;
+ struct page *page;
+
+ pte_frag = mm->context.pte_frag;
+ if (!pte_frag)
+ return;
+
+ page = virt_to_page(pte_frag);
+ /* drop all the pending references */
+ count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
+ /* We allow PTE_FRAG_NR fragments from a PTE page */
+ count = atomic_sub_return(PTE_FRAG_NR - count, &page->_count);
+ if (!count) {
+ pgtable_page_dtor(page);
+ free_hot_cold_page(page, 0);
+ }
+}
+
+#else
+static inline void destroy_pagetable_page(struct mm_struct *mm)
+{
+ return;
+}
+#endif
+
+
void destroy_context(struct mm_struct *mm)
{
+
#ifdef CONFIG_PPC_ICSWX
drop_cop(mm->context.acop, mm);
kfree(mm->context.cop_lockp);
mm->context.cop_lockp = NULL;
#endif /* CONFIG_PPC_ICSWX */
+
+ destroy_pagetable_page(mm);
__destroy_context(mm->context.id);
subpage_prot_free(mm);
mm->context.id = MMU_NO_CONTEXT;
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index bba87ca..88c0425 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -22,6 +22,11 @@
#include <linux/pfn.h>
#include <linux/cpuset.h>
#include <linux/node.h>
+#include <linux/stop_machine.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
#include <asm/sparsemem.h>
#include <asm/prom.h>
#include <asm/smp.h>
@@ -29,6 +34,7 @@
#include <asm/paca.h>
#include <asm/hvcall.h>
#include <asm/setup.h>
+#include <asm/vdso.h>
static int numa_enabled = 1;
@@ -62,14 +68,11 @@ static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
*/
static void __init setup_node_to_cpumask_map(void)
{
- unsigned int node, num = 0;
+ unsigned int node;
/* setup nr_node_ids if not done yet */
- if (nr_node_ids == MAX_NUMNODES) {
- for_each_node_mask(node, node_possible_map)
- num = node;
- nr_node_ids = num + 1;
- }
+ if (nr_node_ids == MAX_NUMNODES)
+ setup_nr_node_ids();
/* allocate the map */
for (node = 0; node < nr_node_ids; node++)
@@ -79,7 +82,7 @@ static void __init setup_node_to_cpumask_map(void)
dbg("Node to cpumask map for %d nodes\n", nr_node_ids);
}
-static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
+static int __init fake_numa_create_new_node(unsigned long end_pfn,
unsigned int *nid)
{
unsigned long long mem;
@@ -201,7 +204,7 @@ int __node_distance(int a, int b)
int distance = LOCAL_DISTANCE;
if (!form1_affinity)
- return distance;
+ return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE);
for (i = 0; i < distance_ref_points_depth; i++) {
if (distance_lookup_table[a][i] == distance_lookup_table[b][i])
@@ -291,9 +294,7 @@ EXPORT_SYMBOL_GPL(of_node_to_nid);
static int __init find_min_common_depth(void)
{
int depth;
- struct device_node *chosen;
struct device_node *root;
- const char *vec5;
if (firmware_has_feature(FW_FEATURE_OPAL))
root = of_find_node_by_path("/ibm,opal");
@@ -325,24 +326,10 @@ static int __init find_min_common_depth(void)
distance_ref_points_depth /= sizeof(int);
-#define VEC5_AFFINITY_BYTE 5
-#define VEC5_AFFINITY 0x80
-
- if (firmware_has_feature(FW_FEATURE_OPAL))
+ if (firmware_has_feature(FW_FEATURE_OPAL) ||
+ firmware_has_feature(FW_FEATURE_TYPE1_AFFINITY)) {
+ dbg("Using form 1 affinity\n");
form1_affinity = 1;
- else {
- chosen = of_find_node_by_path("/chosen");
- if (chosen) {
- vec5 = of_get_property(chosen,
- "ibm,architecture-vec-5", NULL);
- if (vec5 && (vec5[VEC5_AFFINITY_BYTE] &
- VEC5_AFFINITY)) {
- dbg("Using form 1 affinity\n");
- form1_affinity = 1;
- }
-
- of_node_put(chosen);
- }
}
if (form1_affinity) {
@@ -1270,10 +1257,18 @@ u64 memory_hotplug_max(void)
/* Virtual Processor Home Node (VPHN) support */
#ifdef CONFIG_PPC_SPLPAR
+struct topology_update_data {
+ struct topology_update_data *next;
+ unsigned int cpu;
+ int old_nid;
+ int new_nid;
+};
+
static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
static cpumask_t cpu_associativity_changes_mask;
static int vphn_enabled;
-static void set_topology_timer(void);
+static int prrn_enabled;
+static void reset_topology_timer(void);
/*
* Store the current values of the associativity change counters in the
@@ -1309,11 +1304,9 @@ static void setup_cpu_associativity_change_counters(void)
*/
static int update_cpu_associativity_changes_mask(void)
{
- int cpu, nr_cpus = 0;
+ int cpu;
cpumask_t *changes = &cpu_associativity_changes_mask;
- cpumask_clear(changes);
-
for_each_possible_cpu(cpu) {
int i, changed = 0;
u8 *counts = vphn_cpu_change_counts[cpu];
@@ -1327,11 +1320,10 @@ static int update_cpu_associativity_changes_mask(void)
}
if (changed) {
cpumask_set_cpu(cpu, changes);
- nr_cpus++;
}
}
- return nr_cpus;
+ return cpumask_weight(changes);
}
/*
@@ -1423,40 +1415,84 @@ static long vphn_get_associativity(unsigned long cpu,
}
/*
+ * Update the CPU maps and sysfs entries for a single CPU when its NUMA
+ * characteristics change. This function doesn't perform any locking and is
+ * only safe to call from stop_machine().
+ */
+static int update_cpu_topology(void *data)
+{
+ struct topology_update_data *update;
+ unsigned long cpu;
+
+ if (!data)
+ return -EINVAL;
+
+ cpu = get_cpu();
+
+ for (update = data; update; update = update->next) {
+ if (cpu != update->cpu)
+ continue;
+
+ unregister_cpu_under_node(update->cpu, update->old_nid);
+ unmap_cpu_from_node(update->cpu);
+ map_cpu_to_node(update->cpu, update->new_nid);
+ vdso_getcpu_init();
+ register_cpu_under_node(update->cpu, update->new_nid);
+ }
+
+ return 0;
+}
+
+/*
* Update the node maps and sysfs entries for each cpu whose home node
* has changed. Returns 1 when the topology has changed, and 0 otherwise.
*/
int arch_update_cpu_topology(void)
{
- int cpu, nid, old_nid, changed = 0;
+ unsigned int cpu, changed = 0;
+ struct topology_update_data *updates, *ud;
unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
+ cpumask_t updated_cpus;
struct device *dev;
+ int weight, i = 0;
+
+ weight = cpumask_weight(&cpu_associativity_changes_mask);
+ if (!weight)
+ return 0;
+
+ updates = kzalloc(weight * (sizeof(*updates)), GFP_KERNEL);
+ if (!updates)
+ return 0;
- for_each_cpu(cpu,&cpu_associativity_changes_mask) {
+ cpumask_clear(&updated_cpus);
+
+ for_each_cpu(cpu, &cpu_associativity_changes_mask) {
+ ud = &updates[i++];
+ ud->cpu = cpu;
vphn_get_associativity(cpu, associativity);
- nid = associativity_to_nid(associativity);
+ ud->new_nid = associativity_to_nid(associativity);
- if (nid < 0 || !node_online(nid))
- nid = first_online_node;
+ if (ud->new_nid < 0 || !node_online(ud->new_nid))
+ ud->new_nid = first_online_node;
- old_nid = numa_cpu_lookup_table[cpu];
+ ud->old_nid = numa_cpu_lookup_table[cpu];
+ cpumask_set_cpu(cpu, &updated_cpus);
- /* Disable hotplug while we update the cpu
- * masks and sysfs.
- */
- get_online_cpus();
- unregister_cpu_under_node(cpu, old_nid);
- unmap_cpu_from_node(cpu);
- map_cpu_to_node(cpu, nid);
- register_cpu_under_node(cpu, nid);
- put_online_cpus();
-
- dev = get_cpu_device(cpu);
+ if (i < weight)
+ ud->next = &updates[i];
+ }
+
+ stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
+
+ for (ud = &updates[0]; ud; ud = ud->next) {
+ dev = get_cpu_device(ud->cpu);
if (dev)
kobject_uevent(&dev->kobj, KOBJ_CHANGE);
+ cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask);
changed = 1;
}
+ kfree(updates);
return changed;
}
@@ -1473,49 +1509,165 @@ void topology_schedule_update(void)
static void topology_timer_fn(unsigned long ignored)
{
- if (!vphn_enabled)
- return;
- if (update_cpu_associativity_changes_mask() > 0)
+ if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask))
topology_schedule_update();
- set_topology_timer();
+ else if (vphn_enabled) {
+ if (update_cpu_associativity_changes_mask() > 0)
+ topology_schedule_update();
+ reset_topology_timer();
+ }
}
static struct timer_list topology_timer =
TIMER_INITIALIZER(topology_timer_fn, 0, 0);
-static void set_topology_timer(void)
+static void reset_topology_timer(void)
{
topology_timer.data = 0;
topology_timer.expires = jiffies + 60 * HZ;
- add_timer(&topology_timer);
+ mod_timer(&topology_timer, topology_timer.expires);
+}
+
+#ifdef CONFIG_SMP
+
+static void stage_topology_update(int core_id)
+{
+ cpumask_or(&cpu_associativity_changes_mask,
+ &cpu_associativity_changes_mask, cpu_sibling_mask(core_id));
+ reset_topology_timer();
}
+static int dt_update_callback(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct of_prop_reconfig *update;
+ int rc = NOTIFY_DONE;
+
+ switch (action) {
+ case OF_RECONFIG_UPDATE_PROPERTY:
+ update = (struct of_prop_reconfig *)data;
+ if (!of_prop_cmp(update->dn->type, "cpu") &&
+ !of_prop_cmp(update->prop->name, "ibm,associativity")) {
+ u32 core_id;
+ of_property_read_u32(update->dn, "reg", &core_id);
+ stage_topology_update(core_id);
+ rc = NOTIFY_OK;
+ }
+ break;
+ }
+
+ return rc;
+}
+
+static struct notifier_block dt_update_nb = {
+ .notifier_call = dt_update_callback,
+};
+
+#endif
+
/*
- * Start polling for VPHN associativity changes.
+ * Start polling for associativity changes.
*/
int start_topology_update(void)
{
int rc = 0;
- /* Disabled until races with load balancing are fixed */
- if (0 && firmware_has_feature(FW_FEATURE_VPHN) &&
- get_lppaca()->shared_proc) {
- vphn_enabled = 1;
- setup_cpu_associativity_change_counters();
- init_timer_deferrable(&topology_timer);
- set_topology_timer();
- rc = 1;
+ if (firmware_has_feature(FW_FEATURE_PRRN)) {
+ if (!prrn_enabled) {
+ prrn_enabled = 1;
+ vphn_enabled = 0;
+#ifdef CONFIG_SMP
+ rc = of_reconfig_notifier_register(&dt_update_nb);
+#endif
+ }
+ } else if (firmware_has_feature(FW_FEATURE_VPHN) &&
+ get_lppaca()->shared_proc) {
+ if (!vphn_enabled) {
+ prrn_enabled = 0;
+ vphn_enabled = 1;
+ setup_cpu_associativity_change_counters();
+ init_timer_deferrable(&topology_timer);
+ reset_topology_timer();
+ }
}
return rc;
}
-__initcall(start_topology_update);
/*
* Disable polling for VPHN associativity changes.
*/
int stop_topology_update(void)
{
- vphn_enabled = 0;
- return del_timer_sync(&topology_timer);
+ int rc = 0;
+
+ if (prrn_enabled) {
+ prrn_enabled = 0;
+#ifdef CONFIG_SMP
+ rc = of_reconfig_notifier_unregister(&dt_update_nb);
+#endif
+ } else if (vphn_enabled) {
+ vphn_enabled = 0;
+ rc = del_timer_sync(&topology_timer);
+ }
+
+ return rc;
+}
+
+int prrn_is_enabled(void)
+{
+ return prrn_enabled;
+}
+
+static int topology_read(struct seq_file *file, void *v)
+{
+ if (vphn_enabled || prrn_enabled)
+ seq_puts(file, "on\n");
+ else
+ seq_puts(file, "off\n");
+
+ return 0;
+}
+
+static int topology_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, topology_read, NULL);
+}
+
+static ssize_t topology_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *off)
+{
+ char kbuf[4]; /* "on" or "off" plus null. */
+ int read_len;
+
+ read_len = count < 3 ? count : 3;
+ if (copy_from_user(kbuf, buf, read_len))
+ return -EINVAL;
+
+ kbuf[read_len] = '\0';
+
+ if (!strncmp(kbuf, "on", 2))
+ start_topology_update();
+ else if (!strncmp(kbuf, "off", 3))
+ stop_topology_update();
+ else
+ return -EINVAL;
+
+ return count;
+}
+
+static const struct file_operations topology_ops = {
+ .read = seq_read,
+ .write = topology_write,
+ .open = topology_open,
+ .release = single_release
+};
+
+static int topology_update_init(void)
+{
+ start_topology_update();
+ proc_create("powerpc/topology_updates", 644, NULL, &topology_ops);
+
+ return 0;
}
+device_initcall(topology_update_init);
#endif /* CONFIG_PPC_SPLPAR */
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index e212a27..a854096 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -61,7 +61,7 @@
#endif
#ifdef CONFIG_PPC_STD_MMU_64
-#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT))
+#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
#error TASK_SIZE_USER64 exceeds user VSID range
#endif
#endif
@@ -337,3 +337,121 @@ EXPORT_SYMBOL(__ioremap_at);
EXPORT_SYMBOL(iounmap);
EXPORT_SYMBOL(__iounmap);
EXPORT_SYMBOL(__iounmap_at);
+
+#ifdef CONFIG_PPC_64K_PAGES
+static pte_t *get_from_cache(struct mm_struct *mm)
+{
+ void *pte_frag, *ret;
+
+ spin_lock(&mm->page_table_lock);
+ ret = mm->context.pte_frag;
+ if (ret) {
+ pte_frag = ret + PTE_FRAG_SIZE;
+ /*
+ * If we have taken up all the fragments mark PTE page NULL
+ */
+ if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
+ pte_frag = NULL;
+ mm->context.pte_frag = pte_frag;
+ }
+ spin_unlock(&mm->page_table_lock);
+ return (pte_t *)ret;
+}
+
+static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
+{
+ void *ret = NULL;
+ struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
+ __GFP_REPEAT | __GFP_ZERO);
+ if (!page)
+ return NULL;
+
+ ret = page_address(page);
+ spin_lock(&mm->page_table_lock);
+ /*
+ * If we find pgtable_page set, we return
+ * the allocated page with single fragement
+ * count.
+ */
+ if (likely(!mm->context.pte_frag)) {
+ atomic_set(&page->_count, PTE_FRAG_NR);
+ mm->context.pte_frag = ret + PTE_FRAG_SIZE;
+ }
+ spin_unlock(&mm->page_table_lock);
+
+ if (!kernel)
+ pgtable_page_ctor(page);
+
+ return (pte_t *)ret;
+}
+
+pte_t *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
+{
+ pte_t *pte;
+
+ pte = get_from_cache(mm);
+ if (pte)
+ return pte;
+
+ return __alloc_for_cache(mm, kernel);
+}
+
+void page_table_free(struct mm_struct *mm, unsigned long *table, int kernel)
+{
+ struct page *page = virt_to_page(table);
+ if (put_page_testzero(page)) {
+ if (!kernel)
+ pgtable_page_dtor(page);
+ free_hot_cold_page(page, 0);
+ }
+}
+
+#ifdef CONFIG_SMP
+static void page_table_free_rcu(void *table)
+{
+ struct page *page = virt_to_page(table);
+ if (put_page_testzero(page)) {
+ pgtable_page_dtor(page);
+ free_hot_cold_page(page, 0);
+ }
+}
+
+void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+{
+ unsigned long pgf = (unsigned long)table;
+
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ pgf |= shift;
+ tlb_remove_table(tlb, (void *)pgf);
+}
+
+void __tlb_remove_table(void *_table)
+{
+ void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+ unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+ if (!shift)
+ /* PTE page needs special handling */
+ page_table_free_rcu(table);
+ else {
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ kmem_cache_free(PGT_CACHE(shift), table);
+ }
+}
+#else
+void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+{
+ if (!shift) {
+ /* PTE page needs special handling */
+ struct page *page = virt_to_page(table);
+ if (put_page_testzero(page)) {
+ pgtable_page_dtor(page);
+ free_hot_cold_page(page, 0);
+ }
+ } else {
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ kmem_cache_free(PGT_CACHE(shift), table);
+ }
+}
+#endif
+#endif /* CONFIG_PPC_64K_PAGES */
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 1a16ca2..17aa6df 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -31,10 +31,15 @@
* No other registers are examined or changed.
*/
_GLOBAL(slb_allocate_realmode)
- /* r3 = faulting address */
+ /*
+ * check for bad kernel/user address
+ * (ea & ~REGION_MASK) >= PGTABLE_RANGE
+ */
+ rldicr. r9,r3,4,(63 - 46 - 4)
+ bne- 8f
srdi r9,r3,60 /* get region */
- srdi r10,r3,28 /* get esid */
+ srdi r10,r3,SID_SHIFT /* get esid */
cmpldi cr7,r9,0xc /* cmp PAGE_OFFSET for later use */
/* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */
@@ -56,12 +61,14 @@ _GLOBAL(slb_allocate_realmode)
*/
_GLOBAL(slb_miss_kernel_load_linear)
li r11,0
- li r9,0x1
/*
- * for 1T we shift 12 bits more. slb_finish_load_1T will do
- * the necessary adjustment
+ * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
+ * r9 = region id.
*/
- rldimi r10,r9,(CONTEXT_BITS + USER_ESID_BITS),0
+ addis r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha
+ addi r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l
+
+
BEGIN_FTR_SECTION
b slb_finish_load
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
@@ -91,24 +98,19 @@ _GLOBAL(slb_miss_kernel_load_vmemmap)
_GLOBAL(slb_miss_kernel_load_io)
li r11,0
6:
- li r9,0x1
/*
- * for 1T we shift 12 bits more. slb_finish_load_1T will do
- * the necessary adjustment
+ * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
+ * r9 = region id.
*/
- rldimi r10,r9,(CONTEXT_BITS + USER_ESID_BITS),0
+ addis r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha
+ addi r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l
+
BEGIN_FTR_SECTION
b slb_finish_load
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
b slb_finish_load_1T
-0: /* user address: proto-VSID = context << 15 | ESID. First check
- * if the address is within the boundaries of the user region
- */
- srdi. r9,r10,USER_ESID_BITS
- bne- 8f /* invalid ea bits set */
-
-
+0:
/* when using slices, we extract the psize off the slice bitmaps
* and then we need to get the sllp encoding off the mmu_psize_defs
* array.
@@ -164,15 +166,13 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
ld r9,PACACONTEXTID(r13)
BEGIN_FTR_SECTION
cmpldi r10,0x1000
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
- rldimi r10,r9,USER_ESID_BITS,0
-BEGIN_FTR_SECTION
bge slb_finish_load_1T
END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
b slb_finish_load
8: /* invalid EA */
li r10,0 /* BAD_VSID */
+ li r9,0 /* BAD_VSID */
li r11,SLB_VSID_USER /* flags don't much matter */
b slb_finish_load
@@ -221,8 +221,6 @@ _GLOBAL(slb_allocate_user)
/* get context to calculate proto-VSID */
ld r9,PACACONTEXTID(r13)
- rldimi r10,r9,USER_ESID_BITS,0
-
/* fall through slb_finish_load */
#endif /* __DISABLED__ */
@@ -231,9 +229,10 @@ _GLOBAL(slb_allocate_user)
/*
* Finish loading of an SLB entry and return
*
- * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET
+ * r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET
*/
slb_finish_load:
+ rldimi r10,r9,ESID_BITS,0
ASM_VSID_SCRAMBLE(r10,r9,256M)
/*
* bits above VSID_BITS_256M need to be ignored from r10
@@ -298,10 +297,11 @@ _GLOBAL(slb_compare_rr_to_size)
/*
* Finish loading of a 1T SLB entry (for the kernel linear mapping) and return.
*
- * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9
+ * r3 = EA, r9 = context, r10 = ESID(256MB), r11 = flags, clobbers r9
*/
slb_finish_load_1T:
- srdi r10,r10,40-28 /* get 1T ESID */
+ srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */
+ rldimi r10,r9,ESID_BITS_1T,0
ASM_VSID_SCRAMBLE(r10,r9,1T)
/*
* bits above VSID_BITS_1T need to be ignored from r10
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index cf9dada..3e99c14 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -237,134 +237,112 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
#endif
}
+/*
+ * Compute which slice addr is part of;
+ * set *boundary_addr to the start or end boundary of that slice
+ * (depending on 'end' parameter);
+ * return boolean indicating if the slice is marked as available in the
+ * 'available' slice_mark.
+ */
+static bool slice_scan_available(unsigned long addr,
+ struct slice_mask available,
+ int end,
+ unsigned long *boundary_addr)
+{
+ unsigned long slice;
+ if (addr < SLICE_LOW_TOP) {
+ slice = GET_LOW_SLICE_INDEX(addr);
+ *boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
+ return !!(available.low_slices & (1u << slice));
+ } else {
+ slice = GET_HIGH_SLICE_INDEX(addr);
+ *boundary_addr = (slice + end) ?
+ ((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
+ return !!(available.high_slices & (1u << slice));
+ }
+}
+
static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
unsigned long len,
struct slice_mask available,
- int psize, int use_cache)
+ int psize)
{
- struct vm_area_struct *vma;
- unsigned long start_addr, addr;
- struct slice_mask mask;
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
-
- if (use_cache) {
- if (len <= mm->cached_hole_size) {
- start_addr = addr = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = 0;
- } else
- start_addr = addr = mm->free_area_cache;
- } else
- start_addr = addr = TASK_UNMAPPED_BASE;
-
-full_search:
- for (;;) {
- addr = _ALIGN_UP(addr, 1ul << pshift);
- if ((TASK_SIZE - len) < addr)
- break;
- vma = find_vma(mm, addr);
- BUG_ON(vma && (addr >= vma->vm_end));
-
- mask = slice_range_to_mask(addr, len);
- if (!slice_check_fit(mask, available)) {
- if (addr < SLICE_LOW_TOP)
- addr = _ALIGN_UP(addr + 1, 1ul << SLICE_LOW_SHIFT);
- else
- addr = _ALIGN_UP(addr + 1, 1ul << SLICE_HIGH_SHIFT);
+ unsigned long addr, found, next_end;
+ struct vm_unmapped_area_info info;
+
+ info.flags = 0;
+ info.length = len;
+ info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
+ info.align_offset = 0;
+
+ addr = TASK_UNMAPPED_BASE;
+ while (addr < TASK_SIZE) {
+ info.low_limit = addr;
+ if (!slice_scan_available(addr, available, 1, &addr))
continue;
+
+ next_slice:
+ /*
+ * At this point [info.low_limit; addr) covers
+ * available slices only and ends at a slice boundary.
+ * Check if we need to reduce the range, or if we can
+ * extend it to cover the next available slice.
+ */
+ if (addr >= TASK_SIZE)
+ addr = TASK_SIZE;
+ else if (slice_scan_available(addr, available, 1, &next_end)) {
+ addr = next_end;
+ goto next_slice;
}
- if (!vma || addr + len <= vma->vm_start) {
- /*
- * Remember the place where we stopped the search:
- */
- if (use_cache)
- mm->free_area_cache = addr + len;
- return addr;
- }
- if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start)
- mm->cached_hole_size = vma->vm_start - addr;
- addr = vma->vm_end;
- }
+ info.high_limit = addr;
- /* Make sure we didn't miss any holes */
- if (use_cache && start_addr != TASK_UNMAPPED_BASE) {
- start_addr = addr = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = 0;
- goto full_search;
+ found = vm_unmapped_area(&info);
+ if (!(found & ~PAGE_MASK))
+ return found;
}
+
return -ENOMEM;
}
static unsigned long slice_find_area_topdown(struct mm_struct *mm,
unsigned long len,
struct slice_mask available,
- int psize, int use_cache)
+ int psize)
{
- struct vm_area_struct *vma;
- unsigned long addr;
- struct slice_mask mask;
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+ unsigned long addr, found, prev;
+ struct vm_unmapped_area_info info;
- /* check if free_area_cache is useful for us */
- if (use_cache) {
- if (len <= mm->cached_hole_size) {
- mm->cached_hole_size = 0;
- mm->free_area_cache = mm->mmap_base;
- }
-
- /* either no address requested or can't fit in requested
- * address hole
- */
- addr = mm->free_area_cache;
-
- /* make sure it can fit in the remaining address space */
- if (addr > len) {
- addr = _ALIGN_DOWN(addr - len, 1ul << pshift);
- mask = slice_range_to_mask(addr, len);
- if (slice_check_fit(mask, available) &&
- slice_area_is_free(mm, addr, len))
- /* remember the address as a hint for
- * next time
- */
- return (mm->free_area_cache = addr);
- }
- }
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ info.length = len;
+ info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
+ info.align_offset = 0;
addr = mm->mmap_base;
- while (addr > len) {
- /* Go down by chunk size */
- addr = _ALIGN_DOWN(addr - len, 1ul << pshift);
-
- /* Check for hit with different page size */
- mask = slice_range_to_mask(addr, len);
- if (!slice_check_fit(mask, available)) {
- if (addr < SLICE_LOW_TOP)
- addr = _ALIGN_DOWN(addr, 1ul << SLICE_LOW_SHIFT);
- else if (addr < (1ul << SLICE_HIGH_SHIFT))
- addr = SLICE_LOW_TOP;
- else
- addr = _ALIGN_DOWN(addr, 1ul << SLICE_HIGH_SHIFT);
+ while (addr > PAGE_SIZE) {
+ info.high_limit = addr;
+ if (!slice_scan_available(addr - 1, available, 0, &addr))
continue;
- }
+ prev_slice:
/*
- * Lookup failure means no vma is above this address,
- * else if new region fits below vma->vm_start,
- * return with success:
+ * At this point [addr; info.high_limit) covers
+ * available slices only and starts at a slice boundary.
+ * Check if we need to reduce the range, or if we can
+ * extend it to cover the previous available slice.
*/
- vma = find_vma(mm, addr);
- if (!vma || (addr + len) <= vma->vm_start) {
- /* remember the address as a hint for next time */
- if (use_cache)
- mm->free_area_cache = addr;
- return addr;
+ if (addr < PAGE_SIZE)
+ addr = PAGE_SIZE;
+ else if (slice_scan_available(addr - 1, available, 0, &prev)) {
+ addr = prev;
+ goto prev_slice;
}
+ info.low_limit = addr;
- /* remember the largest hole we saw so far */
- if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start)
- mm->cached_hole_size = vma->vm_start - addr;
-
- /* try just below the current vma->vm_start */
- addr = vma->vm_start;
+ found = vm_unmapped_area(&info);
+ if (!(found & ~PAGE_MASK))
+ return found;
}
/*
@@ -373,28 +351,18 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
* can happen with large stack limits and large mmap()
* allocations.
*/
- addr = slice_find_area_bottomup(mm, len, available, psize, 0);
-
- /*
- * Restore the topdown base:
- */
- if (use_cache) {
- mm->free_area_cache = mm->mmap_base;
- mm->cached_hole_size = ~0UL;
- }
-
- return addr;
+ return slice_find_area_bottomup(mm, len, available, psize);
}
static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
struct slice_mask mask, int psize,
- int topdown, int use_cache)
+ int topdown)
{
if (topdown)
- return slice_find_area_topdown(mm, len, mask, psize, use_cache);
+ return slice_find_area_topdown(mm, len, mask, psize);
else
- return slice_find_area_bottomup(mm, len, mask, psize, use_cache);
+ return slice_find_area_bottomup(mm, len, mask, psize);
}
#define or_mask(dst, src) do { \
@@ -415,7 +383,7 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
unsigned long flags, unsigned int psize,
- int topdown, int use_cache)
+ int topdown)
{
struct slice_mask mask = {0, 0};
struct slice_mask good_mask;
@@ -430,8 +398,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
BUG_ON(mm->task_size == 0);
slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
- slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d, use_cache=%d\n",
- addr, len, flags, topdown, use_cache);
+ slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d\n",
+ addr, len, flags, topdown);
if (len > mm->task_size)
return -ENOMEM;
@@ -503,8 +471,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
/* Now let's see if we can find something in the existing
* slices for that size
*/
- newaddr = slice_find_area(mm, len, good_mask, psize, topdown,
- use_cache);
+ newaddr = slice_find_area(mm, len, good_mask, psize, topdown);
if (newaddr != -ENOMEM) {
/* Found within the good mask, we don't have to setup,
* we thus return directly
@@ -536,8 +503,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
* anywhere in the good area.
*/
if (addr) {
- addr = slice_find_area(mm, len, good_mask, psize, topdown,
- use_cache);
+ addr = slice_find_area(mm, len, good_mask, psize, topdown);
if (addr != -ENOMEM) {
slice_dbg(" found area at 0x%lx\n", addr);
return addr;
@@ -547,15 +513,14 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
/* Now let's see if we can find something in the existing slices
* for that size plus free slices
*/
- addr = slice_find_area(mm, len, potential_mask, psize, topdown,
- use_cache);
+ addr = slice_find_area(mm, len, potential_mask, psize, topdown);
#ifdef CONFIG_PPC_64K_PAGES
if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
/* retry the search with 4k-page slices included */
or_mask(potential_mask, compat_mask);
addr = slice_find_area(mm, len, potential_mask, psize,
- topdown, use_cache);
+ topdown);
}
#endif
@@ -586,8 +551,7 @@ unsigned long arch_get_unmapped_area(struct file *filp,
unsigned long flags)
{
return slice_get_unmapped_area(addr, len, flags,
- current->mm->context.user_psize,
- 0, 1);
+ current->mm->context.user_psize, 0);
}
unsigned long arch_get_unmapped_area_topdown(struct file *filp,
@@ -597,8 +561,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
const unsigned long flags)
{
return slice_get_unmapped_area(addr0, len, flags,
- current->mm->context.user_psize,
- 1, 1);
+ current->mm->context.user_psize, 1);
}
unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 0d82ef5..023ec8a 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -82,11 +82,11 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
if (!is_kernel_addr(addr)) {
ssize = user_segment_size(addr);
vsid = get_vsid(mm->context.id, addr, ssize);
- WARN_ON(vsid == 0);
} else {
vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
ssize = mmu_kernel_ssize;
}
+ WARN_ON(vsid == 0);
vpn = hpt_vpn(addr, vsid, ssize);
rpte = __real_pte(__pte(pte), ptep);
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 44097cc..033a980 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -427,8 +427,8 @@ static void setup_page_sizes(void)
int i, psize;
#ifdef CONFIG_PPC_FSL_BOOK3E
- int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E);
unsigned int mmucfg = mfspr(SPRN_MMUCFG);
+ int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E);
if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) {
unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG);