summaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-13 02:54:28 (GMT)
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-13 02:54:28 (GMT)
commit818099574b04c5301eacbbcd441022b353a65466 (patch)
tree77b3645b375105cb0389df2b4ea5ffa90329f7f8 /arch/powerpc
parent802ea9d8645d33d24b7b4cd4537c14f3e698bde0 (diff)
parent6016daed58ee482a2f7684e93342e89139cf4419 (diff)
downloadlinux-818099574b04c5301eacbbcd441022b353a65466.tar.xz
Merge branch 'akpm' (patches from Andrew)
Merge third set of updates from Andrew Morton: - the rest of MM [ This includes getting rid of the numa hinting bits, in favor of just generic protnone logic. Yay. - Linus ] - core kernel - procfs - some of lib/ (lots of lib/ material this time) * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (104 commits) lib/lcm.c: replace include lib/percpu_ida.c: remove redundant includes lib/strncpy_from_user.c: replace module.h include lib/stmp_device.c: replace module.h include lib/sort.c: move include inside #if 0 lib/show_mem.c: remove redundant include lib/radix-tree.c: change to simpler include lib/plist.c: remove redundant include lib/nlattr.c: remove redundant include lib/kobject_uevent.c: remove redundant include lib/llist.c: remove redundant include lib/md5.c: simplify include lib/list_sort.c: rearrange includes lib/genalloc.c: remove redundant include lib/idr.c: remove redundant include lib/halfmd4.c: simplify includes lib/dynamic_queue_limits.c: simplify includes lib/sort.c: use simpler includes lib/interval_tree.c: simplify includes hexdump: make it return number of bytes placed in buffer ...
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/include/asm/pgtable.h54
-rw-r--r--arch/powerpc/include/asm/pte-common.h5
-rw-r--r--arch/powerpc/include/asm/pte-hash64.h6
-rw-r--r--arch/powerpc/include/asm/thread_info.h4
-rw-r--r--arch/powerpc/kernel/signal_32.c4
-rw-r--r--arch/powerpc/kernel/signal_64.c2
-rw-r--r--arch/powerpc/kernel/time.c32
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c2
-rw-r--r--arch/powerpc/mm/copro_fault.c8
-rw-r--r--arch/powerpc/mm/fault.c25
-rw-r--r--arch/powerpc/mm/pgtable.c11
-rw-r--r--arch/powerpc/mm/pgtable_64.c3
12 files changed, 70 insertions, 86 deletions
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 7e77f2c..79fee2e 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -40,63 +40,27 @@ static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK)
static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
#ifdef CONFIG_NUMA_BALANCING
-static inline int pte_present(pte_t pte)
-{
- return pte_val(pte) & _PAGE_NUMA_MASK;
-}
-
-#define pte_present_nonuma pte_present_nonuma
-static inline int pte_present_nonuma(pte_t pte)
-{
- return pte_val(pte) & (_PAGE_PRESENT);
-}
-
-#define ptep_set_numa ptep_set_numa
-static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
-{
- if ((pte_val(*ptep) & _PAGE_PRESENT) == 0)
- VM_BUG_ON(1);
-
- pte_update(mm, addr, ptep, _PAGE_PRESENT, _PAGE_NUMA, 0);
- return;
-}
-
-#define pmdp_set_numa pmdp_set_numa
-static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp)
-{
- if ((pmd_val(*pmdp) & _PAGE_PRESENT) == 0)
- VM_BUG_ON(1);
-
- pmd_hugepage_update(mm, addr, pmdp, _PAGE_PRESENT, _PAGE_NUMA);
- return;
-}
-
/*
- * Generic NUMA pte helpers expect pteval_t and pmdval_t types to exist
- * which was inherited from x86. For the purposes of powerpc pte_basic_t and
- * pmd_t are equivalent
+ * These work without NUMA balancing but the kernel does not care. See the
+ * comment in include/asm-generic/pgtable.h . On powerpc, this will only
+ * work for user pages and always return true for kernel pages.
*/
-#define pteval_t pte_basic_t
-#define pmdval_t pmd_t
-static inline pteval_t ptenuma_flags(pte_t pte)
+static inline int pte_protnone(pte_t pte)
{
- return pte_val(pte) & _PAGE_NUMA_MASK;
+ return (pte_val(pte) &
+ (_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT;
}
-static inline pmdval_t pmdnuma_flags(pmd_t pmd)
+static inline int pmd_protnone(pmd_t pmd)
{
- return pmd_val(pmd) & _PAGE_NUMA_MASK;
+ return pte_protnone(pmd_pte(pmd));
}
-
-# else
+#endif /* CONFIG_NUMA_BALANCING */
static inline int pte_present(pte_t pte)
{
return pte_val(pte) & _PAGE_PRESENT;
}
-#endif /* CONFIG_NUMA_BALANCING */
/* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
index 2aef9b7..c5a755e 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -104,11 +104,6 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
_PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | \
_PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC)
-#ifdef CONFIG_NUMA_BALANCING
-/* Mask of bits that distinguish present and numa ptes */
-#define _PAGE_NUMA_MASK (_PAGE_NUMA|_PAGE_PRESENT)
-#endif
-
/*
* We define 2 sets of base prot bits, one for basic pages (ie,
* cacheable kernel and user pages) and one for non cacheable
diff --git a/arch/powerpc/include/asm/pte-hash64.h b/arch/powerpc/include/asm/pte-hash64.h
index 2505d8e..55aea0c 100644
--- a/arch/powerpc/include/asm/pte-hash64.h
+++ b/arch/powerpc/include/asm/pte-hash64.h
@@ -27,12 +27,6 @@
#define _PAGE_RW 0x0200 /* software: user write access allowed */
#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */
-/*
- * Used for tracking numa faults
- */
-#define _PAGE_NUMA 0x00000010 /* Gather numa placement stats */
-
-
/* No separate kernel read-only */
#define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */
#define _PAGE_KERNEL_RO _PAGE_KERNEL_RW
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index e8abc83..7248979 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -43,7 +43,6 @@ struct thread_info {
int cpu; /* cpu we're on */
int preempt_count; /* 0 => preemptable,
<0 => BUG */
- struct restart_block restart_block;
unsigned long local_flags; /* private flags for thread */
/* low level flags - has atomic operations done on it */
@@ -59,9 +58,6 @@ struct thread_info {
.exec_domain = &default_exec_domain, \
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
.flags = 0, \
}
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index b171001..d3a831a 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1231,7 +1231,7 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
int tm_restore = 0;
#endif
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
rt_sf = (struct rt_sigframe __user *)
(regs->gpr[1] + __SIGNAL_FRAMESIZE + 16);
@@ -1504,7 +1504,7 @@ long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
#endif
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE);
sc = &sf->sctx;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 2cb0c94..c7c24d2 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -666,7 +666,7 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
#endif
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
if (!access_ok(VERIFY_READ, uc, sizeof(*uc)))
goto badframe;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index fa7c4f1..7316dd1 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -621,6 +621,38 @@ unsigned long long sched_clock(void)
return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
}
+
+#ifdef CONFIG_PPC_PSERIES
+
+/*
+ * Running clock - attempts to give a view of time passing for a virtualised
+ * kernels.
+ * Uses the VTB register if available otherwise a next best guess.
+ */
+unsigned long long running_clock(void)
+{
+ /*
+ * Don't read the VTB as a host since KVM does not switch in host
+ * timebase into the VTB when it takes a guest off the CPU, reading the
+ * VTB would result in reading 'last switched out' guest VTB.
+ *
+ * Host kernels are often compiled with CONFIG_PPC_PSERIES checked, it
+ * would be unsafe to rely only on the #ifdef above.
+ */
+ if (firmware_has_feature(FW_FEATURE_LPAR) &&
+ cpu_has_feature(CPU_FTR_ARCH_207S))
+ return mulhdu(get_vtb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
+
+ /*
+ * This is a next best approximation without a VTB.
+ * On a host which is running bare metal there should never be any stolen
+ * time and on a host which doesn't do any virtualisation TB *should* equal
+ * VTB so it makes no difference anyway.
+ */
+ return local_clock() - cputime_to_nsecs(kcpustat_this_cpu->cpustat[CPUTIME_STEAL]);
+}
+#endif
+
static int __init get_freq(char *name, int cells, unsigned long *val)
{
struct device_node *cpu;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 510bdfb..625407e 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -212,7 +212,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
/* Look up the Linux PTE for the backing page */
pte_size = psize;
pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size);
- if (pte_present(pte) && !pte_numa(pte)) {
+ if (pte_present(pte) && !pte_protnone(pte)) {
if (writing && !pte_write(pte))
/* make the actual HPTE be read-only */
ptel = hpte_make_readonly(ptel);
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index 1b5305d..f031a47 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -64,10 +64,14 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
if (!(vma->vm_flags & VM_WRITE))
goto out_unlock;
} else {
- if (dsisr & DSISR_PROTFAULT)
- goto out_unlock;
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
goto out_unlock;
+ /*
+ * protfault should only happen due to us
+ * mapping a region readonly temporarily. PROT_NONE
+ * is also covered by the VMA check above.
+ */
+ WARN_ON_ONCE(dsisr & DSISR_PROTFAULT);
}
ret = 0;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 6154b0a..b396868 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -389,19 +389,6 @@ good_area:
#endif /* CONFIG_8xx */
if (is_exec) {
-#ifdef CONFIG_PPC_STD_MMU
- /* Protection fault on exec go straight to failure on
- * Hash based MMUs as they either don't support per-page
- * execute permission, or if they do, it's handled already
- * at the hash level. This test would probably have to
- * be removed if we change the way this works to make hash
- * processors use the same I/D cache coherency mechanism
- * as embedded.
- */
- if (error_code & DSISR_PROTFAULT)
- goto bad_area;
-#endif /* CONFIG_PPC_STD_MMU */
-
/*
* Allow execution from readable areas if the MMU does not
* provide separate controls over reading and executing.
@@ -416,6 +403,14 @@ good_area:
(cpu_has_feature(CPU_FTR_NOEXECUTE) ||
!(vma->vm_flags & (VM_READ | VM_WRITE))))
goto bad_area;
+#ifdef CONFIG_PPC_STD_MMU
+ /*
+ * protfault should only happen due to us
+ * mapping a region readonly temporarily. PROT_NONE
+ * is also covered by the VMA check above.
+ */
+ WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
+#endif /* CONFIG_PPC_STD_MMU */
/* a write */
} else if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
@@ -423,11 +418,9 @@ good_area:
flags |= FAULT_FLAG_WRITE;
/* a read */
} else {
- /* protection fault */
- if (error_code & 0x08000000)
- goto bad_area;
if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
goto bad_area;
+ WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
}
/*
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index c90e602..83dfcb5 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -172,9 +172,14 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
pte_t pte)
{
-#ifdef CONFIG_DEBUG_VM
- WARN_ON(pte_val(*ptep) & _PAGE_PRESENT);
-#endif
+ /*
+ * When handling numa faults, we already have the pte marked
+ * _PAGE_PRESENT, but we can be sure that it is not in hpte.
+ * Hence we can use set_pte_at for them.
+ */
+ VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) ==
+ (_PAGE_PRESENT | _PAGE_USER));
+
/* Note: mm->context.id might not yet have been assigned as
* this context might not have been activated yet when this
* is called.
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 4fe5f64..91bb883 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -718,7 +718,8 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
#ifdef CONFIG_DEBUG_VM
- WARN_ON(pmd_val(*pmdp) & _PAGE_PRESENT);
+ WARN_ON((pmd_val(*pmdp) & (_PAGE_PRESENT | _PAGE_USER)) ==
+ (_PAGE_PRESENT | _PAGE_USER));
assert_spin_locked(&mm->page_table_lock);
WARN_ON(!pmd_trans_huge(pmd));
#endif