summaryrefslogtreecommitdiff
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/Makefile6
-rw-r--r--arch/x86/mm/fault.c70
-rw-r--r--arch/x86/mm/highmem_32.c4
-rw-r--r--arch/x86/mm/init.c18
-rw-r--r--arch/x86/mm/init_32.c6
-rw-r--r--arch/x86/mm/init_64.c13
-rw-r--r--arch/x86/mm/iomap_32.c27
-rw-r--r--arch/x86/mm/ioremap.c90
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c14
-rw-r--r--arch/x86/mm/mmap.c17
-rw-r--r--arch/x86/mm/pageattr.c99
-rw-r--r--arch/x86/mm/pat.c358
-rw-r--r--arch/x86/mm/pgtable.c7
-rw-r--r--arch/x86/mm/physaddr.c70
-rw-r--r--arch/x86/mm/physaddr.h10
-rw-r--r--arch/x86/mm/srat_32.c4
-rw-r--r--arch/x86/mm/srat_64.c6
-rw-r--r--arch/x86/mm/tlb.c21
18 files changed, 534 insertions, 306 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index eefdeee..9b5a9f5 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,5 +1,9 @@
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o gup.o
+ pat.o pgtable.o physaddr.o gup.o
+
+# Make sure __phys_addr has no stackprotector
+nostackp := $(call cc-option, -fno-stack-protector)
+CFLAGS_physaddr.o := $(nostackp)
obj-$(CONFIG_SMP) += tlb.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 78a5fff..82728f2 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -10,7 +10,7 @@
#include <linux/bootmem.h> /* max_low_pfn */
#include <linux/kprobes.h> /* __kprobes, ... */
#include <linux/mmiotrace.h> /* kmmio_handler, ... */
-#include <linux/perf_counter.h> /* perf_swcounter_event */
+#include <linux/perf_event.h> /* perf_sw_event */
#include <asm/traps.h> /* dotraplinkage, ... */
#include <asm/pgalloc.h> /* pgd_*(), ... */
@@ -285,26 +285,25 @@ check_v8086_mode(struct pt_regs *regs, unsigned long address,
tsk->thread.screen_bitmap |= 1 << bit;
}
-static void dump_pagetable(unsigned long address)
+static bool low_pfn(unsigned long pfn)
{
- __typeof__(pte_val(__pte(0))) page;
+ return pfn < max_low_pfn;
+}
- page = read_cr3();
- page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
+static void dump_pagetable(unsigned long address)
+{
+ pgd_t *base = __va(read_cr3());
+ pgd_t *pgd = &base[pgd_index(address)];
+ pmd_t *pmd;
+ pte_t *pte;
#ifdef CONFIG_X86_PAE
- printk("*pdpt = %016Lx ", page);
- if ((page >> PAGE_SHIFT) < max_low_pfn
- && page & _PAGE_PRESENT) {
- page &= PAGE_MASK;
- page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
- & (PTRS_PER_PMD - 1)];
- printk(KERN_CONT "*pde = %016Lx ", page);
- page &= ~_PAGE_NX;
- }
-#else
- printk("*pde = %08lx ", page);
+ printk("*pdpt = %016Lx ", pgd_val(*pgd));
+ if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
+ goto out;
#endif
+ pmd = pmd_offset(pud_offset(pgd, address), address);
+ printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
/*
* We must not directly access the pte in the highpte
@@ -312,16 +311,12 @@ static void dump_pagetable(unsigned long address)
* And let's rather not kmap-atomic the pte, just in case
* it's allocated already:
*/
- if ((page >> PAGE_SHIFT) < max_low_pfn
- && (page & _PAGE_PRESENT)
- && !(page & _PAGE_PSE)) {
-
- page &= PAGE_MASK;
- page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
- & (PTRS_PER_PTE - 1)];
- printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
- }
+ if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd))
+ goto out;
+ pte = pte_offset_kernel(pmd, address);
+ printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte));
+out:
printk("\n");
}
@@ -426,10 +421,11 @@ static noinline int vmalloc_fault(unsigned long address)
}
static const char errata93_warning[] =
-KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
-KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
-KERN_ERR "******* Please consider a BIOS update.\n"
-KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
+KERN_ERR
+"******* Your BIOS seems to not contain a fix for K8 errata #93\n"
+"******* Working around it, but it may cause SEGVs or burn power.\n"
+"******* Please consider a BIOS update.\n"
+"******* Disabling USB legacy in the BIOS may also help.\n";
/*
* No vm86 mode in 64-bit mode:
@@ -449,16 +445,12 @@ static int bad_address(void *p)
static void dump_pagetable(unsigned long address)
{
- pgd_t *pgd;
+ pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+ pgd_t *pgd = base + pgd_index(address);
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- pgd = (pgd_t *)read_cr3();
-
- pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
-
- pgd += pgd_index(address);
if (bad_address(pgd))
goto bad;
@@ -696,7 +688,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
if (!printk_ratelimit())
return;
- printk(KERN_CONT "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
+ printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
tsk->comm, task_pid_nr(tsk), address,
(void *)regs->ip, (void *)regs->sp, error_code);
@@ -1025,7 +1017,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (unlikely(error_code & PF_RSVD))
pgtable_bad(regs, error_code, address);
- perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
/*
* If we're in an interrupt, have no user context or are running
@@ -1122,11 +1114,11 @@ good_area:
if (fault & VM_FAULT_MAJOR) {
tsk->maj_flt++;
- perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
regs, address);
} else {
tsk->min_flt++;
- perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
regs, address);
}
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 58f621e..63a6ba6 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -24,7 +24,7 @@ void kunmap(struct page *page)
* no global lock is needed and because the kmap code must perform a global TLB
* invalidation when the kmap pool wraps.
*
- * However when holding an atomic kmap is is not legal to sleep, so atomic
+ * However when holding an atomic kmap it is not legal to sleep, so atomic
* kmaps are appropriate for short, tight code paths only.
*/
void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
@@ -103,6 +103,8 @@ EXPORT_SYMBOL(kmap);
EXPORT_SYMBOL(kunmap);
EXPORT_SYMBOL(kmap_atomic);
EXPORT_SYMBOL(kunmap_atomic);
+EXPORT_SYMBOL(kmap_atomic_prot);
+EXPORT_SYMBOL(kmap_atomic_to_page);
void __init set_highmem_pages_init(void)
{
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index f53b57e..0607119 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -12,6 +12,7 @@
#include <asm/system.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
+#include <asm/proto.h>
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -177,20 +178,6 @@ static int __meminit save_mr(struct map_range *mr, int nr_range,
return nr_range;
}
-#ifdef CONFIG_X86_64
-static void __init init_gbpages(void)
-{
- if (direct_gbpages && cpu_has_gbpages)
- printk(KERN_INFO "Using GB pages for direct mapping\n");
- else
- direct_gbpages = 0;
-}
-#else
-static inline void init_gbpages(void)
-{
-}
-#endif
-
/*
* Setup the direct mapping of the physical memory at PAGE_OFFSET.
* This runs before bootmem is initialized and gets pages directly from
@@ -210,9 +197,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
- if (!after_bootmem)
- init_gbpages();
-
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
/*
* For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 3cd7711..b49b4f6 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -84,7 +84,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
#ifdef CONFIG_X86_PAE
if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
if (after_bootmem)
- pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+ pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE);
else
pmd_table = (pmd_t *)alloc_low_page();
paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
@@ -116,7 +116,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
#endif
if (!page_table)
page_table =
- (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+ (pte_t *)alloc_bootmem_pages(PAGE_SIZE);
} else
page_table = (pte_t *)alloc_low_page();
@@ -892,7 +892,7 @@ void __init mem_init(void)
printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
- (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+ nr_free_pages() << (PAGE_SHIFT-10),
num_physpages << (PAGE_SHIFT-10),
codesize >> 10,
reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index c4378f4..810bd31 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -598,6 +598,15 @@ void __init paging_init(void)
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
+
+ /*
+ * clear the default setting with node 0
+ * note: don't use nodes_clear here, that is really clearing when
+ * numa support is not compiled in, and later node_set_state
+ * will not set it back.
+ */
+ node_clear_state(0, N_NORMAL_MEMORY);
+
free_area_init_nodes(max_zone_pfns);
}
@@ -678,7 +687,7 @@ void __init mem_init(void)
printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
"%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
- (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+ nr_free_pages() << (PAGE_SHIFT-10),
max_pfn << (PAGE_SHIFT-10),
codesize >> 10,
absent_pages << (PAGE_SHIFT-10),
@@ -787,7 +796,7 @@ int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
return ret;
#else
- reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
+ reserve_bootmem(phys, len, flags);
#endif
if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index fe6f84c..84e236c 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -21,7 +21,7 @@
#include <linux/module.h>
#include <linux/highmem.h>
-int is_io_mapping_possible(resource_size_t base, unsigned long size)
+static int is_io_mapping_possible(resource_size_t base, unsigned long size)
{
#if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT)
/* There is no way to map greater than 1 << 32 address without PAE */
@@ -30,7 +30,30 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size)
#endif
return 1;
}
-EXPORT_SYMBOL_GPL(is_io_mapping_possible);
+
+int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
+{
+ unsigned long flag = _PAGE_CACHE_WC;
+ int ret;
+
+ if (!is_io_mapping_possible(base, size))
+ return -EINVAL;
+
+ ret = io_reserve_memtype(base, base + size, &flag);
+ if (ret)
+ return ret;
+
+ *prot = __pgprot(__PAGE_KERNEL | flag);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(iomap_create_wc);
+
+void
+iomap_free(resource_size_t base, unsigned long size)
+{
+ io_free_memtype(base, base + size);
+}
+EXPORT_SYMBOL_GPL(iomap_free);
void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
{
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 8a45093..334e63c 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -22,77 +22,7 @@
#include <asm/pgalloc.h>
#include <asm/pat.h>
-static inline int phys_addr_valid(resource_size_t addr)
-{
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
- return !(addr >> boot_cpu_data.x86_phys_bits);
-#else
- return 1;
-#endif
-}
-
-#ifdef CONFIG_X86_64
-
-unsigned long __phys_addr(unsigned long x)
-{
- if (x >= __START_KERNEL_map) {
- x -= __START_KERNEL_map;
- VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE);
- x += phys_base;
- } else {
- VIRTUAL_BUG_ON(x < PAGE_OFFSET);
- x -= PAGE_OFFSET;
- VIRTUAL_BUG_ON(!phys_addr_valid(x));
- }
- return x;
-}
-EXPORT_SYMBOL(__phys_addr);
-
-bool __virt_addr_valid(unsigned long x)
-{
- if (x >= __START_KERNEL_map) {
- x -= __START_KERNEL_map;
- if (x >= KERNEL_IMAGE_SIZE)
- return false;
- x += phys_base;
- } else {
- if (x < PAGE_OFFSET)
- return false;
- x -= PAGE_OFFSET;
- if (!phys_addr_valid(x))
- return false;
- }
-
- return pfn_valid(x >> PAGE_SHIFT);
-}
-EXPORT_SYMBOL(__virt_addr_valid);
-
-#else
-
-#ifdef CONFIG_DEBUG_VIRTUAL
-unsigned long __phys_addr(unsigned long x)
-{
- /* VMALLOC_* aren't constants */
- VIRTUAL_BUG_ON(x < PAGE_OFFSET);
- VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
- return x - PAGE_OFFSET;
-}
-EXPORT_SYMBOL(__phys_addr);
-#endif
-
-bool __virt_addr_valid(unsigned long x)
-{
- if (x < PAGE_OFFSET)
- return false;
- if (__vmalloc_start_set && is_vmalloc_addr((void *) x))
- return false;
- if (x >= FIXADDR_START)
- return false;
- return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
-}
-EXPORT_SYMBOL(__virt_addr_valid);
-
-#endif
+#include "physaddr.h"
int page_is_ram(unsigned long pagenr)
{
@@ -228,24 +158,14 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
prot_val, &new_prot_val);
if (retval) {
- pr_debug("Warning: reserve_memtype returned %d\n", retval);
+ printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
return NULL;
}
if (prot_val != new_prot_val) {
- /*
- * Do not fallback to certain memory types with certain
- * requested type:
- * - request is uc-, return cannot be write-back
- * - request is uc-, return cannot be write-combine
- * - request is write-combine, return cannot be write-back
- */
- if ((prot_val == _PAGE_CACHE_UC_MINUS &&
- (new_prot_val == _PAGE_CACHE_WB ||
- new_prot_val == _PAGE_CACHE_WC)) ||
- (prot_val == _PAGE_CACHE_WC &&
- new_prot_val == _PAGE_CACHE_WB)) {
- pr_debug(
+ if (!is_new_memtype_allowed(phys_addr, size,
+ prot_val, new_prot_val)) {
+ printk(KERN_ERR
"ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
(unsigned long long)phys_addr,
(unsigned long long)(phys_addr + size),
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index 5b99004..8cc1833 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -328,6 +328,20 @@ static void kmemcheck_read_strict(struct pt_regs *regs,
kmemcheck_shadow_set(shadow, size);
}
+bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
+{
+ enum kmemcheck_shadow status;
+ void *shadow;
+
+ shadow = kmemcheck_shadow_lookup(addr);
+ if (!shadow)
+ return true;
+
+ status = kmemcheck_shadow_test(shadow, size);
+
+ return status == KMEMCHECK_SHADOW_INITIALIZED;
+}
+
/* Access may cross page boundary */
static void kmemcheck_read(struct pt_regs *regs,
unsigned long addr, unsigned int size)
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 1658296..c8191de 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -29,13 +29,26 @@
#include <linux/random.h>
#include <linux/limits.h>
#include <linux/sched.h>
+#include <asm/elf.h>
+
+static unsigned int stack_maxrandom_size(void)
+{
+ unsigned int max = 0;
+ if ((current->flags & PF_RANDOMIZE) &&
+ !(current->personality & ADDR_NO_RANDOMIZE)) {
+ max = ((-1U) & STACK_RND_MASK) << PAGE_SHIFT;
+ }
+
+ return max;
+}
+
/*
* Top of mmap area (just below the process stack).
*
- * Leave an at least ~128 MB hole.
+ * Leave an at least ~128 MB hole with possible stack randomization.
*/
-#define MIN_GAP (128*1024*1024)
+#define MIN_GAP (128*1024*1024UL + stack_maxrandom_size())
#define MAX_GAP (TASK_SIZE/6*5)
/*
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 3cfe9ce..24952fd 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -11,6 +11,8 @@
#include <linux/interrupt.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
+#include <linux/pfn.h>
+#include <linux/percpu.h>
#include <asm/e820.h>
#include <asm/processor.h>
@@ -590,9 +592,12 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
unsigned int level;
pte_t *kpte, old_pte;
- if (cpa->flags & CPA_PAGES_ARRAY)
- address = (unsigned long)page_address(cpa->pages[cpa->curpage]);
- else if (cpa->flags & CPA_ARRAY)
+ if (cpa->flags & CPA_PAGES_ARRAY) {
+ struct page *page = cpa->pages[cpa->curpage];
+ if (unlikely(PageHighMem(page)))
+ return 0;
+ address = (unsigned long)page_address(page);
+ } else if (cpa->flags & CPA_ARRAY)
address = cpa->vaddr[cpa->curpage];
else
address = *cpa->vaddr;
@@ -681,8 +686,9 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
static int cpa_process_alias(struct cpa_data *cpa)
{
struct cpa_data alias_cpa;
- int ret = 0;
- unsigned long temp_cpa_vaddr, vaddr;
+ unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
+ unsigned long vaddr;
+ int ret;
if (cpa->pfn >= max_pfn_mapped)
return 0;
@@ -695,9 +701,12 @@ static int cpa_process_alias(struct cpa_data *cpa)
* No need to redo, when the primary call touched the direct
* mapping already:
*/
- if (cpa->flags & CPA_PAGES_ARRAY)
- vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]);
- else if (cpa->flags & CPA_ARRAY)
+ if (cpa->flags & CPA_PAGES_ARRAY) {
+ struct page *page = cpa->pages[cpa->curpage];
+ if (unlikely(PageHighMem(page)))
+ return 0;
+ vaddr = (unsigned long)page_address(page);
+ } else if (cpa->flags & CPA_ARRAY)
vaddr = cpa->vaddr[cpa->curpage];
else
vaddr = *cpa->vaddr;
@@ -706,42 +715,37 @@ static int cpa_process_alias(struct cpa_data *cpa)
PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
alias_cpa = *cpa;
- temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
- alias_cpa.vaddr = &temp_cpa_vaddr;
+ alias_cpa.vaddr = &laddr;
alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
-
ret = __change_page_attr_set_clr(&alias_cpa, 0);
+ if (ret)
+ return ret;
}
#ifdef CONFIG_X86_64
- if (ret)
- return ret;
/*
- * No need to redo, when the primary call touched the high
- * mapping already:
- */
- if (within(vaddr, (unsigned long) _text, _brk_end))
- return 0;
-
- /*
- * If the physical address is inside the kernel map, we need
+ * If the primary call didn't touch the high mapping already
+ * and the physical address is inside the kernel map, we need
* to touch the high mapped kernel as well:
*/
- if (!within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn()))
- return 0;
-
- alias_cpa = *cpa;
- temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
- alias_cpa.vaddr = &temp_cpa_vaddr;
- alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
+ if (!within(vaddr, (unsigned long)_text, _brk_end) &&
+ within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) {
+ unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
+ __START_KERNEL_map - phys_base;
+ alias_cpa = *cpa;
+ alias_cpa.vaddr = &temp_cpa_vaddr;
+ alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
- /*
- * The high mapping range is imprecise, so ignore the return value.
- */
- __change_page_attr_set_clr(&alias_cpa, 0);
+ /*
+ * The high mapping range is imprecise, so ignore the
+ * return value.
+ */
+ __change_page_attr_set_clr(&alias_cpa, 0);
+ }
#endif
- return ret;
+
+ return 0;
}
static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
@@ -801,6 +805,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
{
struct cpa_data cpa;
int ret, cache, checkalias;
+ unsigned long baddr = 0;
/*
* Check, if we are requested to change a not supported
@@ -832,6 +837,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
*/
WARN_ON_ONCE(1);
}
+ /*
+ * Save address for cache flush. *addr is modified in the call
+ * to __change_page_attr_set_clr() below.
+ */
+ baddr = *addr;
}
/* Must avoid aliasing mappings in the highmem code */
@@ -879,7 +889,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
cpa_flush_array(addr, numpages, cache,
cpa.flags, pages);
} else
- cpa_flush_range(*addr, numpages, cache);
+ cpa_flush_range(baddr, numpages, cache);
} else
cpa_flush_all(cache);
@@ -982,12 +992,15 @@ EXPORT_SYMBOL(set_memory_array_uc);
int _set_memory_wc(unsigned long addr, int numpages)
{
int ret;
+ unsigned long addr_copy = addr;
+
ret = change_page_attr_set(&addr, numpages,
__pgprot(_PAGE_CACHE_UC_MINUS), 0);
-
if (!ret) {
- ret = change_page_attr_set(&addr, numpages,
- __pgprot(_PAGE_CACHE_WC), 0);
+ ret = change_page_attr_set_clr(&addr_copy, numpages,
+ __pgprot(_PAGE_CACHE_WC),
+ __pgprot(_PAGE_CACHE_MASK),
+ 0, 0, NULL);
}
return ret;
}
@@ -1104,7 +1117,9 @@ int set_pages_array_uc(struct page **pages, int addrinarray)
int free_idx;
for (i = 0; i < addrinarray; i++) {
- start = (unsigned long)page_address(pages[i]);
+ if (PageHighMem(pages[i]))
+ continue;
+ start = page_to_pfn(pages[i]) << PAGE_SHIFT;
end = start + PAGE_SIZE;
if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
goto err_out;
@@ -1117,7 +1132,9 @@ int set_pages_array_uc(struct page **pages, int addrinarray)
err_out:
free_idx = i;
for (i = 0; i < free_idx; i++) {
- start = (unsigned long)page_address(pages[i]);
+ if (PageHighMem(pages[i]))
+ continue;
+ start = page_to_pfn(pages[i]) << PAGE_SHIFT;
end = start + PAGE_SIZE;
free_memtype(start, end);
}
@@ -1146,7 +1163,9 @@ int set_pages_array_wb(struct page **pages, int addrinarray)
return retval;
for (i = 0; i < addrinarray; i++) {
- start = (unsigned long)page_address(pages[i]);
+ if (PageHighMem(pages[i]))
+ continue;
+ start = page_to_pfn(pages[i]) << PAGE_SHIFT;
end = start + PAGE_SIZE;
free_memtype(start, end);
}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index e6718bb..7257cf3 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -15,6 +15,7 @@
#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/fs.h>
+#include <linux/rbtree.h>
#include <asm/cacheflush.h>
#include <asm/processor.h>
@@ -148,11 +149,10 @@ static char *cattr_name(unsigned long flags)
* areas). All the aliases have the same cache attributes of course.
* Zero attributes are represented as holes.
*
- * Currently the data structure is a list because the number of mappings
- * are expected to be relatively small. If this should be a problem
- * it could be changed to a rbtree or similar.
+ * The data structure is a list that is also organized as an rbtree
+ * sorted on the start address of memtype range.
*
- * memtype_lock protects the whole list.
+ * memtype_lock protects both the linear list and rbtree.
*/
struct memtype {
@@ -160,11 +160,53 @@ struct memtype {
u64 end;
unsigned long type;
struct list_head nd;
+ struct rb_node rb;
};
+static struct rb_root memtype_rbroot = RB_ROOT;
static LIST_HEAD(memtype_list);
static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
+static struct memtype *memtype_rb_search(struct rb_root *root, u64 start)
+{
+ struct rb_node *node = root->rb_node;
+ struct memtype *last_lower = NULL;
+
+ while (node) {
+ struct memtype *data = container_of(node, struct memtype, rb);
+
+ if (data->start < start) {
+ last_lower = data;
+ node = node->rb_right;
+ } else if (data->start > start) {
+ node = node->rb_left;
+ } else
+ return data;
+ }
+
+ /* Will return NULL if there is no entry with its start <= start */
+ return last_lower;
+}
+
+static void memtype_rb_insert(struct rb_root *root, struct memtype *data)
+{
+ struct rb_node **new = &(root->rb_node);
+ struct rb_node *parent = NULL;
+
+ while (*new) {
+ struct memtype *this = container_of(*new, struct memtype, rb);
+
+ parent = *new;
+ if (data->start <= this->start)
+ new = &((*new)->rb_left);
+ else if (data->start > this->start)
+ new = &((*new)->rb_right);
+ }
+
+ rb_link_node(&data->rb, parent, new);
+ rb_insert_color(&data->rb, root);
+}
+
/*
* Does intersection of PAT memory type and MTRR memory type and returns
* the resulting memory type as PAT understands it.
@@ -218,9 +260,6 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
return -EBUSY;
}
-static struct memtype *cached_entry;
-static u64 cached_start;
-
static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
{
int ram_page = 0, not_rampage = 0;
@@ -249,63 +288,61 @@ static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
}
/*
- * For RAM pages, mark the pages as non WB memory type using
- * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
- * set_memory_wc() on a RAM page at a time before marking it as WB again.
- * This is ok, because only one driver will be owning the page and
- * doing set_memory_*() calls.
+ * For RAM pages, we use page flags to mark the pages with appropriate type.
+ * Here we do two pass:
+ * - Find the memtype of all the pages in the range, look for any conflicts
+ * - In case of no conflicts, set the new memtype for pages in the range
*
- * For now, we use PageNonWB to track that the RAM page is being mapped
- * as non WB. In future, we will have to use one more flag
- * (or some other mechanism in page_struct) to distinguish between
- * UC and WC mapping.
+ * Caller must hold memtype_lock for atomicity.
*/
static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
unsigned long *new_type)
{
struct page *page;
- u64 pfn, end_pfn;
+ u64 pfn;
+
+ if (req_type == _PAGE_CACHE_UC) {
+ /* We do not support strong UC */
+ WARN_ON_ONCE(1);
+ req_type = _PAGE_CACHE_UC_MINUS;
+ }
for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
- page = pfn_to_page(pfn);
- if (page_mapped(page) || PageNonWB(page))
- goto out;
+ unsigned long type;
- SetPageNonWB(page);
+ page = pfn_to_page(pfn);
+ type = get_page_memtype(page);
+ if (type != -1) {
+ printk(KERN_INFO "reserve_ram_pages_type failed "
+ "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n",
+ start, end, type, req_type);
+ if (new_type)
+ *new_type = type;
+
+ return -EBUSY;
+ }
}
- return 0;
-out:
- end_pfn = pfn;
- for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) {
+ if (new_type)
+ *new_type = req_type;
+
+ for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
page = pfn_to_page(pfn);
- ClearPageNonWB(page);
+ set_page_memtype(page, req_type);
}
-
- return -EINVAL;
+ return 0;
}
static int free_ram_pages_type(u64 start, u64 end)
{
struct page *page;
- u64 pfn, end_pfn;
+ u64 pfn;
for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
page = pfn_to_page(pfn);
- if (page_mapped(page) || !PageNonWB(page))
- goto out;
-
- ClearPageNonWB(page);
+ set_page_memtype(page, -1);
}
return 0;
-
-out:
- end_pfn = pfn;
- for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) {
- page = pfn_to_page(pfn);
- SetPageNonWB(page);
- }
- return -EINVAL;
}
/*
@@ -339,6 +376,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
if (new_type) {
if (req_type == -1)
*new_type = _PAGE_CACHE_WB;
+ else if (req_type == _PAGE_CACHE_WC)
+ *new_type = _PAGE_CACHE_UC_MINUS;
else
*new_type = req_type & _PAGE_CACHE_MASK;
}
@@ -364,11 +403,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
*new_type = actual_type;
is_range_ram = pat_pagerange_is_ram(start, end);
- if (is_range_ram == 1)
- return reserve_ram_pages_type(start, end, req_type,
- new_type);
- else if (is_range_ram < 0)
+ if (is_range_ram == 1) {
+
+ spin_lock(&memtype_lock);
+ err = reserve_ram_pages_type(start, end, req_type, new_type);
+ spin_unlock(&memtype_lock);
+
+ return err;
+ } else if (is_range_ram < 0) {
return -EINVAL;
+ }
new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
if (!new)
@@ -380,17 +424,11 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
spin_lock(&memtype_lock);
- if (cached_entry && start >= cached_start)
- entry = cached_entry;
- else
- entry = list_entry(&memtype_list, struct memtype, nd);
-
/* Search for existing mapping that overlaps the current range */
where = NULL;
- list_for_each_entry_continue(entry, &memtype_list, nd) {
+ list_for_each_entry(entry, &memtype_list, nd) {
if (end <= entry->start) {
where = entry->nd.prev;
- cached_entry = list_entry(where, struct memtype, nd);
break;
} else if (start <= entry->start) { /* end > entry->start */
err = chk_conflict(new, entry, new_type);
@@ -398,8 +436,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
dprintk("Overlap at 0x%Lx-0x%Lx\n",
entry->start, entry->end);
where = entry->nd.prev;
- cached_entry = list_entry(where,
- struct memtype, nd);
}
break;
} else if (start < entry->end) { /* start > entry->start */
@@ -407,8 +443,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
if (!err) {
dprintk("Overlap at 0x%Lx-0x%Lx\n",
entry->start, entry->end);
- cached_entry = list_entry(entry->nd.prev,
- struct memtype, nd);
/*
* Move to right position in the linked
@@ -436,13 +470,13 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
return err;
}
- cached_start = start;
-
if (where)
list_add(&new->nd, where);
else
list_add_tail(&new->nd, &memtype_list);
+ memtype_rb_insert(&memtype_rbroot, new);
+
spin_unlock(&memtype_lock);
dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
@@ -454,7 +488,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
int free_memtype(u64 start, u64 end)
{
- struct memtype *entry;
+ struct memtype *entry, *saved_entry;
int err = -EINVAL;
int is_range_ram;
@@ -466,23 +500,58 @@ int free_memtype(u64 start, u64 end)
return 0;
is_range_ram = pat_pagerange_is_ram(start, end);
- if (is_range_ram == 1)
- return free_ram_pages_type(start, end);
- else if (is_range_ram < 0)
+ if (is_range_ram == 1) {
+
+ spin_lock(&memtype_lock);
+ err = free_ram_pages_type(start, end);
+ spin_unlock(&memtype_lock);
+
+ return err;
+ } else if (is_range_ram < 0) {
return -EINVAL;
+ }
spin_lock(&memtype_lock);
- list_for_each_entry(entry, &memtype_list, nd) {
+
+ entry = memtype_rb_search(&memtype_rbroot, start);
+ if (unlikely(entry == NULL))
+ goto unlock_ret;
+
+ /*
+ * Saved entry points to an entry with start same or less than what
+ * we searched for. Now go through the list in both directions to look
+ * for the entry that matches with both start and end, with list stored
+ * in sorted start address
+ */
+ saved_entry = entry;
+ list_for_each_entry_from(entry, &memtype_list, nd) {
if (entry->start == start && entry->end == end) {
- if (cached_entry == entry || cached_start == start)
- cached_entry = NULL;
+ rb_erase(&entry->rb, &memtype_rbroot);
+ list_del(&entry->nd);
+ kfree(entry);
+ err = 0;
+ break;
+ } else if (entry->start > start) {
+ break;
+ }
+ }
+
+ if (!err)
+ goto unlock_ret;
+ entry = saved_entry;
+ list_for_each_entry_reverse(entry, &memtype_list, nd) {
+ if (entry->start == start && entry->end == end) {
+ rb_erase(&entry->rb, &memtype_rbroot);
list_del(&entry->nd);
kfree(entry);
err = 0;
break;
+ } else if (entry->start < start) {
+ break;
}
}
+unlock_ret:
spin_unlock(&memtype_lock);
if (err) {
@@ -496,6 +565,101 @@ int free_memtype(u64 start, u64 end)
}
+/**
+ * lookup_memtype - Looksup the memory type for a physical address
+ * @paddr: physical address of which memory type needs to be looked up
+ *
+ * Only to be called when PAT is enabled
+ *
+ * Returns _PAGE_CACHE_WB, _PAGE_CACHE_WC, _PAGE_CACHE_UC_MINUS or
+ * _PAGE_CACHE_UC
+ */
+static unsigned long lookup_memtype(u64 paddr)
+{
+ int rettype = _PAGE_CACHE_WB;
+ struct memtype *entry;
+
+ if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1))
+ return rettype;
+
+ if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
+ struct page *page;
+ spin_lock(&memtype_lock);
+ page = pfn_to_page(paddr >> PAGE_SHIFT);
+ rettype = get_page_memtype(page);
+ spin_unlock(&memtype_lock);
+ /*
+ * -1 from get_page_memtype() implies RAM page is in its
+ * default state and not reserved, and hence of type WB
+ */
+ if (rettype == -1)
+ rettype = _PAGE_CACHE_WB;
+
+ return rettype;
+ }
+
+ spin_lock(&memtype_lock);
+
+ entry = memtype_rb_search(&memtype_rbroot, paddr);
+ if (entry != NULL)
+ rettype = entry->type;
+ else
+ rettype = _PAGE_CACHE_UC_MINUS;
+
+ spin_unlock(&memtype_lock);
+ return rettype;
+}
+
+/**
+ * io_reserve_memtype - Request a memory type mapping for a region of memory
+ * @start: start (physical address) of the region
+ * @end: end (physical address) of the region
+ * @type: A pointer to memtype, with requested type. On success, requested
+ * or any other compatible type that was available for the region is returned
+ *
+ * On success, returns 0
+ * On failure, returns non-zero
+ */
+int io_reserve_memtype(resource_size_t start, resource_size_t end,
+ unsigned long *type)
+{
+ resource_size_t size = end - start;
+ unsigned long req_type = *type;
+ unsigned long new_type;
+ int ret;
+
+ WARN_ON_ONCE(iomem_map_sanity_check(start, size));
+
+ ret = reserve_memtype(start, end, req_type, &new_type);
+ if (ret)
+ goto out_err;
+
+ if (!is_new_memtype_allowed(start, size, req_type, new_type))
+ goto out_free;
+
+ if (kernel_map_sync_memtype(start, size, new_type) < 0)
+ goto out_free;
+
+ *type = new_type;
+ return 0;
+
+out_free:
+ free_memtype(start, end);
+ ret = -EBUSY;
+out_err:
+ return ret;
+}
+
+/**
+ * io_free_memtype - Release a memory type mapping for a region of memory
+ * @start: start (physical address) of the region
+ * @end: end (physical address) of the region
+ */
+void io_free_memtype(resource_size_t start, resource_size_t end)
+{
+ free_memtype(start, end);
+}
+
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot)
{
@@ -577,7 +741,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
{
unsigned long id_sz;
- if (!pat_enabled || base >= __pa(high_memory))
+ if (base >= __pa(high_memory))
return 0;
id_sz = (__pa(high_memory) < base + size) ?
@@ -612,18 +776,37 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
is_ram = pat_pagerange_is_ram(paddr, paddr + size);
/*
- * reserve_pfn_range() doesn't support RAM pages. Maintain the current
- * behavior with RAM pages by returning success.
+ * reserve_pfn_range() for RAM pages. We do not refcount to keep
+ * track of number of mappings of RAM pages. We can assert that
+ * the type requested matches the type of first page in the range.
*/
- if (is_ram != 0)
+ if (is_ram) {
+ if (!pat_enabled)
+ return 0;
+
+ flags = lookup_memtype(paddr);
+ if (want_flags != flags) {
+ printk(KERN_WARNING
+ "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n",
+ current->comm, current->pid,
+ cattr_name(want_flags),
+ (unsigned long long)paddr,
+ (unsigned long long)(paddr + size),
+ cattr_name(flags));
+ *vma_prot = __pgprot((pgprot_val(*vma_prot) &
+ (~_PAGE_CACHE_MASK)) |
+ flags);
+ }
return 0;
+ }
ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
if (ret)
return ret;
if (flags != want_flags) {
- if (strict_prot || !is_new_memtype_allowed(want_flags, flags)) {
+ if (strict_prot ||
+ !is_new_memtype_allowed(paddr, size, want_flags, flags)) {
free_memtype(paddr, paddr + size);
printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
" for %Lx-%Lx, got %s\n",
@@ -677,14 +860,6 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
unsigned long vma_size = vma->vm_end - vma->vm_start;
pgprot_t pgprot;
- if (!pat_enabled)
- return 0;
-
- /*
- * For now, only handle remap_pfn_range() vmas where
- * is_linear_pfn_mapping() == TRUE. Handling of
- * vm_insert_pfn() is TBD.
- */
if (is_linear_pfn_mapping(vma)) {
/*
* reserve the whole chunk covered by vma. We need the
@@ -712,23 +887,24 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn, unsigned long size)
{
+ unsigned long flags;
resource_size_t paddr;
unsigned long vma_size = vma->vm_end - vma->vm_start;
- if (!pat_enabled)
- return 0;
-
- /*
- * For now, only handle remap_pfn_range() vmas where
- * is_linear_pfn_mapping() == TRUE. Handling of
- * vm_insert_pfn() is TBD.
- */
if (is_linear_pfn_mapping(vma)) {
/* reserve the whole chunk starting from vm_pgoff */
paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
return reserve_pfn_range(paddr, vma_size, prot, 0);
}
+ if (!pat_enabled)
+ return 0;
+
+ /* for vm_insert_pfn and friends, we set prot based on lookup */
+ flags = lookup_memtype(pfn << PAGE_SHIFT);
+ *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
+ flags);
+
return 0;
}
@@ -743,14 +919,6 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
resource_size_t paddr;
unsigned long vma_size = vma->vm_end - vma->vm_start;
- if (!pat_enabled)
- return;
-
- /*
- * For now, only handle remap_pfn_range() vmas where
- * is_linear_pfn_mapping() == TRUE. Handling of
- * vm_insert_pfn() is TBD.
- */
if (is_linear_pfn_mapping(vma)) {
/* free the whole chunk starting from vm_pgoff */
paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
@@ -826,7 +994,7 @@ static int memtype_seq_show(struct seq_file *seq, void *v)
return 0;
}
-static struct seq_operations memtype_seq_ops = {
+static const struct seq_operations memtype_seq_ops = {
.start = memtype_seq_start,
.next = memtype_seq_next,
.stop = memtype_seq_stop,
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 8e43bdd..ed34f5e 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -25,7 +25,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
return pte;
}
-void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
+void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
{
pgtable_page_dtor(pte);
paravirt_release_pte(page_to_pfn(pte));
@@ -33,14 +33,14 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
}
#if PAGETABLE_LEVELS > 2
-void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
+void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
{
paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
tlb_remove_page(tlb, virt_to_page(pmd));
}
#if PAGETABLE_LEVELS > 3
-void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
+void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
{
paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
tlb_remove_page(tlb, virt_to_page(pud));
@@ -329,7 +329,6 @@ void __init reserve_top_address(unsigned long reserve)
printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
(int)-reserve);
__FIXADDR_TOP = -reserve - PAGE_SIZE;
- __VMALLOC_RESERVE += reserve;
#endif
}
diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c
new file mode 100644
index 0000000..d2e2735
--- /dev/null
+++ b/arch/x86/mm/physaddr.c
@@ -0,0 +1,70 @@
+#include <linux/mmdebug.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+
+#include <asm/page.h>
+
+#include "physaddr.h"
+
+#ifdef CONFIG_X86_64
+
+unsigned long __phys_addr(unsigned long x)
+{
+ if (x >= __START_KERNEL_map) {
+ x -= __START_KERNEL_map;
+ VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE);
+ x += phys_base;
+ } else {
+ VIRTUAL_BUG_ON(x < PAGE_OFFSET);
+ x -= PAGE_OFFSET;
+ VIRTUAL_BUG_ON(!phys_addr_valid(x));
+ }
+ return x;
+}
+EXPORT_SYMBOL(__phys_addr);
+
+bool __virt_addr_valid(unsigned long x)
+{
+ if (x >= __START_KERNEL_map) {
+ x -= __START_KERNEL_map;
+ if (x >= KERNEL_IMAGE_SIZE)
+ return false;
+ x += phys_base;
+ } else {
+ if (x < PAGE_OFFSET)
+ return false;
+ x -= PAGE_OFFSET;
+ if (!phys_addr_valid(x))
+ return false;
+ }
+
+ return pfn_valid(x >> PAGE_SHIFT);
+}
+EXPORT_SYMBOL(__virt_addr_valid);
+
+#else
+
+#ifdef CONFIG_DEBUG_VIRTUAL
+unsigned long __phys_addr(unsigned long x)
+{
+ /* VMALLOC_* aren't constants */
+ VIRTUAL_BUG_ON(x < PAGE_OFFSET);
+ VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
+ return x - PAGE_OFFSET;
+}
+EXPORT_SYMBOL(__phys_addr);
+#endif
+
+bool __virt_addr_valid(unsigned long x)
+{
+ if (x < PAGE_OFFSET)
+ return false;
+ if (__vmalloc_start_set && is_vmalloc_addr((void *) x))
+ return false;
+ if (x >= FIXADDR_START)
+ return false;
+ return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
+}
+EXPORT_SYMBOL(__virt_addr_valid);
+
+#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/mm/physaddr.h b/arch/x86/mm/physaddr.h
new file mode 100644
index 0000000..a3cd5a0
--- /dev/null
+++ b/arch/x86/mm/physaddr.h
@@ -0,0 +1,10 @@
+#include <asm/processor.h>
+
+static inline int phys_addr_valid(resource_size_t addr)
+{
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+ return !(addr >> boot_cpu_data.x86_phys_bits);
+#else
+ return 1;
+#endif
+}
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index 29a0e37..6f8aa33 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -215,7 +215,7 @@ int __init get_memcfg_from_srat(void)
goto out_fail;
if (num_memory_chunks == 0) {
- printk(KERN_WARNING
+ printk(KERN_DEBUG
"could not find any ACPI SRAT memory areas.\n");
goto out_fail;
}
@@ -277,7 +277,7 @@ int __init get_memcfg_from_srat(void)
}
return 1;
out_fail:
- printk(KERN_ERR "failed to get NUMA memory information from SRAT"
+ printk(KERN_DEBUG "failed to get NUMA memory information from SRAT"
" table\n");
return 0;
}
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 2dfcbf9..dbb5381 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -79,8 +79,10 @@ static __init void bad_srat(void)
acpi_numa = -1;
for (i = 0; i < MAX_LOCAL_APIC; i++)
apicid_to_node[i] = NUMA_NO_NODE;
- for (i = 0; i < MAX_NUMNODES; i++)
- nodes_add[i].start = nodes[i].end = 0;
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ nodes[i].start = nodes[i].end = 0;
+ nodes_add[i].start = nodes_add[i].end = 0;
+ }
remove_all_active_ranges();
}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 821e970..c814e14 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -183,18 +183,17 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
f->flush_mm = mm;
f->flush_va = va;
- cpumask_andnot(to_cpumask(f->flush_cpumask),
- cpumask, cpumask_of(smp_processor_id()));
-
- /*
- * We have to send the IPI only to
- * CPUs affected.
- */
- apic->send_IPI_mask(to_cpumask(f->flush_cpumask),
- INVALIDATE_TLB_VECTOR_START + sender);
+ if (cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id()))) {
+ /*
+ * We have to send the IPI only to
+ * CPUs affected.
+ */
+ apic->send_IPI_mask(to_cpumask(f->flush_cpumask),
+ INVALIDATE_TLB_VECTOR_START + sender);
- while (!cpumask_empty(to_cpumask(f->flush_cpumask)))
- cpu_relax();
+ while (!cpumask_empty(to_cpumask(f->flush_cpumask)))
+ cpu_relax();
+ }
f->flush_mm = NULL;
f->flush_va = 0;