summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/pgtable-3level.h30
-rw-r--r--arch/x86/kernel/pci-dma.c3
-rw-r--r--arch/x86/kernel/smpboot.c7
-rw-r--r--arch/x86/lib/usercopy.c2
-rw-r--r--arch/x86/mm/ioremap.c4
-rw-r--r--arch/x86/mm/pageattr.c2
-rw-r--r--arch/x86/platform/mrst/early_printk_mrst.c13
-rw-r--r--arch/x86/um/sys_call_table_32.c4
-rw-r--r--arch/x86/xen/enlighten.c8
-rw-r--r--arch/x86/xen/p2m.c36
-rw-r--r--arch/x86/xen/setup.c3
11 files changed, 81 insertions, 31 deletions
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 43876f1..cb00ccc 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -47,16 +47,26 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
* they can run pmd_offset_map_lock or pmd_trans_huge or other pmd
* operations.
*
- * Without THP if the mmap_sem is hold for reading, the
- * pmd can only transition from null to not null while pmd_read_atomic runs.
- * So there's no need of literally reading it atomically.
+ * Without THP if the mmap_sem is hold for reading, the pmd can only
+ * transition from null to not null while pmd_read_atomic runs. So
+ * we can always return atomic pmd values with this function.
*
* With THP if the mmap_sem is hold for reading, the pmd can become
- * THP or null or point to a pte (and in turn become "stable") at any
- * time under pmd_read_atomic, so it's mandatory to read it atomically
- * with cmpxchg8b.
+ * trans_huge or none or point to a pte (and in turn become "stable")
+ * at any time under pmd_read_atomic. We could read it really
+ * atomically here with a atomic64_read for the THP enabled case (and
+ * it would be a whole lot simpler), but to avoid using cmpxchg8b we
+ * only return an atomic pmdval if the low part of the pmdval is later
+ * found stable (i.e. pointing to a pte). And we're returning a none
+ * pmdval if the low part of the pmd is none. In some cases the high
+ * and low part of the pmdval returned may not be consistent if THP is
+ * enabled (the low part may point to previously mapped hugepage,
+ * while the high part may point to a more recently mapped hugepage),
+ * but pmd_none_or_trans_huge_or_clear_bad() only needs the low part
+ * of the pmd to be read atomically to decide if the pmd is unstable
+ * or not, with the only exception of when the low part of the pmd is
+ * zero in which case we return a none pmd.
*/
-#ifndef CONFIG_TRANSPARENT_HUGEPAGE
static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
{
pmdval_t ret;
@@ -74,12 +84,6 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
return (pmd_t) { ret };
}
-#else /* CONFIG_TRANSPARENT_HUGEPAGE */
-static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
-{
- return (pmd_t) { atomic64_read((atomic64_t *)pmdp) };
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
{
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 62c9457..c0f420f 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -100,7 +100,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
struct dma_attrs *attrs)
{
unsigned long dma_mask;
- struct page *page = NULL;
+ struct page *page;
unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
dma_addr_t addr;
@@ -108,6 +108,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
flag |= __GFP_ZERO;
again:
+ page = NULL;
if (!(flag & GFP_ATOMIC))
page = dma_alloc_from_contiguous(dev, count, get_order(size));
if (!page)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3fab55b..7bd8a08 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -349,9 +349,12 @@ static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
- if (c->phys_proc_id == o->phys_proc_id)
- return topology_sane(c, o, "mc");
+ if (c->phys_proc_id == o->phys_proc_id) {
+ if (cpu_has(c, X86_FEATURE_AMD_DCM))
+ return true;
+ return topology_sane(c, o, "mc");
+ }
return false;
}
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index 677b1ed..4f74d94 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -22,7 +22,7 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
void *map;
int ret;
- if (__range_not_ok(from, n, TASK_SIZE) == 0)
+ if (__range_not_ok(from, n, TASK_SIZE))
return len;
do {
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index be1ef57..78fe3f1 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -180,7 +180,7 @@ err_free_memtype:
/**
* ioremap_nocache - map bus memory into CPU space
- * @offset: bus address of the memory
+ * @phys_addr: bus address of the memory
* @size: size of the resource to map
*
* ioremap_nocache performs a platform specific sequence of operations to
@@ -217,7 +217,7 @@ EXPORT_SYMBOL(ioremap_nocache);
/**
* ioremap_wc - map memory into CPU space write combined
- * @offset: bus address of the memory
+ * @phys_addr: bus address of the memory
* @size: size of the resource to map
*
* This version of ioremap ensures that the memory is marked write combining.
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e1ebde3..a718e0d 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -122,7 +122,7 @@ within(unsigned long addr, unsigned long start, unsigned long end)
/**
* clflush_cache_range - flush a cache range with clflush
- * @addr: virtual start address
+ * @vaddr: virtual start address
* @size: number of bytes to flush
*
* clflush is an unordered instruction which needs fencing with mfence
diff --git a/arch/x86/platform/mrst/early_printk_mrst.c b/arch/x86/platform/mrst/early_printk_mrst.c
index 3c6e328..028454f 100644
--- a/arch/x86/platform/mrst/early_printk_mrst.c
+++ b/arch/x86/platform/mrst/early_printk_mrst.c
@@ -110,19 +110,16 @@ static struct kmsg_dumper dw_dumper;
static int dumper_registered;
static void dw_kmsg_dump(struct kmsg_dumper *dumper,
- enum kmsg_dump_reason reason,
- const char *s1, unsigned long l1,
- const char *s2, unsigned long l2)
+ enum kmsg_dump_reason reason)
{
- int i;
+ static char line[1024];
+ size_t len;
/* When run to this, we'd better re-init the HW */
mrst_early_console_init();
- for (i = 0; i < l1; i++)
- early_mrst_console.write(&early_mrst_console, s1 + i, 1);
- for (i = 0; i < l2; i++)
- early_mrst_console.write(&early_mrst_console, s2 + i, 1);
+ while (kmsg_dump_get_line(dumper, true, line, sizeof(line), &len))
+ early_mrst_console.write(&early_mrst_console, line, len);
}
/* Set the ratio rate to 115200, 8n1, IRQ disabled */
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 416bd40..68d1dc9 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -39,9 +39,9 @@
#undef __SYSCALL_I386
#define __SYSCALL_I386(nr, sym, compat) [ nr ] = sym,
-typedef void (*sys_call_ptr_t)(void);
+typedef asmlinkage void (*sys_call_ptr_t)(void);
-extern void sys_ni_syscall(void);
+extern asmlinkage void sys_ni_syscall(void);
const sys_call_ptr_t sys_call_table[] __cacheline_aligned = {
/*
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e74df95..ff962d4 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -209,6 +209,9 @@ static void __init xen_banner(void)
xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
}
+#define CPUID_THERM_POWER_LEAF 6
+#define APERFMPERF_PRESENT 0
+
static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
@@ -242,6 +245,11 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
*dx = cpuid_leaf5_edx_val;
return;
+ case CPUID_THERM_POWER_LEAF:
+ /* Disabling APERFMPERF for kernel usage */
+ maskecx = ~(1 << APERFMPERF_PRESENT);
+ break;
+
case 0xb:
/* Suppress extended topology stuff */
maskebx = 0;
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index ffd08c4..64effdc6 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -706,6 +706,7 @@ int m2p_add_override(unsigned long mfn, struct page *page,
unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
+ int ret = 0;
pfn = page_to_pfn(page);
if (!PageHighMem(page)) {
@@ -741,6 +742,24 @@ int m2p_add_override(unsigned long mfn, struct page *page,
list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]);
spin_unlock_irqrestore(&m2p_override_lock, flags);
+ /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in
+ * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other
+ * pfn so that the following mfn_to_pfn(mfn) calls will return the
+ * pfn from the m2p_override (the backend pfn) instead.
+ * We need to do this because the pages shared by the frontend
+ * (xen-blkfront) can be already locked (lock_page, called by
+ * do_read_cache_page); when the userspace backend tries to use them
+ * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so
+ * do_blockdev_direct_IO is going to try to lock the same pages
+ * again resulting in a deadlock.
+ * As a side effect get_user_pages_fast might not be safe on the
+ * frontend pages while they are being shared with the backend,
+ * because mfn_to_pfn (that ends up being called by GUPF) will
+ * return the backend pfn rather than the frontend pfn. */
+ ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
+ if (ret == 0 && get_phys_to_machine(pfn) == mfn)
+ set_phys_to_machine(pfn, FOREIGN_FRAME(mfn));
+
return 0;
}
EXPORT_SYMBOL_GPL(m2p_add_override);
@@ -752,6 +771,7 @@ int m2p_remove_override(struct page *page, bool clear_pte)
unsigned long uninitialized_var(address);
unsigned level;
pte_t *ptep = NULL;
+ int ret = 0;
pfn = page_to_pfn(page);
mfn = get_phys_to_machine(pfn);
@@ -821,6 +841,22 @@ int m2p_remove_override(struct page *page, bool clear_pte)
} else
set_phys_to_machine(pfn, page->index);
+ /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present
+ * somewhere in this domain, even before being added to the
+ * m2p_override (see comment above in m2p_add_override).
+ * If there are no other entries in the m2p_override corresponding
+ * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for
+ * the original pfn (the one shared by the frontend): the backend
+ * cannot do any IO on this page anymore because it has been
+ * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of
+ * the original pfn causes mfn_to_pfn(mfn) to return the frontend
+ * pfn again. */
+ mfn &= ~FOREIGN_FRAME_BIT;
+ ret = __get_user(pfn, &machine_to_phys_mapping[mfn]);
+ if (ret == 0 && get_phys_to_machine(pfn) == FOREIGN_FRAME(mfn) &&
+ m2p_find_override(mfn) == NULL)
+ set_phys_to_machine(pfn, mfn);
+
return 0;
}
EXPORT_SYMBOL_GPL(m2p_remove_override);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 3ebba07..a4790bf 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -371,7 +371,8 @@ char * __init xen_memory_setup(void)
populated = xen_populate_chunk(map, memmap.nr_entries,
max_pfn, &last_pfn, xen_released_pages);
- extra_pages += (xen_released_pages - populated);
+ xen_released_pages -= populated;
+ extra_pages += xen_released_pages;
if (last_pfn > max_pfn) {
max_pfn = min(MAX_DOMAIN_PAGES, last_pfn);