From 5e40704ed2c69425bcb076fb1890417ef137e6c8 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 17 Apr 2014 13:57:37 +0100 Subject: arm: xen: implement multicall hypercall support. As part of this make the usual change to xen_ulong_t in place of unsigned long. This change has no impact on x86. The Linux definition of struct multicall_entry.result differs from the Xen definition, I think for good reasons, and used a long rather than an unsigned long. Therefore introduce a xen_long_t, which is a long on x86 architectures and a signed 64-bit integer on ARM. Use uint32_t nr_calls on x86 for consistency with the ARM definition. Build tested on amd64 and i386 builds. Runtime tested on ARM. Signed-off-by: Ian Campbell Cc: Stefano Stabellini Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Signed-off-by: David Vrabel diff --git a/arch/arm/include/asm/xen/hypercall.h b/arch/arm/include/asm/xen/hypercall.h index 7704e28..7658150 100644 --- a/arch/arm/include/asm/xen/hypercall.h +++ b/arch/arm/include/asm/xen/hypercall.h @@ -48,6 +48,7 @@ int HYPERVISOR_memory_op(unsigned int cmd, void *arg); int HYPERVISOR_physdev_op(int cmd, void *arg); int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); int HYPERVISOR_tmem_op(void *arg); +int HYPERVISOR_multicall(struct multicall_entry *calls, uint32_t nr); static inline void MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, @@ -63,9 +64,4 @@ MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req, BUG(); } -static inline int -HYPERVISOR_multicall(void *call_list, int nr_calls) -{ - BUG(); -} #endif /* _ASM_ARM_XEN_HYPERCALL_H */ diff --git a/arch/arm/include/asm/xen/interface.h b/arch/arm/include/asm/xen/interface.h index 1151188..5006600 100644 --- a/arch/arm/include/asm/xen/interface.h +++ b/arch/arm/include/asm/xen/interface.h @@ -40,6 +40,8 @@ typedef uint64_t xen_pfn_t; #define PRI_xen_pfn "llx" typedef uint64_t xen_ulong_t; #define PRI_xen_ulong "llx" +typedef int64_t xen_long_t; +#define PRI_xen_long "llx" /* Guest handles for primitive C types. */ __DEFINE_GUEST_HANDLE(uchar, unsigned char); __DEFINE_GUEST_HANDLE(uint, unsigned int); diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S index d1cf7b7..44e3a5f 100644 --- a/arch/arm/xen/hypercall.S +++ b/arch/arm/xen/hypercall.S @@ -89,6 +89,7 @@ HYPERCALL2(memory_op); HYPERCALL2(physdev_op); HYPERCALL3(vcpu_op); HYPERCALL1(tmem_op); +HYPERCALL2(multicall); ENTRY(privcmd_call) stmdb sp!, {r4} diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 531342e..8bbe940 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -80,6 +80,7 @@ HYPERCALL2(memory_op); HYPERCALL2(physdev_op); HYPERCALL3(vcpu_op); HYPERCALL1(tmem_op); +HYPERCALL2(multicall); ENTRY(privcmd_call) mov x16, x0 diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index e709884..ca08a27 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -343,7 +343,7 @@ HYPERVISOR_memory_op(unsigned int cmd, void *arg) } static inline int -HYPERVISOR_multicall(void *call_list, int nr_calls) +HYPERVISOR_multicall(void *call_list, uint32_t nr_calls) { return _hypercall2(int, multicall, call_list, nr_calls); } diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index fd9cb76..3400dba 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -54,6 +54,9 @@ typedef unsigned long xen_pfn_t; #define PRI_xen_pfn "lx" typedef unsigned long xen_ulong_t; #define PRI_xen_ulong "lx" +typedef long xen_long_t; +#define PRI_xen_long "lx" + /* Guest handles for primitive C types. */ __DEFINE_GUEST_HANDLE(uchar, unsigned char); __DEFINE_GUEST_HANDLE(uint, unsigned int); diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 0cd5ca3..de08213 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -275,9 +275,9 @@ DEFINE_GUEST_HANDLE_STRUCT(mmu_update); * NB. The fields are natural register size for this architecture. */ struct multicall_entry { - unsigned long op; - long result; - unsigned long args[6]; + xen_ulong_t op; + xen_long_t result; + xen_ulong_t args[6]; }; DEFINE_GUEST_HANDLE_STRUCT(multicall_entry); -- cgit v0.10.2 From adc01864eb543097bfa23d3191a0733145662bac Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 18 Apr 2014 16:54:34 +0100 Subject: arm/xen: Remove definiition of virt_to_pfn in asm/xen/page.h virt_to_pfn has been defined in asm/memory.h by the commit e26a9e0 "ARM: Better virt_to_page() handling" This will result of a compilation warning when CONFIG_XEN is enabled. arch/arm/include/asm/xen/page.h:80:0: warning: "virt_to_pfn" redefined [enabled by default] #define virt_to_pfn(v) (PFN_DOWN(__pa(v))) ^ In file included from arch/arm/include/asm/page.h:163:0, from arch/arm/include/asm/xen/page.h:4, from include/xen/page.h:4, from arch/arm/xen/grant-table.c:33: The definition in memory.h is nearly the same (it directly expand PFN_DOWN), so we can safely drop virt_to_pfn in xen include. Signed-off-by: Julien Grall Signed-off-by: David Vrabel diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h index cf4f3e8..ded062f 100644 --- a/arch/arm/include/asm/xen/page.h +++ b/arch/arm/include/asm/xen/page.h @@ -77,7 +77,6 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine) } /* VIRT <-> MACHINE conversion */ #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) -#define virt_to_pfn(v) (PFN_DOWN(__pa(v))) #define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v))) #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) -- cgit v0.10.2 From 01f2b0f351aa2390c71516027ab7e533e7b6b1c3 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 8 May 2014 15:48:13 +0000 Subject: arm64: introduce virt_to_pfn virt_to_pfn has been defined in arch/arm/include/asm/memory.h by commit e26a9e0 "ARM: Better virt_to_page() handling" and Xen has come to rely on it. Introduce virt_to_pfn on arm64 too. Signed-off-by: Stefano Stabellini Acked-by: Catalin Marinas diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index e94f945..993bce5 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -138,6 +138,7 @@ static inline void *phys_to_virt(phys_addr_t x) #define __pa(x) __virt_to_phys((unsigned long)(x)) #define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +#define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys(x)) /* * virt_to_page(k) convert a _valid_ virtual address to struct page * -- cgit v0.10.2 From 9f1d341415b9d84fcf0cb04f409bd61fac5e2f14 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 9 May 2014 16:10:49 +0000 Subject: arm: xen: export HYPERVISOR_multicall to modules. "arm: xen: implement multicall hypercall support." forgot to do this. Signed-off-by: Ian Campbell Signed-off-by: Stefano Stabellini diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index b96723e..488ecdb 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -350,4 +350,5 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_memory_op); EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op); EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op); EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op); +EXPORT_SYMBOL_GPL(HYPERVISOR_multicall); EXPORT_SYMBOL_GPL(privcmd_call); -- cgit v0.10.2 From aa8532c32216ae07c3813b9aeb774517878a7573 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 8 May 2014 11:09:23 +0100 Subject: xen: refactor suspend pre/post hooks New architectures currently have to provide implementations of 5 different functions: xen_arch_pre_suspend(), xen_arch_post_suspend(), xen_arch_hvm_post_suspend(), xen_mm_pin_all(), and xen_mm_unpin_all(). Refactor the suspend code to only require xen_arch_pre_suspend() and xen_arch_post_suspend(). Signed-off-by: David Vrabel Reviewed-by: Boris Ostrovsky diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 45329c8..c4df9db 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -12,8 +12,10 @@ #include "xen-ops.h" #include "mmu.h" -void xen_arch_pre_suspend(void) +static void xen_pv_pre_suspend(void) { + xen_mm_pin_all(); + xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); xen_start_info->console.domU.mfn = mfn_to_pfn(xen_start_info->console.domU.mfn); @@ -26,7 +28,7 @@ void xen_arch_pre_suspend(void) BUG(); } -void xen_arch_hvm_post_suspend(int suspend_cancelled) +static void xen_hvm_post_suspend(int suspend_cancelled) { #ifdef CONFIG_XEN_PVHVM int cpu; @@ -41,7 +43,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled) #endif } -void xen_arch_post_suspend(int suspend_cancelled) +static void xen_pv_post_suspend(int suspend_cancelled) { xen_build_mfn_list_list(); @@ -60,6 +62,21 @@ void xen_arch_post_suspend(int suspend_cancelled) xen_vcpu_restore(); } + xen_mm_unpin_all(); +} + +void xen_arch_pre_suspend(void) +{ + if (xen_pv_domain()) + xen_pv_pre_suspend(); +} + +void xen_arch_post_suspend(int cancelled) +{ + if (xen_pv_domain()) + xen_pv_post_suspend(cancelled); + else + xen_hvm_post_suspend(cancelled); } static void xen_vcpu_notify_restore(void *data) diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 1cb6f4c..c834d4b 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -31,6 +31,8 @@ void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_reserve_top(void); extern unsigned long xen_max_p2m_pfn; +void xen_mm_pin_all(void); +void xen_mm_unpin_all(void); void xen_set_pat(u64); char * __init xen_memory_setup(void); diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 32f9236..c3667b2 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -41,9 +41,6 @@ static enum shutdown_state shutting_down = SHUTDOWN_INVALID; struct suspend_info { int cancelled; - unsigned long arg; /* extra hypercall argument */ - void (*pre)(void); - void (*post)(int cancelled); }; static RAW_NOTIFIER_HEAD(xen_resume_notifier); @@ -61,26 +58,6 @@ void xen_resume_notifier_unregister(struct notifier_block *nb) EXPORT_SYMBOL_GPL(xen_resume_notifier_unregister); #ifdef CONFIG_HIBERNATE_CALLBACKS -static void xen_hvm_post_suspend(int cancelled) -{ - xen_arch_hvm_post_suspend(cancelled); - gnttab_resume(); -} - -static void xen_pre_suspend(void) -{ - xen_mm_pin_all(); - gnttab_suspend(); - xen_arch_pre_suspend(); -} - -static void xen_post_suspend(int cancelled) -{ - xen_arch_post_suspend(cancelled); - gnttab_resume(); - xen_mm_unpin_all(); -} - static int xen_suspend(void *data) { struct suspend_info *si = data; @@ -94,18 +71,20 @@ static int xen_suspend(void *data) return err; } - if (si->pre) - si->pre(); + gnttab_suspend(); + xen_arch_pre_suspend(); /* * This hypercall returns 1 if suspend was cancelled * or the domain was merely checkpointed, and 0 if it * is resuming in a new domain. */ - si->cancelled = HYPERVISOR_suspend(si->arg); + si->cancelled = HYPERVISOR_suspend(xen_pv_domain() + ? virt_to_mfn(xen_start_info) + : 0); - if (si->post) - si->post(si->cancelled); + xen_arch_post_suspend(si->cancelled); + gnttab_resume(); if (!si->cancelled) { xen_irq_resume(); @@ -154,16 +133,6 @@ static void do_suspend(void) si.cancelled = 1; - if (xen_hvm_domain()) { - si.arg = 0UL; - si.pre = NULL; - si.post = &xen_hvm_post_suspend; - } else { - si.arg = virt_to_mfn(xen_start_info); - si.pre = &xen_pre_suspend; - si.post = &xen_post_suspend; - } - err = stop_machine(xen_suspend, &si, cpumask_of(0)); raw_notifier_call_chain(&xen_resume_notifier, 0, NULL); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 2cf4717..0b3149e 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -9,10 +9,6 @@ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); void xen_arch_pre_suspend(void); void xen_arch_post_suspend(int suspend_cancelled); -void xen_arch_hvm_post_suspend(int suspend_cancelled); - -void xen_mm_pin_all(void); -void xen_mm_unpin_all(void); void xen_timer_resume(void); void xen_arch_resume(void); -- cgit v0.10.2 From 79390289cfeb6d0f4295ca32a54630c93154428e Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 8 May 2014 16:54:02 +0100 Subject: arm,arm64/xen: introduce HYPERVISOR_suspend() Introduce HYPERVISOR_suspend() and a few additional empty stubs for Xen arch specific functions called by drivers/xen/manage.c. Signed-off-by: Stefano Stabellini Signed-off-by: David Vrabel diff --git a/arch/arm/include/asm/xen/hypercall.h b/arch/arm/include/asm/xen/hypercall.h index 7658150..712b50e 100644 --- a/arch/arm/include/asm/xen/hypercall.h +++ b/arch/arm/include/asm/xen/hypercall.h @@ -34,6 +34,7 @@ #define _ASM_ARM_XEN_HYPERCALL_H #include +#include long privcmd_call(unsigned call, unsigned long a1, unsigned long a2, unsigned long a3, @@ -50,6 +51,15 @@ int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); int HYPERVISOR_tmem_op(void *arg); int HYPERVISOR_multicall(struct multicall_entry *calls, uint32_t nr); +static inline int +HYPERVISOR_suspend(unsigned long start_info_mfn) +{ + struct sched_shutdown r = { .reason = SHUTDOWN_suspend }; + + /* start_info_mfn is unused on ARM */ + return HYPERVISOR_sched_op(SCHEDOP_shutdown, &r); +} + static inline void MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, unsigned int new_val, unsigned long flags) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 488ecdb..1e63243 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -339,6 +339,14 @@ static int __init xen_pm_init(void) } late_initcall(xen_pm_init); + +/* empty stubs */ +void xen_arch_pre_suspend(void) { } +void xen_arch_post_suspend(int suspend_cancelled) { } +void xen_timer_resume(void) { } +void xen_arch_resume(void) { } + + /* In the hypervisor.S file. */ EXPORT_SYMBOL_GPL(HYPERVISOR_event_channel_op); EXPORT_SYMBOL_GPL(HYPERVISOR_grant_table_op); -- cgit v0.10.2 From bc5eb20161ad742a38b7b4deda393e577ade669b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Tue, 13 May 2014 18:56:25 +0200 Subject: xen/x86: set panic notifier priority to minimum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Execution is not going to continue after telling Xen about the crash. Let other panic notifiers run by postponing the final hypercall as much as possible. Signed-off-by: Andrew Jones Signed-off-by: Radim Krčmář Signed-off-by: David Vrabel diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 201d09a..662bdcb 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1339,6 +1339,7 @@ xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) static struct notifier_block xen_panic_block = { .notifier_call= xen_panic_event, + .priority = INT_MIN }; int xen_panic_handler_init(void) -- cgit v0.10.2 From fcca2e3119f3771dcfd0b03b3718b3c51b5f21c3 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 7 Jan 2014 11:53:35 +0000 Subject: x86/xen: rename early_p2m_alloc() and early_p2m_alloc_middle() early_p2m_alloc_middle() allocates a new leaf page and early_p2m_alloc() allocates a new middle page. This is confusing. Swap the names so they match what the functions actually do. Signed-off-by: David Vrabel Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 85e5d78..4fc71cc 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -596,7 +596,7 @@ static bool alloc_p2m(unsigned long pfn) return true; } -static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary) +static bool __init early_alloc_p2m(unsigned long pfn, bool check_boundary) { unsigned topidx, mididx, idx; unsigned long *p2m; @@ -638,7 +638,7 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn, bool check_boundary return true; } -static bool __init early_alloc_p2m(unsigned long pfn) +static bool __init early_alloc_p2m_middle(unsigned long pfn) { unsigned topidx = p2m_top_index(pfn); unsigned long *mid_mfn_p; @@ -663,7 +663,7 @@ static bool __init early_alloc_p2m(unsigned long pfn) p2m_top_mfn_p[topidx] = mid_mfn_p; p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); /* Note: we don't set mid_mfn_p[midix] here, - * look in early_alloc_p2m_middle */ + * look in early_alloc_p2m() */ } return true; } @@ -739,7 +739,7 @@ found: /* This shouldn't happen */ if (WARN_ON(p2m_top[topidx] == p2m_mid_missing)) - early_alloc_p2m(set_pfn); + early_alloc_p2m_middle(set_pfn); if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing)) return false; @@ -754,13 +754,13 @@ found: bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) { if (unlikely(!__set_phys_to_machine(pfn, mfn))) { - if (!early_alloc_p2m(pfn)) + if (!early_alloc_p2m_middle(pfn)) return false; if (early_can_reuse_p2m_middle(pfn, mfn)) return __set_phys_to_machine(pfn, mfn); - if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/)) + if (!early_alloc_p2m(pfn, false /* boundary crossover OK!*/)) return false; if (!__set_phys_to_machine(pfn, mfn)) -- cgit v0.10.2 From a9b5bff66b2a63f7d0f42434f5da9024b442159c Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 6 Jan 2014 11:55:13 +0000 Subject: x86/xen: fix set_phys_range_identity() if pfn_e > MAX_P2M_PFN Allow set_phys_range_identity() to work with a range that overlaps MAX_P2M_PFN by clamping pfn_e to MAX_P2M_PFN. Signed-off-by: David Vrabel Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 4fc71cc..82c8c93 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -774,7 +774,7 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, { unsigned long pfn; - if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN)) + if (unlikely(pfn_s >= MAX_P2M_PFN)) return 0; if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) @@ -783,6 +783,9 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, if (pfn_s > pfn_e) return 0; + if (pfn_e > MAX_P2M_PFN) + pfn_e = MAX_P2M_PFN; + for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1)); pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE)); pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) -- cgit v0.10.2 From 3cb83e46d032505016ab2565f067e24c8cba9a9d Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 7 Jan 2014 11:44:32 +0000 Subject: x86/xen: compactly store large identity ranges in the p2m Large (multi-GB) identity ranges currently require a unique middle page (filled with p2m_identity entries) per 1 GB region. Similar to the common p2m_mid_missing middle page for large missing regions, introduce a p2m_mid_identity page (filled with p2m_identity entries) which can be used instead. set_phys_range_identity() thus only needs to allocate new middle pages at the beginning and end of the range. Signed-off-by: David Vrabel Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 82c8c93..5700144 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -36,7 +36,7 @@ * pfn_to_mfn(0xc0000)=0xc0000 * * The benefit of this is, that we can assume for non-RAM regions (think - * PCI BARs, or ACPI spaces), we can create mappings easily b/c we + * PCI BARs, or ACPI spaces), we can create mappings easily because we * get the PFN value to match the MFN. * * For this to work efficiently we have one new page p2m_identity and @@ -60,7 +60,7 @@ * There is also a digram of the P2M at the end that can help. * Imagine your E820 looking as so: * - * 1GB 2GB + * 1GB 2GB 4GB * /-------------------+---------\/----\ /----------\ /---+-----\ * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM | * \-------------------+---------/\----/ \----------/ \---+-----/ @@ -77,9 +77,8 @@ * of the PFN and the end PFN (263424 and 512256 respectively). The first step * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page * covers 512^2 of page estate (1GB) and in case the start or end PFN is not - * aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn - * to end pfn. We reserve_brk top leaf pages if they are missing (means they - * point to p2m_mid_missing). + * aligned on 512^2*PAGE_SIZE (1GB) we reserve_brk new middle and leaf pages as + * required to split any existing p2m_mid_missing middle pages. * * With the E820 example above, 263424 is not 1GB aligned so we allocate a * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000. @@ -88,7 +87,7 @@ * Next stage is to determine if we need to do a more granular boundary check * on the 4MB (or 2MB depending on architecture) off the start and end pfn's. * We check if the start pfn and end pfn violate that boundary check, and if - * so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer + * so reserve_brk a (p2m[x][y]) leaf page. This way we have a much finer * granularity of setting which PFNs are missing and which ones are identity. * In our example 263424 and 512256 both fail the check so we reserve_brk two * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" @@ -102,9 +101,10 @@ * * The next step is to walk from the start pfn to the end pfn setting * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity. - * If we find that the middle leaf is pointing to p2m_missing we can swap it - * over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this - * point we do not need to worry about boundary aligment (so no need to + * If we find that the middle entry is pointing to p2m_missing we can swap it + * over to p2m_identity - this way covering 4MB (or 2MB) PFN space (and + * similarly swapping p2m_mid_missing for p2m_mid_identity for larger regions). + * At this point we do not need to worry about boundary aligment (so no need to * reserve_brk a middle page, figure out which PFNs are "missing" and which * ones are identity), as that has been done earlier. If we find that the * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference @@ -118,6 +118,9 @@ * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511] * contain the INVALID_P2M_ENTRY value and are considered "missing." * + * Finally, the region beyond the end of of the E820 (4 GB in this example) + * is set to be identity (in case there are MMIO regions placed here). + * * This is what the p2m ends up looking (for the E820 above) with this * fabulous drawing: * @@ -129,21 +132,27 @@ * |-----| \ | [p2m_identity]+\\ | .... | * | 2 |--\ \-------------------->| ... | \\ \----------------/ * |-----| \ \---------------/ \\ - * | 3 |\ \ \\ p2m_identity - * |-----| \ \-------------------->/---------------\ /-----------------\ - * | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... | - * \-----/ / | [p2m_identity]+-->| ..., ~0 | - * / /---------------\ | .... | \-----------------/ - * / | IDENTITY[@0] | /-+-[x], ~0, ~0.. | - * / | IDENTITY[@256]|<----/ \---------------/ - * / | ~0, ~0, .... | - * | \---------------/ - * | - * p2m_mid_missing p2m_missing - * /-----------------\ /------------\ - * | [p2m_missing] +---->| ~0, ~0, ~0 | - * | [p2m_missing] +---->| ..., ~0 | - * \-----------------/ \------------/ + * | 3 |-\ \ \\ p2m_identity [1] + * |-----| \ \-------------------->/---------------\ /-----------------\ + * | .. |\ | | [p2m_identity]+-->| ~0, ~0, ~0, ... | + * \-----/ | | | [p2m_identity]+-->| ..., ~0 | + * | | | .... | \-----------------/ + * | | +-[x], ~0, ~0.. +\ + * | | \---------------/ \ + * | | \-> /---------------\ + * | V p2m_mid_missing p2m_missing | IDENTITY[@0] | + * | /-----------------\ /------------\ | IDENTITY[@256]| + * | | [p2m_missing] +---->| ~0, ~0, ...| | ~0, ~0, .... | + * | | [p2m_missing] +---->| ..., ~0 | \---------------/ + * | | ... | \------------/ + * | \-----------------/ + * | + * | p2m_mid_identity + * | /-----------------\ + * \-->| [p2m_identity] +---->[1] + * | [p2m_identity] +---->[1] + * | ... | + * \-----------------/ * * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) */ @@ -187,13 +196,15 @@ static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); +static RESERVE_BRK_ARRAY(unsigned long *, p2m_mid_identity, P2M_MID_PER_PAGE); +static RESERVE_BRK_ARRAY(unsigned long, p2m_mid_identity_mfn, P2M_MID_PER_PAGE); RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); /* We might hit two boundary violations at the start and end, at max each * boundary violation will require three middle nodes. */ -RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3); +RESERVE_BRK(p2m_mid_extra, PAGE_SIZE * 2 * 3); /* When we populate back during bootup, the amount of pages can vary. The * max we have is seen is 395979, but that does not mean it can't be more. @@ -242,20 +253,20 @@ static void p2m_top_mfn_p_init(unsigned long **top) top[i] = p2m_mid_missing_mfn; } -static void p2m_mid_init(unsigned long **mid) +static void p2m_mid_init(unsigned long **mid, unsigned long *leaf) { unsigned i; for (i = 0; i < P2M_MID_PER_PAGE; i++) - mid[i] = p2m_missing; + mid[i] = leaf; } -static void p2m_mid_mfn_init(unsigned long *mid) +static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) { unsigned i; for (i = 0; i < P2M_MID_PER_PAGE; i++) - mid[i] = virt_to_mfn(p2m_missing); + mid[i] = virt_to_mfn(leaf); } static void p2m_init(unsigned long *p2m) @@ -286,7 +297,9 @@ void __ref xen_build_mfn_list_list(void) /* Pre-initialize p2m_top_mfn to be completely missing */ if (p2m_top_mfn == NULL) { p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_mfn_init(p2m_mid_missing_mfn); + p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); + p2m_mid_identity_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); + p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity); p2m_top_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_top_mfn_p_init(p2m_top_mfn_p); @@ -295,7 +308,8 @@ void __ref xen_build_mfn_list_list(void) p2m_top_mfn_init(p2m_top_mfn); } else { /* Reinitialise, mfn's all change after migration */ - p2m_mid_mfn_init(p2m_mid_missing_mfn); + p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); + p2m_mid_mfn_init(p2m_mid_identity_mfn, p2m_identity); } for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += P2M_PER_PAGE) { @@ -327,7 +341,7 @@ void __ref xen_build_mfn_list_list(void) * it too late. */ mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_mfn_init(mid_mfn_p); + p2m_mid_mfn_init(mid_mfn_p, p2m_missing); p2m_top_mfn_p[topidx] = mid_mfn_p; } @@ -365,16 +379,17 @@ void __init xen_build_dynamic_phys_to_machine(void) p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_init(p2m_missing); + p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); + p2m_init(p2m_identity); p2m_mid_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_init(p2m_mid_missing); + p2m_mid_init(p2m_mid_missing, p2m_missing); + p2m_mid_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); + p2m_mid_init(p2m_mid_identity, p2m_identity); p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_top_init(p2m_top); - p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_init(p2m_identity); - /* * The domain builder gives us a pre-constructed p2m array in * mfn_list for all the pages initially given to us, so we just @@ -386,7 +401,7 @@ void __init xen_build_dynamic_phys_to_machine(void) if (p2m_top[topidx] == p2m_mid_missing) { unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_init(mid); + p2m_mid_init(mid, p2m_missing); p2m_top[topidx] = mid; } @@ -545,7 +560,7 @@ static bool alloc_p2m(unsigned long pfn) if (!mid) return false; - p2m_mid_init(mid); + p2m_mid_init(mid, p2m_missing); if (cmpxchg(top_p, p2m_mid_missing, mid) != p2m_mid_missing) free_p2m_page(mid); @@ -565,7 +580,7 @@ static bool alloc_p2m(unsigned long pfn) if (!mid_mfn) return false; - p2m_mid_mfn_init(mid_mfn); + p2m_mid_mfn_init(mid_mfn, p2m_missing); missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); mid_mfn_mfn = virt_to_mfn(mid_mfn); @@ -649,7 +664,7 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn) if (mid == p2m_mid_missing) { mid = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_init(mid); + p2m_mid_init(mid, p2m_missing); p2m_top[topidx] = mid; @@ -658,7 +673,7 @@ static bool __init early_alloc_p2m_middle(unsigned long pfn) /* And the save/restore P2M tables.. */ if (mid_mfn_p == p2m_mid_missing_mfn) { mid_mfn_p = extend_brk(PAGE_SIZE, PAGE_SIZE); - p2m_mid_mfn_init(mid_mfn_p); + p2m_mid_mfn_init(mid_mfn_p, p2m_missing); p2m_top_mfn_p[topidx] = mid_mfn_p; p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); @@ -769,6 +784,24 @@ bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) return true; } + +static void __init early_split_p2m(unsigned long pfn) +{ + unsigned long mididx, idx; + + mididx = p2m_mid_index(pfn); + idx = p2m_index(pfn); + + /* + * Allocate new middle and leaf pages if this pfn lies in the + * middle of one. + */ + if (mididx || idx) + early_alloc_p2m_middle(pfn); + if (idx) + early_alloc_p2m(pfn, false); +} + unsigned long __init set_phys_range_identity(unsigned long pfn_s, unsigned long pfn_e) { @@ -786,19 +819,27 @@ unsigned long __init set_phys_range_identity(unsigned long pfn_s, if (pfn_e > MAX_P2M_PFN) pfn_e = MAX_P2M_PFN; - for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1)); - pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE)); - pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) - { - WARN_ON(!early_alloc_p2m(pfn)); - } + early_split_p2m(pfn_s); + early_split_p2m(pfn_e); - early_alloc_p2m_middle(pfn_s, true); - early_alloc_p2m_middle(pfn_e, true); + for (pfn = pfn_s; pfn < pfn_e;) { + unsigned topidx = p2m_top_index(pfn); + unsigned mididx = p2m_mid_index(pfn); - for (pfn = pfn_s; pfn < pfn_e; pfn++) if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) break; + pfn++; + + /* + * If the PFN was set to a middle or leaf identity + * page the remainder must also be identity, so skip + * ahead to the next middle or leaf entry. + */ + if (p2m_top[topidx] == p2m_mid_identity) + pfn = ALIGN(pfn, P2M_MID_PER_PAGE * P2M_PER_PAGE); + else if (p2m_top[topidx][mididx] == p2m_identity) + pfn = ALIGN(pfn, P2M_PER_PAGE); + } if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s), "Identity mapping failed. We are %ld short of 1-1 mappings!\n", @@ -828,8 +869,22 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) /* For sparse holes were the p2m leaf has real PFN along with * PCI holes, stick in the PFN as the MFN value. + * + * set_phys_range_identity() will have allocated new middle + * and leaf pages as required so an existing p2m_mid_missing + * or p2m_missing mean that whole range will be identity so + * these can be switched to p2m_mid_identity or p2m_identity. */ if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) { + if (p2m_top[topidx] == p2m_mid_identity) + return true; + + if (p2m_top[topidx] == p2m_mid_missing) { + WARN_ON(cmpxchg(&p2m_top[topidx], p2m_mid_missing, + p2m_mid_identity) != p2m_mid_missing); + return true; + } + if (p2m_top[topidx][mididx] == p2m_identity) return true; -- cgit v0.10.2 From 2dcc9a3de1d7f77bb4dbc108358a75776328e887 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 7 Jan 2014 11:36:53 +0000 Subject: x86/xen: only warn once if bad MFNs are found during setup In xen_add_extra_mem(), if the WARN() checks for bad MFNs trigger it is likely that they will trigger at lot, spamming the log. Use WARN_ONCE() instead. Signed-off-by: David Vrabel Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 0982233..2afe55e 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -89,10 +89,10 @@ static void __init xen_add_extra_mem(u64 start, u64 size) for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { unsigned long mfn = pfn_to_mfn(pfn); - if (WARN(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn)) + if (WARN_ONCE(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn)) continue; - WARN(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n", - pfn, mfn); + WARN_ONCE(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n", + pfn, mfn); __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); } -- cgit v0.10.2 From 25b884a83d487fd62c3de7ac1ab5549979188482 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Fri, 3 Jan 2014 15:46:10 +0000 Subject: x86/xen: set regions above the end of RAM as 1:1 PCI devices may have BARs located above the end of RAM so mark such frames as identity frames in the p2m (instead of the default of missing). PFNs outside the p2m (above MAX_P2M_PFN) are also considered to be identity frames for the same reason. Signed-off-by: David Vrabel Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 5700144..9bb3d82 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -507,7 +507,7 @@ unsigned long get_phys_to_machine(unsigned long pfn) unsigned topidx, mididx, idx; if (unlikely(pfn >= MAX_P2M_PFN)) - return INVALID_P2M_ENTRY; + return IDENTITY_FRAME(pfn); topidx = p2m_top_index(pfn); mididx = p2m_mid_index(pfn); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 2afe55e..210426a 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -469,6 +469,15 @@ char * __init xen_memory_setup(void) } /* + * Set the rest as identity mapped, in case PCI BARs are + * located here. + * + * PFNs above MAX_P2M_PFN are considered identity mapped as + * well. + */ + set_phys_range_identity(map[i-1].addr / PAGE_SIZE, ~0ul); + + /* * In domU, the ISA region is normal, usable memory, but we * reserve ISA memory anyway because too many things poke * about in there. -- cgit v0.10.2 From f59c5145dc6a079b14b349c388d44362eb813cdf Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 8 Jan 2014 14:00:01 +0000 Subject: x86/xen: do not use _PAGE_IOMAP in xen_remap_domain_mfn_range() _PAGE_IOMAP is used in xen_remap_domain_mfn_range() to prevent the pfn_pte() call in remap_area_mfn_pte_fn() from using the p2m to translate the MFN. If mfn_pte() is used instead, the p2m look up is avoided and the use of _PAGE_IOMAP is no longer needed. Signed-off-by: David Vrabel Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Konrad Rzeszutek Wilk diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 86e02ea..d916024 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2522,7 +2522,7 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, void *data) { struct remap_data *rmd = data; - pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot)); + pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot)); rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; rmd->mmu_update->val = pte_val_ma(pte); @@ -2547,8 +2547,6 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, if (xen_feature(XENFEAT_auto_translated_physmap)) return -EINVAL; - prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); - BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); rmd.mfn = mfn; -- cgit v0.10.2 From 8899035eec4105a10149817fdaf684f127670f8c Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 3 Dec 2013 21:34:03 -0500 Subject: xen-pciback: Cleanup up pcistub_put_pci_dev We are using 'psdev->dev','found_psdev->dev', and 'dev' at the same time - and they all point to the same structure. To keep it straight lets just use one - 'dev'. Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Jan Beulich Reviewed-by: David Vrabel diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 62fcd48..5300a21 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -272,16 +272,16 @@ void pcistub_put_pci_dev(struct pci_dev *dev) * and want to inhibit the user from fiddling with 'reset' */ pci_reset_function(dev); - pci_restore_state(psdev->dev); + pci_restore_state(dev); /* This disables the device. */ - xen_pcibk_reset_device(found_psdev->dev); + xen_pcibk_reset_device(dev); /* And cleanup up our emulated fields. */ - xen_pcibk_config_free_dyn_fields(found_psdev->dev); - xen_pcibk_config_reset_dev(found_psdev->dev); + xen_pcibk_config_free_dyn_fields(dev); + xen_pcibk_config_reset_dev(dev); - xen_unregister_device_domain_owner(found_psdev->dev); + xen_unregister_device_domain_owner(dev); spin_lock_irqsave(&found_psdev->lock, flags); found_psdev->pdev = NULL; -- cgit v0.10.2 From fcb8ce968fb5d476cbb3f1547ade0c05fc2f6c59 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 3 Dec 2013 21:37:24 -0500 Subject: xen-pciback: First reset, then free. We were doing the operations of freeing and reset in the wrong order. Granted nothing broke because the reset functions just set bar->which = 0. But nonethless this was incorrect. Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Jan Beulich Reviewed-by: David Vrabel diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 5300a21..36dd4f3 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -278,8 +278,8 @@ void pcistub_put_pci_dev(struct pci_dev *dev) xen_pcibk_reset_device(dev); /* And cleanup up our emulated fields. */ - xen_pcibk_config_free_dyn_fields(dev); xen_pcibk_config_reset_dev(dev); + xen_pcibk_config_free_dyn_fields(dev); xen_unregister_device_domain_owner(dev); -- cgit v0.10.2 From 8be9df6d4b6b53112abb078cb58827767471d797 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 3 Dec 2013 21:47:37 -0500 Subject: xen-pciback: Document when we FLR an PCI device. When the toolstack wants us to drop or add an PCI device it changes the XenBus state to Configuring - and as result of that we find out which devices we should still be exporting out and which ones not. For the ones we don't need anymore we need to do an PCI reset so that it is ready for the next guest. Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: David Vrabel diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 36dd4f3..b84426a 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -551,6 +551,8 @@ static void pcistub_remove(struct pci_dev *dev) pr_warn("****** shutdown driver domain before binding device\n"); pr_warn("****** to other drivers or domains\n"); + /* N.B. This ends up calling pcistub_put_pci_dev which ends up + * doing the FLR. */ xen_pcibk_release_pci_dev(found_psdev->pdev, found_psdev->dev); } diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index a9ed867..4a7e6e0 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -93,6 +93,8 @@ static void free_pdev(struct xen_pcibk_device *pdev) xen_pcibk_disconnect(pdev); + /* N.B. This calls pcistub_put_pci_dev which does the FLR on all + * of the PCIe devices. */ xen_pcibk_release_devices(pdev); dev_set_drvdata(&pdev->xdev->dev, NULL); @@ -286,6 +288,8 @@ static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev, dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id); xen_unregister_device_domain_owner(dev); + /* N.B. This ends up calling pcistub_put_pci_dev which ends up + * doing the FLR. */ xen_pcibk_release_pci_dev(pdev, dev); out: -- cgit v0.10.2 From 24d8bf1b0a12a92171c6f08a665c6ff3f0788c91 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 21 Apr 2014 15:43:08 -0400 Subject: xen/pciback: Document when the 'unbind' and 'bind' functions are called. And also mention that you cannot do any pci_reset_function, pci_reset_slot, or such calls. This is because they take the same lock as SysFS does - and we would end up with a dead-lock if we call those functions. Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: David Vrabel diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index b84426a..1539bec 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -493,6 +493,8 @@ static int pcistub_seize(struct pci_dev *dev) return err; } +/* Called when 'bind'. This means we must _NOT_ call pci_reset_function or + * other functions that take the sysfs lock. */ static int pcistub_probe(struct pci_dev *dev, const struct pci_device_id *id) { int err = 0; @@ -520,6 +522,8 @@ out: return err; } +/* Called when 'unbind'. This means we must _NOT_ call pci_reset_function or + * other functions that take the sysfs lock. */ static void pcistub_remove(struct pci_dev *dev) { struct pcistub_device *psdev, *found_psdev = NULL; -- cgit v0.10.2 From 0a9fd0152929db372ff61b0d6c280fdd34ae8bdb Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 22 Apr 2014 10:48:17 -0400 Subject: xen/pciback: Document the entry points for 'pcistub_put_pci_dev' which are quite a few. It should be evident that dealing with that many options is a bit complex. Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: David Vrabel diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 1539bec..d57a173 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -242,6 +242,15 @@ struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev, return found_dev; } +/* + * Called when: + * - XenBus state has been reconfigure (pci unplug). See xen_pcibk_remove_device + * - XenBus state has been disconnected (guest shutdown). See xen_pcibk_xenbus_remove + * - 'echo BDF > unbind' on pciback module with no guest attached. See pcistub_remove + * - 'echo BDF > unbind' with a guest still using it. See pcistub_remove + * + * As such we have to be careful. + */ void pcistub_put_pci_dev(struct pci_dev *dev) { struct pcistub_device *psdev, *found_psdev = NULL; -- cgit v0.10.2 From 1a4b50f674d0d2f66569f08d063996971b7d48f2 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 19 Mar 2014 16:03:23 -0400 Subject: xen-acpi-processor: Don't display errors when we get -ENOSYS which is a perfectly legal error. This can be triggered if the user has booted Xen with the no-cpuidle parameter. Reported-by-and-Tested-by: Don Slutz Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index 82358d1..59fc190 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -127,7 +127,7 @@ static int push_cxx_to_hypervisor(struct acpi_processor *_pr) pr_debug(" C%d: %s %d uS\n", cx->type, cx->desc, (u32)cx->latency); } - } else if (ret != -EINVAL) + } else if ((ret != -EINVAL) && (ret != -ENOSYS)) /* EINVAL means the ACPI ID is incorrect - meaning the ACPI * table is referencing a non-existing CPU - which can happen * with broken ACPI tables. */ @@ -259,7 +259,7 @@ static int push_pxx_to_hypervisor(struct acpi_processor *_pr) (u32) perf->states[i].power, (u32) perf->states[i].transition_latency); } - } else if (ret != -EINVAL) + } else if ((ret != -EINVAL) && (ret != -ENOSYS)) /* EINVAL means the ACPI ID is incorrect - meaning the ACPI * table is referencing a non-existing CPU - which can happen * with broken ACPI tables. */ -- cgit v0.10.2 From 77945ca73e9a66cae25882fcab33ae0c6692763f Mon Sep 17 00:00:00 2001 From: Mukesh Rathor Date: Fri, 23 May 2014 19:33:44 -0700 Subject: x86/xen: map foreign pfns for autotranslated guests When running as a dom0 in PVH mode, foreign pfns that are accessed must be added to our p2m which is managed by xen. This is done via XENMEM_add_to_physmap_range hypercall. This is needed for toolstack building guests and mapping guest memory, xentrace mapping xen pages, etc. Signed-off-by: Mukesh Rathor Signed-off-by: David Vrabel diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index d916024..6f6e15d 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2510,6 +2510,95 @@ void __init xen_hvm_init_mmu_ops(void) } #endif +#ifdef CONFIG_XEN_PVH +/* + * Map foreign gfn (fgfn), to local pfn (lpfn). This for the user + * space creating new guest on pvh dom0 and needing to map domU pages. + */ +static int xlate_add_to_p2m(unsigned long lpfn, unsigned long fgfn, + unsigned int domid) +{ + int rc, err = 0; + xen_pfn_t gpfn = lpfn; + xen_ulong_t idx = fgfn; + + struct xen_add_to_physmap_range xatp = { + .domid = DOMID_SELF, + .foreign_domid = domid, + .size = 1, + .space = XENMAPSPACE_gmfn_foreign, + }; + set_xen_guest_handle(xatp.idxs, &idx); + set_xen_guest_handle(xatp.gpfns, &gpfn); + set_xen_guest_handle(xatp.errs, &err); + + rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); + if (rc < 0) + return rc; + return err; +} + +static int xlate_remove_from_p2m(unsigned long spfn, int count) +{ + struct xen_remove_from_physmap xrp; + int i, rc; + + for (i = 0; i < count; i++) { + xrp.domid = DOMID_SELF; + xrp.gpfn = spfn+i; + rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); + if (rc) + break; + } + return rc; +} + +struct xlate_remap_data { + unsigned long fgfn; /* foreign domain's gfn */ + pgprot_t prot; + domid_t domid; + int index; + struct page **pages; +}; + +static int xlate_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, + void *data) +{ + int rc; + struct xlate_remap_data *remap = data; + unsigned long pfn = page_to_pfn(remap->pages[remap->index++]); + pte_t pteval = pte_mkspecial(pfn_pte(pfn, remap->prot)); + + rc = xlate_add_to_p2m(pfn, remap->fgfn, remap->domid); + if (rc) + return rc; + native_set_pte(ptep, pteval); + + return 0; +} + +static int xlate_remap_gfn_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long mfn, + int nr, pgprot_t prot, unsigned domid, + struct page **pages) +{ + int err; + struct xlate_remap_data pvhdata; + + BUG_ON(!pages); + + pvhdata.fgfn = mfn; + pvhdata.prot = prot; + pvhdata.domid = domid; + pvhdata.index = 0; + pvhdata.pages = pages; + err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT, + xlate_map_pte_fn, &pvhdata); + flush_tlb_all(); + return err; +} +#endif + #define REMAP_BATCH_SIZE 16 struct remap_data { @@ -2544,11 +2633,18 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long range; int err = 0; - if (xen_feature(XENFEAT_auto_translated_physmap)) - return -EINVAL; - BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); + if (xen_feature(XENFEAT_auto_translated_physmap)) { +#ifdef CONFIG_XEN_PVH + /* We need to update the local page tables and the xen HAP */ + return xlate_remap_gfn_range(vma, addr, mfn, nr, prot, + domid, pages); +#else + return -EINVAL; +#endif + } + rmd.mfn = mfn; rmd.prot = prot; @@ -2586,6 +2682,25 @@ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, if (!pages || !xen_feature(XENFEAT_auto_translated_physmap)) return 0; +#ifdef CONFIG_XEN_PVH + while (numpgs--) { + /* + * The mmu has already cleaned up the process mmu + * resources at this point (lookup_address will return + * NULL). + */ + unsigned long pfn = page_to_pfn(pages[numpgs]); + + xlate_remove_from_p2m(pfn, 1); + } + /* + * We don't need to flush tlbs because as part of + * xlate_remove_from_p2m, the hypervisor will do tlb flushes + * after removing the p2m entries from the EPT/NPT + */ + return 0; +#else return -EINVAL; +#endif } EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range); -- cgit v0.10.2