From a4e081b0dd16780d960b1ef86985de40a6729fb5 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Thu, 7 Apr 2016 20:03:18 +0800 Subject: Xen: ACPI: Hide UART used by Xen ACPI 6.0 introduces a new table STAO to list the devices which are used by Xen and can't be used by Dom0. On Xen virtual platforms, the physical UART is used by Xen. So here it hides UART from Dom0. CC: "Rafael J. Wysocki" (supporter:ACPI) CC: Len Brown (supporter:ACPI) CC: linux-acpi@vger.kernel.org (open list:ACPI) Signed-off-by: Shannon Zhao Acked-by: Rafael J. Wysocki diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 5f28cf7..cfc73fe 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -46,6 +46,13 @@ DEFINE_MUTEX(acpi_device_lock); LIST_HEAD(acpi_wakeup_device_list); static DEFINE_MUTEX(acpi_hp_context_lock); +/* + * The UART device described by the SPCR table is the only object which needs + * special-casing. Everything else is covered by ACPI namespace paths in STAO + * table. + */ +static u64 spcr_uart_addr; + struct acpi_dep_data { struct list_head node; acpi_handle master; @@ -1453,6 +1460,41 @@ static int acpi_add_single_object(struct acpi_device **child, return 0; } +static acpi_status acpi_get_resource_memory(struct acpi_resource *ares, + void *context) +{ + struct resource *res = context; + + if (acpi_dev_resource_memory(ares, res)) + return AE_CTRL_TERMINATE; + + return AE_OK; +} + +static bool acpi_device_should_be_hidden(acpi_handle handle) +{ + acpi_status status; + struct resource res; + + /* Check if it should ignore the UART device */ + if (!(spcr_uart_addr && acpi_has_method(handle, METHOD_NAME__CRS))) + return false; + + /* + * The UART device described in SPCR table is assumed to have only one + * memory resource present. So we only look for the first one here. + */ + status = acpi_walk_resources(handle, METHOD_NAME__CRS, + acpi_get_resource_memory, &res); + if (ACPI_FAILURE(status) || res.start != spcr_uart_addr) + return false; + + acpi_handle_info(handle, "The UART device @%pa in SPCR table will be hidden\n", + &res.start); + + return true; +} + static int acpi_bus_type_and_status(acpi_handle handle, int *type, unsigned long long *sta) { @@ -1466,6 +1508,9 @@ static int acpi_bus_type_and_status(acpi_handle handle, int *type, switch (acpi_type) { case ACPI_TYPE_ANY: /* for ACPI_ROOT_OBJECT */ case ACPI_TYPE_DEVICE: + if (acpi_device_should_be_hidden(handle)) + return -ENODEV; + *type = ACPI_BUS_TYPE_DEVICE; status = acpi_bus_get_status_handle(handle, sta); if (ACPI_FAILURE(status)) @@ -1916,9 +1961,24 @@ static int acpi_bus_scan_fixed(void) return result < 0 ? result : 0; } +static void __init acpi_get_spcr_uart_addr(void) +{ + acpi_status status; + struct acpi_table_spcr *spcr_ptr; + + status = acpi_get_table(ACPI_SIG_SPCR, 0, + (struct acpi_table_header **)&spcr_ptr); + if (ACPI_SUCCESS(status)) + spcr_uart_addr = spcr_ptr->serial_port.address; + else + printk(KERN_WARNING PREFIX "STAO table present, but SPCR is missing\n"); +} + int __init acpi_scan_init(void) { int result; + acpi_status status; + struct acpi_table_stao *stao_ptr; acpi_pci_root_init(); acpi_pci_link_init(); @@ -1934,6 +1994,20 @@ int __init acpi_scan_init(void) acpi_scan_add_handler(&generic_device_handler); + /* + * If there is STAO table, check whether it needs to ignore the UART + * device in SPCR table. + */ + status = acpi_get_table(ACPI_SIG_STAO, 0, + (struct acpi_table_header **)&stao_ptr); + if (ACPI_SUCCESS(status)) { + if (stao_ptr->header.length > sizeof(struct acpi_table_stao)) + printk(KERN_INFO PREFIX "STAO Name List not yet supported."); + + if (stao_ptr->ignore_uart) + acpi_get_spcr_uart_addr(); + } + mutex_lock(&acpi_scan_lock); /* * Enumerate devices in the ACPI namespace. -- cgit v0.10.2 From 243848fc018cb98c2a70c39fe1f93eb266c79835 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Thu, 7 Apr 2016 20:03:19 +0800 Subject: xen/grant-table: Move xlated_setup_gnttab_pages to common place Move xlated_setup_gnttab_pages to common place, so it can be reused by ARM to setup grant table. Rename it to xen_xlate_map_ballooned_pages. Signed-off-by: Shannon Zhao Reviewed-by: Stefano Stabellini Reviewed-by: Julien Grall Tested-by: Julien Grall diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index e079500..de4144c 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c @@ -111,63 +111,18 @@ int arch_gnttab_init(unsigned long nr_shared) } #ifdef CONFIG_XEN_PVH -#include #include -#include -static int __init xlated_setup_gnttab_pages(void) -{ - struct page **pages; - xen_pfn_t *pfns; - void *vaddr; - int rc; - unsigned int i; - unsigned long nr_grant_frames = gnttab_max_grant_frames(); - - BUG_ON(nr_grant_frames == 0); - pages = kcalloc(nr_grant_frames, sizeof(pages[0]), GFP_KERNEL); - if (!pages) - return -ENOMEM; - - pfns = kcalloc(nr_grant_frames, sizeof(pfns[0]), GFP_KERNEL); - if (!pfns) { - kfree(pages); - return -ENOMEM; - } - rc = alloc_xenballooned_pages(nr_grant_frames, pages); - if (rc) { - pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__, - nr_grant_frames, rc); - kfree(pages); - kfree(pfns); - return rc; - } - for (i = 0; i < nr_grant_frames; i++) - pfns[i] = page_to_pfn(pages[i]); - - vaddr = vmap(pages, nr_grant_frames, 0, PAGE_KERNEL); - if (!vaddr) { - pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__, - nr_grant_frames, rc); - free_xenballooned_pages(nr_grant_frames, pages); - kfree(pages); - kfree(pfns); - return -ENOMEM; - } - kfree(pages); - - xen_auto_xlat_grant_frames.pfn = pfns; - xen_auto_xlat_grant_frames.count = nr_grant_frames; - xen_auto_xlat_grant_frames.vaddr = vaddr; - - return 0; -} - +#include static int __init xen_pvh_gnttab_setup(void) { if (!xen_pvh_domain()) return -ENODEV; - return xlated_setup_gnttab_pages(); + xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames(); + + return xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn, + &xen_auto_xlat_grant_frames.vaddr, + xen_auto_xlat_grant_frames.count); } /* Call it _before_ __gnttab_init as we need to initialize the * xen_auto_xlat_grant_frames first. */ diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c index 5063c5e..9692656 100644 --- a/drivers/xen/xlate_mmu.c +++ b/drivers/xen/xlate_mmu.c @@ -29,6 +29,8 @@ */ #include #include +#include +#include #include #include @@ -37,6 +39,7 @@ #include #include #include +#include typedef void (*xen_gfn_fn_t)(unsigned long gfn, void *data); @@ -185,3 +188,61 @@ int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, return 0; } EXPORT_SYMBOL_GPL(xen_xlate_unmap_gfn_range); + +/** + * xen_xlate_map_ballooned_pages - map a new set of ballooned pages + * @gfns: returns the array of corresponding GFNs + * @virt: returns the virtual address of the mapped region + * @nr_grant_frames: number of GFNs + * @return 0 on success, error otherwise + * + * This allocates a set of ballooned pages and maps them into the + * kernel's address space. + */ +int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt, + unsigned long nr_grant_frames) +{ + struct page **pages; + xen_pfn_t *pfns; + void *vaddr; + int rc; + unsigned int i; + + BUG_ON(nr_grant_frames == 0); + pages = kcalloc(nr_grant_frames, sizeof(pages[0]), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + pfns = kcalloc(nr_grant_frames, sizeof(pfns[0]), GFP_KERNEL); + if (!pfns) { + kfree(pages); + return -ENOMEM; + } + rc = alloc_xenballooned_pages(nr_grant_frames, pages); + if (rc) { + pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__, + nr_grant_frames, rc); + kfree(pages); + kfree(pfns); + return rc; + } + for (i = 0; i < nr_grant_frames; i++) + pfns[i] = page_to_pfn(pages[i]); + + vaddr = vmap(pages, nr_grant_frames, 0, PAGE_KERNEL); + if (!vaddr) { + pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__, + nr_grant_frames, rc); + free_xenballooned_pages(nr_grant_frames, pages); + kfree(pages); + kfree(pfns); + return -ENOMEM; + } + kfree(pages); + + *gfns = pfns; + *virt = vaddr; + + return 0; +} +EXPORT_SYMBOL_GPL(xen_xlate_map_ballooned_pages); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 86abe07..072be1c 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -85,6 +85,8 @@ int xen_xlate_remap_gfn_array(struct vm_area_struct *vma, struct page **pages); int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, int nr, struct page **pages); +int xen_xlate_map_ballooned_pages(xen_pfn_t **pfns, void **vaddr, + unsigned long nr_grant_frames); bool xen_running_on_version_or_later(unsigned int major, unsigned int minor); -- cgit v0.10.2 From 975fac3c4f38e0b47514abdb689548a8e9971081 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Thu, 7 Apr 2016 20:03:20 +0800 Subject: Xen: xlate: Use page_to_xen_pfn instead of page_to_pfn Make xen_xlate_map_ballooned_pages work with 64K pages. In that case Kernel pages are 64K in size but Xen pages remain 4K in size. Xen pfns refer to 4K pages. Signed-off-by: Shannon Zhao Reviewed-by: Stefano Stabellini Reviewed-by: Julien Grall Tested-by: Julien Grall diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c index 9692656..23f1387 100644 --- a/drivers/xen/xlate_mmu.c +++ b/drivers/xen/xlate_mmu.c @@ -189,6 +189,18 @@ int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(xen_xlate_unmap_gfn_range); +struct map_balloon_pages { + xen_pfn_t *pfns; + unsigned int idx; +}; + +static void setup_balloon_gfn(unsigned long gfn, void *data) +{ + struct map_balloon_pages *info = data; + + info->pfns[info->idx++] = gfn; +} + /** * xen_xlate_map_ballooned_pages - map a new set of ballooned pages * @gfns: returns the array of corresponding GFNs @@ -205,11 +217,13 @@ int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt, struct page **pages; xen_pfn_t *pfns; void *vaddr; + struct map_balloon_pages data; int rc; - unsigned int i; + unsigned long nr_pages; BUG_ON(nr_grant_frames == 0); - pages = kcalloc(nr_grant_frames, sizeof(pages[0]), GFP_KERNEL); + nr_pages = DIV_ROUND_UP(nr_grant_frames, XEN_PFN_PER_PAGE); + pages = kcalloc(nr_pages, sizeof(pages[0]), GFP_KERNEL); if (!pages) return -ENOMEM; @@ -218,22 +232,24 @@ int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt, kfree(pages); return -ENOMEM; } - rc = alloc_xenballooned_pages(nr_grant_frames, pages); + rc = alloc_xenballooned_pages(nr_pages, pages); if (rc) { - pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__, - nr_grant_frames, rc); + pr_warn("%s Couldn't balloon alloc %ld pages rc:%d\n", __func__, + nr_pages, rc); kfree(pages); kfree(pfns); return rc; } - for (i = 0; i < nr_grant_frames; i++) - pfns[i] = page_to_pfn(pages[i]); - vaddr = vmap(pages, nr_grant_frames, 0, PAGE_KERNEL); + data.pfns = pfns; + data.idx = 0; + xen_for_each_gfn(pages, nr_grant_frames, setup_balloon_gfn, &data); + + vaddr = vmap(pages, nr_pages, 0, PAGE_KERNEL); if (!vaddr) { - pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__, - nr_grant_frames, rc); - free_xenballooned_pages(nr_grant_frames, pages); + pr_warn("%s Couldn't map %ld pages rc:%d\n", __func__, + nr_pages, rc); + free_xenballooned_pages(nr_pages, pages); kfree(pages); kfree(pfns); return -ENOMEM; -- cgit v0.10.2 From 3cf4095d7446efde28b48c26050b9db6f0bcb004 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Thu, 7 Apr 2016 20:03:21 +0800 Subject: arm/xen: Use xen_xlate_map_ballooned_pages to setup grant table Use xen_xlate_map_ballooned_pages to setup grant table. Then it doesn't rely on DT or ACPI to pass the start address and size of grant table. Signed-off-by: Shannon Zhao Acked-by: Stefano Stabellini Reviewed-by: Julien Grall Tested-by: Julien Grall diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 75cd734..d94f726 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -282,18 +282,10 @@ static int __init xen_guest_init(void) { struct xen_add_to_physmap xatp; struct shared_info *shared_info_page = NULL; - struct resource res; - phys_addr_t grant_frames; if (!xen_domain()) return 0; - if (of_address_to_resource(xen_node, GRANT_TABLE_PHYSADDR, &res)) { - pr_err("Xen grant table base address not found\n"); - return -ENODEV; - } - grant_frames = res.start; - xen_events_irq = irq_of_parse_and_map(xen_node, 0); if (!xen_events_irq) { pr_err("Xen event channel interrupt not found\n"); @@ -328,7 +320,10 @@ static int __init xen_guest_init(void) if (xen_vcpu_info == NULL) return -ENOMEM; - if (gnttab_setup_auto_xlat_frames(grant_frames)) { + xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames(); + if (xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn, + &xen_auto_xlat_grant_frames.vaddr, + xen_auto_xlat_grant_frames.count)) { free_percpu(xen_vcpu_info); return -ENOMEM; } -- cgit v0.10.2 From 712a5b77cb5a1d3028a8c202f4f839802a03f19f Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Thu, 7 Apr 2016 20:03:22 +0800 Subject: xen: memory : Add new XENMAPSPACE type XENMAPSPACE_dev_mmio Add a new type of Xen map space for Dom0 to map device's MMIO region. Signed-off-by: Shannon Zhao Reviewed-by: Julien Grall Tested-by: Julien Grall diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index 2ecfe4f..9aa8988 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -160,6 +160,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t); #define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom, * XENMEM_add_to_physmap_range only. */ +#define XENMAPSPACE_dev_mmio 5 /* device mmio region */ /* * Sets the GPFN at which a particular page appears in the specified guest's -- cgit v0.10.2 From 4ba04bec3755b765bb10b21943afbee60c33288d Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Thu, 7 Apr 2016 20:03:23 +0800 Subject: Xen: ARM: Add support for mapping platform device mmio Add a bus_notifier for platform bus device in order to map the device mmio regions when DOM0 booting with ACPI. Signed-off-by: Shannon Zhao Acked-by: Stefano Stabellini Tested-by: Julien Grall diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 030e91b..8feab810 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -10,6 +10,7 @@ CFLAGS_features.o := $(nostackp) CFLAGS_efi.o += -fshort-wchar LDFLAGS += $(call ld-option, --no-wchar-size-warning) +dom0-$(CONFIG_ARM64) += arm-device.o dom0-$(CONFIG_PCI) += pci.o dom0-$(CONFIG_USB_SUPPORT) += dbgp.o dom0-$(CONFIG_XEN_ACPI) += acpi.o $(xen-pad-y) diff --git a/drivers/xen/arm-device.c b/drivers/xen/arm-device.c new file mode 100644 index 0000000..b918e8e --- /dev/null +++ b/drivers/xen/arm-device.c @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2015, Linaro Limited, Shannon Zhao + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include + +static int xen_unmap_device_mmio(const struct resource *resources, + unsigned int count) +{ + unsigned int i, j, nr; + int rc = 0; + const struct resource *r; + struct xen_remove_from_physmap xrp; + + for (i = 0; i < count; i++) { + r = &resources[i]; + nr = DIV_ROUND_UP(resource_size(r), XEN_PAGE_SIZE); + if ((resource_type(r) != IORESOURCE_MEM) || (nr == 0)) + continue; + + for (j = 0; j < nr; j++) { + xrp.domid = DOMID_SELF; + xrp.gpfn = XEN_PFN_DOWN(r->start) + j; + rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, + &xrp); + if (rc) + return rc; + } + } + + return rc; +} + +static int xen_map_device_mmio(const struct resource *resources, + unsigned int count) +{ + unsigned int i, j, nr; + int rc = 0; + const struct resource *r; + xen_pfn_t *gpfns; + xen_ulong_t *idxs; + int *errs; + struct xen_add_to_physmap_range xatp; + + for (i = 0; i < count; i++) { + r = &resources[i]; + nr = DIV_ROUND_UP(resource_size(r), XEN_PAGE_SIZE); + if ((resource_type(r) != IORESOURCE_MEM) || (nr == 0)) + continue; + + gpfns = kzalloc(sizeof(xen_pfn_t) * nr, GFP_KERNEL); + idxs = kzalloc(sizeof(xen_ulong_t) * nr, GFP_KERNEL); + errs = kzalloc(sizeof(int) * nr, GFP_KERNEL); + if (!gpfns || !idxs || !errs) { + kfree(gpfns); + kfree(idxs); + kfree(errs); + rc = -ENOMEM; + goto unmap; + } + + for (j = 0; j < nr; j++) { + /* + * The regions are always mapped 1:1 to DOM0 and this is + * fine because the memory map for DOM0 is the same as + * the host (except for the RAM). + */ + gpfns[j] = XEN_PFN_DOWN(r->start) + j; + idxs[j] = XEN_PFN_DOWN(r->start) + j; + } + + xatp.domid = DOMID_SELF; + xatp.size = nr; + xatp.space = XENMAPSPACE_dev_mmio; + + set_xen_guest_handle(xatp.gpfns, gpfns); + set_xen_guest_handle(xatp.idxs, idxs); + set_xen_guest_handle(xatp.errs, errs); + + rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); + kfree(gpfns); + kfree(idxs); + kfree(errs); + if (rc) + goto unmap; + } + + return rc; + +unmap: + xen_unmap_device_mmio(resources, i); + return rc; +} + +static int xen_platform_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct platform_device *pdev = to_platform_device(data); + int r = 0; + + if (pdev->num_resources == 0 || pdev->resource == NULL) + return NOTIFY_OK; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + r = xen_map_device_mmio(pdev->resource, pdev->num_resources); + break; + case BUS_NOTIFY_DEL_DEVICE: + r = xen_unmap_device_mmio(pdev->resource, pdev->num_resources); + break; + default: + return NOTIFY_DONE; + } + if (r) + dev_err(&pdev->dev, "Platform: Failed to %s device %s MMIO!\n", + action == BUS_NOTIFY_ADD_DEVICE ? "map" : + (action == BUS_NOTIFY_DEL_DEVICE ? "unmap" : "?"), + pdev->name); + + return NOTIFY_OK; +} + +static struct notifier_block platform_device_nb = { + .notifier_call = xen_platform_notifier, +}; + +static int __init register_xen_platform_notifier(void) +{ + if (!xen_initial_domain() || acpi_disabled) + return 0; + + return bus_register_notifier(&platform_bus_type, &platform_device_nb); +} + +arch_initcall(register_xen_platform_notifier); -- cgit v0.10.2 From 5789afeb0efb4b4eb914ee10c12b597044cf1d22 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Thu, 7 Apr 2016 20:03:24 +0800 Subject: Xen: ARM: Add support for mapping AMBA device mmio Add a bus_notifier for AMBA bus device in order to map the device mmio regions when DOM0 booting with ACPI. Signed-off-by: Shannon Zhao Reviewed-by: Stefano Stabellini Reviewed-by: Julien Grall Tested-by: Julien Grall diff --git a/drivers/xen/arm-device.c b/drivers/xen/arm-device.c index b918e8e..778acf8 100644 --- a/drivers/xen/arm-device.c +++ b/drivers/xen/arm-device.c @@ -151,3 +151,46 @@ static int __init register_xen_platform_notifier(void) } arch_initcall(register_xen_platform_notifier); + +#ifdef CONFIG_ARM_AMBA +#include + +static int xen_amba_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct amba_device *adev = to_amba_device(data); + int r = 0; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + r = xen_map_device_mmio(&adev->res, 1); + break; + case BUS_NOTIFY_DEL_DEVICE: + r = xen_unmap_device_mmio(&adev->res, 1); + break; + default: + return NOTIFY_DONE; + } + if (r) + dev_err(&adev->dev, "AMBA: Failed to %s device %s MMIO!\n", + action == BUS_NOTIFY_ADD_DEVICE ? "map" : + (action == BUS_NOTIFY_DEL_DEVICE ? "unmap" : "?"), + adev->dev.init_name); + + return NOTIFY_OK; +} + +static struct notifier_block amba_device_nb = { + .notifier_call = xen_amba_notifier, +}; + +static int __init register_xen_amba_notifier(void) +{ + if (!xen_initial_domain() || acpi_disabled) + return 0; + + return bus_register_notifier(&amba_bustype, &amba_device_nb); +} + +arch_initcall(register_xen_amba_notifier); +#endif -- cgit v0.10.2 From b6f0bcc23fa9cb32752cbf263d4014a21f132f92 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Thu, 7 Apr 2016 20:03:25 +0800 Subject: Xen: public/hvm: sync changes of HVM_PARAM_CALLBACK_VIA ABI from Xen Sync the changes of HVM_PARAM_CALLBACK_VIA ABI introduced by Xen commit (public/hvm: export the HVM_PARAM_CALLBACK_VIA ABI in the API). Signed-off-by: Shannon Zhao Acked-by: Stefano Stabellini Tested-by: Julien Grall diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h index a6c7991..70ad208 100644 --- a/include/xen/interface/hvm/params.h +++ b/include/xen/interface/hvm/params.h @@ -27,16 +27,31 @@ * Parameter space for HVMOP_{set,get}_param. */ +#define HVM_PARAM_CALLBACK_IRQ 0 /* * How should CPU0 event-channel notifications be delivered? - * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt). - * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows: - * Domain = val[47:32], Bus = val[31:16], - * DevFn = val[15: 8], IntX = val[ 1: 0] - * val[63:56] == 2: val[7:0] is a vector number. + * * If val == 0 then CPU0 event-channel notifications are not delivered. + * If val != 0, val[63:56] encodes the type, as follows: + */ + +#define HVM_PARAM_CALLBACK_TYPE_GSI 0 +/* + * val[55:0] is a delivery GSI. GSI 0 cannot be used, as it aliases val == 0, + * and disables all notifications. + */ + +#define HVM_PARAM_CALLBACK_TYPE_PCI_INTX 1 +/* + * val[55:0] is a delivery PCI INTx line: + * Domain = val[47:32], Bus = val[31:16] DevFn = val[15:8], IntX = val[1:0] + */ + +#define HVM_PARAM_CALLBACK_TYPE_VECTOR 2 +/* + * val[7:0] is a vector number. Check for XENFEAT_hvm_callback_vector to know + * if this delivery method is available. */ -#define HVM_PARAM_CALLBACK_IRQ 0 #define HVM_PARAM_STORE_PFN 1 #define HVM_PARAM_STORE_EVTCHN 2 -- cgit v0.10.2 From 383ff518a79fe3dcece579b9d30be77b219d10f8 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Thu, 7 Apr 2016 20:03:26 +0800 Subject: xen/hvm/params: Add a new delivery type for event-channel in HVM_PARAM_CALLBACK_IRQ This new delivery type which is for ARM shares the same value with HVM_PARAM_CALLBACK_TYPE_VECTOR which is for x86. val[15:8] is flag: val[7:0] is a PPI. To the flag, bit 8 stands the interrupt mode is edge(1) or level(0) and bit 9 stands the interrupt polarity is active low(1) or high(0). Signed-off-by: Shannon Zhao Acked-by: Stefano Stabellini Reviewed-by: Julien Grall Tested-by: Julien Grall diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h index 70ad208..4d61fc5 100644 --- a/include/xen/interface/hvm/params.h +++ b/include/xen/interface/hvm/params.h @@ -47,11 +47,24 @@ * Domain = val[47:32], Bus = val[31:16] DevFn = val[15:8], IntX = val[1:0] */ +#if defined(__i386__) || defined(__x86_64__) #define HVM_PARAM_CALLBACK_TYPE_VECTOR 2 /* * val[7:0] is a vector number. Check for XENFEAT_hvm_callback_vector to know * if this delivery method is available. */ +#elif defined(__arm__) || defined(__aarch64__) +#define HVM_PARAM_CALLBACK_TYPE_PPI 2 +/* + * val[55:16] needs to be zero. + * val[15:8] is interrupt flag of the PPI used by event-channel: + * bit 8: the PPI is edge(1) or level(0) triggered + * bit 9: the PPI is active low(1) or high(0) + * val[7:0] is a PPI number used by event-channel. + * This is only used by ARM/ARM64 and masking/eoi the interrupt associated to + * the notification is handled by the interrupt controller. + */ +#endif #define HVM_PARAM_STORE_PFN 1 #define HVM_PARAM_STORE_EVTCHN 2 -- cgit v0.10.2 From d22cbe651f6573f66c649d5ba91fb87552bbb297 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Thu, 7 Apr 2016 20:03:27 +0800 Subject: arm/xen: Get event-channel irq through HVM_PARAM when booting with ACPI The kernel will get the event-channel IRQ through HVM_PARAM_CALLBACK_IRQ. Signed-off-by: Shannon Zhao Reviewed-by: Stefano Stabellini Reviewed-by: Julien Grall Tested-by: Julien Grall diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index d94f726..06bd61a 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -30,6 +30,7 @@ #include #include #include +#include #include @@ -278,6 +279,35 @@ void __init xen_early_init(void) add_preferred_console("hvc", 0, NULL); } +static void __init xen_acpi_guest_init(void) +{ +#ifdef CONFIG_ACPI + struct xen_hvm_param a; + int interrupt, trigger, polarity; + + a.domid = DOMID_SELF; + a.index = HVM_PARAM_CALLBACK_IRQ; + + if (HYPERVISOR_hvm_op(HVMOP_get_param, &a) + || (a.value >> 56) != HVM_PARAM_CALLBACK_TYPE_PPI) { + xen_events_irq = 0; + return; + } + + interrupt = a.value & 0xff; + trigger = ((a.value >> 8) & 0x1) ? ACPI_EDGE_SENSITIVE + : ACPI_LEVEL_SENSITIVE; + polarity = ((a.value >> 8) & 0x2) ? ACPI_ACTIVE_LOW + : ACPI_ACTIVE_HIGH; + xen_events_irq = acpi_register_gsi(NULL, interrupt, trigger, polarity); +#endif +} + +static void __init xen_dt_guest_init(void) +{ + xen_events_irq = irq_of_parse_and_map(xen_node, 0); +} + static int __init xen_guest_init(void) { struct xen_add_to_physmap xatp; @@ -286,7 +316,11 @@ static int __init xen_guest_init(void) if (!xen_domain()) return 0; - xen_events_irq = irq_of_parse_and_map(xen_node, 0); + if (!acpi_disabled) + xen_acpi_guest_init(); + else + xen_dt_guest_init(); + if (!xen_events_irq) { pr_err("Xen event channel interrupt not found\n"); return -ENODEV; -- cgit v0.10.2 From 9b08aaa3199a4dffca73c7cdec813b483b5b2d3b Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Thu, 7 Apr 2016 20:03:28 +0800 Subject: ARM: XEN: Move xen_early_init() before efi_init() Move xen_early_init() before efi_init(), then when calling efi_init() could initialize Xen specific UEFI. Check if it runs on Xen hypervisor through the flat dts. Cc: Russell King Signed-off-by: Shannon Zhao Reviewed-by: Stefano Stabellini Reviewed-by: Julien Grall Tested-by: Julien Grall Acked-by: Catalin Marinas diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 7b53500..261dae6 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1064,6 +1064,7 @@ void __init setup_arch(char **cmdline_p) early_paging_init(mdesc); #endif setup_dma_zone(mdesc); + xen_early_init(); efi_init(); sanity_check_meminfo(); arm_memblock_init(mdesc); @@ -1080,7 +1081,6 @@ void __init setup_arch(char **cmdline_p) arm_dt_init_cpu_maps(); psci_dt_init(); - xen_early_init(); #ifdef CONFIG_SMP if (is_smp()) { if (!mdesc->smp_init || !mdesc->smp_init()) { diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 06bd61a..13e3e9f 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -53,8 +54,6 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; static __read_mostly unsigned int xen_events_irq; -static __initdata struct device_node *xen_node; - int xen_remap_domain_gfn_array(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *gfn, int nr, @@ -238,6 +237,33 @@ static irqreturn_t xen_arm_callback(int irq, void *arg) return IRQ_HANDLED; } +static __initdata struct { + const char *compat; + const char *prefix; + const char *version; + bool found; +} hyper_node = {"xen,xen", "xen,xen-", NULL, false}; + +static int __init fdt_find_hyper_node(unsigned long node, const char *uname, + int depth, void *data) +{ + const void *s = NULL; + int len; + + if (depth != 1 || strcmp(uname, "hypervisor") != 0) + return 0; + + if (of_flat_dt_is_compatible(node, hyper_node.compat)) + hyper_node.found = true; + + s = of_get_flat_dt_prop(node, "compatible", &len); + if (strlen(hyper_node.prefix) + 3 < len && + !strncmp(hyper_node.prefix, s, strlen(hyper_node.prefix))) + hyper_node.version = s + strlen(hyper_node.prefix); + + return 0; +} + /* * see Documentation/devicetree/bindings/arm/xen.txt for the * documentation of the Xen Device Tree format. @@ -245,26 +271,18 @@ static irqreturn_t xen_arm_callback(int irq, void *arg) #define GRANT_TABLE_PHYSADDR 0 void __init xen_early_init(void) { - int len; - const char *s = NULL; - const char *version = NULL; - const char *xen_prefix = "xen,xen-"; - - xen_node = of_find_compatible_node(NULL, NULL, "xen,xen"); - if (!xen_node) { + of_scan_flat_dt(fdt_find_hyper_node, NULL); + if (!hyper_node.found) { pr_debug("No Xen support\n"); return; } - s = of_get_property(xen_node, "compatible", &len); - if (strlen(xen_prefix) + 3 < len && - !strncmp(xen_prefix, s, strlen(xen_prefix))) - version = s + strlen(xen_prefix); - if (version == NULL) { + + if (hyper_node.version == NULL) { pr_debug("Xen version not found\n"); return; } - pr_info("Xen %s support found\n", version); + pr_info("Xen %s support found\n", hyper_node.version); xen_domain_type = XEN_HVM_DOMAIN; @@ -305,6 +323,14 @@ static void __init xen_acpi_guest_init(void) static void __init xen_dt_guest_init(void) { + struct device_node *xen_node; + + xen_node = of_find_compatible_node(NULL, NULL, "xen,xen"); + if (!xen_node) { + pr_err("Xen support was detected before, but it has disappeared\n"); + return; + } + xen_events_irq = irq_of_parse_and_map(xen_node, 0); } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 3279def..feab2ee 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -257,6 +257,7 @@ void __init setup_arch(char **cmdline_p) */ cpu_uninstall_idmap(); + xen_early_init(); efi_init(); arm64_memblock_init(); @@ -281,8 +282,6 @@ void __init setup_arch(char **cmdline_p) else psci_acpi_init(); - xen_early_init(); - cpu_read_bootcpu_ops(); smp_init_cpus(); smp_build_mpidr_hash(); -- cgit v0.10.2 From 9d2f145fce926ac35f1f602ed87bc61547cc6da5 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Thu, 7 Apr 2016 20:03:30 +0800 Subject: ARM: Xen: Document UEFI support on Xen ARM virtual platforms Add a "uefi" node under /hypervisor node in FDT, then Linux kernel could scan this to get the UEFI information. CC: Rob Herring Signed-off-by: Shannon Zhao Acked-by: Rob Herring Reviewed-by: Stefano Stabellini Tested-by: Julien Grall diff --git a/Documentation/devicetree/bindings/arm/xen.txt b/Documentation/devicetree/bindings/arm/xen.txt index 0f7b9c2..c9b9321 100644 --- a/Documentation/devicetree/bindings/arm/xen.txt +++ b/Documentation/devicetree/bindings/arm/xen.txt @@ -11,10 +11,32 @@ the following properties: memory where the grant table should be mapped to, using an HYPERVISOR_memory_op hypercall. The memory region is large enough to map the whole grant table (it is larger or equal to gnttab_max_grant_frames()). + This property is unnecessary when booting Dom0 using ACPI. - interrupts: the interrupt used by Xen to inject event notifications. A GIC node is also required. + This property is unnecessary when booting Dom0 using ACPI. +To support UEFI on Xen ARM virtual platforms, Xen populates the FDT "uefi" node +under /hypervisor with following parameters: + +________________________________________________________________________________ +Name | Size | Description +================================================================================ +xen,uefi-system-table | 64-bit | Guest physical address of the UEFI System + | | Table. +-------------------------------------------------------------------------------- +xen,uefi-mmap-start | 64-bit | Guest physical address of the UEFI memory + | | map. +-------------------------------------------------------------------------------- +xen,uefi-mmap-size | 32-bit | Size in bytes of the UEFI memory map + | | pointed to in previous entry. +-------------------------------------------------------------------------------- +xen,uefi-mmap-desc-size | 32-bit | Size in bytes of each entry in the UEFI + | | memory map. +-------------------------------------------------------------------------------- +xen,uefi-mmap-desc-ver | 32-bit | Version of the mmap descriptor format. +-------------------------------------------------------------------------------- Example (assuming #address-cells = <2> and #size-cells = <2>): @@ -22,4 +44,17 @@ hypervisor { compatible = "xen,xen-4.3", "xen,xen"; reg = <0 0xb0000000 0 0x20000>; interrupts = <1 15 0xf08>; + uefi { + xen,uefi-system-table = <0xXXXXXXXX>; + xen,uefi-mmap-start = <0xXXXXXXXX>; + xen,uefi-mmap-size = <0xXXXXXXXX>; + xen,uefi-mmap-desc-size = <0xXXXXXXXX>; + xen,uefi-mmap-desc-ver = <0xXXXXXXXX>; + }; }; + +The format and meaning of the "xen,uefi-*" parameters are similar to those in +Documentation/arm/uefi.txt, which are provided by the regular UEFI stub. However +they differ because they are provided by the Xen hypervisor, together with a set +of UEFI runtime services implemented via hypercalls, see +http://xenbits.xen.org/docs/unstable/hypercall/x86_64/include,public,platform.h.html. -- cgit v0.10.2 From a62ed500307bfaf4c1a818b69f7c1e7df1039a16 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Thu, 7 Apr 2016 20:03:31 +0800 Subject: XEN: EFI: Move x86 specific codes to architecture directory Move x86 specific codes to architecture directory and export those EFI runtime service functions. This will be useful for initializing runtime service on ARM later. Signed-off-by: Shannon Zhao Reviewed-by: Stefano Stabellini Tested-by: Julien Grall Signed-off-by: Stefano Stabellini diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index be14cc3..3be0121 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -20,10 +20,121 @@ #include #include +#include #include +#include #include #include +#include + +static efi_char16_t vendor[100] __initdata; + +static efi_system_table_t efi_systab_xen __initdata = { + .hdr = { + .signature = EFI_SYSTEM_TABLE_SIGNATURE, + .revision = 0, /* Initialized later. */ + .headersize = 0, /* Ignored by Linux Kernel. */ + .crc32 = 0, /* Ignored by Linux Kernel. */ + .reserved = 0 + }, + .fw_vendor = EFI_INVALID_TABLE_ADDR, /* Initialized later. */ + .fw_revision = 0, /* Initialized later. */ + .con_in_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .con_in = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .con_out_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .con_out = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .stderr_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .stderr = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .runtime = (efi_runtime_services_t *)EFI_INVALID_TABLE_ADDR, + /* Not used under Xen. */ + .boottime = (efi_boot_services_t *)EFI_INVALID_TABLE_ADDR, + /* Not used under Xen. */ + .nr_tables = 0, /* Initialized later. */ + .tables = EFI_INVALID_TABLE_ADDR /* Initialized later. */ +}; + +static const struct efi efi_xen __initconst = { + .systab = NULL, /* Initialized later. */ + .runtime_version = 0, /* Initialized later. */ + .mps = EFI_INVALID_TABLE_ADDR, + .acpi = EFI_INVALID_TABLE_ADDR, + .acpi20 = EFI_INVALID_TABLE_ADDR, + .smbios = EFI_INVALID_TABLE_ADDR, + .smbios3 = EFI_INVALID_TABLE_ADDR, + .sal_systab = EFI_INVALID_TABLE_ADDR, + .boot_info = EFI_INVALID_TABLE_ADDR, + .hcdp = EFI_INVALID_TABLE_ADDR, + .uga = EFI_INVALID_TABLE_ADDR, + .uv_systab = EFI_INVALID_TABLE_ADDR, + .fw_vendor = EFI_INVALID_TABLE_ADDR, + .runtime = EFI_INVALID_TABLE_ADDR, + .config_table = EFI_INVALID_TABLE_ADDR, + .get_time = xen_efi_get_time, + .set_time = xen_efi_set_time, + .get_wakeup_time = xen_efi_get_wakeup_time, + .set_wakeup_time = xen_efi_set_wakeup_time, + .get_variable = xen_efi_get_variable, + .get_next_variable = xen_efi_get_next_variable, + .set_variable = xen_efi_set_variable, + .query_variable_info = xen_efi_query_variable_info, + .update_capsule = xen_efi_update_capsule, + .query_capsule_caps = xen_efi_query_capsule_caps, + .get_next_high_mono_count = xen_efi_get_next_high_mono_count, + .reset_system = NULL, /* Functionality provided by Xen. */ + .set_virtual_address_map = NULL, /* Not used under Xen. */ + .flags = 0 /* Initialized later. */ +}; + +static efi_system_table_t __init *xen_efi_probe(void) +{ + struct xen_platform_op op = { + .cmd = XENPF_firmware_info, + .u.firmware_info = { + .type = XEN_FW_EFI_INFO, + .index = XEN_FW_EFI_CONFIG_TABLE + } + }; + union xenpf_efi_info *info = &op.u.firmware_info.u.efi_info; + + if (!xen_initial_domain() || HYPERVISOR_platform_op(&op) < 0) + return NULL; + + /* Here we know that Xen runs on EFI platform. */ + + efi = efi_xen; + + efi_systab_xen.tables = info->cfg.addr; + efi_systab_xen.nr_tables = info->cfg.nent; + + op.cmd = XENPF_firmware_info; + op.u.firmware_info.type = XEN_FW_EFI_INFO; + op.u.firmware_info.index = XEN_FW_EFI_VENDOR; + info->vendor.bufsz = sizeof(vendor); + set_xen_guest_handle(info->vendor.name, vendor); + + if (HYPERVISOR_platform_op(&op) == 0) { + efi_systab_xen.fw_vendor = __pa_symbol(vendor); + efi_systab_xen.fw_revision = info->vendor.revision; + } else + efi_systab_xen.fw_vendor = __pa_symbol(L"UNKNOWN"); + + op.cmd = XENPF_firmware_info; + op.u.firmware_info.type = XEN_FW_EFI_INFO; + op.u.firmware_info.index = XEN_FW_EFI_VERSION; + + if (HYPERVISOR_platform_op(&op) == 0) + efi_systab_xen.hdr.revision = info->version; + + op.cmd = XENPF_firmware_info; + op.u.firmware_info.type = XEN_FW_EFI_INFO; + op.u.firmware_info.index = XEN_FW_EFI_RT_VERSION; + + if (HYPERVISOR_platform_op(&op) == 0) + efi.runtime_version = info->version; + + return &efi_systab_xen; +} void __init xen_efi_init(void) { diff --git a/drivers/xen/efi.c b/drivers/xen/efi.c index e9d2135..22f71ff 100644 --- a/drivers/xen/efi.c +++ b/drivers/xen/efi.c @@ -38,7 +38,7 @@ #define efi_data(op) (op.u.efi_runtime_call) -static efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) +efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { struct xen_platform_op op = INIT_EFI_OP(get_time); @@ -59,8 +59,9 @@ static efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_get_time); -static efi_status_t xen_efi_set_time(efi_time_t *tm) +efi_status_t xen_efi_set_time(efi_time_t *tm) { struct xen_platform_op op = INIT_EFI_OP(set_time); @@ -72,10 +73,10 @@ static efi_status_t xen_efi_set_time(efi_time_t *tm) return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_set_time); -static efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, - efi_bool_t *pending, - efi_time_t *tm) +efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, + efi_time_t *tm) { struct xen_platform_op op = INIT_EFI_OP(get_wakeup_time); @@ -95,8 +96,9 @@ static efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_get_wakeup_time); -static efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) +efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) { struct xen_platform_op op = INIT_EFI_OP(set_wakeup_time); @@ -113,12 +115,11 @@ static efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_set_wakeup_time); -static efi_status_t xen_efi_get_variable(efi_char16_t *name, - efi_guid_t *vendor, - u32 *attr, - unsigned long *data_size, - void *data) +efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 *attr, unsigned long *data_size, + void *data) { struct xen_platform_op op = INIT_EFI_OP(get_variable); @@ -138,10 +139,11 @@ static efi_status_t xen_efi_get_variable(efi_char16_t *name, return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_get_variable); -static efi_status_t xen_efi_get_next_variable(unsigned long *name_size, - efi_char16_t *name, - efi_guid_t *vendor) +efi_status_t xen_efi_get_next_variable(unsigned long *name_size, + efi_char16_t *name, + efi_guid_t *vendor) { struct xen_platform_op op = INIT_EFI_OP(get_next_variable_name); @@ -161,12 +163,11 @@ static efi_status_t xen_efi_get_next_variable(unsigned long *name_size, return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_get_next_variable); -static efi_status_t xen_efi_set_variable(efi_char16_t *name, - efi_guid_t *vendor, - u32 attr, - unsigned long data_size, - void *data) +efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, + void *data) { struct xen_platform_op op = INIT_EFI_OP(set_variable); @@ -183,11 +184,11 @@ static efi_status_t xen_efi_set_variable(efi_char16_t *name, return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_set_variable); -static efi_status_t xen_efi_query_variable_info(u32 attr, - u64 *storage_space, - u64 *remaining_space, - u64 *max_variable_size) +efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size) { struct xen_platform_op op = INIT_EFI_OP(query_variable_info); @@ -205,8 +206,9 @@ static efi_status_t xen_efi_query_variable_info(u32 attr, return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_query_variable_info); -static efi_status_t xen_efi_get_next_high_mono_count(u32 *count) +efi_status_t xen_efi_get_next_high_mono_count(u32 *count) { struct xen_platform_op op = INIT_EFI_OP(get_next_high_monotonic_count); @@ -217,10 +219,10 @@ static efi_status_t xen_efi_get_next_high_mono_count(u32 *count) return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_get_next_high_mono_count); -static efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, - unsigned long count, - unsigned long sg_list) +efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, + unsigned long count, unsigned long sg_list) { struct xen_platform_op op = INIT_EFI_OP(update_capsule); @@ -237,11 +239,11 @@ static efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_update_capsule); -static efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, - unsigned long count, - u64 *max_size, - int *reset_type) +efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, + unsigned long count, u64 *max_size, + int *reset_type) { struct xen_platform_op op = INIT_EFI_OP(query_capsule_capabilities); @@ -260,111 +262,4 @@ static efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, return efi_data(op).status; } - -static efi_char16_t vendor[100] __initdata; - -static efi_system_table_t efi_systab_xen __initdata = { - .hdr = { - .signature = EFI_SYSTEM_TABLE_SIGNATURE, - .revision = 0, /* Initialized later. */ - .headersize = 0, /* Ignored by Linux Kernel. */ - .crc32 = 0, /* Ignored by Linux Kernel. */ - .reserved = 0 - }, - .fw_vendor = EFI_INVALID_TABLE_ADDR, /* Initialized later. */ - .fw_revision = 0, /* Initialized later. */ - .con_in_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ - .con_in = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ - .con_out_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ - .con_out = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ - .stderr_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ - .stderr = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ - .runtime = (efi_runtime_services_t *)EFI_INVALID_TABLE_ADDR, - /* Not used under Xen. */ - .boottime = (efi_boot_services_t *)EFI_INVALID_TABLE_ADDR, - /* Not used under Xen. */ - .nr_tables = 0, /* Initialized later. */ - .tables = EFI_INVALID_TABLE_ADDR /* Initialized later. */ -}; - -static const struct efi efi_xen __initconst = { - .systab = NULL, /* Initialized later. */ - .runtime_version = 0, /* Initialized later. */ - .mps = EFI_INVALID_TABLE_ADDR, - .acpi = EFI_INVALID_TABLE_ADDR, - .acpi20 = EFI_INVALID_TABLE_ADDR, - .smbios = EFI_INVALID_TABLE_ADDR, - .smbios3 = EFI_INVALID_TABLE_ADDR, - .sal_systab = EFI_INVALID_TABLE_ADDR, - .boot_info = EFI_INVALID_TABLE_ADDR, - .hcdp = EFI_INVALID_TABLE_ADDR, - .uga = EFI_INVALID_TABLE_ADDR, - .uv_systab = EFI_INVALID_TABLE_ADDR, - .fw_vendor = EFI_INVALID_TABLE_ADDR, - .runtime = EFI_INVALID_TABLE_ADDR, - .config_table = EFI_INVALID_TABLE_ADDR, - .get_time = xen_efi_get_time, - .set_time = xen_efi_set_time, - .get_wakeup_time = xen_efi_get_wakeup_time, - .set_wakeup_time = xen_efi_set_wakeup_time, - .get_variable = xen_efi_get_variable, - .get_next_variable = xen_efi_get_next_variable, - .set_variable = xen_efi_set_variable, - .query_variable_info = xen_efi_query_variable_info, - .update_capsule = xen_efi_update_capsule, - .query_capsule_caps = xen_efi_query_capsule_caps, - .get_next_high_mono_count = xen_efi_get_next_high_mono_count, - .reset_system = NULL, /* Functionality provided by Xen. */ - .set_virtual_address_map = NULL, /* Not used under Xen. */ - .flags = 0 /* Initialized later. */ -}; - -efi_system_table_t __init *xen_efi_probe(void) -{ - struct xen_platform_op op = { - .cmd = XENPF_firmware_info, - .u.firmware_info = { - .type = XEN_FW_EFI_INFO, - .index = XEN_FW_EFI_CONFIG_TABLE - } - }; - union xenpf_efi_info *info = &op.u.firmware_info.u.efi_info; - - if (!xen_initial_domain() || HYPERVISOR_platform_op(&op) < 0) - return NULL; - - /* Here we know that Xen runs on EFI platform. */ - - efi = efi_xen; - - efi_systab_xen.tables = info->cfg.addr; - efi_systab_xen.nr_tables = info->cfg.nent; - - op.cmd = XENPF_firmware_info; - op.u.firmware_info.type = XEN_FW_EFI_INFO; - op.u.firmware_info.index = XEN_FW_EFI_VENDOR; - info->vendor.bufsz = sizeof(vendor); - set_xen_guest_handle(info->vendor.name, vendor); - - if (HYPERVISOR_platform_op(&op) == 0) { - efi_systab_xen.fw_vendor = __pa_symbol(vendor); - efi_systab_xen.fw_revision = info->vendor.revision; - } else - efi_systab_xen.fw_vendor = __pa_symbol(L"UNKNOWN"); - - op.cmd = XENPF_firmware_info; - op.u.firmware_info.type = XEN_FW_EFI_INFO; - op.u.firmware_info.index = XEN_FW_EFI_VERSION; - - if (HYPERVISOR_platform_op(&op) == 0) - efi_systab_xen.hdr.revision = info->version; - - op.cmd = XENPF_firmware_info; - op.u.firmware_info.type = XEN_FW_EFI_INFO; - op.u.firmware_info.index = XEN_FW_EFI_RT_VERSION; - - if (HYPERVISOR_platform_op(&op) == 0) - efi.runtime_version = info->version; - - return &efi_systab_xen; -} +EXPORT_SYMBOL_GPL(xen_efi_query_capsule_caps); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 072be1c..3491582 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -90,14 +90,28 @@ int xen_xlate_map_ballooned_pages(xen_pfn_t **pfns, void **vaddr, bool xen_running_on_version_or_later(unsigned int major, unsigned int minor); -#ifdef CONFIG_XEN_EFI -extern efi_system_table_t *xen_efi_probe(void); -#else -static inline efi_system_table_t __init *xen_efi_probe(void) -{ - return NULL; -} -#endif +efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc); +efi_status_t xen_efi_set_time(efi_time_t *tm); +efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, + efi_time_t *tm); +efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm); +efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 *attr, unsigned long *data_size, + void *data); +efi_status_t xen_efi_get_next_variable(unsigned long *name_size, + efi_char16_t *name, efi_guid_t *vendor); +efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, + void *data); +efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size); +efi_status_t xen_efi_get_next_high_mono_count(u32 *count); +efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, + unsigned long count, unsigned long sg_list); +efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, + unsigned long count, u64 *max_size, + int *reset_type); #ifdef CONFIG_PREEMPT -- cgit v0.10.2 From be1aaf4e4026118e4191117a48f8a8078d1c0ed4 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Thu, 7 Apr 2016 20:03:32 +0800 Subject: ARM64: XEN: Add a function to initialize Xen specific UEFI runtime services When running on Xen hypervisor, runtime services are supported through hypercall. Add a Xen specific function to initialize runtime services. Signed-off-by: Shannon Zhao Reviewed-by: Stefano Stabellini Tested-by: Julien Grall Acked-by: Catalin Marinas diff --git a/arch/arm/include/asm/xen/xen-ops.h b/arch/arm/include/asm/xen/xen-ops.h new file mode 100644 index 0000000..ec154e7 --- /dev/null +++ b/arch/arm/include/asm/xen/xen-ops.h @@ -0,0 +1,6 @@ +#ifndef _ASM_XEN_OPS_H +#define _ASM_XEN_OPS_H + +void xen_efi_runtime_setup(void); + +#endif /* _ASM_XEN_OPS_H */ diff --git a/arch/arm/xen/Makefile b/arch/arm/xen/Makefile index 1296952..2279521 100644 --- a/arch/arm/xen/Makefile +++ b/arch/arm/xen/Makefile @@ -1 +1,2 @@ obj-y := enlighten.o hypercall.o grant-table.o p2m.o mm.o +obj-$(CONFIG_XEN_EFI) += efi.o diff --git a/arch/arm/xen/efi.c b/arch/arm/xen/efi.c new file mode 100644 index 0000000..16db419 --- /dev/null +++ b/arch/arm/xen/efi.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2015, Linaro Limited, Shannon Zhao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + */ + +#include +#include +#include + +/* Set XEN EFI runtime services function pointers. Other fields of struct efi, + * e.g. efi.systab, will be set like normal EFI. + */ +void __init xen_efi_runtime_setup(void) +{ + efi.get_time = xen_efi_get_time; + efi.set_time = xen_efi_set_time; + efi.get_wakeup_time = xen_efi_get_wakeup_time; + efi.set_wakeup_time = xen_efi_set_wakeup_time; + efi.get_variable = xen_efi_get_variable; + efi.get_next_variable = xen_efi_get_next_variable; + efi.set_variable = xen_efi_set_variable; + efi.query_variable_info = xen_efi_query_variable_info; + efi.update_capsule = xen_efi_update_capsule; + efi.query_capsule_caps = xen_efi_query_capsule_caps; + efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; + efi.reset_system = NULL; /* Functionality provided by Xen. */ +} +EXPORT_SYMBOL_GPL(xen_efi_runtime_setup); diff --git a/arch/arm64/include/asm/xen/xen-ops.h b/arch/arm64/include/asm/xen/xen-ops.h new file mode 100644 index 0000000..ec154e7 --- /dev/null +++ b/arch/arm64/include/asm/xen/xen-ops.h @@ -0,0 +1,6 @@ +#ifndef _ASM_XEN_OPS_H +#define _ASM_XEN_OPS_H + +void xen_efi_runtime_setup(void); + +#endif /* _ASM_XEN_OPS_H */ diff --git a/arch/arm64/xen/Makefile b/arch/arm64/xen/Makefile index 74a8d87..8ff8aa9 100644 --- a/arch/arm64/xen/Makefile +++ b/arch/arm64/xen/Makefile @@ -1,2 +1,3 @@ xen-arm-y += $(addprefix ../../arm/xen/, enlighten.o grant-table.o p2m.o mm.o) obj-y := xen-arm.o hypercall.o +obj-$(CONFIG_XEN_EFI) += $(addprefix ../../arm/xen/, efi.o) diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 979a831..f15bb3b7 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -275,7 +275,7 @@ config XEN_HAVE_PVMMU config XEN_EFI def_bool y - depends on X86_64 && EFI + depends on (ARM || ARM64 || X86_64) && EFI config XEN_AUTO_XLATE def_bool y -- cgit v0.10.2 From 9c6098685a1d5df72da61ff7838ebb1524796869 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Thu, 7 Apr 2016 20:03:33 +0800 Subject: FDT: Add a helper to get the subnode by given name Sometimes it needs to check if there is a subnode of given node in FDT by given name. Introduce this helper to get the subnode if it exists. CC: Rob Herring Signed-off-by: Shannon Zhao Acked-by: Stefano Stabellini Acked-by: Rob Herring Tested-by: Julien Grall diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 33daffc..0e02947 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -744,6 +744,19 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node, } /** + * of_get_flat_dt_subnode_by_name - get the subnode by given name + * + * @node: the parent node + * @uname: the name of subnode + * @return offset of the subnode, or -FDT_ERR_NOTFOUND if there is none + */ + +int of_get_flat_dt_subnode_by_name(unsigned long node, const char *uname) +{ + return fdt_subnode_offset(initial_boot_params, node, uname); +} + +/** * of_get_flat_dt_root - find the root node in the flat blob */ unsigned long __init of_get_flat_dt_root(void) diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 901ec01..26c3302 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -53,6 +53,8 @@ extern char __dtb_end[]; extern int of_scan_flat_dt(int (*it)(unsigned long node, const char *uname, int depth, void *data), void *data); +extern int of_get_flat_dt_subnode_by_name(unsigned long node, + const char *uname); extern const void *of_get_flat_dt_prop(unsigned long node, const char *name, int *size); extern int of_flat_dt_is_compatible(unsigned long node, const char *name); -- cgit v0.10.2 From 0cac5c3018b32707b3bab40e4beb83f91c4204f1 Mon Sep 17 00:00:00 2001 From: Shannon Zhao Date: Thu, 12 May 2016 20:19:54 +0800 Subject: Xen: EFI: Parse DT parameters for Xen specific UEFI The EFI DT parameters for bare metal are located under /chosen node, while for Xen Dom0 they are located under /hyperviosr/uefi node. These parameters under /chosen and /hyperviosr/uefi are not expected to appear at the same time. Parse these EFI parameters and initialize EFI like the way for bare metal except the runtime services because the runtime services for Xen Dom0 are available through hypercalls and they are always enabled. So it sets the EFI_RUNTIME_SERVICES flag if it finds /hyperviosr/uefi node and bails out in arm_enable_runtime_services() when EFI_RUNTIME_SERVICES flag is set already. Signed-off-by: Shannon Zhao Cc: Stefano Stabellini Cc: Ard Biesheuvel Cc: Leif Lindholm Signed-off-by: Matt Fleming diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 13e3e9f..d4f36eb 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -15,7 +15,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -261,6 +263,19 @@ static int __init fdt_find_hyper_node(unsigned long node, const char *uname, !strncmp(hyper_node.prefix, s, strlen(hyper_node.prefix))) hyper_node.version = s + strlen(hyper_node.prefix); + /* + * Check if Xen supports EFI by checking whether there is the + * "/hypervisor/uefi" node in DT. If so, runtime services are available + * through proxy functions (e.g. in case of Xen dom0 EFI implementation + * they call special hypercall which executes relevant EFI functions) + * and that is why they are always enabled. + */ + if (IS_ENABLED(CONFIG_XEN_EFI)) { + if ((of_get_flat_dt_subnode_by_name(node, "uefi") > 0) && + !efi_runtime_disabled()) + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + } + return 0; } @@ -352,6 +367,13 @@ static int __init xen_guest_init(void) return -ENODEV; } + /* + * The fdt parsing codes have set EFI_RUNTIME_SERVICES if Xen EFI + * parameters are found. Force enable runtime services. + */ + if (efi_enabled(EFI_RUNTIME_SERVICES)) + xen_efi_runtime_setup(); + shared_info_page = (struct shared_info *)get_zeroed_page(GFP_KERNEL); if (!shared_info_page) { diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 17ccf0a..c394b81 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -107,6 +107,11 @@ static int __init arm_enable_runtime_services(void) return 0; } + if (efi_enabled(EFI_RUNTIME_SERVICES)) { + pr_info("EFI runtime services access via paravirt.\n"); + return 0; + } + pr_info("Remapping and enabling EFI services.\n"); mapsize = efi.memmap.map_end - efi.memmap.map; diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 05509f3..1c6f9dd 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -472,12 +472,14 @@ device_initcall(efi_load_efivars); FIELD_SIZEOF(struct efi_fdt_params, field) \ } -static __initdata struct { +struct params { const char name[32]; const char propname[32]; int offset; int size; -} dt_params[] = { +}; + +static __initdata struct params fdt_params[] = { UEFI_PARAM("System Table", "linux,uefi-system-table", system_table), UEFI_PARAM("MemMap Address", "linux,uefi-mmap-start", mmap), UEFI_PARAM("MemMap Size", "linux,uefi-mmap-size", mmap_size), @@ -485,44 +487,91 @@ static __initdata struct { UEFI_PARAM("MemMap Desc. Version", "linux,uefi-mmap-desc-ver", desc_ver) }; +static __initdata struct params xen_fdt_params[] = { + UEFI_PARAM("System Table", "xen,uefi-system-table", system_table), + UEFI_PARAM("MemMap Address", "xen,uefi-mmap-start", mmap), + UEFI_PARAM("MemMap Size", "xen,uefi-mmap-size", mmap_size), + UEFI_PARAM("MemMap Desc. Size", "xen,uefi-mmap-desc-size", desc_size), + UEFI_PARAM("MemMap Desc. Version", "xen,uefi-mmap-desc-ver", desc_ver) +}; + +#define EFI_FDT_PARAMS_SIZE ARRAY_SIZE(fdt_params) + +static __initdata struct { + const char *uname; + const char *subnode; + struct params *params; +} dt_params[] = { + { "hypervisor", "uefi", xen_fdt_params }, + { "chosen", NULL, fdt_params }, +}; + struct param_info { int found; void *params; + const char *missing; }; -static int __init fdt_find_uefi_params(unsigned long node, const char *uname, - int depth, void *data) +static int __init __find_uefi_params(unsigned long node, + struct param_info *info, + struct params *params) { - struct param_info *info = data; const void *prop; void *dest; u64 val; int i, len; - if (depth != 1 || strcmp(uname, "chosen") != 0) - return 0; - - for (i = 0; i < ARRAY_SIZE(dt_params); i++) { - prop = of_get_flat_dt_prop(node, dt_params[i].propname, &len); - if (!prop) + for (i = 0; i < EFI_FDT_PARAMS_SIZE; i++) { + prop = of_get_flat_dt_prop(node, params[i].propname, &len); + if (!prop) { + info->missing = params[i].name; return 0; - dest = info->params + dt_params[i].offset; + } + + dest = info->params + params[i].offset; info->found++; val = of_read_number(prop, len / sizeof(u32)); - if (dt_params[i].size == sizeof(u32)) + if (params[i].size == sizeof(u32)) *(u32 *)dest = val; else *(u64 *)dest = val; if (efi_enabled(EFI_DBG)) - pr_info(" %s: 0x%0*llx\n", dt_params[i].name, - dt_params[i].size * 2, val); + pr_info(" %s: 0x%0*llx\n", params[i].name, + params[i].size * 2, val); } + return 1; } +static int __init fdt_find_uefi_params(unsigned long node, const char *uname, + int depth, void *data) +{ + struct param_info *info = data; + int i; + + for (i = 0; i < ARRAY_SIZE(dt_params); i++) { + const char *subnode = dt_params[i].subnode; + + if (depth != 1 || strcmp(uname, dt_params[i].uname) != 0) { + info->missing = dt_params[i].params[0].name; + continue; + } + + if (subnode) { + node = of_get_flat_dt_subnode_by_name(node, subnode); + if (node < 0) + return 0; + } + + return __find_uefi_params(node, info, dt_params[i].params); + } + + return 0; +} + int __init efi_get_fdt_params(struct efi_fdt_params *params) { struct param_info info; @@ -538,7 +587,7 @@ int __init efi_get_fdt_params(struct efi_fdt_params *params) pr_info("UEFI not found.\n"); else if (!ret) pr_err("Can't find '%s' in device tree!\n", - dt_params[info.found].name); + info.missing); return ret; } -- cgit v0.10.2 From c7ebf9d9c6b4e9402b978da0b0785db4129c1f79 Mon Sep 17 00:00:00 2001 From: Muhammad Falak R Wani Date: Tue, 24 May 2016 05:34:32 +0530 Subject: xen: use vma_pages(). Replace explicit computation of vma page count by a call to vma_pages(). Signed-off-by: Muhammad Falak R Wani Reviewed-by: Boris Ostrovsky Signed-off-by: David Vrabel diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index 4547a91..7a47c4c 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -504,7 +504,7 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) struct gntalloc_file_private_data *priv = filp->private_data; struct gntalloc_vma_private_data *vm_priv; struct gntalloc_gref *gref; - int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + int count = vma_pages(vma); int rv, i; if (!(vma->vm_flags & VM_SHARED)) { diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 6793957..bb95212 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -982,7 +982,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) { struct gntdev_priv *priv = flip->private_data; int index = vma->vm_pgoff; - int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + int count = vma_pages(vma); struct grant_map *map; int i, err = -EINVAL; diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index df2e6f7..702040f 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -582,7 +582,7 @@ static long privcmd_ioctl(struct file *file, static void privcmd_close(struct vm_area_struct *vma) { struct page **pages = vma->vm_private_data; - int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + int numpgs = vma_pages(vma); int numgfns = (vma->vm_end - vma->vm_start) >> XEN_PAGE_SHIFT; int rc; -- cgit v0.10.2 From ecb23dc6f2eff0ce64dd60351a81f376f13b12cc Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 20 May 2016 09:26:48 +0200 Subject: xen: add steal_clock support on x86 The pv_time_ops structure contains a function pointer for the "steal_clock" functionality used only by KVM and Xen on ARM. Xen on x86 uses its own mechanism to account for the "stolen" time a thread wasn't able to run due to hypervisor scheduling. Add support in Xen arch independent time handling for this feature by moving it out of the arm arch into drivers/xen and remove the x86 Xen hack. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Reviewed-by: Stefano Stabellini Signed-off-by: David Vrabel diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index d4f36eb..47acb36 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -86,19 +85,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range); -static unsigned long long xen_stolen_accounting(int cpu) -{ - struct vcpu_runstate_info state; - - BUG_ON(cpu != smp_processor_id()); - - xen_get_runstate_snapshot(&state); - - WARN_ON(state.state != RUNSTATE_running); - - return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline]; -} - static void xen_read_wallclock(struct timespec64 *ts) { u32 version; @@ -432,8 +418,8 @@ static int __init xen_guest_init(void) register_cpu_notifier(&xen_cpu_notifier); - pv_time_ops.steal_clock = xen_stolen_accounting; - static_key_slow_inc(¶virt_steal_enabled); + xen_time_setup_guest(); + if (xen_initial_domain()) pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 6deba5b..c31006f 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -11,8 +11,6 @@ #include #include #include -#include -#include #include #include #include @@ -31,44 +29,6 @@ /* Xen may fire a timer up to this many ns early */ #define TIMER_SLOP 100000 -#define NS_PER_TICK (1000000000LL / HZ) - -/* snapshots of runstate info */ -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot); - -/* unused ns of stolen time */ -static DEFINE_PER_CPU(u64, xen_residual_stolen); - -static void do_stolen_accounting(void) -{ - struct vcpu_runstate_info state; - struct vcpu_runstate_info *snap; - s64 runnable, offline, stolen; - cputime_t ticks; - - xen_get_runstate_snapshot(&state); - - WARN_ON(state.state != RUNSTATE_running); - - snap = this_cpu_ptr(&xen_runstate_snapshot); - - /* work out how much time the VCPU has not been runn*ing* */ - runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable]; - offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline]; - - *snap = state; - - /* Add the appropriate number of ticks of stolen time, - including any left-overs from last time. */ - stolen = runnable + offline + __this_cpu_read(xen_residual_stolen); - - if (stolen < 0) - stolen = 0; - - ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); - __this_cpu_write(xen_residual_stolen, stolen); - account_steal_ticks(ticks); -} /* Get the TSC speed from Xen */ static unsigned long xen_tsc_khz(void) @@ -335,8 +295,6 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) ret = IRQ_HANDLED; } - do_stolen_accounting(); - return ret; } @@ -431,6 +389,8 @@ static void __init xen_time_init(void) xen_setup_timer(cpu); xen_setup_cpu_clockevents(); + xen_time_setup_guest(); + if (xen_initial_domain()) pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); } diff --git a/drivers/xen/time.c b/drivers/xen/time.c index 7107842..2257b66 100644 --- a/drivers/xen/time.c +++ b/drivers/xen/time.c @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -75,6 +76,15 @@ bool xen_vcpu_stolen(int vcpu) return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable; } +static u64 xen_steal_clock(int cpu) +{ + struct vcpu_runstate_info state; + + BUG_ON(cpu != smp_processor_id()); + xen_get_runstate_snapshot(&state); + return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline]; +} + void xen_setup_runstate_info(int cpu) { struct vcpu_register_runstate_memory_area area; @@ -86,3 +96,13 @@ void xen_setup_runstate_info(int cpu) BUG(); } +void __init xen_time_setup_guest(void) +{ + pv_time_ops.steal_clock = xen_steal_clock; + + static_key_slow_inc(¶virt_steal_enabled); + /* + * We can't set paravirt_steal_rq_enabled as this would require the + * capability to read another cpu's runstate info. + */ +} diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 25a822f..44fda64 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -92,7 +92,6 @@ static inline void account_process_tick(struct task_struct *tsk, int user) extern void account_process_tick(struct task_struct *, int user); #endif -extern void account_steal_ticks(unsigned long ticks); extern void account_idle_ticks(unsigned long ticks); #endif /* _LINUX_KERNEL_STAT_H */ diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 3491582..355275b 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -21,6 +21,7 @@ void xen_resume_notifier_unregister(struct notifier_block *nb); bool xen_vcpu_stolen(int vcpu); void xen_setup_runstate_info(int cpu); +void xen_time_setup_guest(void); void xen_get_runstate_snapshot(struct vcpu_runstate_info *res); int xen_setup_shutdown_event(void); diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 75f98c5..8c4c6dc 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -490,16 +490,6 @@ void account_process_tick(struct task_struct *p, int user_tick) } /* - * Account multiple ticks of steal time. - * @p: the process from which the cpu time has been stolen - * @ticks: number of stolen ticks - */ -void account_steal_ticks(unsigned long ticks) -{ - account_steal_time(jiffies_to_cputime(ticks)); -} - -/* * Account multiple ticks of idle time. * @ticks: number of stolen ticks */ -- cgit v0.10.2 From 6ab9507ed96a6c0b24174d3430064a90b3dddd0a Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 21 Jun 2016 10:17:33 -0400 Subject: xen/PMU: Log VPMU initialization error at lower level This will match how PMU errors are reported at check_hw_exists()'s msr_fail label, which is reached when VPMU initialzation fails. Signed-off-by: Boris Ostrovsky Reviewed-by: Konrad Rzeszutek Wilk Acked-by: Juergen Gross Signed-off-by: David Vrabel diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 9466354..32bdc2c 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -547,7 +547,7 @@ void xen_pmu_init(int cpu) return; fail: - pr_warn_once("Could not initialize VPMU for cpu %d, error %d\n", + pr_info_once("Could not initialize VPMU for cpu %d, error %d\n", cpu, err); free_pages((unsigned long)xenpmu_data, 0); } -- cgit v0.10.2 From 429eafe60943bdfa33b15540ab2db5642a1f8c3c Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Wed, 1 Jun 2016 19:45:08 +0530 Subject: xen: xen-pciback: Remove create_workqueue System workqueues have been able to handle high level of concurrency for a long time now and there's no reason to use dedicated workqueues just to gain concurrency. Replace dedicated xen_pcibk_wq with the use of system_wq. Unlike a dedicated per-cpu workqueue created with create_workqueue(), system_wq allows multiple work items to overlap executions even on the same CPU; however, a per-cpu workqueue doesn't have any CPU locality or global ordering guarantees unless the target CPU is explicitly specified and thus the increase of local concurrency shouldn't make any difference. Since the work items could be pending, flush_work() has been used in xen_pcibk_disconnect(). xen_pcibk_xenbus_remove() calls free_pdev() which in turn calls xen_pcibk_disconnect() for every pdev to ensure that there is no pending task while disconnecting the driver. Signed-off-by: Bhaktipriya Shridhar Acked-by: Tejun Heo Signed-off-by: David Vrabel diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index 4d529f3..7af369b6 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -55,7 +55,6 @@ struct xen_pcibk_dev_data { /* Used by XenBus and xen_pcibk_ops.c */ extern wait_queue_head_t xen_pcibk_aer_wait_queue; -extern struct workqueue_struct *xen_pcibk_wq; /* Used by pcistub.c and conf_space_quirks.c */ extern struct list_head xen_pcibk_quirks; diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 2f19dd7..f8c7775 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -310,7 +310,7 @@ void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) * already processing a request */ if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { - queue_work(xen_pcibk_wq, &pdev->op_work); + schedule_work(&pdev->op_work); } /*_XEN_PCIB_active should have been cleared by pcifront. And also make sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index c252eb3..5ce878c 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -17,7 +17,6 @@ #include "pciback.h" #define INVALID_EVTCHN_IRQ (-1) -struct workqueue_struct *xen_pcibk_wq; static bool __read_mostly passthrough; module_param(passthrough, bool, S_IRUGO); @@ -76,8 +75,7 @@ static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev) /* If the driver domain started an op, make sure we complete it * before releasing the shared memory */ - /* Note, the workqueue does not use spinlocks at all.*/ - flush_workqueue(xen_pcibk_wq); + flush_work(&pdev->op_work); if (pdev->sh_info != NULL) { xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info); @@ -733,11 +731,6 @@ const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend; int __init xen_pcibk_xenbus_register(void) { - xen_pcibk_wq = create_workqueue("xen_pciback_workqueue"); - if (!xen_pcibk_wq) { - pr_err("%s: create xen_pciback_workqueue failed\n", __func__); - return -EFAULT; - } xen_pcibk_backend = &xen_pcibk_vpci_backend; if (passthrough) xen_pcibk_backend = &xen_pcibk_passthrough_backend; @@ -747,6 +740,5 @@ int __init xen_pcibk_xenbus_register(void) void __exit xen_pcibk_xenbus_unregister(void) { - destroy_workqueue(xen_pcibk_wq); xenbus_unregister_driver(&xen_pcibk_driver); } -- cgit v0.10.2 From 5ee405d9d234ee5641741c07a654e4c6ba3e2a9d Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Tue, 31 May 2016 22:26:30 +0530 Subject: xen: xenbus: Remove create_workqueue System workqueues have been able to handle high level of concurrency for a long time now and there's no reason to use dedicated workqueues just to gain concurrency. Replace dedicated xenbus_frontend_wq with the use of system_wq. Unlike a dedicated per-cpu workqueue created with create_workqueue(), system_wq allows multiple work items to overlap executions even on the same CPU; however, a per-cpu workqueue doesn't have any CPU locality or global ordering guarantees unless the target CPU is explicitly specified and the increase of local concurrency shouldn't make any difference. In this case, there is only a single work item, increase of concurrency level by switching to system_wq should not make any difference. Signed-off-by: Bhaktipriya Shridhar Acked-by: Tejun Heo Signed-off-by: David Vrabel diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index bcb53bd..611a231 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -31,7 +31,6 @@ #include "xenbus_probe.h" -static struct workqueue_struct *xenbus_frontend_wq; /* device// => - */ static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) @@ -109,13 +108,7 @@ static int xenbus_frontend_dev_resume(struct device *dev) if (xen_store_domain_type == XS_LOCAL) { struct xenbus_device *xdev = to_xenbus_device(dev); - if (!xenbus_frontend_wq) { - pr_err("%s: no workqueue to process delayed resume\n", - xdev->nodename); - return -EFAULT; - } - - queue_work(xenbus_frontend_wq, &xdev->work); + schedule_work(&xdev->work); return 0; } @@ -485,12 +478,6 @@ static int __init xenbus_probe_frontend_init(void) register_xenstore_notifier(&xenstore_notifier); - if (xen_store_domain_type == XS_LOCAL) { - xenbus_frontend_wq = create_workqueue("xenbus_frontend"); - if (!xenbus_frontend_wq) - pr_warn("create xenbus frontend workqueue failed, S3 resume is likely to fail\n"); - } - return 0; } subsys_initcall(xenbus_probe_frontend_init); -- cgit v0.10.2 From 585423c8c4d2f39a2c299bc6dd16433e6141fba5 Mon Sep 17 00:00:00 2001 From: Amitoj Kaur Chawla Date: Wed, 29 Jun 2016 20:30:38 +0530 Subject: x86/xen: Use DIV_ROUND_UP The kernel.h macro DIV_ROUND_UP performs the computation (((n) + (d) - 1) /(d)) but is perhaps more readable. The Coccinelle script used to make this change is as follows: @haskernel@ @@ #include @depends on haskernel@ expression n,d; @@ ( - (n + d - 1) / d + DIV_ROUND_UP(n,d) | - (n + (d - 1)) / d + DIV_ROUND_UP(n,d) ) Signed-off-by: Amitoj Kaur Chawla Signed-off-by: David Vrabel diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 760789a..ee7d71e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -590,7 +590,7 @@ static void xen_load_gdt(const struct desc_ptr *dtr) { unsigned long va = dtr->address; unsigned int size = dtr->size + 1; - unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE); unsigned long frames[pages]; int f; @@ -639,7 +639,7 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) { unsigned long va = dtr->address; unsigned int size = dtr->size + 1; - unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE); unsigned long frames[pages]; int f; -- cgit v0.10.2 From 6c6e4caa2006ab82587a3648967314ec92569a98 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 6 Jul 2016 00:56:27 -0600 Subject: xen-pciback: drop unused function parameter of read_dev_bar() Signed-off-by: Jan Beulich Signed-off-by: David Vrabel diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index 9ead1c2..258d99d 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -210,8 +210,7 @@ static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data) } static inline void read_dev_bar(struct pci_dev *dev, - struct pci_bar_info *bar_info, int offset, - u32 len_mask) + struct pci_bar_info *bar_info, int offset) { int pos; struct resource *res = dev->resource; @@ -248,7 +247,7 @@ static void *bar_init(struct pci_dev *dev, int offset) if (!bar) return ERR_PTR(-ENOMEM); - read_dev_bar(dev, bar, offset, ~0); + read_dev_bar(dev, bar, offset); return bar; } @@ -260,7 +259,7 @@ static void *rom_init(struct pci_dev *dev, int offset) if (!bar) return ERR_PTR(-ENOMEM); - read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE); + read_dev_bar(dev, bar, offset); return bar; } -- cgit v0.10.2 From 664093bb6b797c8ba0a525ee0a36ad8cbf89413e Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 6 Jul 2016 00:57:07 -0600 Subject: xen-pciback: drop rom_init() It is now identical to bar_init(). Signed-off-by: Jan Beulich Signed-off-by: David Vrabel diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index 258d99d..5165c98 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -252,18 +252,6 @@ static void *bar_init(struct pci_dev *dev, int offset) return bar; } -static void *rom_init(struct pci_dev *dev, int offset) -{ - struct pci_bar_info *bar = kzalloc(sizeof(*bar), GFP_KERNEL); - - if (!bar) - return ERR_PTR(-ENOMEM); - - read_dev_bar(dev, bar, offset); - - return bar; -} - static void bar_reset(struct pci_dev *dev, int offset, void *data) { struct pci_bar_info *bar = data; @@ -382,7 +370,7 @@ static const struct config_field header_common[] = { { \ .offset = reg_offset, \ .size = 4, \ - .init = rom_init, \ + .init = bar_init, \ .reset = bar_reset, \ .release = bar_release, \ .u.dw.read = bar_read, \ -- cgit v0.10.2 From 6ad2655d87d2d35c1de4500402fae10fe7b30b4a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 6 Jul 2016 00:57:43 -0600 Subject: xen-pciback: fold read_dev_bar() into its now single caller Signed-off-by: Jan Beulich Signed-off-by: David Vrabel diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index 5165c98..56647b8 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -209,11 +209,14 @@ static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data) return 0; } -static inline void read_dev_bar(struct pci_dev *dev, - struct pci_bar_info *bar_info, int offset) +static void *bar_init(struct pci_dev *dev, int offset) { int pos; struct resource *res = dev->resource; + struct pci_bar_info *bar = kzalloc(sizeof(*bar), GFP_KERNEL); + + if (!bar) + return ERR_PTR(-ENOMEM); if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1) pos = PCI_ROM_RESOURCE; @@ -223,31 +226,21 @@ static inline void read_dev_bar(struct pci_dev *dev, PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == (PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64))) { - bar_info->val = res[pos - 1].start >> 32; - bar_info->len_val = -resource_size(&res[pos - 1]) >> 32; - return; + bar->val = res[pos - 1].start >> 32; + bar->len_val = -resource_size(&res[pos - 1]) >> 32; + return bar; } } if (!res[pos].flags || (res[pos].flags & (IORESOURCE_DISABLED | IORESOURCE_UNSET | IORESOURCE_BUSY))) - return; - - bar_info->val = res[pos].start | - (res[pos].flags & PCI_REGION_FLAG_MASK); - bar_info->len_val = -resource_size(&res[pos]) | - (res[pos].flags & PCI_REGION_FLAG_MASK); -} - -static void *bar_init(struct pci_dev *dev, int offset) -{ - struct pci_bar_info *bar = kzalloc(sizeof(*bar), GFP_KERNEL); - - if (!bar) - return ERR_PTR(-ENOMEM); + return bar; - read_dev_bar(dev, bar, offset); + bar->val = res[pos].start | + (res[pos].flags & PCI_REGION_FLAG_MASK); + bar->len_val = -resource_size(&res[pos]) | + (res[pos].flags & PCI_REGION_FLAG_MASK); return bar; } -- cgit v0.10.2 From c8670c22e04e4e42e752cc5b53922106b3eedbda Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 6 Jul 2016 00:58:19 -0600 Subject: xen-pciback: simplify determination of 64-bit memory resource Other than for raw BAR values, flags are properly separated in the internal representation. Signed-off-by: Jan Beulich Signed-off-by: David Vrabel diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index 56647b8..bfa610d 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -222,10 +222,7 @@ static void *bar_init(struct pci_dev *dev, int offset) pos = PCI_ROM_RESOURCE; else { pos = (offset - PCI_BASE_ADDRESS_0) / 4; - if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE | - PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == - (PCI_BASE_ADDRESS_SPACE_MEMORY | - PCI_BASE_ADDRESS_MEM_TYPE_64))) { + if (pos && (res[pos - 1].flags & IORESOURCE_MEM_64)) { bar->val = res[pos - 1].start >> 32; bar->len_val = -resource_size(&res[pos - 1]) >> 32; return bar; -- cgit v0.10.2 From 585203609c894db11dea724b743c04d0c9927f39 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 6 Jul 2016 00:58:58 -0600 Subject: xen-pciback: use const and unsigned in bar_init() Signed-off-by: Jan Beulich Signed-off-by: David Vrabel diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index bfa610d..5fbfd9c 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -211,8 +211,8 @@ static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data) static void *bar_init(struct pci_dev *dev, int offset) { - int pos; - struct resource *res = dev->resource; + unsigned int pos; + const struct resource *res = dev->resource; struct pci_bar_info *bar = kzalloc(sizeof(*bar), GFP_KERNEL); if (!bar) -- cgit v0.10.2 From ee87d6d0d36d98c550f99274a81841033226e3bf Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 6 Jul 2016 00:59:35 -0600 Subject: xen-pciback: short-circuit read path used for merging write values There's no point calling xen_pcibk_config_read() here - all it'll do is return whatever conf_space_read() returns for the field which was found here (and which would be found there again). Also there's no point clearing tmp_val before the call. Signed-off-by: Jan Beulich Signed-off-by: David Vrabel diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c index 6a25533..6855bf5 100644 --- a/drivers/xen/xen-pciback/conf_space.c +++ b/drivers/xen/xen-pciback/conf_space.c @@ -230,10 +230,8 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value) field_end = OFFSET(cfg_entry) + field->size; if (req_end > field_start && field_end > req_start) { - tmp_val = 0; - - err = xen_pcibk_config_read(dev, field_start, - field->size, &tmp_val); + err = conf_space_read(dev, cfg_entry, field_start, + &tmp_val); if (err) break; -- cgit v0.10.2 From 1ad6344acfbf19288573b4a5fa0b07cbb5af27d7 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 6 Jul 2016 01:00:14 -0600 Subject: xen-pciback: drop superfluous variables req_start is simply an alias of the "offset" function parameter, and req_end is being used just once in each function. (And both variables were loop invariant anyway, so should at least have got initialized outside the loop.) Signed-off-by: Jan Beulich Signed-off-by: David Vrabel diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c index 6855bf5..9e9286d 100644 --- a/drivers/xen/xen-pciback/conf_space.c +++ b/drivers/xen/xen-pciback/conf_space.c @@ -148,7 +148,7 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size, struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); const struct config_field_entry *cfg_entry; const struct config_field *field; - int req_start, req_end, field_start, field_end; + int field_start, field_end; /* if read fails for any reason, return 0 * (as if device didn't respond) */ u32 value = 0, tmp_val; @@ -178,12 +178,10 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size, list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { field = cfg_entry->field; - req_start = offset; - req_end = offset + size; field_start = OFFSET(cfg_entry); field_end = OFFSET(cfg_entry) + field->size; - if (req_end > field_start && field_end > req_start) { + if (offset + size > field_start && field_end > offset) { err = conf_space_read(dev, cfg_entry, field_start, &tmp_val); if (err) @@ -191,7 +189,7 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size, value = merge_value(value, tmp_val, get_mask(field->size), - field_start - req_start); + field_start - offset); } } @@ -211,7 +209,7 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value) const struct config_field_entry *cfg_entry; const struct config_field *field; u32 tmp_val; - int req_start, req_end, field_start, field_end; + int field_start, field_end; if (unlikely(verbose_request)) printk(KERN_DEBUG @@ -224,19 +222,17 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value) list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { field = cfg_entry->field; - req_start = offset; - req_end = offset + size; field_start = OFFSET(cfg_entry); field_end = OFFSET(cfg_entry) + field->size; - if (req_end > field_start && field_end > req_start) { + if (offset + size > field_start && field_end > offset) { err = conf_space_read(dev, cfg_entry, field_start, &tmp_val); if (err) break; tmp_val = merge_value(tmp_val, value, get_mask(size), - req_start - field_start); + offset - field_start); err = conf_space_write(dev, cfg_entry, field_start, tmp_val); -- cgit v0.10.2 From 7ba8dba95cb227eb6c270b1aa77f942e45f5e47c Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 6 Jul 2016 07:00:28 +0200 Subject: xen: update xen headers Update some Xen headers to be able to use new functionality. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Reviewed-by: Stefano Stabellini Signed-off-by: David Vrabel diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h index b05288c..98188c8 100644 --- a/include/xen/interface/vcpu.h +++ b/include/xen/interface/vcpu.h @@ -75,15 +75,21 @@ */ #define VCPUOP_get_runstate_info 4 struct vcpu_runstate_info { - /* VCPU's current state (RUNSTATE_*). */ - int state; - /* When was current state entered (system time, ns)? */ - uint64_t state_entry_time; - /* - * Time spent in each RUNSTATE_* (ns). The sum of these times is - * guaranteed not to drift from system time. - */ - uint64_t time[4]; + /* VCPU's current state (RUNSTATE_*). */ + int state; + /* When was current state entered (system time, ns)? */ + uint64_t state_entry_time; + /* + * Update indicator set in state_entry_time: + * When activated via VMASST_TYPE_runstate_update_flag, set during + * updates in guest memory mapped copy of vcpu_runstate_info. + */ +#define XEN_RUNSTATE_UPDATE (1ULL << 63) + /* + * Time spent in each RUNSTATE_* (ns). The sum of these times is + * guaranteed not to drift from system time. + */ + uint64_t time[4]; }; DEFINE_GUEST_HANDLE_STRUCT(vcpu_runstate_info); diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index d133112..1b0d189 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -413,7 +413,22 @@ DEFINE_GUEST_HANDLE_STRUCT(mmuext_op); /* x86/PAE guests: support PDPTs above 4GB. */ #define VMASST_TYPE_pae_extended_cr3 3 -#define MAX_VMASST_TYPE 3 +/* + * x86 guests: Sane behaviour for virtual iopl + * - virtual iopl updated from do_iret() hypercalls. + * - virtual iopl reported in bounce frames. + * - guest kernels assumed to be level 0 for the purpose of iopl checks. + */ +#define VMASST_TYPE_architectural_iopl 4 + +/* + * All guests: activate update indicator in vcpu_runstate_info + * Enable setting the XEN_RUNSTATE_UPDATE flag in guest memory mapped + * vcpu_runstate_info during updates of the runstate information. + */ +#define VMASST_TYPE_runstate_update_flag 5 + +#define MAX_VMASST_TYPE 5 #ifndef __ASSEMBLY__ -- cgit v0.10.2 From 4b5ae0150f29f494427a5d5561f1cd43e6cb2396 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 6 Jul 2016 07:00:29 +0200 Subject: arm/xen: add support for vm_assist hypercall Add support for the Xen HYPERVISOR_vm_assist hypercall. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Reviewed-by: Julien Grall Reviewed-by: Stefano Stabellini Signed-off-by: David Vrabel diff --git a/arch/arm/include/asm/xen/hypercall.h b/arch/arm/include/asm/xen/hypercall.h index b6b962d..9d874db 100644 --- a/arch/arm/include/asm/xen/hypercall.h +++ b/arch/arm/include/asm/xen/hypercall.h @@ -52,6 +52,7 @@ int HYPERVISOR_memory_op(unsigned int cmd, void *arg); int HYPERVISOR_physdev_op(int cmd, void *arg); int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); int HYPERVISOR_tmem_op(void *arg); +int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type); int HYPERVISOR_platform_op_raw(void *arg); static inline int HYPERVISOR_platform_op(struct xen_platform_op *op) { diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 47acb36..2f4c3aa 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -466,4 +466,5 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op); EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op); EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op); EXPORT_SYMBOL_GPL(HYPERVISOR_multicall); +EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist); EXPORT_SYMBOL_GPL(privcmd_call); diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S index 9a36f4f..a648dfc 100644 --- a/arch/arm/xen/hypercall.S +++ b/arch/arm/xen/hypercall.S @@ -91,6 +91,7 @@ HYPERCALL3(vcpu_op); HYPERCALL1(tmem_op); HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); +HYPERCALL2(vm_assist); ENTRY(privcmd_call) stmdb sp!, {r4} diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 70df80e..329c802 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -82,6 +82,7 @@ HYPERCALL3(vcpu_op); HYPERCALL1(tmem_op); HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); +HYPERCALL2(vm_assist); ENTRY(privcmd_call) mov x16, x0 -- cgit v0.10.2 From 6ba286ad845799b135e5af73d1fbc838fa79f709 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 6 Jul 2016 07:00:30 +0200 Subject: xen: support runqueue steal time on xen Up to now reading the stolen time of a remote cpu was not possible in a performant way under Xen. This made support of runqueue steal time via paravirt_steal_rq_enabled impossible. With the addition of an appropriate hypervisor interface this is now possible, so add the support. Signed-off-by: Juergen Gross Reviewed-by: Stefano Stabellini Signed-off-by: David Vrabel diff --git a/drivers/xen/time.c b/drivers/xen/time.c index 2257b66..a7fe35b 100644 --- a/drivers/xen/time.c +++ b/drivers/xen/time.c @@ -47,27 +47,31 @@ static u64 get64(const u64 *p) return ret; } -/* - * Runstate accounting - */ -void xen_get_runstate_snapshot(struct vcpu_runstate_info *res) +static void xen_get_runstate_snapshot_cpu(struct vcpu_runstate_info *res, + unsigned int cpu) { u64 state_time; struct vcpu_runstate_info *state; BUG_ON(preemptible()); - state = this_cpu_ptr(&xen_runstate); + state = per_cpu_ptr(&xen_runstate, cpu); - /* - * The runstate info is always updated by the hypervisor on - * the current CPU, so there's no need to use anything - * stronger than a compiler barrier when fetching it. - */ do { state_time = get64(&state->state_entry_time); + rmb(); /* Hypervisor might update data. */ *res = READ_ONCE(*state); - } while (get64(&state->state_entry_time) != state_time); + rmb(); /* Hypervisor might update data. */ + } while (get64(&state->state_entry_time) != state_time || + (state_time & XEN_RUNSTATE_UPDATE)); +} + +/* + * Runstate accounting + */ +void xen_get_runstate_snapshot(struct vcpu_runstate_info *res) +{ + xen_get_runstate_snapshot_cpu(res, smp_processor_id()); } /* return true when a vcpu could run but has no real cpu to run on */ @@ -80,8 +84,7 @@ static u64 xen_steal_clock(int cpu) { struct vcpu_runstate_info state; - BUG_ON(cpu != smp_processor_id()); - xen_get_runstate_snapshot(&state); + xen_get_runstate_snapshot_cpu(&state, cpu); return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline]; } @@ -98,11 +101,14 @@ void xen_setup_runstate_info(int cpu) void __init xen_time_setup_guest(void) { + bool xen_runstate_remote; + + xen_runstate_remote = !HYPERVISOR_vm_assist(VMASST_CMD_enable, + VMASST_TYPE_runstate_update_flag); + pv_time_ops.steal_clock = xen_steal_clock; static_key_slow_inc(¶virt_steal_enabled); - /* - * We can't set paravirt_steal_rq_enabled as this would require the - * capability to read another cpu's runstate info. - */ + if (xen_runstate_remote) + static_key_slow_inc(¶virt_steal_rq_enabled); } -- cgit v0.10.2 From 6694389af9be4d1eb8d3313788a902f0590fb8c2 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Jul 2016 02:05:21 -0600 Subject: xen-blkback: prefer xenbus_scanf() over xenbus_gather() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... for single items being collected: It is more typesafe (as the compiler can check format string and to-be-written-to variable match) and requires one less parameter to be passed. Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk Acked-by: Roger Pau Monné Acked-by: Jens Axboe diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 3355f1c..bf09ffe 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -1022,9 +1022,9 @@ static int connect_ring(struct backend_info *be) pr_debug("%s %s\n", __func__, dev->otherend); be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT; - err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", - "%63s", protocol, NULL); - if (err) + err = xenbus_scanf(XBT_NIL, dev->otherend, "protocol", + "%63s", protocol); + if (err <= 0) strcpy(protocol, "unspecified, assuming default"); else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; @@ -1036,10 +1036,9 @@ static int connect_ring(struct backend_info *be) xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); return -ENOSYS; } - err = xenbus_gather(XBT_NIL, dev->otherend, - "feature-persistent", "%u", - &pers_grants, NULL); - if (err) + err = xenbus_scanf(XBT_NIL, dev->otherend, + "feature-persistent", "%u", &pers_grants); + if (err <= 0) pers_grants = 0; be->blkif->vbd.feature_gnt_persistent = pers_grants; -- cgit v0.10.2 From ff595325ed556fb4b83af5b9ffd5c427c18405d7 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Jul 2016 02:05:46 -0600 Subject: xen-blkfront: prefer xenbus_scanf() over xenbus_gather() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... for single items being collected: It is more typesafe (as the compiler can check format string and to-be-written-to variable match) and requires one less parameter to be passed. Acked-by: Roger Pau Monné Acked-by: Jens Axboe Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2e6d1e9..ca0536e 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2208,10 +2208,9 @@ static void blkfront_setup_discard(struct blkfront_info *info) info->discard_granularity = discard_granularity; info->discard_alignment = discard_alignment; } - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "discard-secure", "%d", &discard_secure, - NULL); - if (!err) + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "discard-secure", "%u", &discard_secure); + if (err > 0) info->feature_secdiscard = !!discard_secure; } @@ -2310,9 +2309,8 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) info->feature_flush = 0; - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-barrier", "%d", &barrier, - NULL); + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "feature-barrier", "%d", &barrier); /* * If there's no "feature-barrier" defined, then it means @@ -2321,38 +2319,35 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) * * If there are barriers, then we use flush. */ - if (!err && barrier) + if (err > 0 && barrier) info->feature_flush = REQ_FLUSH | REQ_FUA; /* * And if there is "feature-flush-cache" use that above * barriers. */ - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-flush-cache", "%d", &flush, - NULL); + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "feature-flush-cache", "%d", &flush); - if (!err && flush) + if (err > 0 && flush) info->feature_flush = REQ_FLUSH; - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-discard", "%d", &discard, - NULL); + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "feature-discard", "%d", &discard); - if (!err && discard) + if (err > 0 && discard) blkfront_setup_discard(info); - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-persistent", "%u", &persistent, - NULL); - if (err) + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "feature-persistent", "%d", &persistent); + if (err <= 0) info->feature_persistent = 0; else info->feature_persistent = persistent; - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-max-indirect-segments", "%u", &indirect_segments, - NULL); - if (err) + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "feature-max-indirect-segments", "%u", + &indirect_segments); + if (err <= 0) info->max_indirect_segments = 0; else info->max_indirect_segments = min(indirect_segments, -- cgit v0.10.2 From 530439484d2d9f2a7f1038b1afd3d3543ecc63f6 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Jul 2016 01:38:58 -0600 Subject: xen-blkback: constify instance of "struct attribute_group" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The functions these get passed to have been taking pointers to const since at least 2.6.16. Acked-by: Jens Axboe Acked-by: Roger Pau Monné Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index bf09ffe..b44eaf4 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -379,7 +379,7 @@ static struct attribute *xen_vbdstat_attrs[] = { NULL }; -static struct attribute_group xen_vbdstat_group = { +static const struct attribute_group xen_vbdstat_group = { .name = "statistics", .attrs = xen_vbdstat_attrs, }; -- cgit v0.10.2 From aea305e11f7a7af12aa2beb7c7e053a338659c49 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 7 Jul 2016 01:38:13 -0600 Subject: xen-blkback: really don't leak mode property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9d092603cc ("xen-blkback: do not leak mode property") left one path unfixed; correct this. Acked-by: Jens Axboe Acked-by: Roger Pau Monné Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index b44eaf4..4a24121 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -715,8 +715,11 @@ static void backend_changed(struct xenbus_watch *watch, /* Front end dir is a number, which is used as the handle. */ err = kstrtoul(strrchr(dev->otherend, '/') + 1, 0, &handle); - if (err) + if (err) { + kfree(be->mode); + be->mode = NULL; return; + } be->major = major; be->minor = minor; -- cgit v0.10.2 From fbc872c38c8fed31948c85683b5326ee5ab9fccc Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 11 Jul 2016 15:45:51 +0100 Subject: xen/evtchn: add IOCTL_EVTCHN_RESTRICT IOCTL_EVTCHN_RESTRICT limits the file descriptor to being able to bind to interdomain event channels from a specific domain. Event channels that are already bound continue to work for sending and receiving notifications. This is useful as part of deprivileging a user space PV backend or device model (QEMU). e.g., Once the device model as bound to the ioreq server event channels it can restrict the file handle so an exploited DM cannot use it to create or bind to arbitrary event channels. Signed-off-by: David Vrabel Reviewed-by: Boris Ostrovsky diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index f4edd6d..7efd1cb 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -73,8 +73,12 @@ struct per_user_data { wait_queue_head_t evtchn_wait; struct fasync_struct *evtchn_async_queue; const char *name; + + domid_t restrict_domid; }; +#define UNRESTRICTED_DOMID ((domid_t)-1) + struct user_evtchn { struct rb_node node; struct per_user_data *user; @@ -443,6 +447,10 @@ static long evtchn_ioctl(struct file *file, struct ioctl_evtchn_bind_virq bind; struct evtchn_bind_virq bind_virq; + rc = -EACCES; + if (u->restrict_domid != UNRESTRICTED_DOMID) + break; + rc = -EFAULT; if (copy_from_user(&bind, uarg, sizeof(bind))) break; @@ -468,6 +476,11 @@ static long evtchn_ioctl(struct file *file, if (copy_from_user(&bind, uarg, sizeof(bind))) break; + rc = -EACCES; + if (u->restrict_domid != UNRESTRICTED_DOMID && + u->restrict_domid != bind.remote_domain) + break; + bind_interdomain.remote_dom = bind.remote_domain; bind_interdomain.remote_port = bind.remote_port; rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, @@ -485,6 +498,10 @@ static long evtchn_ioctl(struct file *file, struct ioctl_evtchn_bind_unbound_port bind; struct evtchn_alloc_unbound alloc_unbound; + rc = -EACCES; + if (u->restrict_domid != UNRESTRICTED_DOMID) + break; + rc = -EFAULT; if (copy_from_user(&bind, uarg, sizeof(bind))) break; @@ -553,6 +570,27 @@ static long evtchn_ioctl(struct file *file, break; } + case IOCTL_EVTCHN_RESTRICT_DOMID: { + struct ioctl_evtchn_restrict_domid ierd; + + rc = -EACCES; + if (u->restrict_domid != UNRESTRICTED_DOMID) + break; + + rc = -EFAULT; + if (copy_from_user(&ierd, uarg, sizeof(ierd))) + break; + + rc = -EINVAL; + if (ierd.domid == 0 || ierd.domid >= DOMID_FIRST_RESERVED) + break; + + u->restrict_domid = ierd.domid; + rc = 0; + + break; + } + default: rc = -ENOSYS; break; @@ -601,6 +639,8 @@ static int evtchn_open(struct inode *inode, struct file *filp) mutex_init(&u->ring_cons_mutex); spin_lock_init(&u->ring_prod_lock); + u->restrict_domid = UNRESTRICTED_DOMID; + filp->private_data = u; return nonseekable_open(inode, filp); diff --git a/include/uapi/xen/evtchn.h b/include/uapi/xen/evtchn.h index 14e833ee4..cb4aa4b 100644 --- a/include/uapi/xen/evtchn.h +++ b/include/uapi/xen/evtchn.h @@ -85,4 +85,19 @@ struct ioctl_evtchn_notify { #define IOCTL_EVTCHN_RESET \ _IOC(_IOC_NONE, 'E', 5, 0) +/* + * Restrict this file descriptor so that it can only be used to bind + * new interdomain events from one domain. + * + * Once a file descriptor has been restricted it cannot be + * de-restricted, and must be closed and re-opened. Event channels + * which were bound before restricting remain bound afterwards, and + * can be notified as usual. + */ +#define IOCTL_EVTCHN_RESTRICT_DOMID \ + _IOC(_IOC_NONE, 'E', 6, sizeof(struct ioctl_evtchn_restrict_domid)) +struct ioctl_evtchn_restrict_domid { + domid_t domid; +}; + #endif /* __LINUX_PUBLIC_EVTCHN_H__ */ -- cgit v0.10.2 From de2f5537b397249e91cafcbed4de64a24818542e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 30 Jun 2016 17:56:35 +0200 Subject: x86/xen: update cpuid.h from Xen-4.7 Update cpuid.h header from xen hypervisor tree to get XEN_HVM_CPUID_VCPU_ID_PRESENT definition. Signed-off-by: Vitaly Kuznetsov Signed-off-by: David Vrabel diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h index 0d809e9..3bdd10d 100644 --- a/arch/x86/include/asm/xen/cpuid.h +++ b/arch/x86/include/asm/xen/cpuid.h @@ -76,15 +76,18 @@ /* * Leaf 5 (0x40000x04) * HVM-specific features + * EAX: Features + * EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag) */ -/* EAX Features */ /* Virtualized APIC registers */ #define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized x2APIC accesses */ #define XEN_HVM_CPUID_X2APIC_VIRT (1u << 1) /* Memory mapped from other domains has valid IOMMU entries */ #define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2) +/* vcpu id is present in EBX */ +#define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3) #define XEN_CPUID_MAX_NUM_LEAVES 4 -- cgit v0.10.2 From 3e9e57fad3d8530aa30787f861c710f598ddc4e7 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 30 Jun 2016 17:56:36 +0200 Subject: x86/acpi: store ACPI ids from MADT for future usage Currently we don't save ACPI ids (unlike LAPIC ids which go to x86_cpu_to_apicid) from MADT and we may need this information later. Particularly, ACPI ids is the only existent way for a PVHVM Xen guest to figure out Xen's idea of its vCPUs ids before these CPUs boot and in some cases these ids diverge from Linux's cpu ids. Signed-off-by: Vitaly Kuznetsov Signed-off-by: David Vrabel diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 678637a..a7fb9dd 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -16,6 +16,7 @@ extern void prefill_possible_map(void); static inline void prefill_possible_map(void) {} #define cpu_physical_id(cpu) boot_cpu_physical_apicid +#define cpu_acpi_id(cpu) 0 #define safe_smp_processor_id() 0 #define stack_smp_processor_id() 0 diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 66b0573..c47b42b 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -33,6 +33,7 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu) } DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid); +DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid); @@ -147,6 +148,7 @@ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); void smp_store_boot_cpu_info(void); void smp_store_cpu_info(int id); #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) +#define cpu_acpi_id(cpu) per_cpu(x86_cpu_to_acpiid, cpu) #else /* !CONFIG_SMP */ #define wbinvd_on_cpu(cpu) wbinvd() diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 9414f84..6738e5c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -161,13 +161,15 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) /** * acpi_register_lapic - register a local apic and generates a logic cpu number * @id: local apic id to register + * @acpiid: ACPI id to register * @enabled: this cpu is enabled or not * * Returns the logic cpu number which maps to the local apic */ -static int acpi_register_lapic(int id, u8 enabled) +static int acpi_register_lapic(int id, u32 acpiid, u8 enabled) { unsigned int ver = 0; + int cpu; if (id >= MAX_LOCAL_APIC) { printk(KERN_INFO PREFIX "skipped apicid that is too big\n"); @@ -182,7 +184,11 @@ static int acpi_register_lapic(int id, u8 enabled) if (boot_cpu_physical_apicid != -1U) ver = apic_version[boot_cpu_physical_apicid]; - return generic_processor_info(id, ver); + cpu = generic_processor_info(id, ver); + if (cpu >= 0) + early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid; + + return cpu; } static int __init @@ -212,7 +218,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) if (!apic->apic_id_valid(apic_id) && enabled) printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); else - acpi_register_lapic(apic_id, enabled); + acpi_register_lapic(apic_id, processor->uid, enabled); #else printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); #endif @@ -240,6 +246,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) * when we use CPU hotplug. */ acpi_register_lapic(processor->id, /* APIC ID */ + processor->processor_id, /* ACPI ID */ processor->lapic_flags & ACPI_MADT_ENABLED); return 0; @@ -258,6 +265,7 @@ acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end) acpi_table_print_madt_entry(header); acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */ + processor->processor_id, /* ACPI ID */ processor->lapic_flags & ACPI_MADT_ENABLED); return 0; @@ -714,7 +722,7 @@ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu) { int cpu; - cpu = acpi_register_lapic(physid, ACPI_MADT_ENABLED); + cpu = acpi_register_lapic(physid, U32_MAX, ACPI_MADT_ENABLED); if (cpu < 0) { pr_info(PREFIX "Unable to map lapic to logical cpu number\n"); return cpu; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 60078a6..db2326f 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -92,8 +92,10 @@ static int apic_extnmi = APIC_EXTNMI_BSP; */ DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index e4fcb87..7a40e06 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -236,6 +236,8 @@ void __init setup_per_cpu_areas(void) early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = early_per_cpu_map(x86_bios_cpu_apicid, cpu); + per_cpu(x86_cpu_to_acpiid, cpu) = + early_per_cpu_map(x86_cpu_to_acpiid, cpu); #endif #ifdef CONFIG_X86_32 per_cpu(x86_cpu_to_logical_apicid, cpu) = @@ -271,6 +273,7 @@ void __init setup_per_cpu_areas(void) #ifdef CONFIG_X86_LOCAL_APIC early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; + early_per_cpu_ptr(x86_cpu_to_acpiid) = NULL; #endif #ifdef CONFIG_X86_32 early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL; -- cgit v0.10.2 From 88e957d6e47f1232ad15b21e54a44f1147ea8c1b Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 30 Jun 2016 17:56:37 +0200 Subject: xen: introduce xen_vcpu_id mapping It may happen that Xen's and Linux's ideas of vCPU id diverge. In particular, when we crash on a secondary vCPU we may want to do kdump and unlike plain kexec where we do migrate_to_reboot_cpu() we try booting on the vCPU which crashed. This doesn't work very well for PVHVM guests as we have a number of hypercalls where we pass vCPU id as a parameter. These hypercalls either fail or do something unexpected. To solve the issue introduce percpu xen_vcpu_id mapping. ARM and PV guests get direct mapping for now. Boot CPU for PVHVM guest gets its id from CPUID. With secondary CPUs it is a bit more trickier. Currently, we initialize IPI vectors before these CPUs boot so we can't use CPUID. Use ACPI ids from MADT instead. Signed-off-by: Vitaly Kuznetsov Signed-off-by: David Vrabel diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 2f4c3aa..72cf06b 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -49,6 +49,10 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); static struct vcpu_info __percpu *xen_vcpu_info; +/* Linux <-> Xen vCPU id mapping */ +DEFINE_PER_CPU(int, xen_vcpu_id) = -1; +EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); + /* These are unused until we support booting "pre-ballooned" */ unsigned long xen_released_pages; struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; @@ -167,6 +171,9 @@ static void xen_percpu_init(void) pr_info("Xen: initializing cpu%d\n", cpu); vcpup = per_cpu_ptr(xen_vcpu_info, cpu); + /* Direct vCPU id mapping for ARM guests. */ + per_cpu(xen_vcpu_id, cpu) = cpu; + info.mfn = virt_to_gfn(vcpup); info.offset = xen_offset_in_page(vcpup); @@ -388,6 +395,9 @@ static int __init xen_guest_init(void) if (xen_vcpu_info == NULL) return -ENOMEM; + /* Direct vCPU id mapping for ARM guests. */ + per_cpu(xen_vcpu_id, 0) = 0; + xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames(); if (xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn, &xen_auto_xlat_grant_frames.vaddr, diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ee7d71e..d093949 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include #include @@ -118,6 +119,10 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); */ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); +/* Linux <-> Xen vCPU id mapping */ +DEFINE_PER_CPU(int, xen_vcpu_id) = -1; +EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); + enum xen_domain_type xen_domain_type = XEN_NATIVE; EXPORT_SYMBOL_GPL(xen_domain_type); @@ -1137,8 +1142,11 @@ void xen_setup_vcpu_info_placement(void) { int cpu; - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { + /* Set up direct vCPU id mapping for PV guests. */ + per_cpu(xen_vcpu_id, cpu) = cpu; xen_vcpu_setup(cpu); + } /* xen_vcpu_setup managed to place the vcpu_info within the * percpu area for all cpus, so make use of it. Note that for @@ -1729,6 +1737,9 @@ asmlinkage __visible void __init xen_start_kernel(void) #endif xen_raw_console_write("about to get started...\n"); + /* Let's presume PV guests always boot on vCPU with id 0. */ + per_cpu(xen_vcpu_id, 0) = 0; + xen_setup_runstate_info(0); xen_efi_init(); @@ -1797,6 +1808,12 @@ static void __init init_hvm_pv_info(void) xen_setup_features(); + cpuid(base + 4, &eax, &ebx, &ecx, &edx); + if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT) + this_cpu_write(xen_vcpu_id, ebx); + else + this_cpu_write(xen_vcpu_id, smp_processor_id()); + pv_info.name = "Xen HVM"; xen_domain_type = XEN_HVM_DOMAIN; @@ -1808,6 +1825,10 @@ static int xen_hvm_cpu_notify(struct notifier_block *self, unsigned long action, int cpu = (long)hcpu; switch (action) { case CPU_UP_PREPARE: + if (cpu_acpi_id(cpu) != U32_MAX) + per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu); + else + per_cpu(xen_vcpu_id, cpu) = cpu; xen_vcpu_setup(cpu); if (xen_have_vector_callback) { if (xen_feature(XENFEAT_hvm_safe_pvclock)) diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 355275b..c9c532d 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -9,6 +9,12 @@ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); +DECLARE_PER_CPU(int, xen_vcpu_id); +static inline int xen_vcpu_nr(int cpu) +{ + return per_cpu(xen_vcpu_id, cpu); +} + void xen_arch_pre_suspend(void); void xen_arch_post_suspend(int suspend_cancelled); -- cgit v0.10.2 From ad5475f9faf5186b7f59de2c6481ee3e211f1ed7 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 30 Jun 2016 17:56:38 +0200 Subject: x86/xen: use xen_vcpu_id mapping for HYPERVISOR_vcpu_op HYPERVISOR_vcpu_op() passes Linux's idea of vCPU id as a parameter while Xen's idea is expected. In some cases these ideas diverge so we need to do remapping. Convert all callers of HYPERVISOR_vcpu_op() to use xen_vcpu_nr(). Leave xen_fill_possible_map() and xen_filter_cpu_maps() intact as they're only being called by PV guests before perpu areas are initialized. While the issue could be solved by switching to early_percpu for xen_vcpu_id I think it's not worth it: PV guests will probably never get to the point where their idea of vCPU id diverges from Xen's. Signed-off-by: Vitaly Kuznetsov Signed-off-by: David Vrabel diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 72cf06b..0bea3d2 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -177,7 +177,8 @@ static void xen_percpu_init(void) info.mfn = virt_to_gfn(vcpup); info.offset = xen_offset_in_page(vcpup); - err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); + err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu), + &info); BUG_ON(err); per_cpu(xen_vcpu, cpu) = vcpup; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index d093949..46f3399 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -228,7 +228,8 @@ static void xen_vcpu_setup(int cpu) hypervisor has no unregister variant and this hypercall does not allow to over-write info.mfn and info.offset. */ - err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); + err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu), + &info); if (err) { printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); @@ -252,10 +253,11 @@ void xen_vcpu_restore(void) for_each_possible_cpu(cpu) { bool other_cpu = (cpu != smp_processor_id()); - bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL); + bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu), + NULL); if (other_cpu && is_up && - HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) + HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL)) BUG(); xen_setup_runstate_info(cpu); @@ -264,7 +266,7 @@ void xen_vcpu_restore(void) xen_vcpu_setup(cpu); if (other_cpu && is_up && - HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) + HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)) BUG(); } } diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index a1207cb..33e9295 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -109,7 +109,8 @@ static void xen_safe_halt(void) static void xen_halt(void) { if (irqs_disabled()) - HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); + HYPERVISOR_vcpu_op(VCPUOP_down, + xen_vcpu_nr(smp_processor_id()), NULL); else xen_safe_halt(); } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 719cf29..a3118f2 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -454,7 +454,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) #endif ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir)); - if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) + if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt)) BUG(); kfree(ctxt); @@ -492,7 +492,7 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle) if (rc) return rc; - rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); + rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL); BUG_ON(rc); while (cpu_report_state(cpu) != CPU_ONLINE) @@ -520,7 +520,8 @@ static int xen_cpu_disable(void) static void xen_cpu_die(unsigned int cpu) { - while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) { + while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up, + xen_vcpu_nr(cpu), NULL)) { __set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(HZ/10); } @@ -536,7 +537,7 @@ static void xen_cpu_die(unsigned int cpu) static void xen_play_dead(void) /* used only with HOTPLUG_CPU */ { play_dead_common(); - HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); + HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(smp_processor_id()), NULL); cpu_bringup(); /* * commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down) @@ -576,7 +577,7 @@ static void stop_self(void *v) set_cpu_online(cpu, false); - HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL); + HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL); BUG(); } diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c31006f..fd7cd6f 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -223,8 +223,10 @@ static int xen_vcpuop_shutdown(struct clock_event_device *evt) { int cpu = smp_processor_id(); - if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) || - HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) + if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, xen_vcpu_nr(cpu), + NULL) || + HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu), + NULL)) BUG(); return 0; @@ -234,7 +236,8 @@ static int xen_vcpuop_set_oneshot(struct clock_event_device *evt) { int cpu = smp_processor_id(); - if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) + if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu), + NULL)) BUG(); return 0; @@ -253,7 +256,8 @@ static int xen_vcpuop_set_next_event(unsigned long delta, /* Get an event anyway, even if the timeout is already expired */ single.flags = 0; - ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single); + ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, xen_vcpu_nr(cpu), + &single); BUG_ON(ret != 0); return ret; @@ -352,7 +356,8 @@ void xen_timer_resume(void) return; for_each_online_cpu(cpu) { - if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) + if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, + xen_vcpu_nr(cpu), NULL)) BUG(); } } @@ -372,7 +377,8 @@ static void __init xen_time_init(void) clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC); - if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { + if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu), + NULL) == 0) { /* Successfully turned off 100Hz tick, so we have the vcpuop-based timer interface */ printk(KERN_DEBUG "Xen: using vcpuop timer interface\n"); diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 71d49a9..8fb7cbf 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1211,7 +1211,8 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) #ifdef CONFIG_X86 if (unlikely(vector == XEN_NMI_VECTOR)) { - int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, cpu, NULL); + int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, xen_vcpu_nr(cpu), + NULL); if (rc < 0) printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc); return; diff --git a/drivers/xen/time.c b/drivers/xen/time.c index a7fe35b..48c3f69 100644 --- a/drivers/xen/time.c +++ b/drivers/xen/time.c @@ -95,7 +95,7 @@ void xen_setup_runstate_info(int cpu) area.addr.v = &per_cpu(xen_runstate, cpu); if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, - cpu, &area)) + xen_vcpu_nr(cpu), &area)) BUG(); } -- cgit v0.10.2 From e15a8621935cac527b4e0ed4078d24c3e5ef73a6 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 30 Jun 2016 17:56:39 +0200 Subject: x86/xen: use xen_vcpu_id mapping when pointing vcpu_info to shared_info shared_info page has space for 32 vcpu info slots for first 32 vCPUs but these are the first 32 vCPUs from Xen's perspective and we should map them accordingly with the newly introduced xen_vcpu_id mapping. Signed-off-by: Vitaly Kuznetsov Signed-off-by: David Vrabel diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 46f3399..5ca92e6 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -207,8 +207,9 @@ static void xen_vcpu_setup(int cpu) if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu)) return; } - if (cpu < MAX_VIRT_CPUS) - per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; + if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) + per_cpu(xen_vcpu, cpu) = + &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; if (!have_vcpu_info_placement) { if (cpu >= MAX_VIRT_CPUS) @@ -1783,9 +1784,10 @@ void __ref xen_hvm_init_shared_info(void) * in that case multiple vcpus might be online. */ for_each_online_cpu(cpu) { /* Leave it to be NULL. */ - if (cpu >= MAX_VIRT_CPUS) + if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS) continue; - per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; + per_cpu(xen_vcpu, cpu) = + &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; } } -- cgit v0.10.2 From 8058c0b897e7d1ba5c900cb17eb82aa0d88fca53 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 30 Jun 2016 17:56:40 +0200 Subject: xen/events: use xen_vcpu_id mapping in events_base EVTCHNOP_bind_ipi and EVTCHNOP_bind_virq pass vCPU id as a parameter and Xen's idea of vCPU id should be used. Use the newly introduced xen_vcpu_id mapping to convert it from Linux's id. Signed-off-by: Vitaly Kuznetsov Signed-off-by: David Vrabel diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 8fb7cbf..d5dbdb9 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -895,7 +895,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) irq_set_chip_and_handler_name(irq, &xen_percpu_chip, handle_percpu_irq, "ipi"); - bind_ipi.vcpu = cpu; + bind_ipi.vcpu = xen_vcpu_nr(cpu); if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi) != 0) BUG(); @@ -991,7 +991,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) handle_edge_irq, "virq"); bind_virq.virq = virq; - bind_virq.vcpu = cpu; + bind_virq.vcpu = xen_vcpu_nr(cpu); ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq); if (ret == 0) @@ -1319,7 +1319,7 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) /* Send future instances of this interrupt to other vcpu. */ bind_vcpu.port = evtchn; - bind_vcpu.vcpu = tcpu; + bind_vcpu.vcpu = xen_vcpu_nr(tcpu); /* * Mask the event while changing the VCPU binding to prevent @@ -1459,7 +1459,7 @@ static void restore_cpu_virqs(unsigned int cpu) /* Get a new binding from Xen. */ bind_virq.virq = virq; - bind_virq.vcpu = cpu; + bind_virq.vcpu = xen_vcpu_nr(cpu); if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq) != 0) BUG(); @@ -1483,7 +1483,7 @@ static void restore_cpu_ipis(unsigned int cpu) BUG_ON(ipi_from_irq(irq) != ipi); /* Get a new binding from Xen. */ - bind_ipi.vcpu = cpu; + bind_ipi.vcpu = xen_vcpu_nr(cpu); if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi) != 0) BUG(); -- cgit v0.10.2 From be78da1cf43db4c1a9e13af8b6754199a89d5d75 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 30 Jun 2016 17:56:41 +0200 Subject: xen/events: fifo: use xen_vcpu_id mapping EVTCHNOP_init_control has vCPU id as a parameter and Xen's idea of vCPU id should be used. Use the newly introduced xen_vcpu_id mapping to convert it from Linux's id. Signed-off-by: Vitaly Kuznetsov Signed-off-by: David Vrabel diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index 9289a17..266c2c7 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -113,7 +113,7 @@ static int init_control_block(int cpu, init_control.control_gfn = virt_to_gfn(control_block); init_control.offset = 0; - init_control.vcpu = cpu; + init_control.vcpu = xen_vcpu_nr(cpu); return HYPERVISOR_event_channel_op(EVTCHNOP_init_control, &init_control); } -- cgit v0.10.2 From cbbb4682394c45986a34d8c77a02e7a066e30235 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 30 Jun 2016 17:56:42 +0200 Subject: xen/evtchn: use xen_vcpu_id mapping Use the newly introduced xen_vcpu_id mapping to get Xen's idea of vCPU id for CPU0. Signed-off-by: Vitaly Kuznetsov Signed-off-by: David Vrabel diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 7efd1cb..e8c7f09 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -55,6 +55,7 @@ #include #include #include +#include #include struct per_user_data { @@ -456,7 +457,7 @@ static long evtchn_ioctl(struct file *file, break; bind_virq.virq = bind.virq; - bind_virq.vcpu = 0; + bind_virq.vcpu = xen_vcpu_nr(0); rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq); if (rc != 0) -- cgit v0.10.2 From ee42d665d3f5db975caf87baf101a57235ddb566 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 30 Jun 2016 17:56:43 +0200 Subject: xen/pvhvm: run xen_vcpu_setup() for the boot CPU Historically we didn't call VCPUOP_register_vcpu_info for CPU0 for PVHVM guests (while we had it for PV and ARM guests). This is usually fine as we can use vcpu info in the shared_info page but when we try booting on a vCPU with Xen's vCPU id > 31 (e.g. when we try to kdump after crashing on this CPU) we're not able to boot. Switch to always doing VCPUOP_register_vcpu_info for the boot CPU. Signed-off-by: Vitaly Kuznetsov Signed-off-by: David Vrabel diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 5ca92e6..85ef4c0 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -184,7 +184,7 @@ static void clamp_max_cpus(void) #endif } -static void xen_vcpu_setup(int cpu) +void xen_vcpu_setup(int cpu) { struct vcpu_register_vcpu_info info; int err; diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index a3118f2..0b4d04c 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -322,6 +322,13 @@ static void __init xen_smp_prepare_boot_cpu(void) xen_filter_cpu_maps(); xen_setup_vcpu_info_placement(); } + + /* + * Setup vcpu_info for boot CPU. + */ + if (xen_hvm_domain()) + xen_vcpu_setup(0); + /* * The alternative logic (which patches the unlock/lock) runs before * the smp bootup up code is activated. Hence we need to set this up diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 4140b07..3cbce3b 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -76,6 +76,7 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id); bool xen_vcpu_stolen(int vcpu); +void xen_vcpu_setup(int cpu); void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP -- cgit v0.10.2 From d34c30cc1fa80f509500ff192ea6bc7d30671061 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 26 Jul 2016 14:15:11 +0200 Subject: xen: add static initialization of steal_clock op to xen_time_ops pv_time_ops might be overwritten with xen_time_ops after the steal_clock operation has been initialized already. To prevent calling a now uninitialized function pointer add the steal_clock static initialization to xen_time_ops. Signed-off-by: Juergen Gross Signed-off-by: David Vrabel diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index fd7cd6f..67356d2 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -364,6 +364,7 @@ void xen_timer_resume(void) static const struct pv_time_ops xen_time_ops __initconst = { .sched_clock = xen_clocksource_read, + .steal_clock = xen_steal_clock, }; static void __init xen_time_init(void) diff --git a/drivers/xen/time.c b/drivers/xen/time.c index 48c3f69..ac5f23f 100644 --- a/drivers/xen/time.c +++ b/drivers/xen/time.c @@ -80,7 +80,7 @@ bool xen_vcpu_stolen(int vcpu) return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable; } -static u64 xen_steal_clock(int cpu) +u64 xen_steal_clock(int cpu) { struct vcpu_runstate_info state; diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index c9c532d..9a37c541 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -29,6 +29,7 @@ bool xen_vcpu_stolen(int vcpu); void xen_setup_runstate_info(int cpu); void xen_time_setup_guest(void); void xen_get_runstate_snapshot(struct vcpu_runstate_info *res); +u64 xen_steal_clock(int cpu); int xen_setup_shutdown_event(void); -- cgit v0.10.2