From 093d7b4639951ea3021a6f70d376c3ff31f4740c Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Wed, 16 Sep 2009 03:56:17 -0400
Subject: xen: release unused free memory

Scan an e820 table and release any memory which lies between e820 entries,
as it won't be used and would just be wasted.  At present this is just to
release any memory beyond the end of the e820 map, but it will also deal
with holes being punched in the map.

Derived from patch by Miroslav Rezanina <mrezanin@redhat.com>

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 65d8d79..399bed2 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -731,7 +731,6 @@ static void set_xen_basic_apic_ops(void)
 
 #endif
 
-
 static void xen_clts(void)
 {
 	struct multicall_space mcs;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index ad0047f..e094263 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -20,6 +20,7 @@
 #include <xen/page.h>
 #include <xen/interface/callback.h>
 #include <xen/interface/physdev.h>
+#include <xen/interface/memory.h>
 #include <xen/features.h>
 
 #include "xen-ops.h"
@@ -32,6 +33,56 @@ extern void xen_sysenter_target(void);
 extern void xen_syscall_target(void);
 extern void xen_syscall32_target(void);
 
+static unsigned long __init xen_release_chunk(phys_addr_t start_addr, phys_addr_t end_addr)
+{
+	struct xen_memory_reservation reservation = {
+		.address_bits = 0,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+	unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
+	unsigned long start, end;
+	unsigned long len;
+	unsigned long pfn;
+	int ret;
+
+	start = PFN_UP(start_addr);
+	end = PFN_UP(end_addr);
+
+	if (end <= start)
+		return 0;
+
+	len = end - start;
+
+	set_xen_guest_handle(reservation.extent_start, &mfn_list[start]);
+	reservation.nr_extents = len;
+
+	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+	WARN(ret != (end - start), "Failed to release memory %lx-%lx err=%d\n",
+	     start, end, ret);
+
+	for(pfn = start; pfn < end; pfn++)
+		set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+
+	return len;
+}
+
+static unsigned long __init xen_return_unused_memory(const struct e820map *e820)
+{
+	unsigned long last_end = 0;
+	unsigned long released = 0;
+	int i;
+
+	for (i = 0; i < e820->nr_map; i++) {
+		released += xen_release_chunk(last_end, e820->map[i].addr);
+		last_end = e820->map[i].addr + e820->map[i].size;
+	}
+
+	released += xen_release_chunk(last_end, PFN_PHYS(xen_start_info->nr_pages));
+
+	printk(KERN_INFO "released %ld pages of unused memory\n", released);
+	return released;
+}
 
 /**
  * machine_specific_memory_setup - Hook for machine specific memory setup.
@@ -67,6 +118,8 @@ char * __init xen_memory_setup(void)
 
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 
+	xen_return_unused_memory(&e820);
+
 	return "Xen";
 }
 
-- 
cgit v0.10.2


From f89e048e76da7ac0b4c89e75606ca7a3422886b1 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Wed, 16 Sep 2009 12:38:33 -0700
Subject: xen: make sure pages are really part of domain before freeing

Scan the set of pages we're freeing and make sure they're actually
owned by the domain before freeing.  This generally won't happen on a
domU (since Xen gives us contigious memory), but it could happen if
there are some hardware mappings passed through.

We only bother going up to the highest page Xen actually claimed to
give us, since there's definitely nothing of ours above that.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index e094263..9deb6ba 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -33,52 +33,69 @@ extern void xen_sysenter_target(void);
 extern void xen_syscall_target(void);
 extern void xen_syscall32_target(void);
 
-static unsigned long __init xen_release_chunk(phys_addr_t start_addr, phys_addr_t end_addr)
+static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
+					      phys_addr_t end_addr)
 {
 	struct xen_memory_reservation reservation = {
 		.address_bits = 0,
 		.extent_order = 0,
 		.domid        = DOMID_SELF
 	};
-	unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
 	unsigned long start, end;
-	unsigned long len;
+	unsigned long len = 0;
 	unsigned long pfn;
 	int ret;
 
 	start = PFN_UP(start_addr);
-	end = PFN_UP(end_addr);
+	end = PFN_DOWN(end_addr);
 
 	if (end <= start)
 		return 0;
 
-	len = end - start;
-
-	set_xen_guest_handle(reservation.extent_start, &mfn_list[start]);
-	reservation.nr_extents = len;
-
-	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
-	WARN(ret != (end - start), "Failed to release memory %lx-%lx err=%d\n",
-	     start, end, ret);
-
-	for(pfn = start; pfn < end; pfn++)
-		set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+	printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: ",
+	       start, end);
+	for(pfn = start; pfn < end; pfn++) {
+		unsigned long mfn = pfn_to_mfn(pfn);
+
+		/* Make sure pfn exists to start with */
+		if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
+			continue;
+
+		set_xen_guest_handle(reservation.extent_start, &mfn);
+		reservation.nr_extents = 1;
+
+		ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+					   &reservation);
+		WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n",
+		     start, end, ret);
+		if (ret == 1) {
+			set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+			len++;
+		}
+	}
+	printk(KERN_CONT "%ld pages freed\n", len);
 
 	return len;
 }
 
-static unsigned long __init xen_return_unused_memory(const struct e820map *e820)
+static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
+						     const struct e820map *e820)
 {
-	unsigned long last_end = 0;
+	phys_addr_t max_addr = PFN_PHYS(max_pfn);
+	phys_addr_t last_end = 0;
 	unsigned long released = 0;
 	int i;
 
-	for (i = 0; i < e820->nr_map; i++) {
-		released += xen_release_chunk(last_end, e820->map[i].addr);
+	for (i = 0; i < e820->nr_map && last_end < max_addr; i++) {
+		phys_addr_t end = e820->map[i].addr;
+		end = min(max_addr, end);
+
+		released += xen_release_chunk(last_end, end);
 		last_end = e820->map[i].addr + e820->map[i].size;
 	}
 
-	released += xen_release_chunk(last_end, PFN_PHYS(xen_start_info->nr_pages));
+	if (last_end < max_addr)
+		released += xen_release_chunk(last_end, max_addr);
 
 	printk(KERN_INFO "released %ld pages of unused memory\n", released);
 	return released;
@@ -118,7 +135,7 @@ char * __init xen_memory_setup(void)
 
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 
-	xen_return_unused_memory(&e820);
+	xen_return_unused_memory(xen_start_info->nr_pages, &e820);
 
 	return "Xen";
 }
-- 
cgit v0.10.2


From 31de189f7d02da163f77d46a86d9e655a2d83124 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Fri, 16 Jul 2010 17:30:19 +0100
Subject: pvops: do not notify callers from register_xenstore_notifier

Currently register_xenstore_notifier notifies the caller during the
registration itself if xenstore is believed to be ready. This behaviour
causes problems to PV on HVM guests, in which case callers should be
notified by xenbus_probe only after the platform pci driver is loaded.
We already make sure xenbus_probe is called at the right time, calling
it either from device_initcall (PV case) or from the platform pci
driver initialization (HVM case) so we don't need this additional
notification.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 3479332..abc1242 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -752,10 +752,7 @@ int register_xenstore_notifier(struct notifier_block *nb)
 {
 	int ret = 0;
 
-	if (xenstored_ready > 0)
-		ret = nb->notifier_call(nb, 0, NULL);
-	else
-		blocking_notifier_chain_register(&xenstore_chain, nb);
+	blocking_notifier_chain_register(&xenstore_chain, nb);
 
 	return ret;
 }
-- 
cgit v0.10.2


From 8a22b9996b001c88f2bfb54c6de6a05fc39e177a Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 12 Jul 2010 11:49:59 -0700
Subject: xen: drop xen_sched_clock in favour of using plain wallclock time

xen_sched_clock only counts unstolen time.  In principle this should
be useful to the Linux scheduler so that it knows how much time a process
actually consumed.  But in practice this doesn't work very well as the
scheduler expects the sched_clock time to be synchronized between
cpus.  It also uses sched_clock to measure the time a task spends
sleeping, in which case "unstolen time" isn't meaningful.

So just use plain xen_clocksource_read to return wallclock nanoseconds
for sched_clock.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 399bed2..fef034a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -926,7 +926,7 @@ static const struct pv_init_ops xen_init_ops __initdata = {
 };
 
 static const struct pv_time_ops xen_time_ops __initdata = {
-	.sched_clock = xen_sched_clock,
+	.sched_clock = xen_clocksource_read,
 };
 
 static const struct pv_cpu_ops xen_cpu_ops __initdata = {
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 32764b8..e90360f 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -155,45 +155,6 @@ static void do_stolen_accounting(void)
 	account_idle_ticks(ticks);
 }
 
-/*
- * Xen sched_clock implementation.  Returns the number of unstolen
- * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
- * states.
- */
-unsigned long long xen_sched_clock(void)
-{
-	struct vcpu_runstate_info state;
-	cycle_t now;
-	u64 ret;
-	s64 offset;
-
-	/*
-	 * Ideally sched_clock should be called on a per-cpu basis
-	 * anyway, so preempt should already be disabled, but that's
-	 * not current practice at the moment.
-	 */
-	preempt_disable();
-
-	now = xen_clocksource_read();
-
-	get_runstate_snapshot(&state);
-
-	WARN_ON(state.state != RUNSTATE_running);
-
-	offset = now - state.state_entry_time;
-	if (offset < 0)
-		offset = 0;
-
-	ret = state.time[RUNSTATE_blocked] +
-		state.time[RUNSTATE_running] +
-		offset;
-
-	preempt_enable();
-
-	return ret;
-}
-
-
 /* Get the TSC speed from Xen */
 unsigned long xen_tsc_khz(void)
 {
-- 
cgit v0.10.2


From c06ee78d73fd24e8d8a65f16380f6a0551107e1b Mon Sep 17 00:00:00 2001
From: Mukesh Rathor <mukesh.rathor@oracle.com>
Date: Mon, 19 Jul 2010 10:25:08 -0700
Subject: xen: support large numbers of CPUs with vcpu info placement

When vcpu info placement is supported, we're not limited to MAX_VIRT_CPUS
vcpus.  However, if it isn't supported, then ignore any excess vcpus.

Signed-off-by: Mukesh Rathor <mukesh.rathor@oracle.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index fef034a..90a3e80 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -97,6 +97,14 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
  */
 static int have_vcpu_info_placement = 1;
 
+static void clamp_max_cpus(void)
+{
+#ifdef CONFIG_SMP
+	if (setup_max_cpus > MAX_VIRT_CPUS)
+		setup_max_cpus = MAX_VIRT_CPUS;
+#endif
+}
+
 static void xen_vcpu_setup(int cpu)
 {
 	struct vcpu_register_vcpu_info info;
@@ -104,13 +112,17 @@ static void xen_vcpu_setup(int cpu)
 	struct vcpu_info *vcpup;
 
 	BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
-	per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
 
-	if (!have_vcpu_info_placement)
-		return;		/* already tested, not available */
+	if (cpu < MAX_VIRT_CPUS)
+		per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
 
-	vcpup = &per_cpu(xen_vcpu_info, cpu);
+	if (!have_vcpu_info_placement) {
+		if (cpu >= MAX_VIRT_CPUS)
+			clamp_max_cpus();
+		return;
+	}
 
+	vcpup = &per_cpu(xen_vcpu_info, cpu);
 	info.mfn = arbitrary_virt_to_mfn(vcpup);
 	info.offset = offset_in_page(vcpup);
 
@@ -125,6 +137,7 @@ static void xen_vcpu_setup(int cpu)
 	if (err) {
 		printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
 		have_vcpu_info_placement = 0;
+		clamp_max_cpus();
 	} else {
 		/* This cpu is using the registered vcpu info, even if
 		   later ones fail to. */
-- 
cgit v0.10.2


From f09f6d194d85043e0eb105a577e7ad6d8170ab66 Mon Sep 17 00:00:00 2001
From: Donald Dutile <ddutile@redhat.com>
Date: Thu, 15 Jul 2010 14:56:49 -0400
Subject: Xen: register panic notifier to take crashes of xen guests on panic

Register a panic notifier so that when the guest crashes it can shut
down the domain and indicate it was a crash to the host.

Signed-off-by: Donald Dutile <ddutile@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 90a3e80..d99522e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1040,6 +1040,26 @@ static void xen_crash_shutdown(struct pt_regs *regs)
 	xen_reboot(SHUTDOWN_crash);
 }
 
+static int
+xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct sched_shutdown r = { .reason = SHUTDOWN_crash};
+
+	if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
+		BUG();
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block xen_panic_block = {
+	.notifier_call= xen_panic_event,
+};
+
+int xen_panic_handler_init(void)
+{
+	atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
+	return 0;
+}
+
 static const struct machine_ops __initdata xen_machine_ops = {
 	.restart = xen_restart,
 	.halt = xen_machine_halt,
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 9deb6ba..328b003 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -226,6 +226,8 @@ void __init xen_arch_setup(void)
 	struct physdev_set_iopl set_iopl;
 	int rc;
 
+	xen_panic_handler_init();
+
 	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
 	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
 
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index f9153a3..00d59d6 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -101,4 +101,6 @@ void xen_sysret32(void);
 void xen_sysret64(void);
 void xen_adjust_exception_frame(void);
 
+extern int xen_panic_handler_init(void);
+
 #endif /* XEN_OPS_H */
-- 
cgit v0.10.2


From 086748e52fb072ff0935ba4512e29c421bd5b716 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Tue, 3 Aug 2010 14:55:14 -0700
Subject: xen/panic: use xen_reboot and fix smp_send_stop

Offline vcpu when using stop_self.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d99522e..3c4da8b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1043,10 +1043,7 @@ static void xen_crash_shutdown(struct pt_regs *regs)
 static int
 xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
-	struct sched_shutdown r = { .reason = SHUTDOWN_crash};
-
-	if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
-		BUG();
+	xen_reboot(SHUTDOWN_crash);
 	return NOTIFY_DONE;
 }
 
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index a29693f..25f232b 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -394,6 +394,8 @@ static void stop_self(void *v)
 	load_cr3(swapper_pg_dir);
 	/* should set up a minimal gdt */
 
+	set_cpu_online(cpu, false);
+
 	HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL);
 	BUG();
 }
-- 
cgit v0.10.2