From f10cd522c5fbfec9ae3cc01967868c9c2401ed23 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Tue, 6 Sep 2011 17:41:47 +0100
Subject: xen: disable PV spinlocks on HVM

PV spinlocks cannot possibly work with the current code because they are
enabled after pvops patching has already been done, and because PV
spinlocks use a different data structure than native spinlocks so we
cannot switch between them dynamically. A spinlock that has been taken
once by the native code (__ticket_spin_lock) cannot be taken by
__xen_spin_lock even after it has been released.

Reported-and-Tested-by: Stefan Bader <stefan.bader@canonical.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d4fc6d4..041d4fe 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -532,7 +532,6 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
 	WARN_ON(xen_smp_intr_init(0));
 
 	xen_init_lock_cpu(0);
-	xen_init_spinlocks();
 }
 
 static int __cpuinit xen_hvm_cpu_up(unsigned int cpu)
-- 
cgit v0.10.2


From e3b73c4a25e9a5705b4ef28b91676caf01f9bc9f Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Tue, 13 Sep 2011 10:17:32 -0400
Subject: xen/e820: if there is no dom0_mem=, don't tweak extra_pages.

The patch "xen: use maximum reservation to limit amount of usable RAM"
(d312ae878b6aed3912e1acaaf5d0b2a9d08a4f11) breaks machines that
do not use 'dom0_mem=' argument with:

reserve RAM buffer: 000000133f2e2000 - 000000133fffffff
(XEN) mm.c:4976:d0 Global bit is set to kernel page fffff8117e
(XEN) domain_crash_sync called from entry.S
(XEN) Domain 0 (vcpu#0) crashed on cpu#0:
...

The reason being that the last E820 entry is created using the
'extra_pages' (which is based on how many pages have been freed).
The mentioned git commit sets the initial value of 'extra_pages'
using a hypercall which returns the number of pages (if dom0_mem
has been used) or -1 otherwise. If the later we return with
MAX_DOMAIN_PAGES as basis for calculation:

    return min(max_pages, MAX_DOMAIN_PAGES);

and use it:

     extra_limit = xen_get_max_pages();
     if (extra_limit >= max_pfn)
             extra_pages = extra_limit - max_pfn;
     else
             extra_pages = 0;

which means we end up with extra_pages = 128GB in PFNs (33554432)
- 8GB in PFNs (2097152, on this specific box, can be larger or smaller),
and then we add that value to the E820 making it:

  Xen: 00000000ff000000 - 0000000100000000 (reserved)
  Xen: 0000000100000000 - 000000133f2e2000 (usable)

which is clearly wrong. It should look as so:

  Xen: 00000000ff000000 - 0000000100000000 (reserved)
  Xen: 0000000100000000 - 000000027fbda000 (usable)

Naturally this problem does not present itself if dom0_mem=max:X
is used.

CC: stable@kernel.org
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index ff3dfa1..09688eb 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -305,10 +305,12 @@ char * __init xen_memory_setup(void)
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 
 	extra_limit = xen_get_max_pages();
-	if (extra_limit >= max_pfn)
-		extra_pages = extra_limit - max_pfn;
-	else
-		extra_pages = 0;
+	if (max_pfn + extra_pages > extra_limit) {
+		if (extra_limit > max_pfn)
+			extra_pages = extra_limit - max_pfn;
+		else
+			extra_pages = 0;
+	}
 
 	extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
 
-- 
cgit v0.10.2


From 773659483685d652970583384a0294948e57f8b3 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 14 Sep 2011 05:10:00 -0400
Subject: xen/irq: Alter the locking to use a mutex instead of a spinlock.

When we allocate/change the IRQ informations, we do not
need to use spinlocks. We can use a mutex (which is
what the generic IRQ code does for allocations/changes).
Fixes a slew of:

BUG: sleeping function called from invalid context at /linux/kernel/mutex.c:271
in_atomic(): 1, irqs_disabled(): 0, pid: 3216, name: xenstored
2 locks held by xenstored/3216:
 #0:  (&u->bind_mutex){......}, at: [<ffffffffa02e0920>] evtchn_ioctl+0x30/0x3a0 [xen_evtchn]
 #1:  (irq_mapping_update_lock){......}, at: [<ffffffff8138b274>] bind_evtchn_to_irq+0x24/0x90
Pid: 3216, comm: xenstored Not tainted 3.1.0-rc6-00021-g437a3d1 #2
Call Trace:
 [<ffffffff81088d10>] __might_sleep+0x100/0x130
 [<ffffffff81645c2f>] mutex_lock_nested+0x2f/0x50
 [<ffffffff81627529>] __irq_alloc_descs+0x49/0x200
 [<ffffffffa02e0920>] ? evtchn_ioctl+0x30/0x3a0 [xen_evtchn]
 [<ffffffff8138b214>] xen_allocate_irq_dynamic+0x34/0x70
 [<ffffffff8138b2ad>] bind_evtchn_to_irq+0x5d/0x90
 [<ffffffffa02e03c0>] ? evtchn_bind_to_user+0x60/0x60 [xen_evtchn]
 [<ffffffff8138c282>] bind_evtchn_to_irqhandler+0x32/0x80
 [<ffffffffa02e03a9>] evtchn_bind_to_user+0x49/0x60 [xen_evtchn]
 [<ffffffffa02e0a34>] evtchn_ioctl+0x144/0x3a0 [xen_evtchn]
 [<ffffffff811b4070>] ? vfsmount_lock_local_unlock+0x50/0x80
 [<ffffffff811a6a1a>] do_vfs_ioctl+0x9a/0x5e0
 [<ffffffff811b476f>] ? mntput+0x1f/0x30
 [<ffffffff81196259>] ? fput+0x199/0x240
 [<ffffffff811a7001>] sys_ioctl+0xa1/0xb0
 [<ffffffff8164ea82>] system_call_fastpath+0x16/0x1b

Reported-by: Jim Burns <jim_burn@bellsouth.net>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index da70f5c..7523719 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -54,7 +54,7 @@
  * This lock protects updates to the following mapping and reference-count
  * arrays. The lock does not need to be acquired to read the mapping tables.
  */
-static DEFINE_SPINLOCK(irq_mapping_update_lock);
+static DEFINE_MUTEX(irq_mapping_update_lock);
 
 static LIST_HEAD(xen_irq_list_head);
 
@@ -631,7 +631,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
 	int irq = -1;
 	struct physdev_irq irq_op;
 
-	spin_lock(&irq_mapping_update_lock);
+	mutex_lock(&irq_mapping_update_lock);
 
 	irq = find_irq_by_gsi(gsi);
 	if (irq != -1) {
@@ -684,7 +684,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
 				handle_edge_irq, name);
 
 out:
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 
 	return irq;
 }
@@ -710,7 +710,7 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 {
 	int irq, ret;
 
-	spin_lock(&irq_mapping_update_lock);
+	mutex_lock(&irq_mapping_update_lock);
 
 	irq = xen_allocate_irq_dynamic();
 	if (irq == -1)
@@ -724,10 +724,10 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 	if (ret < 0)
 		goto error_irq;
 out:
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 	return irq;
 error_irq:
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 	xen_free_irq(irq);
 	return -1;
 }
@@ -740,7 +740,7 @@ int xen_destroy_irq(int irq)
 	struct irq_info *info = info_for_irq(irq);
 	int rc = -ENOENT;
 
-	spin_lock(&irq_mapping_update_lock);
+	mutex_lock(&irq_mapping_update_lock);
 
 	desc = irq_to_desc(irq);
 	if (!desc)
@@ -766,7 +766,7 @@ int xen_destroy_irq(int irq)
 	xen_free_irq(irq);
 
 out:
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 	return rc;
 }
 
@@ -776,7 +776,7 @@ int xen_irq_from_pirq(unsigned pirq)
 
 	struct irq_info *info;
 
-	spin_lock(&irq_mapping_update_lock);
+	mutex_lock(&irq_mapping_update_lock);
 
 	list_for_each_entry(info, &xen_irq_list_head, list) {
 		if (info == NULL || info->type != IRQT_PIRQ)
@@ -787,7 +787,7 @@ int xen_irq_from_pirq(unsigned pirq)
 	}
 	irq = -1;
 out:
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 
 	return irq;
 }
@@ -802,7 +802,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
 {
 	int irq;
 
-	spin_lock(&irq_mapping_update_lock);
+	mutex_lock(&irq_mapping_update_lock);
 
 	irq = evtchn_to_irq[evtchn];
 
@@ -818,7 +818,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
 	}
 
 out:
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 
 	return irq;
 }
@@ -829,7 +829,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
 	struct evtchn_bind_ipi bind_ipi;
 	int evtchn, irq;
 
-	spin_lock(&irq_mapping_update_lock);
+	mutex_lock(&irq_mapping_update_lock);
 
 	irq = per_cpu(ipi_to_irq, cpu)[ipi];
 
@@ -853,7 +853,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
 	}
 
  out:
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 	return irq;
 }
 
@@ -878,7 +878,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
 	struct evtchn_bind_virq bind_virq;
 	int evtchn, irq;
 
-	spin_lock(&irq_mapping_update_lock);
+	mutex_lock(&irq_mapping_update_lock);
 
 	irq = per_cpu(virq_to_irq, cpu)[virq];
 
@@ -903,7 +903,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
 	}
 
 out:
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 
 	return irq;
 }
@@ -913,7 +913,7 @@ static void unbind_from_irq(unsigned int irq)
 	struct evtchn_close close;
 	int evtchn = evtchn_from_irq(irq);
 
-	spin_lock(&irq_mapping_update_lock);
+	mutex_lock(&irq_mapping_update_lock);
 
 	if (VALID_EVTCHN(evtchn)) {
 		close.port = evtchn;
@@ -943,7 +943,7 @@ static void unbind_from_irq(unsigned int irq)
 
 	xen_free_irq(irq);
 
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 }
 
 int bind_evtchn_to_irqhandler(unsigned int evtchn,
@@ -1279,7 +1279,7 @@ void rebind_evtchn_irq(int evtchn, int irq)
 	   will also be masked. */
 	disable_irq(irq);
 
-	spin_lock(&irq_mapping_update_lock);
+	mutex_lock(&irq_mapping_update_lock);
 
 	/* After resume the irq<->evtchn mappings are all cleared out */
 	BUG_ON(evtchn_to_irq[evtchn] != -1);
@@ -1289,7 +1289,7 @@ void rebind_evtchn_irq(int evtchn, int irq)
 
 	xen_irq_info_evtchn_init(irq, evtchn);
 
-	spin_unlock(&irq_mapping_update_lock);
+	mutex_unlock(&irq_mapping_update_lock);
 
 	/* new event channels are always bound to cpu 0 */
 	irq_set_affinity(irq, cpumask_of(0));
-- 
cgit v0.10.2


From 61cca2fab7ecba18f9b9680cd736ef5fa82ad3b1 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Thu, 15 Sep 2011 08:52:40 +0100
Subject: xen/i386: follow-up to "replace order-based range checking of M2P
 table by linear one"

The numbers obtained from the hypervisor really can't ever lead to an
overflow here, only the original calculation going through the order
of the range could have. This avoids the (as Jeremy points outs)
somewhat ugly NULL-based calculation here.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 24abc1f..a3872f7 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1721,10 +1721,8 @@ void __init xen_setup_machphys_mapping(void)
 		machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
 	}
 #ifdef CONFIG_X86_32
-	if ((machine_to_phys_mapping + machine_to_phys_nr)
-	    < machine_to_phys_mapping)
-		machine_to_phys_nr = (unsigned long *)NULL
-				     - machine_to_phys_mapping;
+	WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1))
+		< machine_to_phys_mapping);
 #endif
 }
 
-- 
cgit v0.10.2