From 6efa20e49b9cb1db1ab66870cc37323474a75a13 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad@kernel.org>
Date: Fri, 19 Jul 2013 11:51:31 -0400
Subject: xen: Support 64-bit PV guest receiving NMIs

This is based on a patch that Zhenzhong Duan had sent - which
was missing some of the remaining pieces. The kernel has the
logic to handle Xen-type-exceptions using the paravirt interface
in the assembler code (see PARAVIRT_ADJUST_EXCEPTION_FRAME -
pv_irq_ops.adjust_exception_frame and and INTERRUPT_RETURN -
pv_cpu_ops.iret).

That means the nmi handler (and other exception handlers) use
the hypervisor iret.

The other changes that would be neccessary for this would
be to translate the NMI_VECTOR to one of the entries on the
ipi_vector and make xen_send_IPI_mask_allbutself use different
events.

Fortunately for us commit 1db01b4903639fcfaec213701a494fe3fb2c490b
(xen: Clean up apic ipi interface) implemented this and we piggyback
on the cleanup such that the apic IPI interface will pass the right
vector value for NMI.

With this patch we can trigger NMIs within a PV guest (only tested
x86_64).

For this to work with normal PV guests (not initial domain)
we need the domain to be able to use the APIC ops - they are
already implemented to use the Xen event channels. For that
to be turned on in a PV domU we need to remove the masking
of X86_FEATURE_APIC.

Incidentally that means kgdb will also now work within
a PV guest without using the 'nokgdbroundup' workaround.

Note that the 32-bit version is different and this patch
does not enable that.

CC: Lisa Nguyen <lisa@xenapiadmin.com>
CC: Ben Guthro <benjamin.guthro@citrix.com>
CC: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
[v1: Fixed up per David Vrabel comments]
Reviewed-by: Ben Guthro <benjamin.guthro@citrix.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>

diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h
index ca842f2..608a79d 100644
--- a/arch/x86/include/asm/xen/events.h
+++ b/arch/x86/include/asm/xen/events.h
@@ -7,6 +7,7 @@ enum ipi_vector {
 	XEN_CALL_FUNCTION_SINGLE_VECTOR,
 	XEN_SPIN_UNLOCK_VECTOR,
 	XEN_IRQ_WORK_VECTOR,
+	XEN_NMI_VECTOR,
 
 	XEN_NR_IPIS,
 };
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 193097e..b5a22fa 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -427,8 +427,7 @@ static void __init xen_init_cpuid_mask(void)
 
 	if (!xen_initial_domain())
 		cpuid_leaf1_edx_mask &=
-			~((1 << X86_FEATURE_APIC) |  /* disable local APIC */
-			  (1 << X86_FEATURE_ACPI));  /* disable ACPI */
+			~((1 << X86_FEATURE_ACPI));  /* disable ACPI */
 
 	cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_X2APIC % 32));
 
@@ -735,8 +734,7 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
 		addr = (unsigned long)xen_int3;
 	else if (addr == (unsigned long)stack_segment)
 		addr = (unsigned long)xen_stack_segment;
-	else if (addr == (unsigned long)double_fault ||
-		 addr == (unsigned long)nmi) {
+	else if (addr == (unsigned long)double_fault) {
 		/* Don't need to handle these */
 		return 0;
 #ifdef CONFIG_X86_MCE
@@ -747,7 +745,12 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
 		 */
 		;
 #endif
-	} else {
+	} else if (addr == (unsigned long)nmi)
+		/*
+		 * Use the native version as well.
+		 */
+		;
+	else {
 		/* Some other trap using IST? */
 		if (WARN_ON(val->ist != 0))
 			return 0;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 056d11f..403f5cc 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -33,6 +33,9 @@
 /* These are code, but not functions.  Defined in entry.S */
 extern const char xen_hypervisor_callback[];
 extern const char xen_failsafe_callback[];
+#ifdef CONFIG_X86_64
+extern const char nmi[];
+#endif
 extern void xen_sysenter_target(void);
 extern void xen_syscall_target(void);
 extern void xen_syscall32_target(void);
@@ -525,7 +528,13 @@ void xen_enable_syscall(void)
 	}
 #endif /* CONFIG_X86_64 */
 }
-
+void __cpuinit xen_enable_nmi(void)
+{
+#ifdef CONFIG_X86_64
+	if (register_callback(CALLBACKTYPE_nmi, nmi))
+		BUG();
+#endif
+}
 void __init xen_arch_setup(void)
 {
 	xen_panic_handler_init();
@@ -543,7 +552,7 @@ void __init xen_arch_setup(void)
 
 	xen_enable_sysenter();
 	xen_enable_syscall();
-
+	xen_enable_nmi();
 #ifdef CONFIG_ACPI
 	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
 		printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index ca92754..22759c6 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -572,6 +572,12 @@ static inline int xen_map_vector(int vector)
 	case IRQ_WORK_VECTOR:
 		xen_vector = XEN_IRQ_WORK_VECTOR;
 		break;
+#ifdef CONFIG_X86_64
+	case NMI_VECTOR:
+	case APIC_DM_NMI: /* Some use that instead of NMI_VECTOR */
+		xen_vector = XEN_NMI_VECTOR;
+		break;
+#endif
 	default:
 		xen_vector = -1;
 		printk(KERN_ERR "xen: vector 0x%x is not implemented\n",
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index a58ac43..c8b9d9f 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -56,6 +56,7 @@
 #include <xen/interface/hvm/params.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/sched.h>
+#include <xen/interface/vcpu.h>
 #include <asm/hw_irq.h>
 
 /*
@@ -1212,7 +1213,15 @@ EXPORT_SYMBOL_GPL(evtchn_put);
 
 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
 {
-	int irq = per_cpu(ipi_to_irq, cpu)[vector];
+	int irq;
+
+	if (unlikely(vector == XEN_NMI_VECTOR)) {
+		int rc =  HYPERVISOR_vcpu_op(VCPUOP_send_nmi, cpu, NULL);
+		if (rc < 0)
+			printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
+		return;
+	}
+	irq = per_cpu(ipi_to_irq, cpu)[vector];
 	BUG_ON(irq < 0);
 	notify_remote_via_irq(irq);
 }
diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h
index 87e6f8a..b05288c 100644
--- a/include/xen/interface/vcpu.h
+++ b/include/xen/interface/vcpu.h
@@ -170,4 +170,6 @@ struct vcpu_register_vcpu_info {
 };
 DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info);
 
+/* Send an NMI to the specified VCPU. @extra_arg == NULL. */
+#define VCPUOP_send_nmi             11
 #endif /* __XEN_PUBLIC_VCPU_H__ */
-- 
cgit v0.10.2


From e2683957fb268c6b29316fd9e7191e13239a30a5 Mon Sep 17 00:00:00 2001
From: Daniel De Graaf <dgdegra@tycho.nsa.gov>
Date: Tue, 30 Jul 2013 13:29:47 -0400
Subject: drivers/tpm: add xen tpmfront interface

This is a complete rewrite of the Xen TPM frontend driver, taking
advantage of a simplified frontend/backend interface and adding support
for cancellation and timeouts.  The backend for this driver is provided
by a vTPM stub domain using the interface in Xen 4.3.

Signed-off-by: Daniel De Graaf <dgdegra@tycho.nsa.gov>
Acked-by: Matthew Fioravante <matthew.fioravante@jhuapl.edu>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Peter Huewe <peterhuewe@gmx.de>
Reviewed-by: Peter Huewe <peterhuewe@gmx.de>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

diff --git a/Documentation/tpm/xen-tpmfront.txt b/Documentation/tpm/xen-tpmfront.txt
new file mode 100644
index 0000000..69346de
--- /dev/null
+++ b/Documentation/tpm/xen-tpmfront.txt
@@ -0,0 +1,113 @@
+Virtual TPM interface for Xen
+
+Authors: Matthew Fioravante (JHUAPL), Daniel De Graaf (NSA)
+
+This document describes the virtual Trusted Platform Module (vTPM) subsystem for
+Xen. The reader is assumed to have familiarity with building and installing Xen,
+Linux, and a basic understanding of the TPM and vTPM concepts.
+
+INTRODUCTION
+
+The goal of this work is to provide a TPM functionality to a virtual guest
+operating system (in Xen terms, a DomU).  This allows programs to interact with
+a TPM in a virtual system the same way they interact with a TPM on the physical
+system.  Each guest gets its own unique, emulated, software TPM.  However, each
+of the vTPM's secrets (Keys, NVRAM, etc) are managed by a vTPM Manager domain,
+which seals the secrets to the Physical TPM.  If the process of creating each of
+these domains (manager, vTPM, and guest) is trusted, the vTPM subsystem extends
+the chain of trust rooted in the hardware TPM to virtual machines in Xen. Each
+major component of vTPM is implemented as a separate domain, providing secure
+separation guaranteed by the hypervisor. The vTPM domains are implemented in
+mini-os to reduce memory and processor overhead.
+
+This mini-os vTPM subsystem was built on top of the previous vTPM work done by
+IBM and Intel corporation.
+
+
+DESIGN OVERVIEW
+---------------
+
+The architecture of vTPM is described below:
+
++------------------+
+|    Linux DomU    | ...
+|       |  ^       |
+|       v  |       |
+|   xen-tpmfront   |
++------------------+
+        |  ^
+        v  |
++------------------+
+| mini-os/tpmback  |
+|       |  ^       |
+|       v  |       |
+|  vtpm-stubdom    | ...
+|       |  ^       |
+|       v  |       |
+| mini-os/tpmfront |
++------------------+
+        |  ^
+        v  |
++------------------+
+| mini-os/tpmback  |
+|       |  ^       |
+|       v  |       |
+| vtpmmgr-stubdom  |
+|       |  ^       |
+|       v  |       |
+| mini-os/tpm_tis  |
++------------------+
+        |  ^
+        v  |
++------------------+
+|   Hardware TPM   |
++------------------+
+
+ * Linux DomU: The Linux based guest that wants to use a vTPM. There may be
+	       more than one of these.
+
+ * xen-tpmfront.ko: Linux kernel virtual TPM frontend driver. This driver
+                    provides vTPM access to a Linux-based DomU.
+
+ * mini-os/tpmback: Mini-os TPM backend driver. The Linux frontend driver
+		    connects to this backend driver to facilitate communications
+		    between the Linux DomU and its vTPM. This driver is also
+		    used by vtpmmgr-stubdom to communicate with vtpm-stubdom.
+
+ * vtpm-stubdom: A mini-os stub domain that implements a vTPM. There is a
+		 one to one mapping between running vtpm-stubdom instances and
+                 logical vtpms on the system. The vTPM Platform Configuration
+                 Registers (PCRs) are normally all initialized to zero.
+
+ * mini-os/tpmfront: Mini-os TPM frontend driver. The vTPM mini-os domain
+		     vtpm-stubdom uses this driver to communicate with
+		     vtpmmgr-stubdom. This driver is also used in mini-os
+		     domains such as pv-grub that talk to the vTPM domain.
+
+ * vtpmmgr-stubdom: A mini-os domain that implements the vTPM manager. There is
+		    only one vTPM manager and it should be running during the
+		    entire lifetime of the machine.  This domain regulates
+		    access to the physical TPM on the system and secures the
+		    persistent state of each vTPM.
+
+ * mini-os/tpm_tis: Mini-os TPM version 1.2 TPM Interface Specification (TIS)
+                    driver. This driver used by vtpmmgr-stubdom to talk directly to
+                    the hardware TPM. Communication is facilitated by mapping
+                    hardware memory pages into vtpmmgr-stubdom.
+
+ * Hardware TPM: The physical TPM that is soldered onto the motherboard.
+
+
+INTEGRATION WITH XEN
+--------------------
+
+Support for the vTPM driver was added in Xen using the libxl toolstack in Xen
+4.3.  See the Xen documentation (docs/misc/vtpm.txt) for details on setting up
+the vTPM and vTPM Manager stub domains.  Once the stub domains are running, a
+vTPM device is set up in the same manner as a disk or network device in the
+domain's configuration file.
+
+In order to use features such as IMA that require a TPM to be loaded prior to
+the initrd, the xen-tpmfront driver must be compiled in to the kernel.  If not
+using such features, the driver can be compiled as a module and will be loaded
+as usual.
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index dbfd564..205ed35 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -91,4 +91,15 @@ config TCG_ST33_I2C
         To compile this driver as a module, choose M here; the module will be
         called tpm_stm_st33_i2c.
 
+config TCG_XEN
+	tristate "XEN TPM Interface"
+	depends on TCG_TPM && XEN
+	---help---
+	  If you want to make TPM support available to a Xen user domain,
+	  say Yes and it will be accessible from within Linux. See
+	  the manpages for xl, xl.conf, and docs/misc/vtpm.txt in
+	  the Xen source repository for more details.
+	  To compile this driver as a module, choose M here; the module
+	  will be called xen-tpmfront.
+
 endif # TCG_TPM
diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
index a3736c9..eb41ff9 100644
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile
@@ -18,3 +18,4 @@ obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o
 obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
 obj-$(CONFIG_TCG_IBMVTPM) += tpm_ibmvtpm.o
 obj-$(CONFIG_TCG_ST33_I2C) += tpm_i2c_stm_st33.o
+obj-$(CONFIG_TCG_XEN) += xen-tpmfront.o
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
new file mode 100644
index 0000000..7a7929b
--- /dev/null
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -0,0 +1,473 @@
+/*
+ * Implementation of the Xen vTPM device frontend
+ *
+ * Author:  Daniel De Graaf <dgdegra@tycho.nsa.gov>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ */
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <xen/events.h>
+#include <xen/interface/io/tpmif.h>
+#include <xen/grant_table.h>
+#include <xen/xenbus.h>
+#include <xen/page.h>
+#include "tpm.h"
+
+struct tpm_private {
+	struct tpm_chip *chip;
+	struct xenbus_device *dev;
+
+	struct vtpm_shared_page *shr;
+
+	unsigned int evtchn;
+	int ring_ref;
+	domid_t backend_id;
+};
+
+enum status_bits {
+	VTPM_STATUS_RUNNING  = 0x1,
+	VTPM_STATUS_IDLE     = 0x2,
+	VTPM_STATUS_RESULT   = 0x4,
+	VTPM_STATUS_CANCELED = 0x8,
+};
+
+static u8 vtpm_status(struct tpm_chip *chip)
+{
+	struct tpm_private *priv = TPM_VPRIV(chip);
+	switch (priv->shr->state) {
+	case VTPM_STATE_IDLE:
+		return VTPM_STATUS_IDLE | VTPM_STATUS_CANCELED;
+	case VTPM_STATE_FINISH:
+		return VTPM_STATUS_IDLE | VTPM_STATUS_RESULT;
+	case VTPM_STATE_SUBMIT:
+	case VTPM_STATE_CANCEL: /* cancel requested, not yet canceled */
+		return VTPM_STATUS_RUNNING;
+	default:
+		return 0;
+	}
+}
+
+static bool vtpm_req_canceled(struct tpm_chip *chip, u8 status)
+{
+	return status & VTPM_STATUS_CANCELED;
+}
+
+static void vtpm_cancel(struct tpm_chip *chip)
+{
+	struct tpm_private *priv = TPM_VPRIV(chip);
+	priv->shr->state = VTPM_STATE_CANCEL;
+	wmb();
+	notify_remote_via_evtchn(priv->evtchn);
+}
+
+static unsigned int shr_data_offset(struct vtpm_shared_page *shr)
+{
+	return sizeof(*shr) + sizeof(u32) * shr->nr_extra_pages;
+}
+
+static int vtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
+{
+	struct tpm_private *priv = TPM_VPRIV(chip);
+	struct vtpm_shared_page *shr = priv->shr;
+	unsigned int offset = shr_data_offset(shr);
+
+	u32 ordinal;
+	unsigned long duration;
+
+	if (offset > PAGE_SIZE)
+		return -EINVAL;
+
+	if (offset + count > PAGE_SIZE)
+		return -EINVAL;
+
+	/* Wait for completion of any existing command or cancellation */
+	if (wait_for_tpm_stat(chip, VTPM_STATUS_IDLE, chip->vendor.timeout_c,
+			&chip->vendor.read_queue, true) < 0) {
+		vtpm_cancel(chip);
+		return -ETIME;
+	}
+
+	memcpy(offset + (u8 *)shr, buf, count);
+	shr->length = count;
+	barrier();
+	shr->state = VTPM_STATE_SUBMIT;
+	wmb();
+	notify_remote_via_evtchn(priv->evtchn);
+
+	ordinal = be32_to_cpu(((struct tpm_input_header*)buf)->ordinal);
+	duration = tpm_calc_ordinal_duration(chip, ordinal);
+
+	if (wait_for_tpm_stat(chip, VTPM_STATUS_IDLE, duration,
+			&chip->vendor.read_queue, true) < 0) {
+		/* got a signal or timeout, try to cancel */
+		vtpm_cancel(chip);
+		return -ETIME;
+	}
+
+	return count;
+}
+
+static int vtpm_recv(struct tpm_chip *chip, u8 *buf, size_t count)
+{
+	struct tpm_private *priv = TPM_VPRIV(chip);
+	struct vtpm_shared_page *shr = priv->shr;
+	unsigned int offset = shr_data_offset(shr);
+	size_t length = shr->length;
+
+	if (shr->state == VTPM_STATE_IDLE)
+		return -ECANCELED;
+
+	/* In theory the wait at the end of _send makes this one unnecessary */
+	if (wait_for_tpm_stat(chip, VTPM_STATUS_RESULT, chip->vendor.timeout_c,
+			&chip->vendor.read_queue, true) < 0) {
+		vtpm_cancel(chip);
+		return -ETIME;
+	}
+
+	if (offset > PAGE_SIZE)
+		return -EIO;
+
+	if (offset + length > PAGE_SIZE)
+		length = PAGE_SIZE - offset;
+
+	if (length > count)
+		length = count;
+
+	memcpy(buf, offset + (u8 *)shr, length);
+
+	return length;
+}
+
+ssize_t tpm_show_locality(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct tpm_chip *chip = dev_get_drvdata(dev);
+	struct tpm_private *priv = TPM_VPRIV(chip);
+	u8 locality = priv->shr->locality;
+
+	return sprintf(buf, "%d\n", locality);
+}
+
+ssize_t tpm_store_locality(struct device *dev, struct device_attribute *attr,
+			const char *buf, size_t len)
+{
+	struct tpm_chip *chip = dev_get_drvdata(dev);
+	struct tpm_private *priv = TPM_VPRIV(chip);
+	u8 val;
+
+	int rv = kstrtou8(buf, 0, &val);
+	if (rv)
+		return rv;
+
+	priv->shr->locality = val;
+
+	return len;
+}
+
+static const struct file_operations vtpm_ops = {
+	.owner = THIS_MODULE,
+	.llseek = no_llseek,
+	.open = tpm_open,
+	.read = tpm_read,
+	.write = tpm_write,
+	.release = tpm_release,
+};
+
+static DEVICE_ATTR(pubek, S_IRUGO, tpm_show_pubek, NULL);
+static DEVICE_ATTR(pcrs, S_IRUGO, tpm_show_pcrs, NULL);
+static DEVICE_ATTR(enabled, S_IRUGO, tpm_show_enabled, NULL);
+static DEVICE_ATTR(active, S_IRUGO, tpm_show_active, NULL);
+static DEVICE_ATTR(owned, S_IRUGO, tpm_show_owned, NULL);
+static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated,
+		NULL);
+static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps, NULL);
+static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel);
+static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL);
+static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL);
+static DEVICE_ATTR(locality, S_IRUGO | S_IWUSR, tpm_show_locality,
+		tpm_store_locality);
+
+static struct attribute *vtpm_attrs[] = {
+	&dev_attr_pubek.attr,
+	&dev_attr_pcrs.attr,
+	&dev_attr_enabled.attr,
+	&dev_attr_active.attr,
+	&dev_attr_owned.attr,
+	&dev_attr_temp_deactivated.attr,
+	&dev_attr_caps.attr,
+	&dev_attr_cancel.attr,
+	&dev_attr_durations.attr,
+	&dev_attr_timeouts.attr,
+	&dev_attr_locality.attr,
+	NULL,
+};
+
+static struct attribute_group vtpm_attr_grp = {
+	.attrs = vtpm_attrs,
+};
+
+#define TPM_LONG_TIMEOUT   (10 * 60 * HZ)
+
+static const struct tpm_vendor_specific tpm_vtpm = {
+	.status = vtpm_status,
+	.recv = vtpm_recv,
+	.send = vtpm_send,
+	.cancel = vtpm_cancel,
+	.req_complete_mask = VTPM_STATUS_IDLE | VTPM_STATUS_RESULT,
+	.req_complete_val  = VTPM_STATUS_IDLE | VTPM_STATUS_RESULT,
+	.req_canceled      = vtpm_req_canceled,
+	.attr_group = &vtpm_attr_grp,
+	.miscdev = {
+		.fops = &vtpm_ops,
+	},
+	.duration = {
+		TPM_LONG_TIMEOUT,
+		TPM_LONG_TIMEOUT,
+		TPM_LONG_TIMEOUT,
+	},
+};
+
+static irqreturn_t tpmif_interrupt(int dummy, void *dev_id)
+{
+	struct tpm_private *priv = dev_id;
+
+	switch (priv->shr->state) {
+	case VTPM_STATE_IDLE:
+	case VTPM_STATE_FINISH:
+		wake_up_interruptible(&priv->chip->vendor.read_queue);
+		break;
+	case VTPM_STATE_SUBMIT:
+	case VTPM_STATE_CANCEL:
+	default:
+		break;
+	}
+	return IRQ_HANDLED;
+}
+
+static int setup_chip(struct device *dev, struct tpm_private *priv)
+{
+	struct tpm_chip *chip;
+
+	chip = tpm_register_hardware(dev, &tpm_vtpm);
+	if (!chip)
+		return -ENODEV;
+
+	init_waitqueue_head(&chip->vendor.read_queue);
+
+	priv->chip = chip;
+	TPM_VPRIV(chip) = priv;
+
+	return 0;
+}
+
+/* caller must clean up in case of errors */
+static int setup_ring(struct xenbus_device *dev, struct tpm_private *priv)
+{
+	struct xenbus_transaction xbt;
+	const char *message = NULL;
+	int rv;
+
+	priv->shr = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
+	if (!priv->shr) {
+		xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
+		return -ENOMEM;
+	}
+
+	rv = xenbus_grant_ring(dev, virt_to_mfn(priv->shr));
+	if (rv < 0)
+		return rv;
+
+	priv->ring_ref = rv;
+
+	rv = xenbus_alloc_evtchn(dev, &priv->evtchn);
+	if (rv)
+		return rv;
+
+	rv = bind_evtchn_to_irqhandler(priv->evtchn, tpmif_interrupt, 0,
+				       "tpmif", priv);
+	if (rv <= 0) {
+		xenbus_dev_fatal(dev, rv, "allocating TPM irq");
+		return rv;
+	}
+	priv->chip->vendor.irq = rv;
+
+ again:
+	rv = xenbus_transaction_start(&xbt);
+	if (rv) {
+		xenbus_dev_fatal(dev, rv, "starting transaction");
+		return rv;
+	}
+
+	rv = xenbus_printf(xbt, dev->nodename,
+			"ring-ref", "%u", priv->ring_ref);
+	if (rv) {
+		message = "writing ring-ref";
+		goto abort_transaction;
+	}
+
+	rv = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
+			priv->evtchn);
+	if (rv) {
+		message = "writing event-channel";
+		goto abort_transaction;
+	}
+
+	rv = xenbus_printf(xbt, dev->nodename, "feature-protocol-v2", "1");
+	if (rv) {
+		message = "writing feature-protocol-v2";
+		goto abort_transaction;
+	}
+
+	rv = xenbus_transaction_end(xbt, 0);
+	if (rv == -EAGAIN)
+		goto again;
+	if (rv) {
+		xenbus_dev_fatal(dev, rv, "completing transaction");
+		return rv;
+	}
+
+	xenbus_switch_state(dev, XenbusStateInitialised);
+
+	return 0;
+
+ abort_transaction:
+	xenbus_transaction_end(xbt, 1);
+	if (message)
+		xenbus_dev_error(dev, rv, "%s", message);
+
+	return rv;
+}
+
+static void ring_free(struct tpm_private *priv)
+{
+	if (!priv)
+		return;
+
+	if (priv->ring_ref)
+		gnttab_end_foreign_access(priv->ring_ref, 0,
+				(unsigned long)priv->shr);
+	else
+		free_page((unsigned long)priv->shr);
+
+	if (priv->chip && priv->chip->vendor.irq)
+		unbind_from_irqhandler(priv->chip->vendor.irq, priv);
+
+	kfree(priv);
+}
+
+static int tpmfront_probe(struct xenbus_device *dev,
+		const struct xenbus_device_id *id)
+{
+	struct tpm_private *priv;
+	int rv;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		xenbus_dev_fatal(dev, -ENOMEM, "allocating priv structure");
+		return -ENOMEM;
+	}
+
+	rv = setup_chip(&dev->dev, priv);
+	if (rv) {
+		kfree(priv);
+		return rv;
+	}
+
+	rv = setup_ring(dev, priv);
+	if (rv) {
+		tpm_remove_hardware(&dev->dev);
+		ring_free(priv);
+		return rv;
+	}
+
+	tpm_get_timeouts(priv->chip);
+
+	dev_set_drvdata(&dev->dev, priv->chip);
+
+	return rv;
+}
+
+static int tpmfront_remove(struct xenbus_device *dev)
+{
+	struct tpm_chip *chip = dev_get_drvdata(&dev->dev);
+	struct tpm_private *priv = TPM_VPRIV(chip);
+	tpm_remove_hardware(&dev->dev);
+	ring_free(priv);
+	TPM_VPRIV(chip) = NULL;
+	return 0;
+}
+
+static int tpmfront_resume(struct xenbus_device *dev)
+{
+	/* A suspend/resume/migrate will interrupt a vTPM anyway */
+	tpmfront_remove(dev);
+	return tpmfront_probe(dev, NULL);
+}
+
+static void backend_changed(struct xenbus_device *dev,
+		enum xenbus_state backend_state)
+{
+	int val;
+
+	switch (backend_state) {
+	case XenbusStateInitialised:
+	case XenbusStateConnected:
+		if (dev->state == XenbusStateConnected)
+			break;
+
+		if (xenbus_scanf(XBT_NIL, dev->otherend,
+				"feature-protocol-v2", "%d", &val) < 0)
+			val = 0;
+		if (!val) {
+			xenbus_dev_fatal(dev, -EINVAL,
+					"vTPM protocol 2 required");
+			return;
+		}
+		xenbus_switch_state(dev, XenbusStateConnected);
+		break;
+
+	case XenbusStateClosing:
+	case XenbusStateClosed:
+		device_unregister(&dev->dev);
+		xenbus_frontend_closed(dev);
+		break;
+	default:
+		break;
+	}
+}
+
+static const struct xenbus_device_id tpmfront_ids[] = {
+	{ "vtpm" },
+	{ "" }
+};
+MODULE_ALIAS("xen:vtpm");
+
+static DEFINE_XENBUS_DRIVER(tpmfront, ,
+		.probe = tpmfront_probe,
+		.remove = tpmfront_remove,
+		.resume = tpmfront_resume,
+		.otherend_changed = backend_changed,
+	);
+
+static int __init xen_tpmfront_init(void)
+{
+	if (!xen_domain())
+		return -ENODEV;
+
+	return xenbus_register_frontend(&tpmfront_driver);
+}
+module_init(xen_tpmfront_init);
+
+static void __exit xen_tpmfront_exit(void)
+{
+	xenbus_unregister_driver(&tpmfront_driver);
+}
+module_exit(xen_tpmfront_exit);
+
+MODULE_AUTHOR("Daniel De Graaf <dgdegra@tycho.nsa.gov>");
+MODULE_DESCRIPTION("Xen vTPM Driver");
+MODULE_LICENSE("GPL");
diff --git a/include/xen/interface/io/tpmif.h b/include/xen/interface/io/tpmif.h
new file mode 100644
index 0000000..28e7dcd
--- /dev/null
+++ b/include/xen/interface/io/tpmif.h
@@ -0,0 +1,52 @@
+/******************************************************************************
+ * tpmif.h
+ *
+ * TPM I/O interface for Xen guest OSes, v2
+ *
+ * This file is in the public domain.
+ *
+ */
+
+#ifndef __XEN_PUBLIC_IO_TPMIF_H__
+#define __XEN_PUBLIC_IO_TPMIF_H__
+
+/*
+ * Xenbus state machine
+ *
+ * Device open:
+ *   1. Both ends start in XenbusStateInitialising
+ *   2. Backend transitions to InitWait (frontend does not wait on this step)
+ *   3. Frontend populates ring-ref, event-channel, feature-protocol-v2
+ *   4. Frontend transitions to Initialised
+ *   5. Backend maps grant and event channel, verifies feature-protocol-v2
+ *   6. Backend transitions to Connected
+ *   7. Frontend verifies feature-protocol-v2, transitions to Connected
+ *
+ * Device close:
+ *   1. State is changed to XenbusStateClosing
+ *   2. Frontend transitions to Closed
+ *   3. Backend unmaps grant and event, changes state to InitWait
+ */
+
+enum vtpm_shared_page_state {
+	VTPM_STATE_IDLE,         /* no contents / vTPM idle / cancel complete */
+	VTPM_STATE_SUBMIT,       /* request ready / vTPM working */
+	VTPM_STATE_FINISH,       /* response ready / vTPM idle */
+	VTPM_STATE_CANCEL,       /* cancel requested / vTPM working */
+};
+/* The backend should only change state to IDLE or FINISH, while the
+ * frontend should only change to SUBMIT or CANCEL. */
+
+
+struct vtpm_shared_page {
+	uint32_t length;         /* request/response length in bytes */
+
+	uint8_t state;           /* enum vtpm_shared_page_state */
+	uint8_t locality;        /* for the current request */
+	uint8_t pad;
+
+	uint8_t nr_extra_pages;  /* extra pages for long packets; may be zero */
+	uint32_t extra_pages[0]; /* grant IDs; length in nr_extra_pages */
+};
+
+#endif
-- 
cgit v0.10.2


From 5f338d9001094a56cf87bd8a280b4e7ff953bb59 Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Wed, 31 Jul 2013 17:00:42 +0200
Subject: xen-gnt: prevent adding duplicate gnt callbacks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the current implementation, the callback in the tail of the list
can be added twice, because the check done in
gnttab_request_free_callback is bogus, callback->next can be NULL if
it is the last callback in the list. If we add the same callback twice
we end up with an infinite loop, were callback == callback->next.

Replace this check with a proper one that iterates over the list to
see if the callback has already been added.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Matt Wilson <msw@amazon.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
CC: stable@vger.kernel.org

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 04cdeb8..c4d2298 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -730,9 +730,18 @@ void gnttab_request_free_callback(struct gnttab_free_callback *callback,
 				  void (*fn)(void *), void *arg, u16 count)
 {
 	unsigned long flags;
+	struct gnttab_free_callback *cb;
+
 	spin_lock_irqsave(&gnttab_list_lock, flags);
-	if (callback->next)
-		goto out;
+
+	/* Check if the callback is already on the list */
+	cb = gnttab_free_callback_list;
+	while (cb) {
+		if (cb == callback)
+			goto out;
+		cb = cb->next;
+	}
+
 	callback->fn = fn;
 	callback->arg = arg;
 	callback->count = count;
-- 
cgit v0.10.2


From d3dbd93db54f477e066926394e3e4ae3ac06cd3b Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Fri, 19 Jul 2013 16:20:15 +0900
Subject: xen: replace strict_strtoul() with kstrtoul()

The usage of strict_strtoul() is not preferred, because
strict_strtoul() is obsolete. Thus, kstrtoul() should be
used.

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 02817a8..21e18c1 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -265,8 +265,10 @@ static ssize_t store_selfballooning(struct device *dev,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	err = strict_strtoul(buf, 10, &tmp);
-	if (err || ((tmp != 0) && (tmp != 1)))
+	err = kstrtoul(buf, 10, &tmp);
+	if (err)
+		return err;
+	if ((tmp != 0) && (tmp != 1))
 		return -EINVAL;
 
 	xen_selfballooning_enabled = !!tmp;
@@ -292,8 +294,10 @@ static ssize_t store_selfballoon_interval(struct device *dev,
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	err = strict_strtoul(buf, 10, &val);
-	if (err || val == 0)
+	err = kstrtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val == 0)
 		return -EINVAL;
 	selfballoon_interval = val;
 	return count;
@@ -314,8 +318,10 @@ static ssize_t store_selfballoon_downhys(struct device *dev,
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	err = strict_strtoul(buf, 10, &val);
-	if (err || val == 0)
+	err = kstrtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val == 0)
 		return -EINVAL;
 	selfballoon_downhysteresis = val;
 	return count;
@@ -337,8 +343,10 @@ static ssize_t store_selfballoon_uphys(struct device *dev,
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	err = strict_strtoul(buf, 10, &val);
-	if (err || val == 0)
+	err = kstrtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val == 0)
 		return -EINVAL;
 	selfballoon_uphysteresis = val;
 	return count;
@@ -360,8 +368,10 @@ static ssize_t store_selfballoon_min_usable_mb(struct device *dev,
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	err = strict_strtoul(buf, 10, &val);
-	if (err || val == 0)
+	err = kstrtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val == 0)
 		return -EINVAL;
 	selfballoon_min_usable_mb = val;
 	return count;
@@ -384,8 +394,10 @@ static ssize_t store_selfballoon_reserved_mb(struct device *dev,
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	err = strict_strtoul(buf, 10, &val);
-	if (err || val == 0)
+	err = kstrtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val == 0)
 		return -EINVAL;
 	selfballoon_reserved_mb = val;
 	return count;
@@ -410,8 +422,10 @@ static ssize_t store_frontswap_selfshrinking(struct device *dev,
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	err = strict_strtoul(buf, 10, &tmp);
-	if (err || ((tmp != 0) && (tmp != 1)))
+	err = kstrtoul(buf, 10, &tmp);
+	if (err)
+		return err;
+	if ((tmp != 0) && (tmp != 1))
 		return -EINVAL;
 	frontswap_selfshrinking = !!tmp;
 	if (!was_enabled && !xen_selfballooning_enabled &&
@@ -437,8 +451,10 @@ static ssize_t store_frontswap_inertia(struct device *dev,
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	err = strict_strtoul(buf, 10, &val);
-	if (err || val == 0)
+	err = kstrtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val == 0)
 		return -EINVAL;
 	frontswap_inertia = val;
 	frontswap_inertia_counter = val;
@@ -460,8 +476,10 @@ static ssize_t store_frontswap_hysteresis(struct device *dev,
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	err = strict_strtoul(buf, 10, &val);
-	if (err || val == 0)
+	err = kstrtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val == 0)
 		return -EINVAL;
 	frontswap_hysteresis = val;
 	return count;
-- 
cgit v0.10.2


From 11dbb52bb413580e9eaec9116259d1c62a054c39 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 5 Aug 2013 13:59:36 -0400
Subject: MAINTAINERS: Update the Xen subsystem's with proper mailing list.

And also drop the virtualization one since we don't really use it.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com> [for netback]
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index defc053..aaf47df 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9238,8 +9238,7 @@ F:	drivers/media/tuners/tuner-xc2028.*
 XEN HYPERVISOR INTERFACE
 M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
 M:	Jeremy Fitzhardinge <jeremy@goop.org>
-L:	xen-devel@lists.xensource.com (moderated for non-subscribers)
-L:	virtualization@lists.linux-foundation.org
+L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
 S:	Supported
 F:	arch/x86/xen/
 F:	drivers/*/xen-*front.c
@@ -9250,35 +9249,35 @@ F:	include/uapi/xen/
 
 XEN HYPERVISOR ARM
 M:	Stefano Stabellini <stefano.stabellini@eu.citrix.com>
-L:	xen-devel@lists.xensource.com (moderated for non-subscribers)
+L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
 S:	Supported
 F:	arch/arm/xen/
 F:	arch/arm/include/asm/xen/
 
 XEN HYPERVISOR ARM64
 M:	Stefano Stabellini <stefano.stabellini@eu.citrix.com>
-L:	xen-devel@lists.xensource.com (moderated for non-subscribers)
+L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
 S:	Supported
 F:	arch/arm64/xen/
 F:	arch/arm64/include/asm/xen/
 
 XEN NETWORK BACKEND DRIVER
 M:	Ian Campbell <ian.campbell@citrix.com>
-L:	xen-devel@lists.xensource.com (moderated for non-subscribers)
+L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/xen-netback/*
 
 XEN PCI SUBSYSTEM
 M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-L:	xen-devel@lists.xensource.com (moderated for non-subscribers)
+L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
 S:	Supported
 F:	arch/x86/pci/*xen*
 F:	drivers/pci/*xen*
 
 XEN SWIOTLB SUBSYSTEM
 M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-L:	xen-devel@lists.xensource.com (moderated for non-subscribers)
+L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
 S:	Supported
 F:	arch/x86/xen/*swiotlb*
 F:	drivers/xen/*swiotlb*
-- 
cgit v0.10.2


From 3eeef8f72a9365fe20f2c9d84b0afe60a20788cd Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 5 Aug 2013 14:01:49 -0400
Subject: MAINTAINERS: Add in two extra co-maintainers of the Xen tree.

Both Boris and David have graciously volunteered to help in
maintaining the Xen subsystem tree. Cementing this in the
MAINTAINERS file so they are copied on Xen related patches.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Acked-by: David Vrabel <david.vrabel@citrix.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index aaf47df..a97428f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9238,6 +9238,8 @@ F:	drivers/media/tuners/tuner-xc2028.*
 XEN HYPERVISOR INTERFACE
 M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
 M:	Jeremy Fitzhardinge <jeremy@goop.org>
+M:	Boris Ostrovsky <boris.ostrovsky@oracle.com>
+M:	David Vrabel <david.vrabel@citrix.com>
 L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
 S:	Supported
 F:	arch/x86/xen/
-- 
cgit v0.10.2


From 65a45fa2f640f72757ef00f2bc83f9654d65a62b Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Fri, 19 Jul 2013 15:51:59 +0100
Subject: xen/p2m: avoid unneccesary TLB flush in m2p_remove_override()

In m2p_remove_override() when removing the grant map from the kernel
mapping and replacing with a mapping to the original page, the grant
unmap will already have flushed the TLB and it is not necessary to do
it again after updating the mapping.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>

diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 95fb2aa..74672ee 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -1003,7 +1003,6 @@ int m2p_remove_override(struct page *page,
 
 			set_pte_at(&init_mm, address, ptep,
 					pfn_pte(pfn, PAGE_KERNEL));
-			__flush_tlb_single(address);
 			kmap_op->host_addr = 0;
 		}
 	}
-- 
cgit v0.10.2


From 73cc4bb0c79eebe1f0e92b700d9fe8d1c9b061bb Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Fri, 19 Jul 2013 15:52:00 +0100
Subject: xen/evtchn: improve scalability by using per-user locks

The global array of port users and the port_user_lock limits
scalability of the evtchn device.  Instead of the global array lookup,
use a per-use (per-fd) tree of event channels bound by that user and
protect the tree with a per-user lock.

This is also a prerequiste for extended the number of supported event
channels, by removing the fixed size, per-event channel array.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index b6165e0..f328f12 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -57,6 +57,7 @@
 
 struct per_user_data {
 	struct mutex bind_mutex; /* serialize bind/unbind operations */
+	struct rb_root evtchns;
 
 	/* Notification ring, accessed via /dev/xen/evtchn. */
 #define EVTCHN_RING_SIZE     (PAGE_SIZE / sizeof(evtchn_port_t))
@@ -64,6 +65,7 @@ struct per_user_data {
 	evtchn_port_t *ring;
 	unsigned int ring_cons, ring_prod, ring_overflow;
 	struct mutex ring_cons_mutex; /* protect against concurrent readers */
+	spinlock_t ring_prod_lock; /* product against concurrent interrupts */
 
 	/* Processes wait on this queue when ring is empty. */
 	wait_queue_head_t evtchn_wait;
@@ -71,54 +73,79 @@ struct per_user_data {
 	const char *name;
 };
 
-/*
- * Who's bound to each port?  This is logically an array of struct
- * per_user_data *, but we encode the current enabled-state in bit 0.
- */
-static unsigned long *port_user;
-static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */
+struct user_evtchn {
+	struct rb_node node;
+	struct per_user_data *user;
+	unsigned port;
+	bool enabled;
+};
 
-static inline struct per_user_data *get_port_user(unsigned port)
+static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
 {
-	return (struct per_user_data *)(port_user[port] & ~1);
-}
+	struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL;
 
-static inline void set_port_user(unsigned port, struct per_user_data *u)
-{
-	port_user[port] = (unsigned long)u;
+	while (*new) {
+		struct user_evtchn *this;
+
+		this = container_of(*new, struct user_evtchn, node);
+
+		parent = *new;
+		if (this->port < evtchn->port)
+			new = &((*new)->rb_left);
+		else if (this->port > evtchn->port)
+			new = &((*new)->rb_right);
+		else
+			return -EEXIST;
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&evtchn->node, parent, new);
+	rb_insert_color(&evtchn->node, &u->evtchns);
+
+	return 0;
 }
 
-static inline bool get_port_enabled(unsigned port)
+static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
 {
-	return port_user[port] & 1;
+	rb_erase(&evtchn->node, &u->evtchns);
+	kfree(evtchn);
 }
 
-static inline void set_port_enabled(unsigned port, bool enabled)
+static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port)
 {
-	if (enabled)
-		port_user[port] |= 1;
-	else
-		port_user[port] &= ~1;
+	struct rb_node *node = u->evtchns.rb_node;
+
+	while (node) {
+		struct user_evtchn *evtchn;
+
+		evtchn = container_of(node, struct user_evtchn, node);
+
+		if (evtchn->port < port)
+			node = node->rb_left;
+		else if (evtchn->port > port)
+			node = node->rb_right;
+		else
+			return evtchn;
+	}
+	return NULL;
 }
 
 static irqreturn_t evtchn_interrupt(int irq, void *data)
 {
-	unsigned int port = (unsigned long)data;
-	struct per_user_data *u;
-
-	spin_lock(&port_user_lock);
-
-	u = get_port_user(port);
+	struct user_evtchn *evtchn = data;
+	struct per_user_data *u = evtchn->user;
 
-	WARN(!get_port_enabled(port),
+	WARN(!evtchn->enabled,
 	     "Interrupt for port %d, but apparently not enabled; per-user %p\n",
-	     port, u);
+	     evtchn->port, u);
 
 	disable_irq_nosync(irq);
-	set_port_enabled(port, false);
+	evtchn->enabled = false;
+
+	spin_lock(&u->ring_prod_lock);
 
 	if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
-		u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
+		u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port;
 		wmb(); /* Ensure ring contents visible */
 		if (u->ring_cons == u->ring_prod++) {
 			wake_up_interruptible(&u->evtchn_wait);
@@ -128,7 +155,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
 	} else
 		u->ring_overflow = 1;
 
-	spin_unlock(&port_user_lock);
+	spin_unlock(&u->ring_prod_lock);
 
 	return IRQ_HANDLED;
 }
@@ -229,20 +256,20 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
 	if (copy_from_user(kbuf, buf, count) != 0)
 		goto out;
 
-	spin_lock_irq(&port_user_lock);
+	mutex_lock(&u->bind_mutex);
 
 	for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
 		unsigned port = kbuf[i];
+		struct user_evtchn *evtchn;
 
-		if (port < NR_EVENT_CHANNELS &&
-		    get_port_user(port) == u &&
-		    !get_port_enabled(port)) {
-			set_port_enabled(port, true);
+		evtchn = find_evtchn(u, port);
+		if (evtchn && !evtchn->enabled) {
+			evtchn->enabled = true;
 			enable_irq(irq_from_evtchn(port));
 		}
 	}
 
-	spin_unlock_irq(&port_user_lock);
+	mutex_unlock(&u->bind_mutex);
 
 	rc = count;
 
@@ -253,6 +280,8 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
 
 static int evtchn_bind_to_user(struct per_user_data *u, int port)
 {
+	struct user_evtchn *evtchn;
+	struct evtchn_close close;
 	int rc = 0;
 
 	/*
@@ -263,35 +292,47 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
 	 * interrupt handler yet, and our caller has already
 	 * serialized bind operations.)
 	 */
-	BUG_ON(get_port_user(port) != NULL);
-	set_port_user(port, u);
-	set_port_enabled(port, true); /* start enabled */
+
+	evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL);
+	if (!evtchn)
+		return -ENOMEM;
+
+	evtchn->user = u;
+	evtchn->port = port;
+	evtchn->enabled = true; /* start enabled */
+
+	rc = add_evtchn(u, evtchn);
+	if (rc < 0)
+		goto err;
 
 	rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
-				       u->name, (void *)(unsigned long)port);
-	if (rc >= 0)
-		rc = evtchn_make_refcounted(port);
-	else {
-		/* bind failed, should close the port now */
-		struct evtchn_close close;
-		close.port = port;
-		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
-			BUG();
-		set_port_user(port, NULL);
-	}
+				       u->name, evtchn);
+	if (rc < 0)
+		goto err;
 
+	rc = evtchn_make_refcounted(port);
+	return rc;
+
+err:
+	/* bind failed, should close the port now */
+	close.port = port;
+	if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+		BUG();
+	del_evtchn(u, evtchn);
+	kfree(evtchn);
 	return rc;
 }
 
-static void evtchn_unbind_from_user(struct per_user_data *u, int port)
+static void evtchn_unbind_from_user(struct per_user_data *u,
+				    struct user_evtchn *evtchn)
 {
-	int irq = irq_from_evtchn(port);
+	int irq = irq_from_evtchn(evtchn->port);
 
 	BUG_ON(irq < 0);
 
-	unbind_from_irqhandler(irq, (void *)(unsigned long)port);
+	unbind_from_irqhandler(irq, evtchn);
 
-	set_port_user(port, NULL);
+	del_evtchn(u, evtchn);
 }
 
 static long evtchn_ioctl(struct file *file,
@@ -370,6 +411,7 @@ static long evtchn_ioctl(struct file *file,
 
 	case IOCTL_EVTCHN_UNBIND: {
 		struct ioctl_evtchn_unbind unbind;
+		struct user_evtchn *evtchn;
 
 		rc = -EFAULT;
 		if (copy_from_user(&unbind, uarg, sizeof(unbind)))
@@ -380,29 +422,27 @@ static long evtchn_ioctl(struct file *file,
 			break;
 
 		rc = -ENOTCONN;
-		if (get_port_user(unbind.port) != u)
+		evtchn = find_evtchn(u, unbind.port);
+		if (!evtchn)
 			break;
 
 		disable_irq(irq_from_evtchn(unbind.port));
-
-		evtchn_unbind_from_user(u, unbind.port);
-
+		evtchn_unbind_from_user(u, evtchn);
 		rc = 0;
 		break;
 	}
 
 	case IOCTL_EVTCHN_NOTIFY: {
 		struct ioctl_evtchn_notify notify;
+		struct user_evtchn *evtchn;
 
 		rc = -EFAULT;
 		if (copy_from_user(&notify, uarg, sizeof(notify)))
 			break;
 
-		if (notify.port >= NR_EVENT_CHANNELS) {
-			rc = -EINVAL;
-		} else if (get_port_user(notify.port) != u) {
-			rc = -ENOTCONN;
-		} else {
+		rc = -ENOTCONN;
+		evtchn = find_evtchn(u, notify.port);
+		if (evtchn) {
 			notify_remote_via_evtchn(notify.port);
 			rc = 0;
 		}
@@ -412,9 +452,9 @@ static long evtchn_ioctl(struct file *file,
 	case IOCTL_EVTCHN_RESET: {
 		/* Initialise the ring to empty. Clear errors. */
 		mutex_lock(&u->ring_cons_mutex);
-		spin_lock_irq(&port_user_lock);
+		spin_lock_irq(&u->ring_prod_lock);
 		u->ring_cons = u->ring_prod = u->ring_overflow = 0;
-		spin_unlock_irq(&port_user_lock);
+		spin_unlock_irq(&u->ring_prod_lock);
 		mutex_unlock(&u->ring_cons_mutex);
 		rc = 0;
 		break;
@@ -473,6 +513,7 @@ static int evtchn_open(struct inode *inode, struct file *filp)
 
 	mutex_init(&u->bind_mutex);
 	mutex_init(&u->ring_cons_mutex);
+	spin_lock_init(&u->ring_prod_lock);
 
 	filp->private_data = u;
 
@@ -481,15 +522,15 @@ static int evtchn_open(struct inode *inode, struct file *filp)
 
 static int evtchn_release(struct inode *inode, struct file *filp)
 {
-	int i;
 	struct per_user_data *u = filp->private_data;
+	struct rb_node *node;
 
-	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
-		if (get_port_user(i) != u)
-			continue;
+	while ((node = u->evtchns.rb_node)) {
+		struct user_evtchn *evtchn;
 
-		disable_irq(irq_from_evtchn(i));
-		evtchn_unbind_from_user(get_port_user(i), i);
+		evtchn = rb_entry(node, struct user_evtchn, node);
+		disable_irq(irq_from_evtchn(evtchn->port));
+		evtchn_unbind_from_user(u, evtchn);
 	}
 
 	free_page((unsigned long)u->ring);
@@ -523,12 +564,6 @@ static int __init evtchn_init(void)
 	if (!xen_domain())
 		return -ENODEV;
 
-	port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL);
-	if (port_user == NULL)
-		return -ENOMEM;
-
-	spin_lock_init(&port_user_lock);
-
 	/* Create '/dev/xen/evtchn'. */
 	err = misc_register(&evtchn_miscdev);
 	if (err != 0) {
@@ -543,9 +578,6 @@ static int __init evtchn_init(void)
 
 static void __exit evtchn_cleanup(void)
 {
-	kfree(port_user);
-	port_user = NULL;
-
 	misc_deregister(&evtchn_miscdev);
 }
 
-- 
cgit v0.10.2


From cd9151e26d31048b2b5e00fd02e110e07d2200c9 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Sun, 4 Aug 2013 15:39:40 +0100
Subject: xen/balloon: set a mapping for ballooned out pages

Currently ballooned out pages are mapped to 0 and have INVALID_P2M_ENTRY
in the p2m. These ballooned out pages are used to map foreign grants
by gntdev and blkback (see alloc_xenballooned_pages).

Allocate a page per cpu and map all the ballooned out pages to the
corresponding mfn. Set the p2m accordingly. This way reading from a
ballooned out page won't cause a kernel crash (see
http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html).

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
CC: alex@alex.org.uk
CC: dcrisan@flexiant.com
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 2a2ef97..a3dc75d 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -38,6 +38,7 @@
 
 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
 
+#include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
@@ -52,6 +53,7 @@
 #include <linux/notifier.h>
 #include <linux/memory.h>
 #include <linux/memory_hotplug.h>
+#include <linux/percpu-defs.h>
 
 #include <asm/page.h>
 #include <asm/pgalloc.h>
@@ -90,6 +92,8 @@ EXPORT_SYMBOL_GPL(balloon_stats);
 
 /* We increase/decrease in batches which fit in a page */
 static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)];
+static DEFINE_PER_CPU(struct page *, balloon_scratch_page);
+
 
 /* List of ballooned pages, threaded through the mem_map array. */
 static LIST_HEAD(ballooned_pages);
@@ -412,7 +416,8 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 		if (xen_pv_domain() && !PageHighMem(page)) {
 			ret = HYPERVISOR_update_va_mapping(
 				(unsigned long)__va(pfn << PAGE_SHIFT),
-				__pte_ma(0), 0);
+				pfn_pte(page_to_pfn(__get_cpu_var(balloon_scratch_page)),
+					PAGE_KERNEL_RO), 0);
 			BUG_ON(ret);
 		}
 #endif
@@ -425,7 +430,8 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 	/* No more mappings: invalidate P2M and add to balloon. */
 	for (i = 0; i < nr_pages; i++) {
 		pfn = mfn_to_pfn(frame_list[i]);
-		__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+		__set_phys_to_machine(pfn,
+				pfn_to_mfn(page_to_pfn(__get_cpu_var(balloon_scratch_page))));
 		balloon_append(pfn_to_page(pfn));
 	}
 
@@ -480,6 +486,18 @@ static void balloon_process(struct work_struct *work)
 	mutex_unlock(&balloon_mutex);
 }
 
+struct page *get_balloon_scratch_page(void)
+{
+	struct page *ret = get_cpu_var(balloon_scratch_page);
+	BUG_ON(ret == NULL);
+	return ret;
+}
+
+void put_balloon_scratch_page(void)
+{
+	put_cpu_var(balloon_scratch_page);
+}
+
 /* Resets the Xen limit, sets new target, and kicks off processing. */
 void balloon_set_new_target(unsigned long target)
 {
@@ -573,13 +591,47 @@ static void __init balloon_add_region(unsigned long start_pfn,
 	}
 }
 
+static int __cpuinit balloon_cpu_notify(struct notifier_block *self,
+				    unsigned long action, void *hcpu)
+{
+	int cpu = (long)hcpu;
+	switch (action) {
+	case CPU_UP_PREPARE:
+		if (per_cpu(balloon_scratch_page, cpu) != NULL)
+			break;
+		per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
+		if (per_cpu(balloon_scratch_page, cpu) == NULL) {
+			pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu);
+			return NOTIFY_BAD;
+		}
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block balloon_cpu_notifier __cpuinitdata = {
+	.notifier_call	= balloon_cpu_notify,
+};
+
 static int __init balloon_init(void)
 {
-	int i;
+	int i, cpu;
 
 	if (!xen_domain())
 		return -ENODEV;
 
+	for_each_online_cpu(cpu)
+	{
+		per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL);
+		if (per_cpu(balloon_scratch_page, cpu) == NULL) {
+			pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu);
+			return -ENOMEM;
+		}
+	}
+	register_cpu_notifier(&balloon_cpu_notifier);
+
 	pr_info("Initialising balloon driver\n");
 
 	balloon_stats.current_pages = xen_pv_domain()
@@ -616,4 +668,15 @@ static int __init balloon_init(void)
 
 subsys_initcall(balloon_init);
 
+static int __init balloon_clear(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(balloon_scratch_page, cpu) = NULL;
+
+	return 0;
+}
+early_initcall(balloon_clear);
+
 MODULE_LICENSE("GPL");
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index cc2e1a7..a4c1c6a 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -29,6 +29,9 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages,
 		bool highmem);
 void free_xenballooned_pages(int nr_pages, struct page **pages);
 
+struct page *get_balloon_scratch_page(void);
+void put_balloon_scratch_page(void);
+
 struct device;
 #ifdef CONFIG_XEN_SELFBALLOONING
 extern int register_xen_selfballooning(struct device *dev);
-- 
cgit v0.10.2


From 4d86ec7a8a3f9c135fdc34d7aed2a9467e2e6ff9 Mon Sep 17 00:00:00 2001
From: EunBong Song <eunb.song@samsung.com>
Date: Mon, 5 Aug 2013 17:30:47 +0100
Subject: swiotlb: replace dma_length with sg_dma_len() macro

This patch replace dma_length in "lib/swiotlb.c" to sg_dma_len() macro,
because the build error can occur if CONFIG_NEED_SG_DMA_LENGTH is not
set, and CONFIG_SWIOTLB is set.

Singed-off-by: EunBong Song <eunb.song@samsung.com>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index d23762e..4e8686c 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -870,13 +870,13 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 				swiotlb_full(hwdev, sg->length, dir, 0);
 				swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
 						       attrs);
-				sgl[0].dma_length = 0;
+				sg_dma_len(sgl) = 0;
 				return 0;
 			}
 			sg->dma_address = phys_to_dma(hwdev, map);
 		} else
 			sg->dma_address = dev_addr;
-		sg->dma_length = sg->length;
+		sg_dma_len(sg) = sg->length;
 	}
 	return nelems;
 }
@@ -904,7 +904,7 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 	BUG_ON(dir == DMA_NONE);
 
 	for_each_sg(sgl, sg, nelems, i)
-		unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
+		unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir);
 
 }
 EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
@@ -934,7 +934,7 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
 
 	for_each_sg(sgl, sg, nelems, i)
 		swiotlb_sync_single(hwdev, sg->dma_address,
-				    sg->dma_length, dir, target);
+				    sg_dma_len(sg), dir, target);
 }
 
 void
-- 
cgit v0.10.2


From 781575cd8127c30a0444953bcd0b6d1e882df13e Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Mon, 5 Aug 2013 17:30:48 +0100
Subject: swiotlb-xen: replace dma_length with sg_dma_len() macro

swiotlb-xen has an implicit dependency on CONFIG_NEED_SG_DMA_LENGTH.
Remove it by replacing dma_length with sg_dma_len.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index aadffcf..1b2277c 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -506,13 +506,13 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 				   to do proper error handling. */
 				xen_swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
 							   attrs);
-				sgl[0].dma_length = 0;
+				sg_dma_len(sgl) = 0;
 				return DMA_ERROR_CODE;
 			}
 			sg->dma_address = xen_phys_to_bus(map);
 		} else
 			sg->dma_address = dev_addr;
-		sg->dma_length = sg->length;
+		sg_dma_len(sg) = sg->length;
 	}
 	return nelems;
 }
@@ -533,7 +533,7 @@ xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 	BUG_ON(dir == DMA_NONE);
 
 	for_each_sg(sgl, sg, nelems, i)
-		xen_unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
+		xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir);
 
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs);
@@ -555,7 +555,7 @@ xen_swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
 
 	for_each_sg(sgl, sg, nelems, i)
 		xen_swiotlb_sync_single(hwdev, sg->dma_address,
-					sg->dma_length, dir, target);
+					sg_dma_len(sg), dir, target);
 }
 
 void
-- 
cgit v0.10.2


From fb58e30091c74967f6b8e98b3c1f292782f92b41 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Thu, 15 Aug 2013 13:21:04 +0100
Subject: x86/xen: disable premption when enabling local irqs

If CONFIG_PREEMPT is enabled then xen_enable_irq() (and
xen_restore_fl()) could be preempted and rescheduled on a different
VCPU in between the clear of the mask and the check for pending
events.  This may result in events being lost as the upcall will check
for pending events on the wrong VCPU.

Fix this by disabling preemption around the unmask and check for
events.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 01a4dc0..0da7f86 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -47,23 +47,18 @@ static void xen_restore_fl(unsigned long flags)
 	/* convert from IF type flag */
 	flags = !(flags & X86_EFLAGS_IF);
 
-	/* There's a one instruction preempt window here.  We need to
-	   make sure we're don't switch CPUs between getting the vcpu
-	   pointer and updating the mask. */
+	/* See xen_irq_enable() for why preemption must be disabled. */
 	preempt_disable();
 	vcpu = this_cpu_read(xen_vcpu);
 	vcpu->evtchn_upcall_mask = flags;
-	preempt_enable_no_resched();
-
-	/* Doesn't matter if we get preempted here, because any
-	   pending event will get dealt with anyway. */
 
 	if (flags == 0) {
-		preempt_check_resched();
 		barrier(); /* unmask then check (avoid races) */
 		if (unlikely(vcpu->evtchn_upcall_pending))
 			xen_force_evtchn_callback();
-	}
+		preempt_enable();
+	} else
+		preempt_enable_no_resched();
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
 
@@ -82,10 +77,12 @@ static void xen_irq_enable(void)
 {
 	struct vcpu_info *vcpu;
 
-	/* We don't need to worry about being preempted here, since
-	   either a) interrupts are disabled, so no preemption, or b)
-	   the caller is confused and is trying to re-enable interrupts
-	   on an indeterminate processor. */
+	/*
+	 * We may be preempted as soon as vcpu->evtchn_upcall_mask is
+	 * cleared, so disable preemption to ensure we check for
+	 * events on the VCPU we are still running on.
+	 */
+	preempt_disable();
 
 	vcpu = this_cpu_read(xen_vcpu);
 	vcpu->evtchn_upcall_mask = 0;
@@ -96,6 +93,8 @@ static void xen_irq_enable(void)
 	barrier(); /* unmask then check (avoid races) */
 	if (unlikely(vcpu->evtchn_upcall_pending))
 		xen_force_evtchn_callback();
+
+	preempt_enable();
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable);
 
-- 
cgit v0.10.2


From e201bfcc5c4fe6d58ce2b21fb4f4c2a2f0ab7ab8 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Mon, 22 Jul 2013 15:29:00 +0100
Subject: x86/xen: during early setup, only 1:1 map the ISA region

During early setup, when the reserved regions and MMIO holes are being
setup as 1:1 in the p2m, clear any mappings instead of making them 1:1
(execept for the ISA region which is expected to be mapped).

This fixes a regression introduced in 3.5 by 83d51ab473dd (xen/setup:
update VA mapping when releasing memory during setup) which caused
hosts with tboot to fail to boot.

tboot marks a region in the e820 map as unusable and the dom0 kernel
would attempt to map this region and Xen does not permit unusable
regions to be mapped by guests.

(XEN)  0000000000000000 - 0000000000060000 (usable)
(XEN)  0000000000060000 - 0000000000068000 (reserved)
(XEN)  0000000000068000 - 000000000009e000 (usable)
(XEN)  0000000000100000 - 0000000000800000 (usable)
(XEN)  0000000000800000 - 0000000000972000 (unusable)

tboot marked this region as unusable.

(XEN)  0000000000972000 - 00000000cf200000 (usable)
(XEN)  00000000cf200000 - 00000000cf38f000 (reserved)
(XEN)  00000000cf38f000 - 00000000cf3ce000 (ACPI data)
(XEN)  00000000cf3ce000 - 00000000d0000000 (reserved)
(XEN)  00000000e0000000 - 00000000f0000000 (reserved)
(XEN)  00000000fe000000 - 0000000100000000 (reserved)
(XEN)  0000000100000000 - 0000000630000000 (usable)

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 403f5cc..6243651 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -218,13 +218,19 @@ static void __init xen_set_identity_and_release_chunk(
 	unsigned long pfn;
 
 	/*
-	 * If the PFNs are currently mapped, the VA mapping also needs
-	 * to be updated to be 1:1.
+	 * If the PFNs are currently mapped, clear the mappings
+	 * (except for the ISA region which must be 1:1 mapped) to
+	 * release the refcounts (in Xen) on the original frames.
 	 */
-	for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
+	for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
+		pte_t pte = __pte_ma(0);
+
+		if (pfn < PFN_UP(ISA_END_ADDRESS))
+			pte = mfn_pte(pfn, PAGE_KERNEL_IO);
+
 		(void)HYPERVISOR_update_va_mapping(
-			(unsigned long)__va(pfn << PAGE_SHIFT),
-			mfn_pte(pfn, PAGE_KERNEL_IO), 0);
+			(unsigned long)__va(pfn << PAGE_SHIFT), pte, 0);
+	}
 
 	if (start_pfn < nr_pages)
 		*released += xen_release_chunk(
-- 
cgit v0.10.2


From 3ef0296a0b77d70585e23be93a5144e764a2781e Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Thu, 15 Aug 2013 13:21:05 +0100
Subject: xen/events: document behaviour when scanning the start word for
 events

The original comment on the scanning of the start word on the 2nd pass
did not reflect the actual behaviour (the code was incorrectly masking
bit_idx instead of the pending word itself).

The documented behaviour is not actually required since if event were
pending in the MSBs, they would be immediately scanned anyway as we go
through the loop again.

Update the documentation to reflect this (instead of trying to change
the behaviour).

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index c8b9d9f..0b14a9bb 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -1388,14 +1388,21 @@ static void __xen_evtchn_do_upcall(void)
 
 			pending_bits = active_evtchns(cpu, s, word_idx);
 			bit_idx = 0; /* usually scan entire word from start */
+			/*
+			 * We scan the starting word in two parts.
+			 *
+			 * 1st time: start in the middle, scanning the
+			 * upper bits.
+			 *
+			 * 2nd time: scan the whole word (not just the
+			 * parts skipped in the first pass) -- if an
+			 * event in the previously scanned bits is
+			 * pending again it would just be scanned on
+			 * the next loop anyway.
+			 */
 			if (word_idx == start_word_idx) {
-				/* We scan the starting word in two parts */
 				if (i == 0)
-					/* 1st time: start in the middle */
 					bit_idx = start_bit_idx;
-				else
-					/* 2nd time: mask bits done already */
-					bit_idx &= (1UL << start_bit_idx) - 1;
 			}
 
 			do {
-- 
cgit v0.10.2


From c65a8370d7354720c74ba0c7427a48000b366bda Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 5 Aug 2013 13:57:01 -0400
Subject: MAINTAINERS: Remove Jeremy from the Xen subsystem.

Jeremy has been a key person in making Linux work with Xen.
He has been enjoying the last year working on something
different so reflect that in the maintainers file.

CC: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Jeremy Fitzhardinge <jeremy@goop.org>

diff --git a/CREDITS b/CREDITS
index 206d0fc..646a0a9 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1120,6 +1120,7 @@ D: author of userfs filesystem
 D: Improved mmap and munmap handling
 D: General mm minor tidyups
 D: autofs v4 maintainer
+D: Xen subsystem
 S: 987 Alabama St
 S: San Francisco
 S: CA, 94110
diff --git a/MAINTAINERS b/MAINTAINERS
index a97428f..0161dad 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9237,7 +9237,6 @@ F:	drivers/media/tuners/tuner-xc2028.*
 
 XEN HYPERVISOR INTERFACE
 M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-M:	Jeremy Fitzhardinge <jeremy@goop.org>
 M:	Boris Ostrovsky <boris.ostrovsky@oracle.com>
 M:	David Vrabel <david.vrabel@citrix.com>
 L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
-- 
cgit v0.10.2


From 072b2064b88f709c53a83c6ec1f1cb17bf7c0abf Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Tue, 13 Aug 2013 16:57:06 +0000
Subject: xen: fix ARM build after 6efa20e4

The following commit:

commit 6efa20e49b9cb1db1ab66870cc37323474a75a13
Author: Konrad Rzeszutek Wilk <konrad@kernel.org>
Date:   Fri Jul 19 11:51:31 2013 -0400

    xen: Support 64-bit PV guest receiving NMIs

breaks the Xen ARM build:

CC      drivers/xen/events.o
drivers/xen/events.c: In function 'xen_send_IPI_one':
drivers/xen/events.c:1218:6: error: 'XEN_NMI_VECTOR' undeclared (first use in this function)

Simply ifdef the undeclared symbol in the code.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 0b14a9bb..090a3b7 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -1215,12 +1215,14 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
 {
 	int irq;
 
+#ifdef CONFIG_X86
 	if (unlikely(vector == XEN_NMI_VECTOR)) {
 		int rc =  HYPERVISOR_vcpu_op(VCPUOP_send_nmi, cpu, NULL);
 		if (rc < 0)
 			printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
 		return;
 	}
+#endif
 	irq = per_cpu(ipi_to_irq, cpu)[vector];
 	BUG_ON(irq < 0);
 	notify_remote_via_irq(irq);
-- 
cgit v0.10.2


From ee0726407feaf504dff304fb603652fb2d778b42 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Tue, 23 Jul 2013 17:23:54 +0000
Subject: xen/m2p: use GNTTABOP_unmap_and_replace to reinstate the original
 mapping

GNTTABOP_unmap_grant_ref unmaps a grant and replaces it with a 0
mapping instead of reinstating the original mapping.
Doing so separately would be racy.

To unmap a grant and reinstate the original mapping atomically we use
GNTTABOP_unmap_and_replace.
GNTTABOP_unmap_and_replace doesn't work with GNTMAP_contains_pte, so
don't use it for kmaps.  GNTTABOP_unmap_and_replace zeroes the mapping
passed in new_addr so we have to reinstate it, however that is a
per-cpu mapping only used for balloon scratch pages, so we can be sure that
it's not going to be accessed while the mapping is not valid.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
CC: alex@alex.org.uk
CC: dcrisan@flexiant.com

[v1: Konrad fixed up the conflicts]
Conflicts:
	arch/x86/xen/p2m.c

diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 74672ee..0d4ec35 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -161,6 +161,7 @@
 #include <asm/xen/page.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
+#include <xen/balloon.h>
 #include <xen/grant_table.h>
 
 #include "multicalls.h"
@@ -967,7 +968,10 @@ int m2p_remove_override(struct page *page,
 	if (kmap_op != NULL) {
 		if (!PageHighMem(page)) {
 			struct multicall_space mcs;
-			struct gnttab_unmap_grant_ref *unmap_op;
+			struct gnttab_unmap_and_replace *unmap_op;
+			struct page *scratch_page = get_balloon_scratch_page();
+			unsigned long scratch_page_address = (unsigned long)
+				__va(page_to_pfn(scratch_page) << PAGE_SHIFT);
 
 			/*
 			 * It might be that we queued all the m2p grant table
@@ -990,20 +994,25 @@ int m2p_remove_override(struct page *page,
 			}
 
 			mcs = xen_mc_entry(
-					sizeof(struct gnttab_unmap_grant_ref));
+					sizeof(struct gnttab_unmap_and_replace));
 			unmap_op = mcs.args;
 			unmap_op->host_addr = kmap_op->host_addr;
+			unmap_op->new_addr = scratch_page_address;
 			unmap_op->handle = kmap_op->handle;
-			unmap_op->dev_bus_addr = 0;
 
 			MULTI_grant_table_op(mcs.mc,
-					GNTTABOP_unmap_grant_ref, unmap_op, 1);
+					GNTTABOP_unmap_and_replace, unmap_op, 1);
 
 			xen_mc_issue(PARAVIRT_LAZY_MMU);
 
-			set_pte_at(&init_mm, address, ptep,
-					pfn_pte(pfn, PAGE_KERNEL));
+			mcs = __xen_mc_entry(0);
+			MULTI_update_va_mapping(mcs.mc, scratch_page_address,
+					pfn_pte(page_to_pfn(get_balloon_scratch_page()),
+					PAGE_KERNEL_RO), 0);
+			xen_mc_issue(PARAVIRT_LAZY_MMU);
+
 			kmap_op->host_addr = 0;
+			put_balloon_scratch_page();
 		}
 	}
 
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index eab5427..e41c79c 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -272,19 +272,12 @@ static int map_grant_pages(struct grant_map *map)
 		 * with find_grant_ptes.
 		 */
 		for (i = 0; i < map->count; i++) {
-			unsigned level;
 			unsigned long address = (unsigned long)
 				pfn_to_kaddr(page_to_pfn(map->pages[i]));
-			pte_t *ptep;
-			u64 pte_maddr = 0;
 			BUG_ON(PageHighMem(map->pages[i]));
 
-			ptep = lookup_address(address, &level);
-			pte_maddr = arbitrary_virt_to_machine(ptep).maddr;
-			gnttab_set_map_op(&map->kmap_ops[i], pte_maddr,
-				map->flags |
-				GNTMAP_host_map |
-				GNTMAP_contains_pte,
+			gnttab_set_map_op(&map->kmap_ops[i], address,
+				map->flags | GNTMAP_host_map,
 				map->grants[i].ref,
 				map->grants[i].domid);
 		}
-- 
cgit v0.10.2


From 669b0ae961e87c824233475e987b2d39996d4849 Mon Sep 17 00:00:00 2001
From: Vaughan Cao <vaughan.cao@oracle.com>
Date: Fri, 16 Aug 2013 16:10:56 +0800
Subject: xen/pvhvm: Initialize xen panic handler for PVHVM guests

kernel use callback linked in panic_notifier_list to notice others when panic
happens.
NORET_TYPE void panic(const char * fmt, ...){
    ...
    atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
}
When Xen becomes aware of this, it will call xen_reboot(SHUTDOWN_crash) to
send out an event with reason code - SHUTDOWN_crash.

xen_panic_handler_init() is defined to register on panic_notifier_list but
we only call it in xen_arch_setup which only be called by PV, this patch is
necessary for PVHVM.

Without this patch, setting 'on_crash=coredump-restart' in PVHVM guest config
file won't lead a vmcore to be generate when the guest panics. It can be
reproduced with 'echo c > /proc/sysrq-trigger'.

Signed-off-by: Vaughan Cao <vaughan.cao@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Joe Jin <joe.jin@oracle.com>

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b5a22fa..15939e8 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1713,6 +1713,8 @@ static void __init xen_hvm_guest_init(void)
 
 	xen_hvm_init_shared_info();
 
+	xen_panic_handler_init();
+
 	if (xen_feature(XENFEAT_hvm_callback_vector))
 		xen_have_vector_callback = 1;
 	xen_hvm_smp_init();
-- 
cgit v0.10.2


From a5deabe0e62203350369020687c3fc3b7445a0d0 Mon Sep 17 00:00:00 2001
From: Andres Lagar-Cavilla <andres@lagarcavilla.org>
Date: Fri, 23 Aug 2013 18:10:06 +0100
Subject: Xen: Fix retry calls into PRIVCMD_MMAPBATCH*.

When a foreign mapper attempts to map guest frames that are paged out,
the mapper receives an ENOENT response and will have to try again
while a helper process pages the target frame back in.

Gating checks on PRIVCMD_MMAPBATCH* ioctl args were preventing retries
of mapping calls.

Permit subsequent calls to update a sub-range of the VMA, iff nothing
is yet mapped in that range.

Since it is now valid to call PRIVCMD_MMAPBATCH* multiple times, only
set vma->vm_private_data if the parameters are valid and (if
necessary) the pages for the auto_translated_physmap case have been
allocated.  This prevents subsequent calls from incorrectly entering
the 'retry' path when there are no pages allocated etc.

Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index f8e5dd70..8e74590 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -43,9 +43,10 @@ MODULE_LICENSE("GPL");
 
 #define PRIV_VMA_LOCKED ((void *)1)
 
-#ifndef HAVE_ARCH_PRIVCMD_MMAP
-static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
-#endif
+static int privcmd_vma_range_is_mapped(
+               struct vm_area_struct *vma,
+               unsigned long addr,
+               unsigned long nr_pages);
 
 static long privcmd_ioctl_hypercall(void __user *udata)
 {
@@ -225,9 +226,9 @@ static long privcmd_ioctl_mmap(void __user *udata)
 		vma = find_vma(mm, msg->va);
 		rc = -EINVAL;
 
-		if (!vma || (msg->va != vma->vm_start) ||
-		    !privcmd_enforce_singleshot_mapping(vma))
+		if (!vma || (msg->va != vma->vm_start) || vma->vm_private_data)
 			goto out_up;
+		vma->vm_private_data = PRIV_VMA_LOCKED;
 	}
 
 	state.va = vma->vm_start;
@@ -358,7 +359,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
 		kfree(pages);
 		return -ENOMEM;
 	}
-	BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED);
+	BUG_ON(vma->vm_private_data != NULL);
 	vma->vm_private_data = pages;
 
 	return 0;
@@ -421,19 +422,43 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
 
 	vma = find_vma(mm, m.addr);
 	if (!vma ||
-	    vma->vm_ops != &privcmd_vm_ops ||
-	    (m.addr != vma->vm_start) ||
-	    ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
-	    !privcmd_enforce_singleshot_mapping(vma)) {
-		up_write(&mm->mmap_sem);
+	    vma->vm_ops != &privcmd_vm_ops) {
 		ret = -EINVAL;
-		goto out;
+		goto out_unlock;
 	}
-	if (xen_feature(XENFEAT_auto_translated_physmap)) {
-		ret = alloc_empty_pages(vma, m.num);
-		if (ret < 0) {
-			up_write(&mm->mmap_sem);
-			goto out;
+
+	/*
+	 * Caller must either:
+	 *
+	 * Map the whole VMA range, which will also allocate all the
+	 * pages required for the auto_translated_physmap case.
+	 *
+	 * Or
+	 *
+	 * Map unmapped holes left from a previous map attempt (e.g.,
+	 * because those foreign frames were previously paged out).
+	 */
+	if (vma->vm_private_data == NULL) {
+		if (m.addr != vma->vm_start ||
+		    m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+		if (xen_feature(XENFEAT_auto_translated_physmap)) {
+			ret = alloc_empty_pages(vma, m.num);
+			if (ret < 0)
+				goto out_unlock;
+		} else
+			vma->vm_private_data = PRIV_VMA_LOCKED;
+	} else {
+		if (m.addr < vma->vm_start ||
+		    m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+		if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) {
+			ret = -EINVAL;
+			goto out_unlock;
 		}
 	}
 
@@ -466,8 +491,11 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
 
 out:
 	free_page_list(&pagelist);
-
 	return ret;
+
+out_unlock:
+	up_write(&mm->mmap_sem);
+	goto out;
 }
 
 static long privcmd_ioctl(struct file *file,
@@ -540,9 +568,24 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
 	return 0;
 }
 
-static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
+/*
+ * For MMAPBATCH*. This allows asserting the singleshot mapping
+ * on a per pfn/pte basis. Mapping calls that fail with ENOENT
+ * can be then retried until success.
+ */
+static int is_mapped_fn(pte_t *pte, struct page *pmd_page,
+	                unsigned long addr, void *data)
+{
+	return pte_none(*pte) ? 0 : -EBUSY;
+}
+
+static int privcmd_vma_range_is_mapped(
+	           struct vm_area_struct *vma,
+	           unsigned long addr,
+	           unsigned long nr_pages)
 {
-	return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED);
+	return apply_to_page_range(vma->vm_mm, addr, nr_pages << PAGE_SHIFT,
+				   is_mapped_fn, NULL) != 0;
 }
 
 const struct file_operations xen_privcmd_fops = {
-- 
cgit v0.10.2


From 06b35d0072d2a126d8cd134b253e5ec65beb9e66 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 26 Aug 2013 19:18:17 +0300
Subject: xen/evtchn: double free on error

The call to del_evtchn() frees "evtchn".

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index f328f12..8b3a69a 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -319,7 +319,6 @@ err:
 	if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
 		BUG();
 	del_evtchn(u, evtchn);
-	kfree(evtchn);
 	return rc;
 }
 
-- 
cgit v0.10.2


From 04660bb5d0e4104e01a8fc34e5d51de1d84c09bf Mon Sep 17 00:00:00 2001
From: Wei Liu <wei.liu2@citrix.com>
Date: Tue, 27 Aug 2013 16:17:25 +0100
Subject: xen/balloon: don't set P2M entry for auto translated guest

In commit cd9151e2: xen/balloon: set a mapping for ballooned out pages
we have the ballooned out page's mapping set to a scratch page.

That commit also sets the P2M entry of ballooned out page to the scratch
page's MFN. This is necessary for PV guest but not for HVM guest. On the
other hand, setting the P2M entry would trigger BUG_ON in
__set_phys_to_machine.

The correct thing to do here is to avoid calling __set_phys_to_machine
for auto translated guest.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>

diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index a3dc75d..3101cf6 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -430,8 +430,13 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 	/* No more mappings: invalidate P2M and add to balloon. */
 	for (i = 0; i < nr_pages; i++) {
 		pfn = mfn_to_pfn(frame_list[i]);
-		__set_phys_to_machine(pfn,
-				pfn_to_mfn(page_to_pfn(__get_cpu_var(balloon_scratch_page))));
+		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+			unsigned long p;
+			struct page *pg;
+			pg = __get_cpu_var(balloon_scratch_page);
+			p = page_to_pfn(pg);
+			__set_phys_to_machine(pfn, pfn_to_mfn(p));
+		}
 		balloon_append(pfn_to_page(pfn));
 	}
 
-- 
cgit v0.10.2


From 713efcabca1fffbbb253a33ff0f8fda532633064 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 30 Aug 2013 09:02:27 -0400
Subject: drivers/xen-tpmfront: Fix compile issue with missing option.

Randy reports:

x86_64:

drivers/built-in.o: In function `xen_tpmfront_init':
xen-tpmfront.c:(.init.text+0x257c): undefined reference to `xenbus_register_frontend'

This is nicely fixed by selecting the XenBus frontend module.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index 205ed35..94c0c74 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -94,6 +94,7 @@ config TCG_ST33_I2C
 config TCG_XEN
 	tristate "XEN TPM Interface"
 	depends on TCG_TPM && XEN
+	select XEN_XENBUS_FRONTEND
 	---help---
 	  If you want to make TPM support available to a Xen user domain,
 	  say Yes and it will be accessible from within Linux. See
-- 
cgit v0.10.2


From 23b7eaf8220721892975610dd0ae5c846a34dcb4 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 29 Aug 2013 14:08:18 -0700
Subject: hvc_xen: Remove unnecessary __GFP_ZERO from kzalloc

kzalloc already adds this __GFP_ZERO.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c
index 682210d..e61c36c 100644
--- a/drivers/tty/hvc/hvc_xen.c
+++ b/drivers/tty/hvc/hvc_xen.c
@@ -208,7 +208,7 @@ static int xen_hvm_console_init(void)
 
 	info = vtermno_to_xencons(HVC_COOKIE);
 	if (!info) {
-		info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO);
+		info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL);
 		if (!info)
 			return -ENOMEM;
 	} else if (info->intf != NULL) {
@@ -257,7 +257,7 @@ static int xen_pv_console_init(void)
 
 	info = vtermno_to_xencons(HVC_COOKIE);
 	if (!info) {
-		info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO);
+		info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL);
 		if (!info)
 			return -ENOMEM;
 	} else if (info->intf != NULL) {
@@ -284,7 +284,7 @@ static int xen_initial_domain_console_init(void)
 
 	info = vtermno_to_xencons(HVC_COOKIE);
 	if (!info) {
-		info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO);
+		info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL);
 		if (!info)
 			return -ENOMEM;
 	}
-- 
cgit v0.10.2