From 2c5816b4beccc8ba709144539f6fdd764f8fa49c Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Tue, 10 Nov 2015 10:32:36 +0100
Subject: cuse: fix memory leak

The problem is that fuse_dev_alloc() acquires an extra reference to cc.fc,
and the original ref count is never dropped.

Reported-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Fixes: cc080e9e9be1 ("fuse: introduce per-instance fuse_dev structure")
Cc: <stable@vger.kernel.org> # v4.2+

diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index eae2c11..8e3ee19 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -549,6 +549,8 @@ static int cuse_channel_release(struct inode *inode, struct file *file)
 		unregister_chrdev_region(cc->cdev->dev, 1);
 		cdev_del(cc->cdev);
 	}
+	/* Base reference is now owned by "fud" */
+	fuse_conn_put(&cc->fc);
 
 	rc = fuse_dev_release(inode, file);	/* puts the base reference */
 
-- 
cgit v0.10.2


From 3ca8138f014a913f98e6ef40e939868e1e9ea876 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <klamm@yandex-team.ru>
Date: Mon, 12 Oct 2015 16:33:44 +0300
Subject: fuse: break infinite loop in fuse_fill_write_pages()

I got a report about unkillable task eating CPU. Further
investigation shows, that the problem is in the fuse_fill_write_pages()
function. If iov's first segment has zero length, we get an infinite
loop, because we never reach iov_iter_advance() call.

Fix this by calling iov_iter_advance() before repeating an attempt to
copy data from userspace.

A similar problem is described in 124d3b7041f ("fix writev regression:
pan hanging unkillable and un-straceable"). If zero-length segmend
is followed by segment with invalid address,
iov_iter_fault_in_readable() checks only first segment (zero-length),
iov_iter_copy_from_user_atomic() skips it, fails at second and
returns zero -> goto again without skipping zero-length segment.

Patch calls iov_iter_advance() before goto again: we'll skip zero-length
segment at second iteraction and iov_iter_fault_in_readable() will detect
invalid address.

Special thanks to Konstantin Khlebnikov, who helped a lot with the commit
description.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Maxim Patlasov <mpatlasov@parallels.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Roman Gushchin <klamm@yandex-team.ru>
Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Fixes: ea9b9907b82a ("fuse: implement perform_write")
Cc: <stable@vger.kernel.org>

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f523f2f..195476a 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1049,6 +1049,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
 		tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
 		flush_dcache_page(page);
 
+		iov_iter_advance(ii, tmp);
 		if (!tmp) {
 			unlock_page(page);
 			page_cache_release(page);
@@ -1061,7 +1062,6 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
 		req->page_descs[req->num_pages].length = tmp;
 		req->num_pages++;
 
-		iov_iter_advance(ii, tmp);
 		count += tmp;
 		pos += tmp;
 		offset += tmp;
-- 
cgit v0.10.2


From 34c06254ff82a815fdccdfae7517a06c9b768cee Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 5 Nov 2015 00:12:24 -0500
Subject: cgroup: fix cftype->file_offset handling

6f60eade2433 ("cgroup: generalize obtaining the handles of and
notifying cgroup files") introduced cftype->file_offset so that the
handles for per-css file instances can be recorded.  These handles
then can be used, for example, to generate file modified
notifications.

Unfortunately, it made the wrong assumption that files are created
once for a given css and removed on its destruction.  Due to the
dependencies among subsystems, a css may be hidden from userland and
then later shown again.  This is implemented by removing and
re-creating the affected files, so the associated kernfs_node for a
given cgroup file may change over time.  This incorrect assumption led
to the corruption of css->files lists.

Reimplement cftype->file_offset handling so that cgroup_file->kn is
protected by a lock and updated as files are created and destroyed.
This also makes keeping them on per-cgroup list unnecessary.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: James Sedgwick <jsedgwick@fb.com>
Fixes: 6f60eade2433 ("cgroup: generalize obtaining the handles of and notifying cgroup files")
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Zefan Li <lizefan@huawei.com>

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 60d44b2..869fd4a 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -90,7 +90,6 @@ enum {
  */
 struct cgroup_file {
 	/* do not access any fields from outside cgroup core */
-	struct list_head node;			/* anchored at css->files */
 	struct kernfs_node *kn;
 };
 
@@ -134,9 +133,6 @@ struct cgroup_subsys_state {
 	 */
 	u64 serial_nr;
 
-	/* all cgroup_files associated with this css */
-	struct list_head files;
-
 	/* percpu_ref killing and RCU release */
 	struct rcu_head rcu_head;
 	struct work_struct destroy_work;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 22e3754..f640830 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -88,6 +88,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
 int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
 int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
 int cgroup_rm_cftypes(struct cftype *cfts);
+void cgroup_file_notify(struct cgroup_file *cfile);
 
 char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
 int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
@@ -516,19 +517,6 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
 	pr_cont_kernfs_path(cgrp->kn);
 }
 
-/**
- * cgroup_file_notify - generate a file modified event for a cgroup_file
- * @cfile: target cgroup_file
- *
- * @cfile must have been obtained by setting cftype->file_offset.
- */
-static inline void cgroup_file_notify(struct cgroup_file *cfile)
-{
-	/* might not have been created due to one of the CFTYPE selector flags */
-	if (cfile->kn)
-		kernfs_notify(cfile->kn);
-}
-
 #else /* !CONFIG_CGROUPS */
 
 struct cgroup_subsys_state;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f1603c1..b316deb 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -98,6 +98,12 @@ static DEFINE_SPINLOCK(css_set_lock);
 static DEFINE_SPINLOCK(cgroup_idr_lock);
 
 /*
+ * Protects cgroup_file->kn for !self csses.  It synchronizes notifications
+ * against file removal/re-creation across css hiding.
+ */
+static DEFINE_SPINLOCK(cgroup_file_kn_lock);
+
+/*
  * Protects cgroup_subsys->release_agent_path.  Modifying it also requires
  * cgroup_mutex.  Reading requires either cgroup_mutex or this spinlock.
  */
@@ -1393,6 +1399,16 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
 	char name[CGROUP_FILE_NAME_MAX];
 
 	lockdep_assert_held(&cgroup_mutex);
+
+	if (cft->file_offset) {
+		struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
+		struct cgroup_file *cfile = (void *)css + cft->file_offset;
+
+		spin_lock_irq(&cgroup_file_kn_lock);
+		cfile->kn = NULL;
+		spin_unlock_irq(&cgroup_file_kn_lock);
+	}
+
 	kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
 }
 
@@ -1856,7 +1872,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 
 	INIT_LIST_HEAD(&cgrp->self.sibling);
 	INIT_LIST_HEAD(&cgrp->self.children);
-	INIT_LIST_HEAD(&cgrp->self.files);
 	INIT_LIST_HEAD(&cgrp->cset_links);
 	INIT_LIST_HEAD(&cgrp->pidlists);
 	mutex_init(&cgrp->pidlist_mutex);
@@ -3313,9 +3328,9 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
 	if (cft->file_offset) {
 		struct cgroup_file *cfile = (void *)css + cft->file_offset;
 
-		kernfs_get(kn);
+		spin_lock_irq(&cgroup_file_kn_lock);
 		cfile->kn = kn;
-		list_add(&cfile->node, &css->files);
+		spin_unlock_irq(&cgroup_file_kn_lock);
 	}
 
 	return 0;
@@ -3553,6 +3568,22 @@ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
 }
 
 /**
+ * cgroup_file_notify - generate a file modified event for a cgroup_file
+ * @cfile: target cgroup_file
+ *
+ * @cfile must have been obtained by setting cftype->file_offset.
+ */
+void cgroup_file_notify(struct cgroup_file *cfile)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cgroup_file_kn_lock, flags);
+	if (cfile->kn)
+		kernfs_notify(cfile->kn);
+	spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
+}
+
+/**
  * cgroup_task_count - count the number of tasks in a cgroup.
  * @cgrp: the cgroup in question
  *
@@ -4613,13 +4644,9 @@ static void css_free_work_fn(struct work_struct *work)
 		container_of(work, struct cgroup_subsys_state, destroy_work);
 	struct cgroup_subsys *ss = css->ss;
 	struct cgroup *cgrp = css->cgroup;
-	struct cgroup_file *cfile;
 
 	percpu_ref_exit(&css->refcnt);
 
-	list_for_each_entry(cfile, &css->files, node)
-		kernfs_put(cfile->kn);
-
 	if (ss) {
 		/* css free path */
 		int id = css->id;
@@ -4724,7 +4751,6 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
 	css->ss = ss;
 	INIT_LIST_HEAD(&css->sibling);
 	INIT_LIST_HEAD(&css->children);
-	INIT_LIST_HEAD(&css->files);
 	css->serial_nr = css_serial_nr_next++;
 
 	if (cgroup_parent(cgrp)) {
-- 
cgit v0.10.2


From c5967b79ecabe2baca40658d9073e28b30d7f6cf Mon Sep 17 00:00:00 2001
From: "Charles_Rose@Dell.com" <Charles_Rose@Dell.com>
Date: Fri, 6 Nov 2015 14:18:56 -0600
Subject: ahci: Add Device ID for Intel Sunrise Point PCH

This patch adds missing AHCI RAID SATA Device IDs for the Intel Sunrise
Point PCH.

Signed-off-by: Nanda Kishore Chinna <nanda_kishore_chinna@dell.com>
Signed-off-by: Charles Rose <charles_rose@dell.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index ff02bb4..ee3fc84 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -350,8 +350,10 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, 0x9d03), board_ahci }, /* Sunrise Point-LP AHCI */
 	{ PCI_VDEVICE(INTEL, 0x9d05), board_ahci }, /* Sunrise Point-LP RAID */
 	{ PCI_VDEVICE(INTEL, 0x9d07), board_ahci }, /* Sunrise Point-LP RAID */
+	{ PCI_VDEVICE(INTEL, 0xa102), board_ahci }, /* Sunrise Point-H AHCI */
 	{ PCI_VDEVICE(INTEL, 0xa103), board_ahci }, /* Sunrise Point-H AHCI */
 	{ PCI_VDEVICE(INTEL, 0xa105), board_ahci }, /* Sunrise Point-H RAID */
+	{ PCI_VDEVICE(INTEL, 0xa106), board_ahci }, /* Sunrise Point-H RAID */
 	{ PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */
 	{ PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */
 
-- 
cgit v0.10.2


From 4d92f0099a06ef0e36c7673f7c090f1a448b2d1b Mon Sep 17 00:00:00 2001
From: Alexandra Yates <alexandra.yates@linux.intel.com>
Date: Mon, 16 Nov 2015 11:22:16 -0500
Subject: ahci: Order SATA device IDs for codename Lewisburg

This change was to preserve the ascending order of device IDs.
There was an exception with the first two Lewisburg device IDs to
keep all device IDs of the same kind grouped by code name.

Signed-off-by: Alexandra Yates <alexandra.yates@linux.intel.com>
signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index ee3fc84..cdfbcc5 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -314,16 +314,6 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, 0x1f37), board_ahci_avn }, /* Avoton RAID */
 	{ PCI_VDEVICE(INTEL, 0x1f3e), board_ahci_avn }, /* Avoton RAID */
 	{ PCI_VDEVICE(INTEL, 0x1f3f), board_ahci_avn }, /* Avoton RAID */
-	{ PCI_VDEVICE(INTEL, 0xa182), board_ahci }, /* Lewisburg AHCI*/
-	{ PCI_VDEVICE(INTEL, 0xa202), board_ahci }, /* Lewisburg AHCI*/
-	{ PCI_VDEVICE(INTEL, 0xa184), board_ahci }, /* Lewisburg RAID*/
-	{ PCI_VDEVICE(INTEL, 0xa204), board_ahci }, /* Lewisburg RAID*/
-	{ PCI_VDEVICE(INTEL, 0xa186), board_ahci }, /* Lewisburg RAID*/
-	{ PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/
-	{ PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/
-	{ PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* Lewisburg RAID*/
-	{ PCI_VDEVICE(INTEL, 0xa18e), board_ahci }, /* Lewisburg RAID*/
-	{ PCI_VDEVICE(INTEL, 0xa20e), board_ahci }, /* Lewisburg RAID*/
 	{ PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Wellsburg RAID */
 	{ PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Wellsburg RAID */
 	{ PCI_VDEVICE(INTEL, 0x8d02), board_ahci }, /* Wellsburg AHCI */
@@ -356,6 +346,16 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, 0xa106), board_ahci }, /* Sunrise Point-H RAID */
 	{ PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */
 	{ PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */
+	{ PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/
+	{ PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* Lewisburg RAID*/
+	{ PCI_VDEVICE(INTEL, 0xa182), board_ahci }, /* Lewisburg AHCI*/
+	{ PCI_VDEVICE(INTEL, 0xa184), board_ahci }, /* Lewisburg RAID*/
+	{ PCI_VDEVICE(INTEL, 0xa186), board_ahci }, /* Lewisburg RAID*/
+	{ PCI_VDEVICE(INTEL, 0xa18e), board_ahci }, /* Lewisburg RAID*/
+	{ PCI_VDEVICE(INTEL, 0xa202), board_ahci }, /* Lewisburg AHCI*/
+	{ PCI_VDEVICE(INTEL, 0xa204), board_ahci }, /* Lewisburg RAID*/
+	{ PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/
+	{ PCI_VDEVICE(INTEL, 0xa20e), board_ahci }, /* Lewisburg RAID*/
 
 	/* JMicron 360/1/3/5/6, match class to avoid IDE function */
 	{ PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
-- 
cgit v0.10.2


From ce36f6ad862fc3bc2466c89de17694e582a778b5 Mon Sep 17 00:00:00 2001
From: Pankaj Dubey <pankaj.dubey@samsung.com>
Date: Fri, 13 Nov 2015 14:59:36 +0530
Subject: ARM: EXYNOS: Fix potential NULL pointer access in
 exynos_sys_powerdown_conf

If no platform devices binded to the driver but driver itself loaded and
exynos_sys_powerdown_conf is called from
arch/arm/mach-exynos/{suspend.c, pm.c} it will result in NULL pointer access,
to prevent this added check on pmu_context for NULL.

Signed-off-by: Pankaj Dubey <pankaj.dubey@samsung.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Tested-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>

diff --git a/arch/arm/mach-exynos/pmu.c b/arch/arm/mach-exynos/pmu.c
index de68938..c21e41d 100644
--- a/arch/arm/mach-exynos/pmu.c
+++ b/arch/arm/mach-exynos/pmu.c
@@ -748,8 +748,12 @@ static void exynos5_powerdown_conf(enum sys_powerdown mode)
 void exynos_sys_powerdown_conf(enum sys_powerdown mode)
 {
 	unsigned int i;
+	const struct exynos_pmu_data *pmu_data;
+
+	if (!pmu_context)
+		return;
 
-	const struct exynos_pmu_data *pmu_data = pmu_context->pmu_data;
+	pmu_data = pmu_context->pmu_data;
 
 	if (pmu_data->powerdown_conf)
 		pmu_data->powerdown_conf(mode);
-- 
cgit v0.10.2


From 183e53e8ddf4165c3763181682189362d6b403f7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 18 Nov 2015 17:18:40 +0100
Subject: usb: musb: USB_TI_CPPI41_DMA requires dmaengine support

The CPPI-4.1 driver selects TI_CPPI41, which is a dmaengine
driver and that may not be available when CONFIG_DMADEVICES
is not set:

warning: (USB_TI_CPPI41_DMA) selects TI_CPPI41 which has unmet direct dependencies (DMADEVICES && ARCH_OMAP)

This adds an extra dependency to avoid generating warnings in randconfig
builds. Ideally we'd remove the 'select' statement, but that has the
potential to break defconfig files.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 411dd19c682d ("usb: musb: Kconfig: Select the DMA driver if DMA mode of MUSB is enabled")
Signed-off-by: Felipe Balbi <balbi@ti.com>

diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig
index 1f2037b..45c83ba 100644
--- a/drivers/usb/musb/Kconfig
+++ b/drivers/usb/musb/Kconfig
@@ -159,7 +159,7 @@ config USB_TI_CPPI_DMA
 
 config USB_TI_CPPI41_DMA
 	bool 'TI CPPI 4.1 (AM335x)'
-	depends on ARCH_OMAP
+	depends on ARCH_OMAP && DMADEVICES
 	select TI_CPPI41
 
 config USB_TUSB_OMAP_DMA
-- 
cgit v0.10.2


From 7fe9a937d5156761cf79dc354f616c726b5c09e2 Mon Sep 17 00:00:00 2001
From: Daniel Walter <dwalter@sigma-star.at>
Date: Wed, 18 Nov 2015 17:15:49 +0100
Subject: usb: gadget: functionfs: fix missing access_ok checks

use safe copy_*_user instead of unsafe __copy_*_user
functions when accessing userland memory.

Signed-off-by: Daniel Walter <dwalter@sigma-star.at>
Signed-off-by: Felipe Balbi <balbi@ti.com>

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index adc6d52..cf43e9e 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -423,7 +423,7 @@ static ssize_t __ffs_ep0_read_events(struct ffs_data *ffs, char __user *buf,
 	spin_unlock_irq(&ffs->ev.waitq.lock);
 	mutex_unlock(&ffs->mutex);
 
-	return unlikely(__copy_to_user(buf, events, size)) ? -EFAULT : size;
+	return unlikely(copy_to_user(buf, events, size)) ? -EFAULT : size;
 }
 
 static ssize_t ffs_ep0_read(struct file *file, char __user *buf,
@@ -513,7 +513,7 @@ static ssize_t ffs_ep0_read(struct file *file, char __user *buf,
 
 		/* unlocks spinlock */
 		ret = __ffs_ep0_queue_wait(ffs, data, len);
-		if (likely(ret > 0) && unlikely(__copy_to_user(buf, data, len)))
+		if (likely(ret > 0) && unlikely(copy_to_user(buf, data, len)))
 			ret = -EFAULT;
 		goto done_mutex;
 
@@ -3493,7 +3493,7 @@ static char *ffs_prepare_buffer(const char __user *buf, size_t len)
 	if (unlikely(!data))
 		return ERR_PTR(-ENOMEM);
 
-	if (unlikely(__copy_from_user(data, buf, len))) {
+	if (unlikely(copy_from_user(data, buf, len))) {
 		kfree(data);
 		return ERR_PTR(-EFAULT);
 	}
-- 
cgit v0.10.2


From f66541ba02d5cb619b84fa4c5b43b3bcc8764716 Mon Sep 17 00:00:00 2001
From: Jyri Sarha <jsarha@ti.com>
Date: Tue, 17 Nov 2015 11:56:44 +0200
Subject: clk: gpio: Get parent clk names in of_gpio_clk_setup()

Get parent clk names in of_gpio_clk_setup() and store the names
in struct clk_gpio_delayed_register_data instead of doing it from
the clk provider's get() callback. of_clk_get_parent_name() can't
be called in struct of_clk_provider's get() callback since it may
make a call to of_clk_get_from_provider() and this in turn tries
to recursively lock of_clk_mutex.

Signed-off-by: Jyri Sarha <jsarha@ti.com>
Cc: Sergej Sawazki <ce3a@gmx.de>
Fixes: 0a4807c2f9a4 ("clk: Make of_clk_get_parent_name() robust with #clock-cells = 1")
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>

diff --git a/drivers/clk/clk-gpio.c b/drivers/clk/clk-gpio.c
index 10819e2..335322d 100644
--- a/drivers/clk/clk-gpio.c
+++ b/drivers/clk/clk-gpio.c
@@ -209,6 +209,8 @@ EXPORT_SYMBOL_GPL(clk_register_gpio_mux);
 
 struct clk_gpio_delayed_register_data {
 	const char *gpio_name;
+	int num_parents;
+	const char **parent_names;
 	struct device_node *node;
 	struct mutex lock;
 	struct clk *clk;
@@ -222,8 +224,6 @@ static struct clk *of_clk_gpio_delayed_register_get(
 {
 	struct clk_gpio_delayed_register_data *data = _data;
 	struct clk *clk;
-	const char **parent_names;
-	int i, num_parents;
 	int gpio;
 	enum of_gpio_flags of_flags;
 
@@ -248,26 +248,14 @@ static struct clk *of_clk_gpio_delayed_register_get(
 		return ERR_PTR(gpio);
 	}
 
-	num_parents = of_clk_get_parent_count(data->node);
-
-	parent_names = kcalloc(num_parents, sizeof(char *), GFP_KERNEL);
-	if (!parent_names) {
-		clk = ERR_PTR(-ENOMEM);
-		goto out;
-	}
-
-	for (i = 0; i < num_parents; i++)
-		parent_names[i] = of_clk_get_parent_name(data->node, i);
-
-	clk = data->clk_register_get(data->node->name, parent_names,
-			num_parents, gpio, of_flags & OF_GPIO_ACTIVE_LOW);
+	clk = data->clk_register_get(data->node->name, data->parent_names,
+			data->num_parents, gpio, of_flags & OF_GPIO_ACTIVE_LOW);
 	if (IS_ERR(clk))
 		goto out;
 
 	data->clk = clk;
 out:
 	mutex_unlock(&data->lock);
-	kfree(parent_names);
 
 	return clk;
 }
@@ -296,11 +284,24 @@ static void __init of_gpio_clk_setup(struct device_node *node,
 				unsigned gpio, bool active_low))
 {
 	struct clk_gpio_delayed_register_data *data;
+	const char **parent_names;
+	int i, num_parents;
 
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return;
 
+	num_parents = of_clk_get_parent_count(node);
+
+	parent_names = kcalloc(num_parents, sizeof(char *), GFP_KERNEL);
+	if (!parent_names)
+		return;
+
+	for (i = 0; i < num_parents; i++)
+		parent_names[i] = of_clk_get_parent_name(node, i);
+
+	data->num_parents = num_parents;
+	data->parent_names = parent_names;
 	data->node = node;
 	data->gpio_name = gpio_name;
 	data->clk_register_get = clk_register_get;
-- 
cgit v0.10.2


From 391e6dcb37857d5659b53def2f41e2f56850d33c Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Wed, 18 Nov 2015 17:06:00 -0600
Subject: usb: gadget: pxa27x: fix suspend callback

pxa27x disconnects pullups on suspend but doesn't
notify the gadget driver about it, so gadget driver
can't disable the endpoints it was using.

This causes problems on resume because gadget core
will think endpoints are still enabled and just
ignore the following usb_ep_enable().

Fix this problem by calling
gadget_driver->disconnect().

Cc: <stable@vger.kernel.org> # v3.10+
Tested-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Felipe Balbi <balbi@ti.com>

diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c
index 670ac0b..001a3b7 100644
--- a/drivers/usb/gadget/udc/pxa27x_udc.c
+++ b/drivers/usb/gadget/udc/pxa27x_udc.c
@@ -2536,6 +2536,9 @@ static int pxa_udc_suspend(struct platform_device *_dev, pm_message_t state)
 	udc->pullup_resume = udc->pullup_on;
 	dplus_pullup(udc, 0);
 
+	if (udc->driver)
+		udc->driver->disconnect(&udc->gadget);
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From ac722e302cc057a65cb1d975f8b17e8e11144946 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@freescale.com>
Date: Thu, 19 Nov 2015 11:47:47 +0800
Subject: usb: kconfig: fix warning of select USB_OTG

When choose randconfig for kernel build, it reports below warning:
"warning: (USB_OTG_FSM && FSL_USB2_OTG && USB_MV_OTG) selects USB_OTG
which has unmet direct dependencies (USB_SUPPORT && USB && PM)"

In fact, USB_OTG is visible symbol and depends on PM, so the driver
needs to depend on it to reduce dependency problem.

Signed-off-by: Peter Chen <peter.chen@freescale.com>
Reported-by: Arnd Bergmann <arnd@arndb.de>
Cc: Felipe Balbi <balbi@ti.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>

diff --git a/drivers/usb/core/Kconfig b/drivers/usb/core/Kconfig
index a99c89e..dd28010 100644
--- a/drivers/usb/core/Kconfig
+++ b/drivers/usb/core/Kconfig
@@ -77,8 +77,7 @@ config USB_OTG_BLACKLIST_HUB
 
 config USB_OTG_FSM
 	tristate "USB 2.0 OTG FSM implementation"
-	depends on USB
-	select USB_OTG
+	depends on USB && USB_OTG
 	select USB_PHY
 	help
 	  Implements OTG Finite State Machine as specified in On-The-Go
diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 1731324..22e8ecb 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -21,7 +21,6 @@ config AB8500_USB
 config FSL_USB2_OTG
 	bool "Freescale USB OTG Transceiver Driver"
 	depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM && PM
-	select USB_OTG
 	select USB_PHY
 	help
 	  Enable this to support Freescale USB OTG transceiver.
@@ -168,8 +167,7 @@ config USB_QCOM_8X16_PHY
 
 config USB_MV_OTG
 	tristate "Marvell USB OTG support"
-	depends on USB_EHCI_MV && USB_MV_UDC && PM
-	select USB_OTG
+	depends on USB_EHCI_MV && USB_MV_UDC && PM && USB_OTG
 	select USB_PHY
 	help
 	  Say Y here if you want to build Marvell USB OTG transciever
-- 
cgit v0.10.2


From c9eb29503e9655e70448bbbf3697d08a56d24854 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Wed, 18 Nov 2015 14:33:35 +0900
Subject: usb: renesas_usbhs: gadget: Fix NULL pointer dereference in
 usbhsg_ep_dequeue()

This patch fixes an issue that NULL pointer dereference happens when
a gadget driver calls usb_ep_dequeue() for ep0 after disconnected
a usb cable. This is because that usbhsg_try_stop() will call
usbhsg_ep_disable(&dcp->ep) when a usb cable is disconnected and
the pipe of dcp (ep0) is set to NULL.

Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>

diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c
index de4f97d..8f7a78e 100644
--- a/drivers/usb/renesas_usbhs/mod_gadget.c
+++ b/drivers/usb/renesas_usbhs/mod_gadget.c
@@ -131,7 +131,8 @@ static void __usbhsg_queue_pop(struct usbhsg_uep *uep,
 	struct device *dev = usbhsg_gpriv_to_dev(gpriv);
 	struct usbhs_priv *priv = usbhsg_gpriv_to_priv(gpriv);
 
-	dev_dbg(dev, "pipe %d : queue pop\n", usbhs_pipe_number(pipe));
+	if (pipe)
+		dev_dbg(dev, "pipe %d : queue pop\n", usbhs_pipe_number(pipe));
 
 	ureq->req.status = status;
 	spin_unlock(usbhs_priv_to_lock(priv));
@@ -685,7 +686,13 @@ static int usbhsg_ep_dequeue(struct usb_ep *ep, struct usb_request *req)
 	struct usbhsg_request *ureq = usbhsg_req_to_ureq(req);
 	struct usbhs_pipe *pipe = usbhsg_uep_to_pipe(uep);
 
-	usbhs_pkt_pop(pipe, usbhsg_ureq_to_pkt(ureq));
+	if (pipe)
+		usbhs_pkt_pop(pipe, usbhsg_ureq_to_pkt(ureq));
+
+	/*
+	 * To dequeue a request, this driver should call the usbhsg_queue_pop()
+	 * even if the pipe is NULL.
+	 */
 	usbhsg_queue_pop(uep, ureq, -ECONNRESET);
 
 	return 0;
-- 
cgit v0.10.2


From d0464bcf12af54bafe02dd23ac5bd75d825f9fdd Mon Sep 17 00:00:00 2001
From: John Youn <John.Youn@synopsys.com>
Date: Fri, 13 Nov 2015 17:02:10 +0000
Subject: usb: dwc2: Make PHY optional

Fixes commit 09a75e857790
("usb: dwc2: refactor common low-level hw code to platform.c")

The above commit consolidated the low-level phy access into a common
location. This change introduced a check from the gadget requiring
that a PHY is specified. This requirement never existed on the host
side and broke some platforms when it was moved into platform.c.

The gadget doesn't require the PHY either so remove the check.

Acked-by: Eric Anholt <eric@anholt.net>
Reported-by: Stefan Wahren <info@lategoodbye.de>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: John Youn <johnyoun@synopsys.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Fixes: 09a75e857790 ("usb: dwc2: refactor common low-level hw code to platform.c")
Signed-off-by: Felipe Balbi <balbi@ti.com>

diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
index e61d773..60b12a4 100644
--- a/drivers/usb/dwc2/platform.c
+++ b/drivers/usb/dwc2/platform.c
@@ -229,11 +229,6 @@ static int dwc2_lowlevel_hw_init(struct dwc2_hsotg *hsotg)
 			hsotg->phyif = GUSBCFG_PHYIF8;
 	}
 
-	if (!hsotg->phy && !hsotg->uphy && !hsotg->plat) {
-		dev_err(hsotg->dev, "no platform data or transceiver defined\n");
-		return -EPROBE_DEFER;
-	}
-
 	/* Clock */
 	hsotg->clk = devm_clk_get(hsotg->dev, "otg");
 	if (IS_ERR(hsotg->clk)) {
-- 
cgit v0.10.2


From 6c2dad69163fdb4ea82344dcba360fc00b4adda4 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Fri, 13 Nov 2015 17:02:11 +0000
Subject: usb: dwc2: Return errors from PHY

When searching for PHYs, any error was treated as if the PHY did not
exist or was not specified. Thus the probe function did not
correctly return error conditions such as -EPROBE_DEFER.

Fixed so that only a non-existing PHY is ignored and any other error
is returned.

Acked-by: Eric Anholt <eric@anholt.net>
Reported-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: John Youn <johnyoun@synopsys.com>
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>

diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
index 60b12a4..d28f72e 100644
--- a/drivers/usb/dwc2/platform.c
+++ b/drivers/usb/dwc2/platform.c
@@ -212,14 +212,41 @@ static int dwc2_lowlevel_hw_init(struct dwc2_hsotg *hsotg)
 	 */
 	hsotg->phy = devm_phy_get(hsotg->dev, "usb2-phy");
 	if (IS_ERR(hsotg->phy)) {
-		hsotg->phy = NULL;
+		ret = PTR_ERR(hsotg->phy);
+		switch (ret) {
+		case -ENODEV:
+		case -ENOSYS:
+			hsotg->phy = NULL;
+			break;
+		case -EPROBE_DEFER:
+			return ret;
+		default:
+			dev_err(hsotg->dev, "error getting phy %d\n", ret);
+			return ret;
+		}
+	}
+
+	if (!hsotg->phy) {
 		hsotg->uphy = devm_usb_get_phy(hsotg->dev, USB_PHY_TYPE_USB2);
-		if (IS_ERR(hsotg->uphy))
-			hsotg->uphy = NULL;
-		else
-			hsotg->plat = dev_get_platdata(hsotg->dev);
+		if (IS_ERR(hsotg->uphy)) {
+			ret = PTR_ERR(hsotg->uphy);
+			switch (ret) {
+			case -ENODEV:
+			case -ENXIO:
+				hsotg->uphy = NULL;
+				break;
+			case -EPROBE_DEFER:
+				return ret;
+			default:
+				dev_err(hsotg->dev, "error getting usb phy %d\n",
+					ret);
+				return ret;
+			}
+		}
 	}
 
+	hsotg->plat = dev_get_platdata(hsotg->dev);
+
 	if (hsotg->phy) {
 		/*
 		 * If using the generic PHY framework, check if the PHY bus
-- 
cgit v0.10.2


From 8aa90cf2a28645c6733f8879c5fe1848c5d510b7 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Fri, 13 Nov 2015 17:02:12 +0000
Subject: usb: dwc2: make otg clk optional

Fixes commit 09a75e857790
("usb: dwc2: refactor common low-level hw code to platform.c")

The above commit consolidated the low-level phy access into a common
location. This change made the otg clk a requirement and broke some
platforms when it was moved into platform.c.

So make clk handling optional again.

Acked-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: John Youn <johnyoun@synopsys.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Fixes: 09a75e857790 ("usb: dwc2: refactor common low-level hw code to platform.c")
Signed-off-by: Felipe Balbi <balbi@ti.com>

diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
index d28f72e..91e2e6b 100644
--- a/drivers/usb/dwc2/platform.c
+++ b/drivers/usb/dwc2/platform.c
@@ -125,9 +125,11 @@ static int __dwc2_lowlevel_hw_enable(struct dwc2_hsotg *hsotg)
 	if (ret)
 		return ret;
 
-	ret = clk_prepare_enable(hsotg->clk);
-	if (ret)
-		return ret;
+	if (hsotg->clk) {
+		ret = clk_prepare_enable(hsotg->clk);
+		if (ret)
+			return ret;
+	}
 
 	if (hsotg->uphy)
 		ret = usb_phy_init(hsotg->uphy);
@@ -175,7 +177,8 @@ static int __dwc2_lowlevel_hw_disable(struct dwc2_hsotg *hsotg)
 	if (ret)
 		return ret;
 
-	clk_disable_unprepare(hsotg->clk);
+	if (hsotg->clk)
+		clk_disable_unprepare(hsotg->clk);
 
 	ret = regulator_bulk_disable(ARRAY_SIZE(hsotg->supplies),
 				     hsotg->supplies);
-- 
cgit v0.10.2


From e9ca7e4bb30cbf4c8d2e2e34681a967c10b3ff54 Mon Sep 17 00:00:00 2001
From: "Felipe F. Tonello" <eu@felipetonello.com>
Date: Tue, 10 Nov 2015 17:52:03 +0000
Subject: usb: gadget: f_midi: Transmit data only when IN ep is enabled

This makes sure f_midi doesn't try to enqueue data when the IN endpoint is
disabled, ie, USB cable is disconnected.

Reviewed-by: Robert Baldyga <r.baldyga@samsung.com>
Signed-off-by: Felipe F. Tonello <eu@felipetonello.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>

diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 42acb45..7877aa8 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -545,7 +545,7 @@ static void f_midi_transmit(struct f_midi *midi, struct usb_request *req)
 		}
 	}
 
-	if (req->length > 0) {
+	if (req->length > 0 && ep->enabled) {
 		int err;
 
 		err = usb_ep_queue(ep, req, GFP_ATOMIC);
-- 
cgit v0.10.2


From ad0d1a058eac46503edbc510d1ce44c5df8e0c91 Mon Sep 17 00:00:00 2001
From: "Felipe F. Tonello" <eu@felipetonello.com>
Date: Tue, 10 Nov 2015 17:52:06 +0000
Subject: usb: gadget: f_midi: fix leak on failed to enqueue out requests

This patch fixes a memory leak that occurs when an endpoint fails to enqueue
the request. If that happens the complete function will never be called, thus
never freeing the request.

Signed-off-by: Felipe F. Tonello <eu@felipetonello.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>

diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 7877aa8..898a570 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -370,6 +370,7 @@ static int f_midi_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
 		if (err) {
 			ERROR(midi, "%s queue req: %d\n",
 				    midi->out_ep->name, err);
+			free_ep_req(midi->out_ep, req);
 		}
 	}
 
-- 
cgit v0.10.2


From 5c256b68348e9717c8ba23521ef52ef7f69d4448 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@freescale.com>
Date: Wed, 16 Sep 2015 15:52:33 +0800
Subject: usb: phy: mxs: add "fsl,imx6ul-usbphy" compatible string

Add "fsl,imx6ul-usbphy" compatible string for iMX6ul usb phy

Signed-off-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Li Jun <jun.li@freescale.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>

diff --git a/drivers/usb/phy/phy-mxs-usb.c b/drivers/usb/phy/phy-mxs-usb.c
index b7536af..c2936dc 100644
--- a/drivers/usb/phy/phy-mxs-usb.c
+++ b/drivers/usb/phy/phy-mxs-usb.c
@@ -143,12 +143,17 @@ static const struct mxs_phy_data imx6sx_phy_data = {
 	.flags = MXS_PHY_DISCONNECT_LINE_WITHOUT_VBUS,
 };
 
+static const struct mxs_phy_data imx6ul_phy_data = {
+	.flags = MXS_PHY_DISCONNECT_LINE_WITHOUT_VBUS,
+};
+
 static const struct of_device_id mxs_phy_dt_ids[] = {
 	{ .compatible = "fsl,imx6sx-usbphy", .data = &imx6sx_phy_data, },
 	{ .compatible = "fsl,imx6sl-usbphy", .data = &imx6sl_phy_data, },
 	{ .compatible = "fsl,imx6q-usbphy", .data = &imx6q_phy_data, },
 	{ .compatible = "fsl,imx23-usbphy", .data = &imx23_phy_data, },
 	{ .compatible = "fsl,vf610-usbphy", .data = &vf610_phy_data, },
+	{ .compatible = "fsl,imx6ul-usbphy", .data = &imx6ul_phy_data, },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, mxs_phy_dt_ids);
-- 
cgit v0.10.2


From 7fe941422339c96072fc1eec4e8b484ccf7ccd0e Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Thu, 19 Nov 2015 13:00:14 +0900
Subject: vfio: Drop owner assignment from platform_driver

platform_driver does not need to set an owner because
platform_driver_register() will set it.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Acked-by: Baptiste Reynal <b.reynal@virtualopensystems.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c
index f1625dc..b1cc3a7 100644
--- a/drivers/vfio/platform/vfio_platform.c
+++ b/drivers/vfio/platform/vfio_platform.c
@@ -92,7 +92,6 @@ static struct platform_driver vfio_platform_driver = {
 	.remove		= vfio_platform_remove,
 	.driver	= {
 		.name	= "vfio-platform",
-		.owner	= THIS_MODULE,
 	},
 };
 
-- 
cgit v0.10.2


From 7d10f4e0797597a31f2724b46e619ee10d8eb501 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Sat, 14 Nov 2015 11:07:01 +0100
Subject: vfio-pci: constify pci_error_handlers structures

This pci_error_handlers structure is never modified, like all the other
pci_error_handlers structures, so declare it as const.

Done with the help of Coccinelle.

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 32b88bd..2760a7b 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -1035,7 +1035,7 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
 	return PCI_ERS_RESULT_CAN_RECOVER;
 }
 
-static struct pci_error_handlers vfio_err_handlers = {
+static const struct pci_error_handlers vfio_err_handlers = {
 	.error_detected = vfio_pci_aer_err_detected,
 };
 
-- 
cgit v0.10.2


From f74875dc36135ebae82a8e005f4b7f52289d2c40 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Thu, 12 Nov 2015 21:08:34 +0000
Subject: usb: dwc2: fix kernel oops during driver probe

This patch make sure that all necessary members of dwc2_hsotg
are initialized before the irq handler is requested. So
the kernel oops triggered by dwc2_handle_common_intr has
been fixed.

  dwc2 20980000.usb: Configuration mismatch. Forcing host mode
  dwc2 20980000.usb: no platform data or transceiver defined
  Unable to handle kernel paging request at virtual address cc860040
  pgd = c0004000
  [cc860040] *pgd=0b41e811, *pte=00000000, *ppte=00000000
  Internal error: Oops: 7 [#1] ARM
  CPU: 0 PID: 1 Comm: swapper Not tainted 4.3.0-rc3+ #19
  Hardware name: BCM2835
  task: cb494000 ti: cb4d0000 task.ti: cb4d0000
  PC is at dwc2_is_controller_alive+0x18/0x34
  LR is at dwc2_handle_common_intr+0x24/0xb60

Acked-by: John Youn <johnyoun@synopsys.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>

diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
index 91e2e6b..39c1cbf 100644
--- a/drivers/usb/dwc2/platform.c
+++ b/drivers/usb/dwc2/platform.c
@@ -367,20 +367,6 @@ static int dwc2_driver_probe(struct platform_device *dev)
 	if (retval)
 		return retval;
 
-	irq = platform_get_irq(dev, 0);
-	if (irq < 0) {
-		dev_err(&dev->dev, "missing IRQ resource\n");
-		return irq;
-	}
-
-	dev_dbg(hsotg->dev, "registering common handler for irq%d\n",
-		irq);
-	retval = devm_request_irq(hsotg->dev, irq,
-				  dwc2_handle_common_intr, IRQF_SHARED,
-				  dev_name(hsotg->dev), hsotg);
-	if (retval)
-		return retval;
-
 	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
 	hsotg->regs = devm_ioremap_resource(&dev->dev, res);
 	if (IS_ERR(hsotg->regs))
@@ -415,6 +401,20 @@ static int dwc2_driver_probe(struct platform_device *dev)
 
 	dwc2_set_all_params(hsotg->core_params, -1);
 
+	irq = platform_get_irq(dev, 0);
+	if (irq < 0) {
+		dev_err(&dev->dev, "missing IRQ resource\n");
+		return irq;
+	}
+
+	dev_dbg(hsotg->dev, "registering common handler for irq%d\n",
+		irq);
+	retval = devm_request_irq(hsotg->dev, irq,
+				  dwc2_handle_common_intr, IRQF_SHARED,
+				  dev_name(hsotg->dev), hsotg);
+	if (retval)
+		return retval;
+
 	retval = dwc2_lowlevel_hw_enable(hsotg);
 	if (retval)
 		return retval;
-- 
cgit v0.10.2


From 7200be7c81cc1a1c54a79348a724c16345c5242f Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 19 Nov 2015 16:17:24 -0800
Subject: vfio: platform: remove needless stack usage

request_module already takes format strings, so no need to duplicate
the effort.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c
index a1c50d6..418cdd9 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -51,13 +51,10 @@ static vfio_platform_reset_fn_t vfio_platform_lookup_reset(const char *compat,
 
 static void vfio_platform_get_reset(struct vfio_platform_device *vdev)
 {
-	char modname[256];
-
 	vdev->reset = vfio_platform_lookup_reset(vdev->compat,
 						&vdev->reset_module);
 	if (!vdev->reset) {
-		snprintf(modname, 256, "vfio-reset:%s", vdev->compat);
-		request_module(modname);
+		request_module("vfio-reset:%s", vdev->compat);
 		vdev->reset = vfio_platform_lookup_reset(vdev->compat,
 							 &vdev->reset_module);
 	}
-- 
cgit v0.10.2


From 4f1dd973acffeb67142d39f3bfbc4dd0569f2da6 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 19 Nov 2015 15:16:30 +0100
Subject: sata/mvebu: use #ifdef around suspend/resume code

The newly added suspend/resume implementation for ahci_mvebu causes
a link error when CONFIG_PM_SLEEP is disabled:

ERROR: "ahci_platform_suspend_host" [drivers/ata/ahci_mvebu.ko] undefined!
ERROR: "ahci_platform_resume_host" [drivers/ata/ahci_mvebu.ko] undefined!

This adds the same #ifdef here that exists in the ahci_platform driver
which defines the above functions.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: d6ecf1581488 ("ata: ahci_mvebu: add suspend/resume support")
Acked-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c
index 8490d37..f7a7fa8 100644
--- a/drivers/ata/ahci_mvebu.c
+++ b/drivers/ata/ahci_mvebu.c
@@ -62,6 +62,7 @@ static void ahci_mvebu_regret_option(struct ahci_host_priv *hpriv)
 	writel(0x80, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA);
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int ahci_mvebu_suspend(struct platform_device *pdev, pm_message_t state)
 {
 	return ahci_platform_suspend_host(&pdev->dev);
@@ -81,6 +82,10 @@ static int ahci_mvebu_resume(struct platform_device *pdev)
 
 	return ahci_platform_resume_host(&pdev->dev);
 }
+#else
+#define ahci_mvebu_suspend NULL
+#define ahci_mvebu_resume NULL
+#endif
 
 static const struct ata_port_info ahci_mvebu_port_info = {
 	.flags	   = AHCI_FLAG_COMMON,
-- 
cgit v0.10.2


From 049af1060bb81532f2700762a8ba71eb3fa81f5a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 21 Nov 2015 13:32:21 +0300
Subject: vfio: fix a warning message

The first argument to the WARN() macro has to be a condition.  I'm sort
of disappointed that this code doesn't generate a compiler warning.  I
guess -Wformat-extra-args doesn't work in the kernel.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index de632da..9da0703 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -682,7 +682,7 @@ static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev)
 		return 0;
 
 	/* TODO Prevent device auto probing */
-	WARN("Device %s added to live group %d!\n", dev_name(dev),
+	WARN(1, "Device %s added to live group %d!\n", dev_name(dev),
 	     iommu_group_id(group->iommu_group));
 
 	return 0;
-- 
cgit v0.10.2


From 2e9fed42209b17116c6221e136ccbd3f252f5f86 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ciorneiioana@gmail.com>
Date: Tue, 27 Oct 2015 20:40:56 +0200
Subject: staging: iio: dummy: complete IIO events delivery to userspace

Starting with commit fd2bb310ca (Staging: iio: Move evgen interrupt
generation to irq_work) event processing is handled by calling
both the top half and the threaded part properly simulating real
hardware interrupts making use of threaded interrupts.
This way the processing is split in 2 parts:

* the IRQ handler that runs in IRQ context and only saves the event
timestamp
* the threaded handler that runs in process context, reads the events
and pushes the in the userspace.

If the IRQ handler returns IRQ_HANDLED the threaded handler is not
even being called since the interrupt is considered to be processed.
Because the iio dummy driver processes the events in the threaded
handler the IRQ handler must return IRQ_WAKE_THREAD so that the
threaded part would be awakened and called.

Signed-off-by: Ioana Ciornei <ciorneiioana@gmail.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>

diff --git a/drivers/staging/iio/iio_simple_dummy_events.c b/drivers/staging/iio/iio_simple_dummy_events.c
index bfbf1c5..6eb600f 100644
--- a/drivers/staging/iio/iio_simple_dummy_events.c
+++ b/drivers/staging/iio/iio_simple_dummy_events.c
@@ -159,7 +159,7 @@ static irqreturn_t iio_simple_dummy_get_timestamp(int irq, void *private)
 	struct iio_dummy_state *st = iio_priv(indio_dev);
 
 	st->event_timestamp = iio_get_time_ns();
-	return IRQ_HANDLED;
+	return IRQ_WAKE_THREAD;
 }
 
 /**
-- 
cgit v0.10.2


From 45a6b8218df54087c3bb8dc731424d8789d31790 Mon Sep 17 00:00:00 2001
From: Matt Ranostay <mranostay@gmail.com>
Date: Mon, 26 Oct 2015 20:18:23 -0700
Subject: iio: lidar: return -EINVAL on invalid signal

Returning zero from the measurment function has the side effect of
corrupting the triggered buffer readings, better to use -EINVAL than
a zero measurement reading.

The INVALID status happens even it isn't out of range
sometimes roughly once every second or two. This can be from an
invalid second signal return path. Hence there are spurious zero
readings from the triggered buffer, and warning messages in the kernel
log.

Signed-off-by: Matt Ranostay <mranostay@gmail.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>

diff --git a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
index 961f9f99..e544fcf 100644
--- a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
+++ b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
@@ -130,10 +130,10 @@ static int lidar_get_measurement(struct lidar_data *data, u16 *reg)
 		if (ret < 0)
 			break;
 
-		/* return 0 since laser is likely pointed out of range */
+		/* return -EINVAL since laser is likely pointed out of range */
 		if (ret & LIDAR_REG_STATUS_INVALID) {
 			*reg = 0;
-			ret = 0;
+			ret = -EINVAL;
 			break;
 		}
 
@@ -197,7 +197,7 @@ static irqreturn_t lidar_trigger_handler(int irq, void *private)
 	if (!ret) {
 		iio_push_to_buffers_with_timestamp(indio_dev, data->buffer,
 						   iio_get_time_ns());
-	} else {
+	} else if (ret != -EINVAL) {
 		dev_err(&data->client->dev, "cannot read LIDAR measurement");
 	}
 
-- 
cgit v0.10.2


From 8386c27587594899e4b638010fa0187fe068b295 Mon Sep 17 00:00:00 2001
From: Matt Ranostay <mranostay@gmail.com>
Date: Sat, 7 Nov 2015 20:21:28 -0800
Subject: iio: light: apds9960: correct ->last_busy count

Add missing pm_runtime_mark_last_busy to apds9960_set_power_state
function.

Unless pm_runtime_mark_last_busy is called the
pm_runtime_put_autosuspend may put the device into suspend before the
delay time requested.

Signed-off-by: Matt Ranostay <mranostay@gmail.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>

diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c
index 7d269ef..f6a07dc 100644
--- a/drivers/iio/light/apds9960.c
+++ b/drivers/iio/light/apds9960.c
@@ -453,6 +453,7 @@ static int apds9960_set_power_state(struct apds9960_data *data, bool on)
 			usleep_range(data->als_adc_int_us,
 				     APDS9960_MAX_INT_TIME_IN_US);
 	} else {
+		pm_runtime_mark_last_busy(dev);
 		ret = pm_runtime_put_autosuspend(dev);
 	}
 
-- 
cgit v0.10.2


From 231bfe53c57e89857753c940192acba933cba56c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 21 Nov 2015 13:33:00 +0300
Subject: iio: fix some warning messages

WARN_ON() only takes a condition argument.  I have changed these to
WARN() instead.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>

diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index d7e908a..0f6f63b 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -302,7 +302,7 @@ static int iio_scan_mask_set(struct iio_dev *indio_dev,
 	if (trialmask == NULL)
 		return -ENOMEM;
 	if (!indio_dev->masklength) {
-		WARN_ON("Trying to set scanmask prior to registering buffer\n");
+		WARN(1, "Trying to set scanmask prior to registering buffer\n");
 		goto err_invalid_mask;
 	}
 	bitmap_copy(trialmask, buffer->scan_mask, indio_dev->masklength);
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 208358f..159ede6 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -655,7 +655,7 @@ int __iio_device_attr_init(struct device_attribute *dev_attr,
 			break;
 		case IIO_SEPARATE:
 			if (!chan->indexed) {
-				WARN_ON("Differential channels must be indexed\n");
+				WARN(1, "Differential channels must be indexed\n");
 				ret = -EINVAL;
 				goto error_free_full_postfix;
 			}
-- 
cgit v0.10.2


From d4c65fe4ed69a62a30a680789322ed677e3438af Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Wed, 18 Nov 2015 23:04:13 +0100
Subject: iio: adc: spmi-vadc: add missing of_node_put

for_each_available_child_of_node performs an of_node_get on each iteration,
so a break out of the loop requires an of_node_put.

A simplified version of the semantic patch that fixes this problem is as
follows (http://coccinelle.lip6.fr):

// <smpl>
@@
expression root,e;
local idexpression child;
@@

 for_each_available_child_of_node(root, child) {
   ... when != of_node_put(child)
       when != e = child
(
   return child;
|
+  of_node_put(child);
?  return ...;
)
   ...
 }
// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>

diff --git a/drivers/iio/adc/qcom-spmi-vadc.c b/drivers/iio/adc/qcom-spmi-vadc.c
index 0c4618b..c2babe5 100644
--- a/drivers/iio/adc/qcom-spmi-vadc.c
+++ b/drivers/iio/adc/qcom-spmi-vadc.c
@@ -839,8 +839,10 @@ static int vadc_get_dt_data(struct vadc_priv *vadc, struct device_node *node)
 
 	for_each_available_child_of_node(node, child) {
 		ret = vadc_get_dt_channel_data(vadc->dev, &prop, child);
-		if (ret)
+		if (ret) {
+			of_node_put(child);
 			return ret;
+		}
 
 		vadc->chan_props[index] = prop;
 
-- 
cgit v0.10.2


From ddaaf4e291dd63db0667991e4a335fcf3a7df13e Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 12 Nov 2015 11:00:03 +0100
Subject: perf/core: Fix RCU problem with cgroup context switching code

The RCU checker detected RCU violation in the cgroup switching routines
perf_cgroup_sched_in() and perf_cgroup_sched_out(). We were dereferencing
cgroup from task without holding the RCU lock.

Fix this by holding the RCU read lock. We move the locking from
perf_cgroup_switch() to avoid double locking.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: edumazet@google.com
Link: http://lkml.kernel.org/r/1447322404-10920-2-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 36babfd..60e71ca 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -489,7 +489,6 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
 	 * we reschedule only in the presence of cgroup
 	 * constrained events.
 	 */
-	rcu_read_lock();
 
 	list_for_each_entry_rcu(pmu, &pmus, entry) {
 		cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
@@ -531,8 +530,6 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
 		}
 	}
 
-	rcu_read_unlock();
-
 	local_irq_restore(flags);
 }
 
@@ -542,6 +539,7 @@ static inline void perf_cgroup_sched_out(struct task_struct *task,
 	struct perf_cgroup *cgrp1;
 	struct perf_cgroup *cgrp2 = NULL;
 
+	rcu_read_lock();
 	/*
 	 * we come here when we know perf_cgroup_events > 0
 	 */
@@ -561,6 +559,8 @@ static inline void perf_cgroup_sched_out(struct task_struct *task,
 	 */
 	if (cgrp1 != cgrp2)
 		perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
+
+	rcu_read_unlock();
 }
 
 static inline void perf_cgroup_sched_in(struct task_struct *prev,
@@ -569,6 +569,7 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
 	struct perf_cgroup *cgrp1;
 	struct perf_cgroup *cgrp2 = NULL;
 
+	rcu_read_lock();
 	/*
 	 * we come here when we know perf_cgroup_events > 0
 	 */
@@ -584,6 +585,8 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
 	 */
 	if (cgrp1 != cgrp2)
 		perf_cgroup_switch(task, PERF_CGROUP_SWIN);
+
+	rcu_read_unlock();
 }
 
 static inline int perf_cgroup_connect(int fd, struct perf_event *event,
@@ -9452,7 +9455,9 @@ static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
 static int __perf_cgroup_move(void *info)
 {
 	struct task_struct *task = info;
+	rcu_read_lock();
 	perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN);
+	rcu_read_unlock();
 	return 0;
 }
 
-- 
cgit v0.10.2


From 614e4c4ebc75517295bccd29b20ddbc5b52af6fc Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 12 Nov 2015 11:00:04 +0100
Subject: perf/core: Robustify the perf_cgroup_from_task() RCU checks

This patch reinforces the lockdep checks performed by
perf_cgroup_from_tsk() by passing the perf_event_context
whenever possible. It is okay to not hold the RCU read lock
when we know we hold the ctx->lock. This patch makes sure this
property holds.

In some functions, such as perf_cgroup_sched_in(), we do not
pass the context because we are sure we are holding the RCU
read lock.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: edumazet@google.com
Link: http://lkml.kernel.org/r/1447322404-10920-3-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index 377e8f8..a316ca9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -298,7 +298,7 @@ static bool __match_event(struct perf_event *a, struct perf_event *b)
 static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event)
 {
 	if (event->attach_state & PERF_ATTACH_TASK)
-		return perf_cgroup_from_task(event->hw.target);
+		return perf_cgroup_from_task(event->hw.target, event->ctx);
 
 	return event->cgrp;
 }
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d841d33..f9828a4 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -697,9 +697,11 @@ struct perf_cgroup {
  * if there is no cgroup event for the current CPU context.
  */
 static inline struct perf_cgroup *
-perf_cgroup_from_task(struct task_struct *task)
+perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
 {
-	return container_of(task_css(task, perf_event_cgrp_id),
+	return container_of(task_css_check(task, perf_event_cgrp_id,
+					   ctx ? lockdep_is_held(&ctx->lock)
+					       : true),
 			    struct perf_cgroup, css);
 }
 #endif /* CONFIG_CGROUP_PERF */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 60e71ca..1ac857a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -435,7 +435,7 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
 	if (!is_cgroup_event(event))
 		return;
 
-	cgrp = perf_cgroup_from_task(current);
+	cgrp = perf_cgroup_from_task(current, event->ctx);
 	/*
 	 * Do not update time when cgroup is not active
 	 */
@@ -458,7 +458,7 @@ perf_cgroup_set_timestamp(struct task_struct *task,
 	if (!task || !ctx->nr_cgroups)
 		return;
 
-	cgrp = perf_cgroup_from_task(task);
+	cgrp = perf_cgroup_from_task(task, ctx);
 	info = this_cpu_ptr(cgrp->info);
 	info->timestamp = ctx->timestamp;
 }
@@ -521,8 +521,10 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
 				 * set cgrp before ctxsw in to allow
 				 * event_filter_match() to not have to pass
 				 * task around
+				 * we pass the cpuctx->ctx to perf_cgroup_from_task()
+				 * because cgorup events are only per-cpu
 				 */
-				cpuctx->cgrp = perf_cgroup_from_task(task);
+				cpuctx->cgrp = perf_cgroup_from_task(task, &cpuctx->ctx);
 				cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
 			}
 			perf_pmu_enable(cpuctx->ctx.pmu);
@@ -542,15 +544,17 @@ static inline void perf_cgroup_sched_out(struct task_struct *task,
 	rcu_read_lock();
 	/*
 	 * we come here when we know perf_cgroup_events > 0
+	 * we do not need to pass the ctx here because we know
+	 * we are holding the rcu lock
 	 */
-	cgrp1 = perf_cgroup_from_task(task);
+	cgrp1 = perf_cgroup_from_task(task, NULL);
 
 	/*
 	 * next is NULL when called from perf_event_enable_on_exec()
 	 * that will systematically cause a cgroup_switch()
 	 */
 	if (next)
-		cgrp2 = perf_cgroup_from_task(next);
+		cgrp2 = perf_cgroup_from_task(next, NULL);
 
 	/*
 	 * only schedule out current cgroup events if we know
@@ -572,11 +576,13 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
 	rcu_read_lock();
 	/*
 	 * we come here when we know perf_cgroup_events > 0
+	 * we do not need to pass the ctx here because we know
+	 * we are holding the rcu lock
 	 */
-	cgrp1 = perf_cgroup_from_task(task);
+	cgrp1 = perf_cgroup_from_task(task, NULL);
 
 	/* prev can never be NULL */
-	cgrp2 = perf_cgroup_from_task(prev);
+	cgrp2 = perf_cgroup_from_task(prev, NULL);
 
 	/*
 	 * only need to schedule in cgroup events if we are changing
-- 
cgit v0.10.2


From daecbd267daad36c1cd90254f502610f82efcb30 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 16 Nov 2015 11:07:44 +0100
Subject: perf: Update email address in MAINTAINERS

While still valid, I'm trying to phase out this email address.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 050d0e7..a45fe35 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8284,7 +8284,7 @@ F:	include/linux/delayacct.h
 F:	kernel/delayacct.c
 
 PERFORMANCE EVENTS SUBSYSTEM
-M:	Peter Zijlstra <a.p.zijlstra@chello.nl>
+M:	Peter Zijlstra <peterz@infradead.org>
 M:	Ingo Molnar <mingo@redhat.com>
 M:	Arnaldo Carvalho de Melo <acme@kernel.org>
 L:	linux-kernel@vger.kernel.org
-- 
cgit v0.10.2


From b28ae9560b693bcd2e9f4d6d9c415d5380b7c3c5 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 20 Oct 2015 11:46:33 -0700
Subject: perf/x86: Fix LBR call stack save/restore

This fixes a bug I added in the following commit:

  90405aa02247 ("perf/x86/intel/lbr: Limit LBR accesses to TOS in callstack mode")

The bug could lead to lost LBR call stacks. When restoring the LBR state
we need to use the TOS of the previous context, not the current context.
To do that we need to save/restore the TOS.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: http://lkml.kernel.org/r/1445366797-30894-1-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 499f533..ffa7a92 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -627,6 +627,7 @@ struct x86_perf_task_context {
 	u64 lbr_from[MAX_LBR_ENTRIES];
 	u64 lbr_to[MAX_LBR_ENTRIES];
 	u64 lbr_info[MAX_LBR_ENTRIES];
+	int tos;
 	int lbr_callstack_users;
 	int lbr_stack_state;
 };
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index bfd0b71..659f01e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -239,7 +239,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
 	}
 
 	mask = x86_pmu.lbr_nr - 1;
-	tos = intel_pmu_lbr_tos();
+	tos = task_ctx->tos;
 	for (i = 0; i < tos; i++) {
 		lbr_idx = (tos - i) & mask;
 		wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
@@ -247,6 +247,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
 			wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
 	}
+	wrmsrl(x86_pmu.lbr_tos, tos);
 	task_ctx->lbr_stack_state = LBR_NONE;
 }
 
@@ -270,6 +271,7 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
 		if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
 			rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
 	}
+	task_ctx->tos = tos;
 	task_ctx->lbr_stack_state = LBR_VALID;
 }
 
-- 
cgit v0.10.2


From 90eec103b96e30401c0b846045bf8a1c7159b6da Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 16 Nov 2015 11:08:45 +0100
Subject: treewide: Remove old email address

There were still a number of references to my old Red Hat email
address in the kernel source. Remove these while keeping the
Red Hat copyright notices intact.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/blackfin/kernel/perf_event.c b/arch/blackfin/kernel/perf_event.c
index 1e9c8b0..170d786 100644
--- a/arch/blackfin/kernel/perf_event.c
+++ b/arch/blackfin/kernel/perf_event.c
@@ -14,7 +14,7 @@
  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
  *  Copyright (C) 2009 Jaswinder Singh Rajput
  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
  *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
  *
  * ppc:
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 7cfd7f1..4dca183 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -10,7 +10,7 @@
  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
  *  Copyright (C) 2009 Jaswinder Singh Rajput
  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
  *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
  *
  * ppc:
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index b0da5ae..3091267 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -9,7 +9,7 @@
  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
  *  Copyright (C) 2009 Jaswinder Singh Rajput
  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
  */
 
 #include <linux/perf_event.h>
diff --git a/arch/tile/kernel/perf_event.c b/arch/tile/kernel/perf_event.c
index bb509ce..8767060 100644
--- a/arch/tile/kernel/perf_event.c
+++ b/arch/tile/kernel/perf_event.c
@@ -21,7 +21,7 @@
  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
  *  Copyright (C) 2009 Jaswinder Singh Rajput
  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
  *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
  *  Copyright (C) 2009 Google, Inc., Stephane Eranian
  */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4562cf0..2bf79d7 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -5,7 +5,7 @@
  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
  *  Copyright (C) 2009 Jaswinder Singh Rajput
  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
  *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
  *  Copyright (C) 2009 Google, Inc., Stephane Eranian
  *
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index ffa7a92..ab18b8a 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -5,7 +5,7 @@
  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
  *  Copyright (C) 2009 Jaswinder Singh Rajput
  *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
  *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
  *  Copyright (C) 2009 Google, Inc., Stephane Eranian
  *
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index dc5fa6a..3512ba6 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -1,7 +1,7 @@
 /*
  * x86 specific code for irq_work
  *
- * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra
  */
 
 #include <linux/kernel.h>
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index db284bf..9dbb739 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -5,7 +5,7 @@
  * Copyright 2001 Red Hat, Inc.
  * Based on code from mm/memory.c Copyright Linus Torvalds and others.
  *
- * Copyright 2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright 2011 Red Hat, Inc., Peter Zijlstra
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 8dde559..0536524 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -5,7 +5,7 @@
  * Jump label support
  *
  * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com>
- * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra
  *
  * DEPRECATED API:
  *
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 70400dc..c57e424 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -2,7 +2,7 @@
  * Runtime locking correctness validator
  *
  *  Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
  *
  * see Documentation/locking/lockdep-design.txt for more details.
  */
diff --git a/include/linux/proportions.h b/include/linux/proportions.h
index 5440f64..2122133 100644
--- a/include/linux/proportions.h
+++ b/include/linux/proportions.h
@@ -1,7 +1,7 @@
 /*
  * FLoating proportions
  *
- *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
  *
  * This file contains the public data structure and API definitions.
  */
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 0bdc72f..4a29c75 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -21,7 +21,7 @@
  * Authors:
  *	Srikar Dronamraju
  *	Jim Keniston
- * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra
  */
 
 #include <linux/errno.h>
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index d659487..9c41800 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -3,7 +3,7 @@
  *
  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
  *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
  *  Copyright  �  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
  *
  * For licensing details see kernel-base/COPYING
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1ac857a..5854fcf 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3,7 +3,7 @@
  *
  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
  *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
  *  Copyright  ©  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
  *
  * For licensing details see kernel-base/COPYING
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index b5d1ea7..adfdc05 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -3,7 +3,7 @@
  *
  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
  *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
  *  Copyright  ©  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
  *
  * For licensing details see kernel-base/COPYING
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 4e5e979..7dad849 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -19,7 +19,7 @@
  * Authors:
  *	Srikar Dronamraju
  *	Jim Keniston
- * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra
  */
 
 #include <linux/kernel.h>
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index cbf9fb8..bcf107c 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra
  *
  * Provides a framework for enqueueing and running callbacks from hardirq
  * context. The enqueueing is NMI-safe.
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index f7dd15d..05254ee 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -2,7 +2,7 @@
  * jump label support
  *
  * Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
- * Copyright (C) 2011 Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2011 Peter Zijlstra
  *
  */
 #include <linux/memory.h>
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index deae390..60ace56 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -6,7 +6,7 @@
  * Started by Ingo Molnar:
  *
  *  Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
  *
  * this code maps all the lock dependencies as they occur in a live kernel
  * and will warn about the following classes of locking bugs:
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index d83d798..dbb61a3 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -6,7 +6,7 @@
  * Started by Ingo Molnar:
  *
  *  Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
  *
  * Code for /proc/lockdep and /proc/lockdep_stats:
  *
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index c0a2051..caf4041 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -1,7 +1,7 @@
 /*
  * sched_clock for unstable cpu clocks
  *
- *  Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra
  *
  *  Updates and enhancements:
  *    Copyright (C) 2008 Red Hat, Inc. Steven Rostedt <srostedt@redhat.com>
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f04fda8..90e26b1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -17,7 +17,7 @@
  *  Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de>
  *
  *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
- *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
  */
 
 #include <linux/latencytop.h>
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index abfc903..cc9f7a9 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -1,7 +1,7 @@
 /*
  * trace event based perf event profiling/tracing
  *
- * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra
  * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
  */
 
diff --git a/lib/btree.c b/lib/btree.c
index 4264871..f93a945 100644
--- a/lib/btree.c
+++ b/lib/btree.c
@@ -5,7 +5,7 @@
  *
  * Copyright (c) 2007-2008 Joern Engel <joern@logfs.org>
  * Bits and pieces stolen from Peter Zijlstra's code, which is
- * Copyright 2007, Red Hat Inc. Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright 2007, Red Hat Inc. Peter Zijlstra
  * GPLv2
  *
  * see http://programming.kicks-ass.net/kernel-patches/vma_lookup/btree.patch
diff --git a/lib/proportions.c b/lib/proportions.c
index 6f72429..efa54f25 100644
--- a/lib/proportions.c
+++ b/lib/proportions.c
@@ -1,7 +1,7 @@
 /*
  * Floating proportions
  *
- *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
  *
  * Description:
  *
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 3e4d654..d15d88c 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2,7 +2,7 @@
  * mm/page-writeback.c
  *
  * Copyright (C) 2002, Linus Torvalds.
- * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
  *
  * Contains functions related to writing back dirty pages at the
  * address_space level.
-- 
cgit v0.10.2


From 7c90e610b60cd1ed6abafd806acfaedccbbe52d1 Mon Sep 17 00:00:00 2001
From: Konstantin Shkolnyy <konstantin.shkolnyy@gmail.com>
Date: Tue, 10 Nov 2015 16:40:13 -0600
Subject: USB: cp210x: Remove CP2110 ID from compatibility list

CP2110 ID (0x10c4, 0xea80) doesn't belong here because it's a HID
and completely different from CP210x devices.

Signed-off-by: Konstantin Shkolnyy <konstantin.shkolnyy@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index eac7cca..7d4f51a 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -132,7 +132,6 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */
 	{ USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */
 	{ USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */
-	{ USB_DEVICE(0x10C4, 0xEA80) }, /* Silicon Labs factory default */
 	{ USB_DEVICE(0x10C4, 0xEA71) }, /* Infinity GPS-MIC-1 Radio Monophone */
 	{ USB_DEVICE(0x10C4, 0xF001) }, /* Elan Digital Systems USBscope50 */
 	{ USB_DEVICE(0x10C4, 0xF002) }, /* Elan Digital Systems USBwave12 */
-- 
cgit v0.10.2


From f33a7f72e5fc033daccbb8d4753d7c5c41a4d67b Mon Sep 17 00:00:00 2001
From: Jonas Jonsson <jonas@ludd.ltu.se>
Date: Sun, 22 Nov 2015 11:47:17 +0100
Subject: USB: cdc_acm: Ignore Infineon Flash Loader utility

Some modems, such as the Telit UE910, are using an Infineon Flash Loader
utility. It has two interfaces, 2/2/0 (Abstract Modem) and 10/0/0 (CDC
Data). The latter can be used as a serial interface to upgrade the
firmware of the modem. However, that isn't possible when the cdc-acm
driver takes control of the device.

The following is an explanation of the behaviour by Daniele Palmas during
discussion on linux-usb.

"This is what happens when the device is turned on (without modifying
the drivers):

[155492.352031] usb 1-3: new high-speed USB device number 27 using ehci-pci
[155492.485429] usb 1-3: config 1 interface 0 altsetting 0 endpoint 0x81 has an invalid bInterval 255, changing to 11
[155492.485436] usb 1-3: New USB device found, idVendor=058b, idProduct=0041
[155492.485439] usb 1-3: New USB device strings: Mfr=0, Product=0, SerialNumber=0
[155492.485952] cdc_acm 1-3:1.0: ttyACM0: USB ACM device

This is the flashing device that is caught by the cdc-acm driver. Once
the ttyACM appears, the application starts sending a magic string
(simple write on the file descriptor) to keep the device in flashing
mode. If this magic string is not properly received in a certain time
interval, the modem goes on in normal operative mode:

[155493.748094] usb 1-3: USB disconnect, device number 27
[155494.916025] usb 1-3: new high-speed USB device number 28 using ehci-pci
[155495.059978] usb 1-3: New USB device found, idVendor=1bc7, idProduct=0021
[155495.059983] usb 1-3: New USB device strings: Mfr=1, Product=2, SerialNumber=3
[155495.059986] usb 1-3: Product: 6 CDC-ACM + 1 CDC-ECM
[155495.059989] usb 1-3: Manufacturer: Telit
[155495.059992] usb 1-3: SerialNumber: 359658044004697
[155495.138958] cdc_acm 1-3:1.0: ttyACM0: USB ACM device
[155495.140832] cdc_acm 1-3:1.2: ttyACM1: USB ACM device
[155495.142827] cdc_acm 1-3:1.4: ttyACM2: USB ACM device
[155495.144462] cdc_acm 1-3:1.6: ttyACM3: USB ACM device
[155495.145967] cdc_acm 1-3:1.8: ttyACM4: USB ACM device
[155495.147588] cdc_acm 1-3:1.10: ttyACM5: USB ACM device
[155495.154322] cdc_ether 1-3:1.12 wwan0: register 'cdc_ether' at usb-0000:00:1a.7-3, Mobile Broadband Network Device, 00:00:11:12:13:14

Using the cdc-acm driver, the string, though being sent in the same way
than using the usb-serial-simple driver (I can confirm that the data is
passing properly since I used an hw usb sniffer), does not make the
device to stay in flashing mode."

Signed-off-by: Jonas Jonsson <jonas@ludd.ltu.se>
Tested-by: Daniele Palmas <dnlplm@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index b30e742..26ca4f9 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1838,6 +1838,11 @@ static const struct usb_device_id acm_ids[] = {
 	},
 #endif
 
+	/* Exclude Infineon Flash Loader utility */
+	{ USB_DEVICE(0x058b, 0x0041),
+	.driver_info = IGNORE_DEVICE,
+	},
+
 	/* control interfaces without any protocol set */
 	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
 		USB_CDC_PROTO_NONE) },
-- 
cgit v0.10.2


From a0e80fbd56b4573de997c9a088a33abbc1121400 Mon Sep 17 00:00:00 2001
From: Jonas Jonsson <jonas@ludd.ltu.se>
Date: Sun, 22 Nov 2015 11:47:18 +0100
Subject: USB: serial: Another Infineon flash loader USB ID

The flash loader has been seen on a Telit UE910 modem. The flash loader
is a bit special, it presents both an ACM and CDC Data interface but
only the latter is useful. Unless a magic string is sent to the device
it will disappear and the regular modem device appears instead.

Signed-off-by: Jonas Jonsson <jonas@ludd.ltu.se>
Tested-by: Daniele Palmas <dnlplm@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>

diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c
index 3658662..a204782 100644
--- a/drivers/usb/serial/usb-serial-simple.c
+++ b/drivers/usb/serial/usb-serial-simple.c
@@ -53,6 +53,7 @@ DEVICE(funsoft, FUNSOFT_IDS);
 
 /* Infineon Flashloader driver */
 #define FLASHLOADER_IDS()		\
+	{ USB_DEVICE_INTERFACE_CLASS(0x058b, 0x0041, USB_CLASS_CDC_DATA) }, \
 	{ USB_DEVICE(0x8087, 0x0716) }
 DEVICE(flashloader, FLASHLOADER_IDS);
 
-- 
cgit v0.10.2


From df976f5d95028e26b4c967b24518fe340f8a726d Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 3 Nov 2015 23:09:58 -0500
Subject: clk: ti: clkt_dpll: fix wrong do_div() usage

do_div() is meant to be used with an unsigned dividend.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Tero Kristo <t-kristo@ti.com>

diff --git a/drivers/clk/ti/clkt_dpll.c b/drivers/clk/ti/clkt_dpll.c
index 9023ca9..b5cc6f6 100644
--- a/drivers/clk/ti/clkt_dpll.c
+++ b/drivers/clk/ti/clkt_dpll.c
@@ -240,7 +240,7 @@ u8 omap2_init_dpll_parent(struct clk_hw *hw)
  */
 unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk)
 {
-	long long dpll_clk;
+	u64 dpll_clk;
 	u32 dpll_mult, dpll_div, v;
 	struct dpll_data *dd;
 
@@ -262,7 +262,7 @@ unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk)
 	dpll_div = v & dd->div1_mask;
 	dpll_div >>= __ffs(dd->div1_mask);
 
-	dpll_clk = (long long)clk_get_rate(dd->clk_ref) * dpll_mult;
+	dpll_clk = (u64)clk_get_rate(dd->clk_ref) * dpll_mult;
 	do_div(dpll_clk, dpll_div + 1);
 
 	return dpll_clk;
-- 
cgit v0.10.2


From c51185b45c43737faca4574d790489a1bd8cfd11 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 3 Nov 2015 23:17:11 -0500
Subject: clk: ti: fapll: fix wrong do_div() usage

do_div() is meant to be used with an unsigned dividend.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Tero Kristo <t-kristo@ti.com>

diff --git a/drivers/clk/ti/fapll.c b/drivers/clk/ti/fapll.c
index f4b2e98..66a0d0e 100644
--- a/drivers/clk/ti/fapll.c
+++ b/drivers/clk/ti/fapll.c
@@ -168,7 +168,7 @@ static unsigned long ti_fapll_recalc_rate(struct clk_hw *hw,
 {
 	struct fapll_data *fd = to_fapll(hw);
 	u32 fapll_n, fapll_p, v;
-	long long rate;
+	u64 rate;
 
 	if (ti_fapll_clock_is_bypass(fd))
 		return parent_rate;
@@ -314,7 +314,7 @@ static unsigned long ti_fapll_synth_recalc_rate(struct clk_hw *hw,
 {
 	struct fapll_synth *synth = to_synth(hw);
 	u32 synth_div_m;
-	long long rate;
+	u64 rate;
 
 	/* The audio_pll_clk1 is hardwired to produce 32.768KiHz clock */
 	if (!synth->div)
-- 
cgit v0.10.2


From 3a5b1dc4a3237870cfb53d31bc4cbc2a9c7aa16e Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Fri, 13 Nov 2015 17:29:58 +0100
Subject: clk: ti816x: Add missing dmtimer clkdev entries

Add missing clkdev dmtimer related entries for dm816x.
32Khz and ext sources were missing.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Cc: Brian Hutchinson <b.hutchman@gmail.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>

diff --git a/drivers/clk/ti/clk-816x.c b/drivers/clk/ti/clk-816x.c
index 1dfad0c..2a5d84f 100644
--- a/drivers/clk/ti/clk-816x.c
+++ b/drivers/clk/ti/clk-816x.c
@@ -20,6 +20,8 @@ static struct ti_dt_clk dm816x_clks[] = {
 	DT_CLK(NULL, "sys_clkin", "sys_clkin_ck"),
 	DT_CLK(NULL, "timer_sys_ck", "sys_clkin_ck"),
 	DT_CLK(NULL, "sys_32k_ck", "sys_32k_ck"),
+	DT_CLK(NULL, "timer_32k_ck", "sysclk18_ck"),
+	DT_CLK(NULL, "timer_ext_ck", "tclkin_ck"),
 	DT_CLK(NULL, "mpu_ck", "mpu_ck"),
 	DT_CLK(NULL, "timer1_fck", "timer1_fck"),
 	DT_CLK(NULL, "timer2_fck", "timer2_fck"),
-- 
cgit v0.10.2


From 167af5ef2cdba14ff14a13c91e5532ed479083d8 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 1 Oct 2015 14:20:37 -0500
Subject: clk: ti: drop locking code from mux/divider drivers

TI's mux and divider clock drivers do not require locking and they do
not initialize internal spinlocks. This code was occasionally
copy-posted from generic mux/divider drivers. So remove it.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>

diff --git a/drivers/clk/ti/divider.c b/drivers/clk/ti/divider.c
index 5b17268..df25583 100644
--- a/drivers/clk/ti/divider.c
+++ b/drivers/clk/ti/divider.c
@@ -214,7 +214,6 @@ static int ti_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
 {
 	struct clk_divider *divider;
 	unsigned int div, value;
-	unsigned long flags = 0;
 	u32 val;
 
 	if (!hw || !rate)
@@ -228,9 +227,6 @@ static int ti_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
 	if (value > div_mask(divider))
 		value = div_mask(divider);
 
-	if (divider->lock)
-		spin_lock_irqsave(divider->lock, flags);
-
 	if (divider->flags & CLK_DIVIDER_HIWORD_MASK) {
 		val = div_mask(divider) << (divider->shift + 16);
 	} else {
@@ -240,9 +236,6 @@ static int ti_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
 	val |= value << divider->shift;
 	ti_clk_ll_ops->clk_writel(val, divider->reg);
 
-	if (divider->lock)
-		spin_unlock_irqrestore(divider->lock, flags);
-
 	return 0;
 }
 
@@ -256,8 +249,7 @@ static struct clk *_register_divider(struct device *dev, const char *name,
 				     const char *parent_name,
 				     unsigned long flags, void __iomem *reg,
 				     u8 shift, u8 width, u8 clk_divider_flags,
-				     const struct clk_div_table *table,
-				     spinlock_t *lock)
+				     const struct clk_div_table *table)
 {
 	struct clk_divider *div;
 	struct clk *clk;
@@ -288,7 +280,6 @@ static struct clk *_register_divider(struct device *dev, const char *name,
 	div->shift = shift;
 	div->width = width;
 	div->flags = clk_divider_flags;
-	div->lock = lock;
 	div->hw.init = &init;
 	div->table = table;
 
@@ -421,7 +412,7 @@ struct clk *ti_clk_register_divider(struct ti_clk *setup)
 
 	clk = _register_divider(NULL, setup->name, div->parent,
 				flags, (void __iomem *)reg, div->bit_shift,
-				width, div_flags, table, NULL);
+				width, div_flags, table);
 
 	if (IS_ERR(clk))
 		kfree(table);
@@ -584,8 +575,7 @@ static void __init of_ti_divider_clk_setup(struct device_node *node)
 		goto cleanup;
 
 	clk = _register_divider(NULL, node->name, parent_name, flags, reg,
-				shift, width, clk_divider_flags, table,
-				NULL);
+				shift, width, clk_divider_flags, table);
 
 	if (!IS_ERR(clk)) {
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
diff --git a/drivers/clk/ti/mux.c b/drivers/clk/ti/mux.c
index 69f08a1..dab9ba8 100644
--- a/drivers/clk/ti/mux.c
+++ b/drivers/clk/ti/mux.c
@@ -69,7 +69,6 @@ static int ti_clk_mux_set_parent(struct clk_hw *hw, u8 index)
 {
 	struct clk_mux *mux = to_clk_mux(hw);
 	u32 val;
-	unsigned long flags = 0;
 
 	if (mux->table) {
 		index = mux->table[index];
@@ -81,9 +80,6 @@ static int ti_clk_mux_set_parent(struct clk_hw *hw, u8 index)
 			index++;
 	}
 
-	if (mux->lock)
-		spin_lock_irqsave(mux->lock, flags);
-
 	if (mux->flags & CLK_MUX_HIWORD_MASK) {
 		val = mux->mask << (mux->shift + 16);
 	} else {
@@ -93,9 +89,6 @@ static int ti_clk_mux_set_parent(struct clk_hw *hw, u8 index)
 	val |= index << mux->shift;
 	ti_clk_ll_ops->clk_writel(val, mux->reg);
 
-	if (mux->lock)
-		spin_unlock_irqrestore(mux->lock, flags);
-
 	return 0;
 }
 
@@ -109,7 +102,7 @@ static struct clk *_register_mux(struct device *dev, const char *name,
 				 const char **parent_names, u8 num_parents,
 				 unsigned long flags, void __iomem *reg,
 				 u8 shift, u32 mask, u8 clk_mux_flags,
-				 u32 *table, spinlock_t *lock)
+				 u32 *table)
 {
 	struct clk_mux *mux;
 	struct clk *clk;
@@ -133,7 +126,6 @@ static struct clk *_register_mux(struct device *dev, const char *name,
 	mux->shift = shift;
 	mux->mask = mask;
 	mux->flags = clk_mux_flags;
-	mux->lock = lock;
 	mux->table = table;
 	mux->hw.init = &init;
 
@@ -175,7 +167,7 @@ struct clk *ti_clk_register_mux(struct ti_clk *setup)
 
 	return _register_mux(NULL, setup->name, mux->parents, mux->num_parents,
 			     flags, (void __iomem *)reg, mux->bit_shift, mask,
-			     mux_flags, NULL, NULL);
+			     mux_flags, NULL);
 }
 
 /**
@@ -227,8 +219,7 @@ static void of_mux_clk_setup(struct device_node *node)
 	mask = (1 << fls(mask)) - 1;
 
 	clk = _register_mux(NULL, node->name, parent_names, num_parents,
-			    flags, reg, shift, mask, clk_mux_flags, NULL,
-			    NULL);
+			    flags, reg, shift, mask, clk_mux_flags, NULL);
 
 	if (!IS_ERR(clk))
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
-- 
cgit v0.10.2


From a4dad1ae24f850410c4e60f22823cba1289b8d52 Mon Sep 17 00:00:00 2001
From: David Turner <novalis@novalis.org>
Date: Tue, 24 Nov 2015 14:34:37 -0500
Subject: ext4: Fix handling of extended tv_sec

In ext4, the bottom two bits of {a,c,m}time_extra are used to extend
the {a,c,m}time fields, deferring the year 2038 problem to the year
2446.

When decoding these extended fields, for times whose bottom 32 bits
would represent a negative number, sign extension causes the 64-bit
extended timestamp to be negative as well, which is not what's
intended.  This patch corrects that issue, so that the only negative
{a,c,m}times are those between 1901 and 1970 (as per 32-bit signed
timestamps).

Some older kernels might have written pre-1970 dates with 1,1 in the
extra bits.  This patch treats those incorrectly-encoded dates as
pre-1970, instead of post-2311, until kernel 4.20 is released.
Hopefully by then e2fsck will have fixed up the bad data.

Also add a comment explaining the encoding of ext4's extra {a,c,m}time
bits.

Signed-off-by: David Turner <novalis@novalis.org>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reported-by: Mark Harris <mh8928@yahoo.com>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=23732
Cc: stable@vger.kernel.org

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 750063f..cc7ca4e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -26,6 +26,7 @@
 #include <linux/seqlock.h>
 #include <linux/mutex.h>
 #include <linux/timer.h>
+#include <linux/version.h>
 #include <linux/wait.h>
 #include <linux/blockgroup_lock.h>
 #include <linux/percpu_counter.h>
@@ -727,19 +728,55 @@ struct move_extent {
 	<= (EXT4_GOOD_OLD_INODE_SIZE +			\
 	    (einode)->i_extra_isize))			\
 
+/*
+ * We use an encoding that preserves the times for extra epoch "00":
+ *
+ * extra  msb of                         adjust for signed
+ * epoch  32-bit                         32-bit tv_sec to
+ * bits   time    decoded 64-bit tv_sec  64-bit tv_sec      valid time range
+ * 0 0    1    -0x80000000..-0x00000001  0x000000000 1901-12-13..1969-12-31
+ * 0 0    0    0x000000000..0x07fffffff  0x000000000 1970-01-01..2038-01-19
+ * 0 1    1    0x080000000..0x0ffffffff  0x100000000 2038-01-19..2106-02-07
+ * 0 1    0    0x100000000..0x17fffffff  0x100000000 2106-02-07..2174-02-25
+ * 1 0    1    0x180000000..0x1ffffffff  0x200000000 2174-02-25..2242-03-16
+ * 1 0    0    0x200000000..0x27fffffff  0x200000000 2242-03-16..2310-04-04
+ * 1 1    1    0x280000000..0x2ffffffff  0x300000000 2310-04-04..2378-04-22
+ * 1 1    0    0x300000000..0x37fffffff  0x300000000 2378-04-22..2446-05-10
+ *
+ * Note that previous versions of the kernel on 64-bit systems would
+ * incorrectly use extra epoch bits 1,1 for dates between 1901 and
+ * 1970.  e2fsck will correct this, assuming that it is run on the
+ * affected filesystem before 2242.
+ */
+
 static inline __le32 ext4_encode_extra_time(struct timespec *time)
 {
-       return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
-			   (time->tv_sec >> 32) & EXT4_EPOCH_MASK : 0) |
-                          ((time->tv_nsec << EXT4_EPOCH_BITS) & EXT4_NSEC_MASK));
+	u32 extra = sizeof(time->tv_sec) > 4 ?
+		((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK : 0;
+	return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS));
 }
 
 static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
 {
-       if (sizeof(time->tv_sec) > 4)
-	       time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
-			       << 32;
-       time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;
+	if (unlikely(sizeof(time->tv_sec) > 4 &&
+			(extra & cpu_to_le32(EXT4_EPOCH_MASK)))) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,20,0)
+		/* Handle legacy encoding of pre-1970 dates with epoch
+		 * bits 1,1.  We assume that by kernel version 4.20,
+		 * everyone will have run fsck over the affected
+		 * filesystems to correct the problem.  (This
+		 * backwards compatibility may be removed before this
+		 * time, at the discretion of the ext4 developers.)
+		 */
+		u64 extra_bits = le32_to_cpu(extra) & EXT4_EPOCH_MASK;
+		if (extra_bits == 3 && ((time->tv_sec) & 0x80000000) != 0)
+			extra_bits = 0;
+		time->tv_sec += extra_bits << 32;
+#else
+		time->tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32;
+#endif
+	}
+	time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;
 }
 
 #define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode)			       \
-- 
cgit v0.10.2


From bc23f0c8d7ccd8d924c4e70ce311288cb3e61ea8 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 24 Nov 2015 15:34:35 -0500
Subject: jbd2: Fix unreclaimed pages after truncate in data=journal mode

Ted and Namjae have reported that truncated pages don't get timely
reclaimed after being truncated in data=journal mode. The following test
triggers the issue easily:

for (i = 0; i < 1000; i++) {
	pwrite(fd, buf, 1024*1024, 0);
	fsync(fd);
	fsync(fd);
	ftruncate(fd, 0);
}

The reason is that journal_unmap_buffer() finds that truncated buffers
are not journalled (jh->b_transaction == NULL), they are part of
checkpoint list of a transaction (jh->b_cp_transaction != NULL) and have
been already written out (!buffer_dirty(bh)). We clean such buffers but
we leave them in the checkpoint list. Since checkpoint transaction holds
a reference to the journal head, these buffers cannot be released until
the checkpoint transaction is cleaned up. And at that point we don't
call release_buffer_page() anymore so pages detached from mapping are
lingering in the system waiting for reclaim to find them and free them.

Fix the problem by removing buffers from transaction checkpoint lists
when journal_unmap_buffer() finds out they don't have to be there
anymore.

Reported-and-tested-by: Namjae Jeon <namjae.jeon@samsung.com>
Fixes: de1b794130b130e77ffa975bb58cb843744f9ae5
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 6b8338e..b996212 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2152,6 +2152,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
 
 		if (!buffer_dirty(bh)) {
 			/* bdflush has written it.  We can drop it now */
+			__jbd2_journal_remove_checkpoint(jh);
 			goto zap_buffer;
 		}
 
@@ -2181,6 +2182,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
 				/* The orphan record's transaction has
 				 * committed.  We can cleanse this buffer */
 				clear_buffer_jbddirty(bh);
+				__jbd2_journal_remove_checkpoint(jh);
 				goto zap_buffer;
 			}
 		}
-- 
cgit v0.10.2


From 07687c031d14a1f36613231c0ea13ce3c4025615 Mon Sep 17 00:00:00 2001
From: Alan Tull <atull@opensource.altera.com>
Date: Thu, 29 Oct 2015 14:39:56 -0500
Subject: fpga manager: remove label

Remove implementation of 'label' DT binding.

Signed-off-by: Alan Tull <atull@opensource.altera.com>
Reviewed-by: Moritz Fischer <moritz.fischer@ettus.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c
index a24f5cb..bfbf8aa 100644
--- a/drivers/fpga/fpga-mgr.c
+++ b/drivers/fpga/fpga-mgr.c
@@ -256,7 +256,6 @@ int fpga_mgr_register(struct device *dev, const char *name,
 		      void *priv)
 {
 	struct fpga_manager *mgr;
-	const char *dt_label;
 	int id, ret;
 
 	if (!mops || !mops->write_init || !mops->write ||
@@ -300,11 +299,9 @@ int fpga_mgr_register(struct device *dev, const char *name,
 	mgr->dev.id = id;
 	dev_set_drvdata(dev, mgr);
 
-	dt_label = of_get_property(mgr->dev.of_node, "label", NULL);
-	if (dt_label)
-		ret = dev_set_name(&mgr->dev, "%s", dt_label);
-	else
-		ret = dev_set_name(&mgr->dev, "fpga%d", id);
+	ret = dev_set_name(&mgr->dev, "fpga%d", id);
+	if (ret)
+		goto error_device;
 
 	ret = device_add(&mgr->dev);
 	if (ret)
-- 
cgit v0.10.2


From e8c77bda05e5d93cce6f38cfdde4192307951dea Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 18 Nov 2015 10:48:16 +0100
Subject: fpga manager: Fix firmware resource leak on error

If fpga_mgr_buf_load() fails, the firmware resource previously allocated
by request_firmware() is leaked. Fix it by calling release_firmware()
regardless of the return value of fpga_mgr_buf_load().

Found by the Coverity scanner (CID 1339653).

Fixes: 6a8c3be7ec8e ("add FPGA manager core")
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Alan Tull <atull@opensource.altera.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c
index bfbf8aa..953dc91 100644
--- a/drivers/fpga/fpga-mgr.c
+++ b/drivers/fpga/fpga-mgr.c
@@ -122,12 +122,10 @@ int fpga_mgr_firmware_load(struct fpga_manager *mgr, u32 flags,
 	}
 
 	ret = fpga_mgr_buf_load(mgr, flags, fw->data, fw->size);
-	if (ret)
-		return ret;
 
 	release_firmware(fw);
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(fpga_mgr_firmware_load);
 
-- 
cgit v0.10.2


From c21ac06648a7a9985fa81a53ba3a65569a1de388 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 25 Nov 2015 23:03:49 +0100
Subject: clk: mmp: add linux/clk.h includes

The common clk implementation for MMP broke without anyone noticing
when we stopped including linux/clk.h from the clk-provider header.

This did not show up in the defconfig builds because those use the
legacy MMP clk drivers, and it did not show up in my randconfig tests
either because I was testing with my mmp multiplatform series
applied, which at some point gained the fixup.

This fixes the three broken files.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 61ae76563ec3 ("clk: Remove clk.h from clk-provider.h")
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>

diff --git a/drivers/clk/mmp/clk-mmp2.c b/drivers/clk/mmp/clk-mmp2.c
index 09d2832..71fd293 100644
--- a/drivers/clk/mmp/clk-mmp2.c
+++ b/drivers/clk/mmp/clk-mmp2.c
@@ -9,6 +9,7 @@
  * warranty of any kind, whether express or implied.
  */
 
+#include <linux/clk.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/spinlock.h>
diff --git a/drivers/clk/mmp/clk-pxa168.c b/drivers/clk/mmp/clk-pxa168.c
index 93e967c..7524491 100644
--- a/drivers/clk/mmp/clk-pxa168.c
+++ b/drivers/clk/mmp/clk-pxa168.c
@@ -9,6 +9,7 @@
  * warranty of any kind, whether express or implied.
  */
 
+#include <linux/clk.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/spinlock.h>
diff --git a/drivers/clk/mmp/clk-pxa910.c b/drivers/clk/mmp/clk-pxa910.c
index 993abcd..37ba04b 100644
--- a/drivers/clk/mmp/clk-pxa910.c
+++ b/drivers/clk/mmp/clk-pxa910.c
@@ -9,6 +9,7 @@
  * warranty of any kind, whether express or implied.
  */
 
+#include <linux/clk.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/spinlock.h>
-- 
cgit v0.10.2


From 025af189fb44250206dd8a32fa4a682392af3301 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Fri, 20 Nov 2015 11:43:50 -0800
Subject: drm/ttm: Fixed a read/write lock imbalance

In ttm_write_lock(), the uninterruptible path should call
__ttm_write_lock() not __ttm_read_lock().  This fixes a vmwgfx hang
on F23 start up.

syeh: Extracted this from one of Thomas' internal patches.

Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>

diff --git a/drivers/gpu/drm/ttm/ttm_lock.c b/drivers/gpu/drm/ttm/ttm_lock.c
index 6a95454..f154fb1 100644
--- a/drivers/gpu/drm/ttm/ttm_lock.c
+++ b/drivers/gpu/drm/ttm/ttm_lock.c
@@ -180,7 +180,7 @@ int ttm_write_lock(struct ttm_lock *lock, bool interruptible)
 			spin_unlock(&lock->lock);
 		}
 	} else
-		wait_event(lock->queue, __ttm_read_lock(lock));
+		wait_event(lock->queue, __ttm_write_lock(lock));
 
 	return ret;
 }
-- 
cgit v0.10.2


From 99f9be4c73cff20ec64793be68f03f927474298c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 21 Nov 2015 13:29:39 +0300
Subject: drm/vmwgfx: fix a warning message

The WARN_ON() macro only takes a condition argument, it doesn't take
a message.  I have converted this to WARN() instead.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index a8baf5f..b6a0806 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -390,7 +390,7 @@ void *vmw_fifo_reserve_dx(struct vmw_private *dev_priv, uint32_t bytes,
 	else if (ctx_id == SVGA3D_INVALID_ID)
 		ret = vmw_local_fifo_reserve(dev_priv, bytes);
 	else {
-		WARN_ON("Command buffer has not been allocated.\n");
+		WARN(1, "Command buffer has not been allocated.\n");
 		ret = NULL;
 	}
 	if (IS_ERR_OR_NULL(ret)) {
-- 
cgit v0.10.2


From e2c9e0b28e146c9a3bce21408f3c02e24ac7ac31 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Thu, 26 Nov 2015 15:20:19 -0500
Subject: ext4: fix an endianness bug in ext4_encrypted_zeroout()

ex->ee_block is not host-endian (note that accesses of other fields
of *ex right next to that line go through the helpers that do proper
conversion from little-endian to host-endian; it might make sense
to add similar for ->ee_block to avoid reintroducing that kind of
bugs...)

Cc: stable@vger.kernel.org # v4.1+
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>

diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index af06830..1a08350 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -389,7 +389,7 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
 	struct ext4_crypto_ctx	*ctx;
 	struct page		*ciphertext_page = NULL;
 	struct bio		*bio;
-	ext4_lblk_t		lblk = ex->ee_block;
+	ext4_lblk_t		lblk = le32_to_cpu(ex->ee_block);
 	ext4_fsblk_t		pblk = ext4_ext_pblock(ex);
 	unsigned int		len = ext4_ext_get_actual_len(ex);
 	int			ret, err = 0;
-- 
cgit v0.10.2


From 5a1c7f47da9b32d0671e776b0f388095b7f91e2e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Thu, 26 Nov 2015 15:20:50 -0500
Subject: ext4: fix an endianness bug in ext4_encrypted_follow_link()

applying le32_to_cpu() to 16bit value is a bad idea...

Cc: stable@vger.kernel.org # v4.1+
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>

diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index abe2401..e8e7af6 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -52,7 +52,7 @@ static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cook
 	/* Symlink is encrypted */
 	sd = (struct ext4_encrypted_symlink_data *)caddr;
 	cstr.name = sd->encrypted_path;
-	cstr.len  = le32_to_cpu(sd->len);
+	cstr.len  = le16_to_cpu(sd->len);
 	if ((cstr.len +
 	     sizeof(struct ext4_encrypted_symlink_data) - 1) >
 	    max_size) {
-- 
cgit v0.10.2


From 681c46b164d79aaa5d18f7519b39e29f9b441e40 Mon Sep 17 00:00:00 2001
From: Xu Cang <cangxumu@gmail.com>
Date: Thu, 26 Nov 2015 15:52:24 -0500
Subject: ext4: add "static" to ext4_seq_##name##_fops struct

to fix sparse warning, add static to ext4_seq_##name##_fops struct.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>

diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 1b57c72..1420a3c 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -358,7 +358,7 @@ static int name##_open(struct inode *inode, struct file *file) \
 	return single_open(file, ext4_seq_##name##_show, PDE_DATA(inode)); \
 } \
 \
-const struct file_operations ext4_seq_##name##_fops = { \
+static const struct file_operations ext4_seq_##name##_fops = { \
 	.owner		= THIS_MODULE, \
 	.open		= name##_open, \
 	.read		= seq_read, \
-- 
cgit v0.10.2


From 62f49ee26f14753bd7fa634f51d537a79061bf0f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 16 Nov 2015 22:26:43 +0100
Subject: cpufreq: s3c24xx: Do not mark s3c2410_plls_add as __init

s3c2410_plls_add is a device notifier that may be called at runtime and
is correctly not marked __init. However it calls s3c_plltab_register()
which is marked __init, and that triggers a build error when we are
checking for section mismatches:

WARNING: vmlinux.o(.text+0x195e0): Section mismatch in reference from the function s3c2410_plls_add() to the function .init.text:s3c_plltab_register()
The function s3c2410_plls_add() references
the function __init s3c_plltab_register().
This is often because s3c2410_plls_add lacks a __init
annotation or the annotation of s3c_plltab_register is wrong.

This removes the __init annotation from s3c2410_plls_add as well as the
__initdata section annotations from s3c2440_plls_12 and s3c2440_plls_169344,
which in turn are referenced from s3c2410_plls_add.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>

diff --git a/arch/arm/mach-s3c24xx/pll-s3c2440-12000000.c b/arch/arm/mach-s3c24xx/pll-s3c2440-12000000.c
index a19460e..b355fca 100644
--- a/arch/arm/mach-s3c24xx/pll-s3c2440-12000000.c
+++ b/arch/arm/mach-s3c24xx/pll-s3c2440-12000000.c
@@ -20,7 +20,7 @@
 #include <plat/cpu.h>
 #include <plat/cpu-freq-core.h>
 
-static struct cpufreq_frequency_table s3c2440_plls_12[] __initdata = {
+static struct cpufreq_frequency_table s3c2440_plls_12[] = {
 	{ .frequency = 75000000,	.driver_data = PLLVAL(0x75, 3, 3),  }, 	/* FVco 600.000000 */
 	{ .frequency = 80000000,	.driver_data = PLLVAL(0x98, 4, 3),  }, 	/* FVco 640.000000 */
 	{ .frequency = 90000000,	.driver_data = PLLVAL(0x70, 2, 3),  }, 	/* FVco 720.000000 */
diff --git a/arch/arm/mach-s3c24xx/pll-s3c2440-16934400.c b/arch/arm/mach-s3c24xx/pll-s3c2440-16934400.c
index 1191b29..be9a248 100644
--- a/arch/arm/mach-s3c24xx/pll-s3c2440-16934400.c
+++ b/arch/arm/mach-s3c24xx/pll-s3c2440-16934400.c
@@ -20,7 +20,7 @@
 #include <plat/cpu.h>
 #include <plat/cpu-freq-core.h>
 
-static struct cpufreq_frequency_table s3c2440_plls_169344[] __initdata = {
+static struct cpufreq_frequency_table s3c2440_plls_169344[] = {
 	{ .frequency = 78019200,	.driver_data = PLLVAL(121, 5, 3), 	}, 	/* FVco 624.153600 */
 	{ .frequency = 84067200,	.driver_data = PLLVAL(131, 5, 3), 	}, 	/* FVco 672.537600 */
 	{ .frequency = 90115200,	.driver_data = PLLVAL(141, 5, 3), 	}, 	/* FVco 720.921600 */
diff --git a/drivers/cpufreq/s3c24xx-cpufreq.c b/drivers/cpufreq/s3c24xx-cpufreq.c
index 733aa51..68ef8fd 100644
--- a/drivers/cpufreq/s3c24xx-cpufreq.c
+++ b/drivers/cpufreq/s3c24xx-cpufreq.c
@@ -648,7 +648,7 @@ late_initcall(s3c_cpufreq_initcall);
  *
  * Register the given set of PLLs with the system.
  */
-int __init s3c_plltab_register(struct cpufreq_frequency_table *plls,
+int s3c_plltab_register(struct cpufreq_frequency_table *plls,
 			       unsigned int plls_no)
 {
 	struct cpufreq_frequency_table *vals;
-- 
cgit v0.10.2


From 53254f900bd9ff1e3cc5628e76126bb403d9d160 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 23 Nov 2015 14:55:41 -0500
Subject: cgroup: make css_set pin its css's to avoid use-afer-free

A css_set represents the relationship between a set of tasks and
css's.  css_set never pinned the associated css's.  This was okay
because tasks used to always disassociate immediately (in RCU sense) -
either a task is moved to a different css_set or exits and never
accesses css_set again.

Unfortunately, afcf6c8b7544 ("cgroup: add cgroup_subsys->free() method
and use it to fix pids controller") and patches leading up to it made
a zombie hold onto its css_set and deref the associated css's on its
release.  Nothing pins the css's after exit and it might have already
been freed leading to use-after-free.

 general protection fault: 0000 [#1] PREEMPT SMP
 task: ffffffff81bf2500 ti: ffffffff81be4000 task.ti: ffffffff81be4000
 RIP: 0010:[<ffffffff810fa205>]  [<ffffffff810fa205>] pids_cancel.constprop.4+0x5/0x40
 ...
 Call Trace:
  <IRQ>
  [<ffffffff810fb02d>] ? pids_free+0x3d/0xa0
  [<ffffffff810f8893>] cgroup_free+0x53/0xe0
  [<ffffffff8104ed62>] __put_task_struct+0x42/0x130
  [<ffffffff81053557>] delayed_put_task_struct+0x77/0x130
  [<ffffffff810c6b34>] rcu_process_callbacks+0x2f4/0x820
  [<ffffffff810c6af3>] ? rcu_process_callbacks+0x2b3/0x820
  [<ffffffff81056e54>] __do_softirq+0xd4/0x460
  [<ffffffff81057369>] irq_exit+0x89/0xa0
  [<ffffffff81876212>] smp_apic_timer_interrupt+0x42/0x50
  [<ffffffff818747f4>] apic_timer_interrupt+0x84/0x90
  <EOI>
 ...
 Code: 5b 5d c3 48 89 df 48 c7 c2 c9 f9 ae 81 48 c7 c6 91 2c ae 81 e8 1d 94 0e 00 31 c0 5b 5d c3 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 <f0> 48 83 87 e0 00 00 00 ff 78 01 c3 80 3d 08 7a c1 00 00 74 02
 RIP  [<ffffffff810fa205>] pids_cancel.constprop.4+0x5/0x40
  RSP <ffff88001fc03e20>
 ---[ end trace 89a4a4b916b90c49 ]---
 Kernel panic - not syncing: Fatal exception in interrupt
 Kernel Offset: disabled
 ---[ end Kernel panic - not syncing: Fatal exception in interrupt

Fix it by making css_set pin the associate css's until its release.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Dave Jones <davej@codemonkey.org.uk>
Reported-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Link: http://lkml.kernel.org/g/20151120041836.GA18390@codemonkey.org.uk
Link: http://lkml.kernel.org/g/5652D448.3080002@bmw-carit.de
Fixes: afcf6c8b7544 ("cgroup: add cgroup_subsys->free() method and use it to fix pids controller")

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b316deb..5cea63f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -760,9 +760,11 @@ static void put_css_set_locked(struct css_set *cset)
 	if (!atomic_dec_and_test(&cset->refcount))
 		return;
 
-	/* This css_set is dead. unlink it and release cgroup refcounts */
-	for_each_subsys(ss, ssid)
+	/* This css_set is dead. unlink it and release cgroup and css refs */
+	for_each_subsys(ss, ssid) {
 		list_del(&cset->e_cset_node[ssid]);
+		css_put(cset->subsys[ssid]);
+	}
 	hash_del(&cset->hlist);
 	css_set_count--;
 
@@ -1062,9 +1064,13 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 	key = css_set_hash(cset->subsys);
 	hash_add(css_set_table, &cset->hlist, key);
 
-	for_each_subsys(ss, ssid)
+	for_each_subsys(ss, ssid) {
+		struct cgroup_subsys_state *css = cset->subsys[ssid];
+
 		list_add_tail(&cset->e_cset_node[ssid],
-			      &cset->subsys[ssid]->cgroup->e_csets[ssid]);
+			      &css->cgroup->e_csets[ssid]);
+		css_get(css);
+	}
 
 	spin_unlock_bh(&css_set_lock);
 
-- 
cgit v0.10.2


From c9e75f0492b248aeaa7af8991a6fc9a21506bc96 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 27 Nov 2015 19:57:19 +0100
Subject: cgroup: pids: fix race between cgroup_post_fork() and
 cgroup_migrate()

If the new child migrates to another cgroup before cgroup_post_fork() calls
subsys->fork(), then both pids_can_attach() and pids_fork() will do the same
pids_uncharge(old_pids) + pids_charge(pids) sequence twice.

Change copy_process() to call threadgroup_change_begin/threadgroup_change_end
unconditionally. percpu_down_read() is cheap and this allows other cleanups,
see the next changes.

Also, this way we can unify cgroup_threadgroup_rwsem and dup_mmap_sem.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c
index cdd8df4..15ef2e4 100644
--- a/kernel/cgroup_pids.c
+++ b/kernel/cgroup_pids.c
@@ -243,27 +243,10 @@ static void pids_cancel_fork(struct task_struct *task, void *priv)
 
 static void pids_fork(struct task_struct *task, void *priv)
 {
-	struct cgroup_subsys_state *css;
-	struct cgroup_subsys_state *old_css = priv;
-	struct pids_cgroup *pids;
-	struct pids_cgroup *old_pids = css_pids(old_css);
-
-	css = task_get_css(task, pids_cgrp_id);
-	pids = css_pids(css);
-
-	/*
-	 * If the association has changed, we have to revert and reapply the
-	 * charge/uncharge on the wrong hierarchy to the current one. Since
-	 * the association can only change due to an organisation event, its
-	 * okay for us to ignore the limit in this case.
-	 */
-	if (pids != old_pids) {
-		pids_uncharge(old_pids, 1);
-		pids_charge(pids, 1);
-	}
+	struct cgroup_subsys_state *css = priv;
 
+	WARN_ON(task_css_check(task, pids_cgrp_id, true) != css);
 	css_put(css);
-	css_put(old_css);
 }
 
 static void pids_free(struct task_struct *task)
diff --git a/kernel/fork.c b/kernel/fork.c
index f97f2c4..fce002e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1368,8 +1368,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->real_start_time = ktime_get_boot_ns();
 	p->io_context = NULL;
 	p->audit_context = NULL;
-	if (clone_flags & CLONE_THREAD)
-		threadgroup_change_begin(current);
+	threadgroup_change_begin(current);
 	cgroup_fork(p);
 #ifdef CONFIG_NUMA
 	p->mempolicy = mpol_dup(p->mempolicy);
@@ -1610,8 +1609,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	proc_fork_connector(p);
 	cgroup_post_fork(p, cgrp_ss_priv);
-	if (clone_flags & CLONE_THREAD)
-		threadgroup_change_end(current);
+	threadgroup_change_end(current);
 	perf_event_fork(p);
 
 	trace_task_newtask(p, clone_flags);
@@ -1652,8 +1650,7 @@ bad_fork_cleanup_policy:
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_threadgroup_lock:
 #endif
-	if (clone_flags & CLONE_THREAD)
-		threadgroup_change_end(current);
+	threadgroup_change_end(current);
 	delayacct_tsk_free(p);
 bad_fork_cleanup_count:
 	atomic_dec(&p->cred->user->processes);
-- 
cgit v0.10.2


From afbcb364bee9e7cf46c94257a82cb9760b6d254f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 27 Nov 2015 19:57:22 +0100
Subject: cgroup: pids: kill pids_fork(), simplify pids_can_fork() and
 pids_cancel_fork()

Now that we know that the forking task can't migrate amd the child is always
moved to the same cgroup by cgroup_post_fork()->css_set_move_task() we can
change pids_can_fork() and pids_cancel_fork() to just use task_css(current).
And since we no longer need to pin this css, we can remove pid_fork().

Note: the patch uses task_css_check(true), perhaps it makes sense to add a
helper or change task_css_set_check() to take cgroup_threadgroup_rwsem into
account.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c
index 15ef2e4..de3359a 100644
--- a/kernel/cgroup_pids.c
+++ b/kernel/cgroup_pids.c
@@ -205,48 +205,28 @@ static void pids_cancel_attach(struct cgroup_subsys_state *css,
 	}
 }
 
+/*
+ * task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies
+ * on threadgroup_change_begin() held by the copy_process().
+ */
 static int pids_can_fork(struct task_struct *task, void **priv_p)
 {
 	struct cgroup_subsys_state *css;
 	struct pids_cgroup *pids;
-	int err;
 
-	/*
-	 * Use the "current" task_css for the pids subsystem as the tentative
-	 * css. It is possible we will charge the wrong hierarchy, in which
-	 * case we will forcefully revert/reapply the charge on the right
-	 * hierarchy after it is committed to the task proper.
-	 */
-	css = task_get_css(current, pids_cgrp_id);
+	css = task_css_check(current, pids_cgrp_id, true);
 	pids = css_pids(css);
-
-	err = pids_try_charge(pids, 1);
-	if (err)
-		goto err_css_put;
-
-	*priv_p = css;
-	return 0;
-
-err_css_put:
-	css_put(css);
-	return err;
+	return pids_try_charge(pids, 1);
 }
 
 static void pids_cancel_fork(struct task_struct *task, void *priv)
 {
-	struct cgroup_subsys_state *css = priv;
-	struct pids_cgroup *pids = css_pids(css);
+	struct cgroup_subsys_state *css;
+	struct pids_cgroup *pids;
 
+	css = task_css_check(current, pids_cgrp_id, true);
+	pids = css_pids(css);
 	pids_uncharge(pids, 1);
-	css_put(css);
-}
-
-static void pids_fork(struct task_struct *task, void *priv)
-{
-	struct cgroup_subsys_state *css = priv;
-
-	WARN_ON(task_css_check(task, pids_cgrp_id, true) != css);
-	css_put(css);
 }
 
 static void pids_free(struct task_struct *task)
@@ -329,7 +309,6 @@ struct cgroup_subsys pids_cgrp_subsys = {
 	.cancel_attach 	= pids_cancel_attach,
 	.can_fork	= pids_can_fork,
 	.cancel_fork	= pids_cancel_fork,
-	.fork		= pids_fork,
 	.free		= pids_free,
 	.legacy_cftypes	= pids_files,
 	.dfl_cftypes	= pids_files,
-- 
cgit v0.10.2


From 023113d24ef9e1d2b44cb2446872b17e2b01d8b1 Mon Sep 17 00:00:00 2001
From: Xiangliang Yu <Xiangliang.Yu@amd.com>
Date: Thu, 26 Nov 2015 20:27:02 +0800
Subject: AHCI: Fix softreset failed issue of Port Multiplier

Current code doesn't update port value of Port Multiplier(PM) when
sending FIS of softreset to device, command will fail if FBS is
enabled.

There are two ways to fix the issue: the first is to disable FBS
before sending softreset command to PM device and the second is
to update port value of PM when sending command.

For the first way, i can't find any related rule in AHCI Spec. The
second way can avoid disabling FBS and has better performance.

Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: stable@vger.kernel.org

diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 096064c..4665512 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -1273,6 +1273,15 @@ static int ahci_exec_polled_cmd(struct ata_port *ap, int pmp,
 	ata_tf_to_fis(tf, pmp, is_cmd, fis);
 	ahci_fill_cmd_slot(pp, 0, cmd_fis_len | flags | (pmp << 12));
 
+	/* set port value for softreset of Port Multiplier */
+	if (pp->fbs_enabled && pp->fbs_last_dev != pmp) {
+		tmp = readl(port_mmio + PORT_FBS);
+		tmp &= ~(PORT_FBS_DEV_MASK | PORT_FBS_DEC);
+		tmp |= pmp << PORT_FBS_DEV_OFFSET;
+		writel(tmp, port_mmio + PORT_FBS);
+		pp->fbs_last_dev = pmp;
+	}
+
 	/* issue & wait */
 	writel(1, port_mmio + PORT_CMD_ISSUE);
 
-- 
cgit v0.10.2


From d98f1cd0a3b70ea91f1dfda3ac36c3b2e1a4d5e2 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Thu, 26 Nov 2015 12:00:59 -0500
Subject: sata_sil: disable trim

When I connect an Intel SSD to SATA SIL controller (PCI ID 1095:3114), any
TRIM command results in I/O errors being reported in the log. There is
other similar error reported with TRIM and the SIL controller:
https://bugs.centos.org/view.php?id=5880

Apparently the controller doesn't support TRIM commands. This patch
disables TRIM support on the SATA SIL controller.

ata7.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x0
ata7.00: BMDMA2 stat 0x50001
ata7.00: failed command: DATA SET MANAGEMENT
ata7.00: cmd 06/01:01:00:00:00/00:00:00:00:00/a0 tag 0 dma 512 out
         res 51/04:01:00:00:00/00:00:00:00:00/a0 Emask 0x1 (device error)
ata7.00: status: { DRDY ERR }
ata7.00: error: { ABRT }
ata7.00: device reported invalid CHS sector 0
sd 8:0:0:0: [sdb] tag#0 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE
sd 8:0:0:0: [sdb] tag#0 Sense Key : Illegal Request [current] [descriptor]
sd 8:0:0:0: [sdb] tag#0 Add. Sense: Unaligned write command
sd 8:0:0:0: [sdb] tag#0 CDB: Write same(16) 93 08 00 00 00 00 00 21 95 88 00 20 00 00 00 00
blk_update_request: I/O error, dev sdb, sector 2200968

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index dea6edcb..29bcff0 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -630,6 +630,9 @@ static void sil_dev_config(struct ata_device *dev)
 	unsigned int n, quirks = 0;
 	unsigned char model_num[ATA_ID_PROD_LEN + 1];
 
+	/* This controller doesn't support trim */
+	dev->horkage |= ATA_HORKAGE_NOTRIM;
+
 	ata_id_c_string(dev->id, model_num, ATA_ID_PROD, sizeof(model_num));
 
 	for (n = 0; sil_blacklist[n].product; n++)
-- 
cgit v0.10.2


From d3e41a9fc091bf5f5d4504aead899d73bd67d71e Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Thu, 12 Nov 2015 15:07:30 +0100
Subject: ARM: dts: add dm816x missing #mbox-cells

Add missing #mbox-cells for dm816x mbox DT node.

Cc: Brian Hutchinson <b.hutchman@gmail.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/boot/dts/dm816x.dtsi b/arch/arm/boot/dts/dm816x.dtsi
index 3c99cfa..a7a34e4 100644
--- a/arch/arm/boot/dts/dm816x.dtsi
+++ b/arch/arm/boot/dts/dm816x.dtsi
@@ -218,6 +218,7 @@
 			reg = <0x480c8000 0x2000>;
 			interrupts = <77>;
 			ti,hwmods = "mailbox";
+			#mbox-cells = <1>;
 			ti,mbox-num-users = <4>;
 			ti,mbox-num-fifos = <12>;
 			mbox_dsp: mbox_dsp {
-- 
cgit v0.10.2


From f1b2e7c29a52309724d13a1eb4de457406697118 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Thu, 12 Nov 2015 15:07:35 +0100
Subject: ARM: dts: add dm816x missing spi DT dma handles

Add the missing SPI controller DMA handler in the dm816x DT
node, only properties for the two channels on four were present.

Cc: Brian Hutchinson <b.hutchman@gmail.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/boot/dts/dm816x.dtsi b/arch/arm/boot/dts/dm816x.dtsi
index a7a34e4..eee636d 100644
--- a/arch/arm/boot/dts/dm816x.dtsi
+++ b/arch/arm/boot/dts/dm816x.dtsi
@@ -280,8 +280,11 @@
 			ti,spi-num-cs = <4>;
 			ti,hwmods = "mcspi1";
 			dmas = <&edma 16 &edma 17
-				&edma 18 &edma 19>;
-			dma-names = "tx0", "rx0", "tx1", "rx1";
+				&edma 18 &edma 19
+				&edma 20 &edma 21
+				&edma 22 &edma 23>;
+			dma-names = "tx0", "rx0", "tx1", "rx1",
+				    "tx2", "rx2", "tx3", "rx3";
 		};
 
 		mmc1: mmc@48060000 {
-- 
cgit v0.10.2


From 741e96e8790cbd389d27e29bbf66de2c691fd775 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 3 Nov 2015 19:46:23 -0500
Subject: imx/clk-pllv1: fix wrong do_div() usage

do_div() is meant to be used with an unsigned dividend.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>

diff --git a/drivers/clk/imx/clk-pllv1.c b/drivers/clk/imx/clk-pllv1.c
index 8564e43..82fe366 100644
--- a/drivers/clk/imx/clk-pllv1.c
+++ b/drivers/clk/imx/clk-pllv1.c
@@ -52,7 +52,7 @@ static unsigned long clk_pllv1_recalc_rate(struct clk_hw *hw,
 		unsigned long parent_rate)
 {
 	struct clk_pllv1 *pll = to_clk_pllv1(hw);
-	long long ll;
+	unsigned long long ull;
 	int mfn_abs;
 	unsigned int mfi, mfn, mfd, pd;
 	u32 reg;
@@ -94,16 +94,16 @@ static unsigned long clk_pllv1_recalc_rate(struct clk_hw *hw,
 	rate = parent_rate * 2;
 	rate /= pd + 1;
 
-	ll = (unsigned long long)rate * mfn_abs;
+	ull = (unsigned long long)rate * mfn_abs;
 
-	do_div(ll, mfd + 1);
+	do_div(ull, mfd + 1);
 
 	if (mfn_is_negative(pll, mfn))
-		ll = -ll;
+		ull = (rate * mfi) - ull;
+	else
+		ull = (rate * mfi) + ull;
 
-	ll = (rate * mfi) + ll;
-
-	return ll;
+	return ull;
 }
 
 static struct clk_ops clk_pllv1_ops = {
-- 
cgit v0.10.2


From 0d2681e1f19162e5975b70edd0c2d1d256f38023 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 3 Nov 2015 20:01:40 -0500
Subject: imx/clk-pllv2: fix wrong do_div() usage

do_div() is meant to be used with an unsigned dividend.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Acked-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>

diff --git a/drivers/clk/imx/clk-pllv2.c b/drivers/clk/imx/clk-pllv2.c
index b18f875..4aeda56 100644
--- a/drivers/clk/imx/clk-pllv2.c
+++ b/drivers/clk/imx/clk-pllv2.c
@@ -79,7 +79,7 @@ static unsigned long __clk_pllv2_recalc_rate(unsigned long parent_rate,
 {
 	long mfi, mfn, mfd, pdf, ref_clk;
 	unsigned long dbl;
-	s64 temp;
+	u64 temp;
 
 	dbl = dp_ctl & MXC_PLL_DP_CTL_DPDCK0_2_EN;
 
@@ -98,8 +98,9 @@ static unsigned long __clk_pllv2_recalc_rate(unsigned long parent_rate,
 	temp = (u64) ref_clk * abs(mfn);
 	do_div(temp, mfd + 1);
 	if (mfn < 0)
-		temp = -temp;
-	temp = (ref_clk * mfi) + temp;
+		temp = (ref_clk * mfi) - temp;
+	else
+		temp = (ref_clk * mfi) + temp;
 
 	return temp;
 }
@@ -126,7 +127,7 @@ static int __clk_pllv2_set_rate(unsigned long rate, unsigned long parent_rate,
 {
 	u32 reg;
 	long mfi, pdf, mfn, mfd = 999999;
-	s64 temp64;
+	u64 temp64;
 	unsigned long quad_parent_rate;
 
 	quad_parent_rate = 4 * parent_rate;
-- 
cgit v0.10.2


From 279104e3ade92b38198fdaead9e84bd80057693a Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Mon, 23 Nov 2015 15:36:50 +0530
Subject: clk: qoriq: fix memory leak

If get_pll_div() fails we exited by returning NULL but we missed
releasing hwc.

Signed-off-by: Sudip Mukherjee <sudip@vectorindia.org>
Fixes: 0dfc86b3173f ("clk: qoriq: Move chip-specific knowledge into driver")
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>

diff --git a/drivers/clk/clk-qoriq.c b/drivers/clk/clk-qoriq.c
index 1ab0fb8..7bc1c45 100644
--- a/drivers/clk/clk-qoriq.c
+++ b/drivers/clk/clk-qoriq.c
@@ -778,8 +778,10 @@ static struct clk * __init create_one_cmux(struct clockgen *cg, int idx)
 	 */
 	clksel = (cg_in(cg, hwc->reg) & CLKSEL_MASK) >> CLKSEL_SHIFT;
 	div = get_pll_div(cg, hwc, clksel);
-	if (!div)
+	if (!div) {
+		kfree(hwc);
 		return NULL;
+	}
 
 	pct80_rate = clk_get_rate(div->clk);
 	pct80_rate *= 8;
-- 
cgit v0.10.2


From 62e345ae5b6ed977b02c24d1eaeaece062f9fb17 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Mon, 30 Nov 2015 15:24:29 -0600
Subject: usb: dwc3: gadget: don't prestart interrupt endpoints

Because interrupt endpoints usually transmit such
small amounts of data, it seems pointless to prestart
transfers and try to get speed improvements. This
patch also sorts out a problem with CDC ECM function
where its notification endpoint gets stuck in busy
state and we continuously issue Update Transfer
commands.

Fixes: 8a1a9c9e4503 ("usb: dwc3: gadget: start transfer on XFER_COMPLETE")
Signed-off-by: Felipe Balbi <balbi@ti.com>

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index e24a01c..a58376f 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1078,6 +1078,7 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
 	 * little bit faster.
 	 */
 	if (!usb_endpoint_xfer_isoc(dep->endpoint.desc) &&
+			!usb_endpoint_xfer_int(dep->endpoint.desc) &&
 			!(dep->flags & DWC3_EP_BUSY)) {
 		ret = __dwc3_gadget_kick_transfer(dep, 0, true);
 		goto out;
-- 
cgit v0.10.2


From fc82792851d693e4f6da81b4b97e7af62d596af9 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 26 Nov 2015 17:22:23 +0200
Subject: ARM: OMAP2+: enable REGULATOR_FIXED_VOLTAGE

Enable REGULATOR_FIXED_VOLTAGE for all OMAP2+ platforms
otherwise system can't boot from SD-card when kernel is
built for single SoC (for example, with CONFIG_SOC_DRA7XX=y only).

It's also required for almost all TI SoC's platforms.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index 5076d3f..4b4371d 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -121,6 +121,7 @@ config ARCH_OMAP2PLUS_TYPICAL
 	select NEON if CPU_V7
 	select PM
 	select REGULATOR
+	select REGULATOR_FIXED_VOLTAGE
 	select TWL4030_CORE if ARCH_OMAP3 || ARCH_OMAP4
 	select TWL4030_POWER if ARCH_OMAP3 || ARCH_OMAP4
 	select VFP
@@ -201,7 +202,6 @@ config MACH_OMAP3_PANDORA
 	depends on ARCH_OMAP3
 	default y
 	select OMAP_PACKAGE_CBB
-	select REGULATOR_FIXED_VOLTAGE if REGULATOR
 
 config MACH_NOKIA_N810
        bool
-- 
cgit v0.10.2


From 9eb8cd2b0780bca0719e754e7bdcf5f368f001bd Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 18 Nov 2015 21:46:38 +1100
Subject: of: Fix comparison of reserved memory regions

In order to check for overlapping reserved memory regions, we first need
to sort the array of memory regions. This is implemented using sort(),
and a custom comparison function __rmem_cmp().

Unfortunatley __rmem_cmp() doesn't work in all cases. Because the two
base values are phys_addr_t, they may be u64 on some platforms, in which
case subtracting one from the other and then (implicitly) casting to int
does not give us the -ve/0/+ve value we need.

This leads to incorrect reports about overlaps, eg:

  ibm,slw-image@1ffe600000 (0x0000001ffe600000--0x0000001ffe700000) overlaps with
  ibm,firmware-allocs-memory@1000000000 (0x0000001000000000--0x0000001000dc0200)

Fix it by just doing the standard double if and return 0 logic.

Fixes: ae1add247bf8 ("of: Check for overlap in reserved memory regions")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Rob Herring <robh@kernel.org>

diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index be77e75..1a3556a 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -206,7 +206,13 @@ static int __init __rmem_cmp(const void *a, const void *b)
 {
 	const struct reserved_mem *ra = a, *rb = b;
 
-	return ra->base - rb->base;
+	if (ra->base < rb->base)
+		return -1;
+
+	if (ra->base > rb->base)
+		return 1;
+
+	return 0;
 }
 
 static void __init __rmem_check_for_overlap(void)
-- 
cgit v0.10.2


From 78bb2abe09d6bd15eeee7250f2fa0cb76432a8a2 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Krivenok" <krivenok.dmitry@gmail.com>
Date: Mon, 30 Nov 2015 23:45:48 +0300
Subject: of: do not use 0x in front of %pa

Signed-off-by: Dmitry V. Krivenok <krivenok.dmitry@gmail.com>
Signed-off-by: Rob Herring <robh@kernel.org>

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index d243029..1bbe3a9 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -1041,7 +1041,7 @@ void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
 int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base,
 					phys_addr_t size, bool nomap)
 {
-	pr_err("Reserved memory not supported, ignoring range 0x%pa - 0x%pa%s\n",
+	pr_err("Reserved memory not supported, ignoring range %pa - %pa%s\n",
 		  &base, &size, nomap ? " (nomap)" : "");
 	return -ENOSYS;
 }
-- 
cgit v0.10.2


From ba85edbe0d39710dc74a2a9c6f90929b5722a53b Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 30 Nov 2015 15:22:37 +0900
Subject: of/address: fix typo in comment block of of_translate_one()

Remove the "not" before "cannot".

I am fixing the comment block style while I am here.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Rob Herring <robh@kernel.org>

diff --git a/drivers/of/address.c b/drivers/of/address.c
index cd53fe4..9582c57 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -485,9 +485,10 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus,
 	int rone;
 	u64 offset = OF_BAD_ADDR;
 
-	/* Normally, an absence of a "ranges" property means we are
+	/*
+	 * Normally, an absence of a "ranges" property means we are
 	 * crossing a non-translatable boundary, and thus the addresses
-	 * below the current not cannot be converted to CPU physical ones.
+	 * below the current cannot be converted to CPU physical ones.
 	 * Unfortunately, while this is very clear in the spec, it's not
 	 * what Apple understood, and they do have things like /uni-n or
 	 * /ht nodes with no "ranges" property and a lot of perfectly
-- 
cgit v0.10.2


From e80cf2e50bfabb14dd3667b2360a393dda3edc3f Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Wed, 21 Oct 2015 22:41:40 +0200
Subject: clk: scpi: add missing of_node_put

for_each_available_child_of_node performs an of_node_get on each iteration,
so a break out of the loop requires an of_node_put.

The semantic patch that fixes this problem is as follows
(http://coccinelle.lip6.fr):

// <smpl>
@@
expression root,e;
local idexpression child;
@@

 for_each_available_child_of_node(root, child) {
   ... when != of_node_put(child)
       when != e = child
(
   return child;
|
+  of_node_put(child);
?  return ...;
)
   ...
 }
// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>

diff --git a/drivers/clk/clk-scpi.c b/drivers/clk/clk-scpi.c
index 0b501a9..cd0f272 100644
--- a/drivers/clk/clk-scpi.c
+++ b/drivers/clk/clk-scpi.c
@@ -292,6 +292,7 @@ static int scpi_clocks_probe(struct platform_device *pdev)
 		ret = scpi_clk_add(dev, child, match);
 		if (ret) {
 			scpi_clocks_remove(pdev);
+			of_node_put(child);
 			return ret;
 		}
 	}
-- 
cgit v0.10.2


From df36c5bede207f734e4750beb2b14fb892050280 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrien=20Verg=C3=A9?= <adrienverge@gmail.com>
Date: Tue, 24 Nov 2015 16:02:04 +0100
Subject: USB: quirks: Fix another ELAN touchscreen
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Like other buggy models that had their fixes [1], the touchscreen with
id 04f3:21b8 from ELAN Microelectronics needs the device-qualifier
quirk. Otherwise, it fails to respond, blocks the boot for a random
amount of time and pollutes dmesg with:

[ 2887.373196] usb 1-5: new full-speed USB device number 41 using xhci_hcd
[ 2889.502000] usb 1-5: unable to read config index 0 descriptor/start: -71
[ 2889.502005] usb 1-5: can't read configurations, error -71
[ 2889.654571] usb 1-5: new full-speed USB device number 42 using xhci_hcd
[ 2891.783438] usb 1-5: unable to read config index 0 descriptor/start: -71
[ 2891.783443] usb 1-5: can't read configurations, error -71

[1]: See commits c68929f, 876af5d, d749947, a32c99e and dc703ec.

Tested-by: Adrien Vergé <adrienverge@gmail.com>
Signed-off-by: Adrien Vergé <adrienverge@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index f5a3819..fcd6ac0 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -125,6 +125,9 @@ static const struct usb_device_id usb_quirk_list[] = {
 	{ USB_DEVICE(0x04f3, 0x016f), .driver_info =
 			USB_QUIRK_DEVICE_QUALIFIER },
 
+	{ USB_DEVICE(0x04f3, 0x21b8), .driver_info =
+			USB_QUIRK_DEVICE_QUALIFIER },
+
 	/* Roland SC-8820 */
 	{ USB_DEVICE(0x0582, 0x0007), .driver_info = USB_QUIRK_RESET_RESUME },
 
-- 
cgit v0.10.2


From 9fa62b1a31c96715aef34f25000e882ed4ac4876 Mon Sep 17 00:00:00 2001
From: Dmitry Katsubo <dmitry.katsubo@gmail.com>
Date: Fri, 20 Nov 2015 01:30:44 +0100
Subject: usb-storage: Fix scsi-sd failure "Invalid field in cdb" for USB
 adapter JMicron

The patch extends the family of SATA-to-USB JMicron adapters that need
FUA to be disabled and applies the same policy for uas driver.
See details in http://unix.stackexchange.com/questions/237204/

Signed-off-by: Dmitry Katsubo <dmitry.katsubo@gmail.com>
Tested-by: Dmitry Katsubo <dmitry.katsubo@gmail.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index e691516..5c66d3f 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -796,6 +796,10 @@ static int uas_slave_configure(struct scsi_device *sdev)
 	if (devinfo->flags & US_FL_NO_REPORT_OPCODES)
 		sdev->no_report_opcodes = 1;
 
+	/* A few buggy USB-ATA bridges don't understand FUA */
+	if (devinfo->flags & US_FL_BROKEN_FUA)
+		sdev->broken_fua = 1;
+
 	scsi_change_queue_depth(sdev, devinfo->qdepth - 2);
 	return 0;
 }
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 6b24791..7ffe420 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -1987,7 +1987,7 @@ UNUSUAL_DEV(  0x14cd, 0x6600, 0x0201, 0x0201,
 		US_FL_IGNORE_RESIDUE ),
 
 /* Reported by Michael Büsch <m@bues.ch> */
-UNUSUAL_DEV(  0x152d, 0x0567, 0x0114, 0x0114,
+UNUSUAL_DEV(  0x152d, 0x0567, 0x0114, 0x0116,
 		"JMicron",
 		"USB to ATA/ATAPI Bridge",
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index c85ea53..ccc113e 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -132,7 +132,7 @@ UNUSUAL_DEV(0x152d, 0x0567, 0x0000, 0x9999,
 		"JMicron",
 		"JMS567",
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
-		US_FL_NO_REPORT_OPCODES),
+		US_FL_BROKEN_FUA | US_FL_NO_REPORT_OPCODES),
 
 /* Reported-by: Hans de Goede <hdegoede@redhat.com> */
 UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999,
-- 
cgit v0.10.2


From 9225c0b7b976dd9ceac2b80727a60d8fcb906a62 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Tue, 1 Dec 2015 19:52:12 +0000
Subject: staging: lustre: echo_copy.._lsm() dereferences userland pointers
 directly

missing get_user()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/staging/lustre/lustre/obdecho/echo_client.c b/drivers/staging/lustre/lustre/obdecho/echo_client.c
index f61ef66..a4a9a76 100644
--- a/drivers/staging/lustre/lustre/obdecho/echo_client.c
+++ b/drivers/staging/lustre/lustre/obdecho/echo_client.c
@@ -1270,6 +1270,7 @@ static int
 echo_copyout_lsm(struct lov_stripe_md *lsm, void *_ulsm, int ulsm_nob)
 {
 	struct lov_stripe_md *ulsm = _ulsm;
+	struct lov_oinfo **p;
 	int nob, i;
 
 	nob = offsetof(struct lov_stripe_md, lsm_oinfo[lsm->lsm_stripe_count]);
@@ -1279,9 +1280,10 @@ echo_copyout_lsm(struct lov_stripe_md *lsm, void *_ulsm, int ulsm_nob)
 	if (copy_to_user(ulsm, lsm, sizeof(*ulsm)))
 		return -EFAULT;
 
-	for (i = 0; i < lsm->lsm_stripe_count; i++) {
-		if (copy_to_user(ulsm->lsm_oinfo[i], lsm->lsm_oinfo[i],
-				      sizeof(lsm->lsm_oinfo[0])))
+	for (i = 0, p = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, p++) {
+		struct lov_oinfo __user *up;
+		if (get_user(up, ulsm->lsm_oinfo + i) ||
+		    copy_to_user(up, *p, sizeof(struct lov_oinfo)))
 			return -EFAULT;
 	}
 	return 0;
@@ -1289,9 +1291,10 @@ echo_copyout_lsm(struct lov_stripe_md *lsm, void *_ulsm, int ulsm_nob)
 
 static int
 echo_copyin_lsm(struct echo_device *ed, struct lov_stripe_md *lsm,
-		 void *ulsm, int ulsm_nob)
+		struct lov_stripe_md __user *ulsm, int ulsm_nob)
 {
 	struct echo_client_obd *ec = ed->ed_ec;
+	struct lov_oinfo **p;
 	int		     i;
 
 	if (ulsm_nob < sizeof(*lsm))
@@ -1306,11 +1309,10 @@ echo_copyin_lsm(struct echo_device *ed, struct lov_stripe_md *lsm,
 	    ((__u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count > ~0UL))
 		return -EINVAL;
 
-	for (i = 0; i < lsm->lsm_stripe_count; i++) {
-		if (copy_from_user(lsm->lsm_oinfo[i],
-				       ((struct lov_stripe_md *)ulsm)-> \
-				       lsm_oinfo[i],
-				       sizeof(lsm->lsm_oinfo[0])))
+	for (i = 0, p = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, p++) {
+		struct lov_oinfo __user *up;
+		if (get_user(up, ulsm->lsm_oinfo + i) ||
+		    copy_from_user(*p, up, sizeof(struct lov_oinfo)))
 			return -EFAULT;
 	}
 	return 0;
-- 
cgit v0.10.2


From d5d4fdd86f5759924fe54efa793e22eccf508db6 Mon Sep 17 00:00:00 2001
From: Guillaume Delbergue <guillaume.delbergue@greensocs.com>
Date: Tue, 1 Dec 2015 18:55:51 +0100
Subject: irqchip/versatile-fpga: Fix PCI IRQ mapping on Versatile PB

This patch is specifically for PCI support on the Versatile PB board using
a DT. Currently, the dynamic IRQ mapping is broken when using DTs. For
example, on QEMU, the SCSI driver is unable to request the IRQ. To fix
this issue, this patch replaces the current dynamic mechanism with a
static value as is done in the non-DT case.

Signed-off-by: Guillaume Delbergue <guillaume.delbergue@greensocs.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: stable@vger.kernel.org

diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c
index 598ab3f..cadf104 100644
--- a/drivers/irqchip/irq-versatile-fpga.c
+++ b/drivers/irqchip/irq-versatile-fpga.c
@@ -210,7 +210,12 @@ int __init fpga_irq_of_init(struct device_node *node,
 		parent_irq = -1;
 	}
 
+#ifdef CONFIG_ARCH_VERSATILE
+	fpga_irq_init(base, node->name, IRQ_SIC_START, parent_irq, valid_mask,
+				  node);
+#else
 	fpga_irq_init(base, node->name, 0, parent_irq, valid_mask, node);
+#endif
 
 	writel(clear_mask, base + IRQ_ENABLE_CLEAR);
 	writel(clear_mask, base + FIQ_ENABLE_CLEAR);
-- 
cgit v0.10.2


From 33bd2dd03dd0bfa1130d11062a9e5f40d0cf1d3f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrien=20Verg=C3=A9?= <adrienverge@gmail.com>
Date: Tue, 1 Dec 2015 19:56:48 +0100
Subject: USB: quirks: Apply ALWAYS_POLL to all ELAN devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All ELAN hid devices seem to require the ALWAYS_POLL quirk. Let's use
this quirk for all devices from this vendor, rather than maintaining a
list of all its known product IDs.

Tested-by: Adrien Vergé <adrienverge@gmail.com>
Signed-off-by: Adrien Vergé <adrienverge@gmail.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Reviewed-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index ac1feea..3c7e0c3 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -316,11 +316,6 @@
 #define USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH_A001	0xa001
 
 #define USB_VENDOR_ID_ELAN		0x04f3
-#define USB_DEVICE_ID_ELAN_TOUCHSCREEN	0x0089
-#define USB_DEVICE_ID_ELAN_TOUCHSCREEN_009B	0x009b
-#define USB_DEVICE_ID_ELAN_TOUCHSCREEN_0103	0x0103
-#define USB_DEVICE_ID_ELAN_TOUCHSCREEN_010c	0x010c
-#define USB_DEVICE_ID_ELAN_TOUCHSCREEN_016F	0x016f
 
 #define USB_VENDOR_ID_ELECOM		0x056e
 #define USB_DEVICE_ID_ELECOM_BM084	0x0061
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 94bb137..78fc94a 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -72,11 +72,7 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT },
-	{ USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ELAN_TOUCHSCREEN, HID_QUIRK_ALWAYS_POLL },
-	{ USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ELAN_TOUCHSCREEN_009B, HID_QUIRK_ALWAYS_POLL },
-	{ USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ELAN_TOUCHSCREEN_0103, HID_QUIRK_ALWAYS_POLL },
-	{ USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ELAN_TOUCHSCREEN_010c, HID_QUIRK_ALWAYS_POLL },
-	{ USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ELAN_TOUCHSCREEN_016F, HID_QUIRK_ALWAYS_POLL },
+	{ USB_VENDOR_ID_ELAN, HID_ANY_ID, HID_QUIRK_ALWAYS_POLL },
 	{ USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER, HID_QUIRK_NO_INIT_REPORTS },
 	{ USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28, HID_QUIRK_NOGET },
@@ -339,7 +335,8 @@ static const struct hid_blacklist *usbhid_exists_squirk(const u16 idVendor,
 
 	for (; hid_blacklist[n].idVendor; n++)
 		if (hid_blacklist[n].idVendor == idVendor &&
-				hid_blacklist[n].idProduct == idProduct)
+			(hid_blacklist[n].idProduct == (__u16) HID_ANY_ID ||
+				hid_blacklist[n].idProduct == idProduct))
 			bl_entry = &hid_blacklist[n];
 
 	if (bl_entry != NULL)
-- 
cgit v0.10.2


From d66e5139aee7d8379fc135b00b9ac7c66f9d4c74 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 20 Nov 2015 23:20:28 +0100
Subject: ARM: ixp4xx: fix read{b,w,l} return types

On ixp4xx, the readl() function returns an 'unsigned long' output
when indirect I/O is used. This is unlike any other platform, and
it causes lots of harmless compiler warnings, such as:

drivers/ata/libahci.c: In function 'ahci_show_host_version':
drivers/ata/libahci.c:254:22: warning: format '%x' expects argument of type 'unsigned int', but argument 3 has type 'long unsigned int' [-Wformat=]
drivers/block/mtip32xx/mtip32xx.c: In function 'mtip_hw_read_registers':
drivers/block/mtip32xx/mtip32xx.c:2602:31: warning: format '%X' expects argument of type 'unsigned int', but argument 3 has type 'long unsigned int' [-Wformat=]
drivers/block/cciss.c: In function 'print_cfg_table':
drivers/block/cciss.c:3845:25: warning: format '%d' expects argument of type 'int', but argument 4 has type 'long unsigned int' [-Wformat=]

This changes all six of the ixp4xx specific I/O read functions
to return the same types that we have in the normal asm/io.h,
to avoid the warnings.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Krzysztof Halasa <khalasa@piap.pl>

diff --git a/arch/arm/mach-ixp4xx/include/mach/io.h b/arch/arm/mach-ixp4xx/include/mach/io.h
index b024390..7a0c13b 100644
--- a/arch/arm/mach-ixp4xx/include/mach/io.h
+++ b/arch/arm/mach-ixp4xx/include/mach/io.h
@@ -143,7 +143,7 @@ static inline void __indirect_writesl(volatile void __iomem *bus_addr,
 		writel(*vaddr++, bus_addr);
 }
 
-static inline unsigned char __indirect_readb(const volatile void __iomem *p)
+static inline u8 __indirect_readb(const volatile void __iomem *p)
 {
 	u32 addr = (u32)p;
 	u32 n, byte_enables, data;
@@ -166,7 +166,7 @@ static inline void __indirect_readsb(const volatile void __iomem *bus_addr,
 		*vaddr++ = readb(bus_addr);
 }
 
-static inline unsigned short __indirect_readw(const volatile void __iomem *p)
+static inline u16 __indirect_readw(const volatile void __iomem *p)
 {
 	u32 addr = (u32)p;
 	u32 n, byte_enables, data;
@@ -189,7 +189,7 @@ static inline void __indirect_readsw(const volatile void __iomem *bus_addr,
 		*vaddr++ = readw(bus_addr);
 }
 
-static inline unsigned long __indirect_readl(const volatile void __iomem *p)
+static inline u32 __indirect_readl(const volatile void __iomem *p)
 {
 	u32 addr = (__force u32)p;
 	u32 data;
@@ -350,7 +350,7 @@ static inline void insl(u32 io_addr, void *p, u32 count)
 					((unsigned long)p <= (PIO_MASK + PIO_OFFSET)))
 
 #define	ioread8(p)			ioread8(p)
-static inline unsigned int ioread8(const void __iomem *addr)
+static inline u8 ioread8(const void __iomem *addr)
 {
 	unsigned long port = (unsigned long __force)addr;
 	if (__is_io_address(port))
@@ -378,7 +378,7 @@ static inline void ioread8_rep(const void __iomem *addr, void *vaddr, u32 count)
 }
 
 #define	ioread16(p)			ioread16(p)
-static inline unsigned int ioread16(const void __iomem *addr)
+static inline u16 ioread16(const void __iomem *addr)
 {
 	unsigned long port = (unsigned long __force)addr;
 	if (__is_io_address(port))
@@ -407,7 +407,7 @@ static inline void ioread16_rep(const void __iomem *addr, void *vaddr,
 }
 
 #define	ioread32(p)			ioread32(p)
-static inline unsigned int ioread32(const void __iomem *addr)
+static inline u32 ioread32(const void __iomem *addr)
 {
 	unsigned long port = (unsigned long __force)addr;
 	if (__is_io_address(port))
-- 
cgit v0.10.2


From 464ad8c43a9ead98c2b0eaed86bea727f2ad106e Mon Sep 17 00:00:00 2001
From: Hans Yang <hansy@nvidia.com>
Date: Tue, 1 Dec 2015 16:54:59 +0800
Subject: usb: core : hub: Fix BOS 'NULL pointer' kernel panic

When a USB 3.0 mass storage device is disconnected in transporting
state, storage device driver may handle it as a transport error and
reset the device by invoking usb_reset_and_verify_device()
and following could happen:

in usb_reset_and_verify_device():
   udev->bos = NULL;

For U1/U2 enabled devices, driver will disable LPM, and in some
conditions:
   from usb_unlocked_disable_lpm()
    --> usb_disable_lpm()
    --> usb_enable_lpm()
        udev->bos->ss_cap->bU1devExitLat;

And it causes 'NULL pointer' and 'kernel panic':

[  157.976257] Unable to handle kernel NULL pointer dereference
at virtual address 00000010
...
[  158.026400] PC is at usb_enable_link_state+0x34/0x2e0
[  158.031442] LR is at usb_enable_lpm+0x98/0xac
...
[  158.137368] [<ffffffc0006a1cac>] usb_enable_link_state+0x34/0x2e0
[  158.143451] [<ffffffc0006a1fec>] usb_enable_lpm+0x94/0xac
[  158.148840] [<ffffffc0006a20e8>] usb_disable_lpm+0xa8/0xb4
...
[  158.214954] Kernel panic - not syncing: Fatal exception

This commit moves 'udev->bos = NULL' behind usb_unlocked_disable_lpm()
to prevent from NULL pointer access.

Issue can be reproduced by following setup:
1) A SS pen drive behind a SS hub connected to the host.
2) Transporting data between the pen drive and the host.
3) Abruptly disconnect hub and pen drive from host.
4) With a chance it crashes.

Signed-off-by: Hans Yang <hansy@nvidia.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index bdeadc1..585c3cb 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -5326,9 +5326,6 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
 	if (udev->usb2_hw_lpm_enabled == 1)
 		usb_set_usb2_hardware_lpm(udev, 0);
 
-	bos = udev->bos;
-	udev->bos = NULL;
-
 	/* Disable LPM and LTM while we reset the device and reinstall the alt
 	 * settings.  Device-initiated LPM settings, and system exit latency
 	 * settings are cleared when the device is reset, so we have to set
@@ -5337,15 +5334,18 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
 	ret = usb_unlocked_disable_lpm(udev);
 	if (ret) {
 		dev_err(&udev->dev, "%s Failed to disable LPM\n.", __func__);
-		goto re_enumerate;
+		goto re_enumerate_no_bos;
 	}
 	ret = usb_disable_ltm(udev);
 	if (ret) {
 		dev_err(&udev->dev, "%s Failed to disable LTM\n.",
 				__func__);
-		goto re_enumerate;
+		goto re_enumerate_no_bos;
 	}
 
+	bos = udev->bos;
+	udev->bos = NULL;
+
 	for (i = 0; i < SET_CONFIG_TRIES; ++i) {
 
 		/* ep0 maxpacket size may change; let the HCD know about it.
@@ -5442,10 +5442,11 @@ done:
 	return 0;
 
 re_enumerate:
-	/* LPM state doesn't matter when we're about to destroy the device. */
-	hub_port_logical_disconnect(parent_hub, port1);
 	usb_release_bos_descriptor(udev);
 	udev->bos = bos;
+re_enumerate_no_bos:
+	/* LPM state doesn't matter when we're about to destroy the device. */
+	hub_port_logical_disconnect(parent_hub, port1);
 	return -ENODEV;
 }
 
-- 
cgit v0.10.2


From f9fa1887dcf26bd346665a6ae3d3f53dec54cba1 Mon Sep 17 00:00:00 2001
From: Alexey Khoroshilov <khoroshilov@ispras.ru>
Date: Sat, 21 Nov 2015 00:36:44 +0300
Subject: USB: whci-hcd: add check for dma mapping error

qset_fill_page_list() do not check for dma mapping errors.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/usb/host/whci/qset.c b/drivers/usb/host/whci/qset.c
index dc31c42..9f1c053 100644
--- a/drivers/usb/host/whci/qset.c
+++ b/drivers/usb/host/whci/qset.c
@@ -377,6 +377,10 @@ static int qset_fill_page_list(struct whc *whc, struct whc_std *std, gfp_t mem_f
 	if (std->pl_virt == NULL)
 		return -ENOMEM;
 	std->dma_addr = dma_map_single(whc->wusbhc.dev, std->pl_virt, pl_len, DMA_TO_DEVICE);
+	if (dma_mapping_error(whc->wusbhc.dev, std->dma_addr)) {
+		kfree(std->pl_virt);
+		return -EFAULT;
+	}
 
 	for (p = 0; p < std->num_pointers; p++) {
 		std->pl_virt[p].buf_ptr = cpu_to_le64(dma_addr);
-- 
cgit v0.10.2


From 5377adb092664d336ac212499961cac5e8728794 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 18 Nov 2015 02:01:21 +0000
Subject: usb: Use the USB_SS_MULT() macro to decode burst multiplier for log
 message

usb_parse_ss_endpoint_companion() now decodes the burst multiplier
correctly in order to check that it's <= 3, but still uses the wrong
expression if warning that it's > 3.

Fixes: ff30cbc8da42 ("usb: Use the USB_SS_MULT() macro to get the ...")
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 7caff02..5050760 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -115,7 +115,8 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
 		   USB_SS_MULT(desc->bmAttributes) > 3) {
 		dev_warn(ddev, "Isoc endpoint has Mult of %d in "
 				"config %d interface %d altsetting %d ep %d: "
-				"setting to 3\n", desc->bmAttributes + 1,
+				"setting to 3\n",
+				USB_SS_MULT(desc->bmAttributes),
 				cfgno, inum, asnum, ep->desc.bEndpointAddress);
 		ep->ss_ep_comp.bmAttributes = 2;
 	}
-- 
cgit v0.10.2


From 3b60a26fdc3b32c00d750458df33d414e8f924ce Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Sat, 17 Oct 2015 21:05:20 -0700
Subject: ARM: imx: clk-vf610: fix SAI clock tree

The Synchronous Audio Interface (SAI) instances are clocked by
independent clocks: The bus clock and the audio clock (as shown in
Figure 51-1 in the Vybrid Reference Manual). The clock gates in
CCGR0/CCGR1 for SAI0 through SAI3 are bus clock gates, as access
tests to the registers with/without gating those clocks have shown.
The audio clock is gated by the SAIx_EN gates in CCM_CSCDR1,
followed by a clock divider (SAIx_DIV). Currently, the parent of
the bus clock gates has been assigned to SAIx_DIV, which is not
involved in the bus clock path for the SAI instances (see chapter
9.10.12, SAI clocking in the Vybrid Reference Manual).

Fix this by define the parent clock of VF610_CLK_SAIx to be the bus
clock.

If the driver needs the audio clock (when used in master mode), a
fixed device tree is required which assign the audio clock properly
to VF610_CLK_SAIx_DIV.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Acked-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>

diff --git a/drivers/clk/imx/clk-vf610.c b/drivers/clk/imx/clk-vf610.c
index d1b1c95..0a94d96 100644
--- a/drivers/clk/imx/clk-vf610.c
+++ b/drivers/clk/imx/clk-vf610.c
@@ -335,22 +335,22 @@ static void __init vf610_clocks_init(struct device_node *ccm_node)
 	clk[VF610_CLK_SAI0_SEL] = imx_clk_mux("sai0_sel", CCM_CSCMR1, 0, 2, sai_sels, 4);
 	clk[VF610_CLK_SAI0_EN] = imx_clk_gate("sai0_en", "sai0_sel", CCM_CSCDR1, 16);
 	clk[VF610_CLK_SAI0_DIV] = imx_clk_divider("sai0_div", "sai0_en", CCM_CSCDR1, 0, 4);
-	clk[VF610_CLK_SAI0] = imx_clk_gate2("sai0", "sai0_div", CCM_CCGR0, CCM_CCGRx_CGn(15));
+	clk[VF610_CLK_SAI0] = imx_clk_gate2("sai0", "ipg_bus", CCM_CCGR0, CCM_CCGRx_CGn(15));
 
 	clk[VF610_CLK_SAI1_SEL] = imx_clk_mux("sai1_sel", CCM_CSCMR1, 2, 2, sai_sels, 4);
 	clk[VF610_CLK_SAI1_EN] = imx_clk_gate("sai1_en", "sai1_sel", CCM_CSCDR1, 17);
 	clk[VF610_CLK_SAI1_DIV] = imx_clk_divider("sai1_div", "sai1_en", CCM_CSCDR1, 4, 4);
-	clk[VF610_CLK_SAI1] = imx_clk_gate2("sai1", "sai1_div", CCM_CCGR1, CCM_CCGRx_CGn(0));
+	clk[VF610_CLK_SAI1] = imx_clk_gate2("sai1", "ipg_bus", CCM_CCGR1, CCM_CCGRx_CGn(0));
 
 	clk[VF610_CLK_SAI2_SEL] = imx_clk_mux("sai2_sel", CCM_CSCMR1, 4, 2, sai_sels, 4);
 	clk[VF610_CLK_SAI2_EN] = imx_clk_gate("sai2_en", "sai2_sel", CCM_CSCDR1, 18);
 	clk[VF610_CLK_SAI2_DIV] = imx_clk_divider("sai2_div", "sai2_en", CCM_CSCDR1, 8, 4);
-	clk[VF610_CLK_SAI2] = imx_clk_gate2("sai2", "sai2_div", CCM_CCGR1, CCM_CCGRx_CGn(1));
+	clk[VF610_CLK_SAI2] = imx_clk_gate2("sai2", "ipg_bus", CCM_CCGR1, CCM_CCGRx_CGn(1));
 
 	clk[VF610_CLK_SAI3_SEL] = imx_clk_mux("sai3_sel", CCM_CSCMR1, 6, 2, sai_sels, 4);
 	clk[VF610_CLK_SAI3_EN] = imx_clk_gate("sai3_en", "sai3_sel", CCM_CSCDR1, 19);
 	clk[VF610_CLK_SAI3_DIV] = imx_clk_divider("sai3_div", "sai3_en", CCM_CSCDR1, 12, 4);
-	clk[VF610_CLK_SAI3] = imx_clk_gate2("sai3", "sai3_div", CCM_CCGR1, CCM_CCGRx_CGn(2));
+	clk[VF610_CLK_SAI3] = imx_clk_gate2("sai3", "ipg_bus", CCM_CCGR1, CCM_CCGRx_CGn(2));
 
 	clk[VF610_CLK_NFC_SEL] = imx_clk_mux("nfc_sel", CCM_CSCMR1, 12, 2, nfc_sels, 4);
 	clk[VF610_CLK_NFC_EN] = imx_clk_gate("nfc_en", "nfc_sel", CCM_CSCDR2, 9);
-- 
cgit v0.10.2


From 531ee1f4ea7fb5634cb79824e6df6be83596a054 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Sat, 17 Oct 2015 21:05:21 -0700
Subject: ARM: dts: vf610: fix clock definition for SAI2

So far, only the bus clock has been assigned, but in reality the
SAI IP has for clock inputs. The driver has been updated to
make use of the additional clock inputs by c3ecef21c3f2 ("ASoC:
fsl_sai: add sai master mode support"). Due to a bug in the
clock tree, the audio clock has been enabled none the less by
the specified bus clock (see "ARM: imx: clk-vf610: fix SAI
clock tree"), which made master mode even without the proper
clock assigned working.

This patch completes the clock definition for SAI2. On Vybrid,
only two MCLK out of the four options are available (the first
being the bus clock itself). See chapter 8.10.1.2.3 of the
Vybrid Reference manual ("SAI transmitter and receiver options
for MCLK selection"). Note: The audio clocks are only required
in master mode.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>

diff --git a/arch/arm/boot/dts/vfxxx.dtsi b/arch/arm/boot/dts/vfxxx.dtsi
index 0d5acc2..3cd1b27 100644
--- a/arch/arm/boot/dts/vfxxx.dtsi
+++ b/arch/arm/boot/dts/vfxxx.dtsi
@@ -178,8 +178,10 @@
 				compatible = "fsl,vf610-sai";
 				reg = <0x40031000 0x1000>;
 				interrupts = <86 IRQ_TYPE_LEVEL_HIGH>;
-				clocks = <&clks VF610_CLK_SAI2>;
-				clock-names = "sai";
+				clocks = <&clks VF610_CLK_SAI2>,
+					<&clks VF610_CLK_SAI2_DIV>,
+					<&clks 0>, <&clks 0>;
+				clock-names = "bus", "mclk1", "mclk2", "mclk3";
 				dma-names = "tx", "rx";
 				dmas = <&edma0 0 21>,
 					<&edma0 0 20>;
-- 
cgit v0.10.2


From 30ce6e1cc5a0f781d60227e9096c86e188d2c2bd Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Mon, 23 Nov 2015 16:24:45 -0500
Subject: dm btree: fix leak of bufio-backed block in btree_split_sibling error
 path

The block allocated at the start of btree_split_sibling() is never
released if later insert_at() fails.

Fix this by releasing the previously allocated bufio block using
unlock_block().

Reported-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org

diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index c573402..0918a7c 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -473,8 +473,10 @@ static int btree_split_sibling(struct shadow_spine *s, unsigned parent_index,
 
 	r = insert_at(sizeof(__le64), pn, parent_index + 1,
 		      le64_to_cpu(rn->keys[0]), &location);
-	if (r)
+	if (r) {
+		unlock_block(s->info, right);
 		return r;
+	}
 
 	if (key < le64_to_cpu(rn->keys[0])) {
 		unlock_block(s->info, right);
-- 
cgit v0.10.2


From 993ceab91986e2e737ce9a3e23bebc8cce649240 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Wed, 2 Dec 2015 12:24:39 +0000
Subject: dm thin metadata: fix bug in dm_thin_remove_range()

dm_btree_remove_leaves() only unmaps a contiguous region so we need a
loop, in __remove_range(), to handle ranges that contain multiple
regions.

A new btree function, dm_btree_lookup_next(), is introduced which is
more efficiently able to skip over regions of the thin device which
aren't mapped.  __remove_range() uses dm_btree_lookup_next() for each
iteration of __remove_range()'s loop.

Also, improve description of dm_btree_remove_leaves().

Fixes: 6550f075 ("dm thin metadata: add dm_thin_remove_range()")
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org # 4.1+

diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 1fa4569..67871e7 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1538,7 +1538,7 @@ static int __remove(struct dm_thin_device *td, dm_block_t block)
 static int __remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_t end)
 {
 	int r;
-	unsigned count;
+	unsigned count, total_count = 0;
 	struct dm_pool_metadata *pmd = td->pmd;
 	dm_block_t keys[1] = { td->id };
 	__le64 value;
@@ -1561,11 +1561,29 @@ static int __remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_
 	if (r)
 		return r;
 
-	r = dm_btree_remove_leaves(&pmd->bl_info, mapping_root, &begin, end, &mapping_root, &count);
-	if (r)
-		return r;
+	/*
+	 * Remove leaves stops at the first unmapped entry, so we have to
+	 * loop round finding mapped ranges.
+	 */
+	while (begin < end) {
+		r = dm_btree_lookup_next(&pmd->bl_info, mapping_root, &begin, &begin, &value);
+		if (r == -ENODATA)
+			break;
+
+		if (r)
+			return r;
+
+		if (begin >= end)
+			break;
+
+		r = dm_btree_remove_leaves(&pmd->bl_info, mapping_root, &begin, end, &mapping_root, &count);
+		if (r)
+			return r;
+
+		total_count += count;
+	}
 
-	td->mapped_blocks -= count;
+	td->mapped_blocks -= total_count;
 	td->changed = 1;
 
 	/*
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index 0918a7c..7e5b7f1 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -63,6 +63,11 @@ int lower_bound(struct btree_node *n, uint64_t key)
 	return bsearch(n, key, 0);
 }
 
+static int upper_bound(struct btree_node *n, uint64_t key)
+{
+	return bsearch(n, key, 1);
+}
+
 void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
 		  struct dm_btree_value_type *vt)
 {
@@ -392,6 +397,82 @@ int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root,
 }
 EXPORT_SYMBOL_GPL(dm_btree_lookup);
 
+static int dm_btree_lookup_next_single(struct dm_btree_info *info, dm_block_t root,
+				       uint64_t key, uint64_t *rkey, void *value_le)
+{
+	int r, i;
+	uint32_t flags, nr_entries;
+	struct dm_block *node;
+	struct btree_node *n;
+
+	r = bn_read_lock(info, root, &node);
+	if (r)
+		return r;
+
+	n = dm_block_data(node);
+	flags = le32_to_cpu(n->header.flags);
+	nr_entries = le32_to_cpu(n->header.nr_entries);
+
+	if (flags & INTERNAL_NODE) {
+		i = lower_bound(n, key);
+		if (i < 0 || i >= nr_entries) {
+			r = -ENODATA;
+			goto out;
+		}
+
+		r = dm_btree_lookup_next_single(info, value64(n, i), key, rkey, value_le);
+		if (r == -ENODATA && i < (nr_entries - 1)) {
+			i++;
+			r = dm_btree_lookup_next_single(info, value64(n, i), key, rkey, value_le);
+		}
+
+	} else {
+		i = upper_bound(n, key);
+		if (i < 0 || i >= nr_entries) {
+			r = -ENODATA;
+			goto out;
+		}
+
+		*rkey = le64_to_cpu(n->keys[i]);
+		memcpy(value_le, value_ptr(n, i), info->value_type.size);
+	}
+out:
+	dm_tm_unlock(info->tm, node);
+	return r;
+}
+
+int dm_btree_lookup_next(struct dm_btree_info *info, dm_block_t root,
+			 uint64_t *keys, uint64_t *rkey, void *value_le)
+{
+	unsigned level;
+	int r = -ENODATA;
+	__le64 internal_value_le;
+	struct ro_spine spine;
+
+	init_ro_spine(&spine, info);
+	for (level = 0; level < info->levels - 1u; level++) {
+		r = btree_lookup_raw(&spine, root, keys[level],
+				     lower_bound, rkey,
+				     &internal_value_le, sizeof(uint64_t));
+		if (r)
+			goto out;
+
+		if (*rkey != keys[level]) {
+			r = -ENODATA;
+			goto out;
+		}
+
+		root = le64_to_cpu(internal_value_le);
+	}
+
+	r = dm_btree_lookup_next_single(info, root, keys[level], rkey, value_le);
+out:
+	exit_ro_spine(&spine);
+	return r;
+}
+
+EXPORT_SYMBOL_GPL(dm_btree_lookup_next);
+
 /*
  * Splits a node by creating a sibling node and shifting half the nodes
  * contents across.  Assumes there is a parent node, and it has room for
diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h
index 11d8cf7..c74301f 100644
--- a/drivers/md/persistent-data/dm-btree.h
+++ b/drivers/md/persistent-data/dm-btree.h
@@ -110,6 +110,13 @@ int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root,
 		    uint64_t *keys, void *value_le);
 
 /*
+ * Tries to find the first key where the bottom level key is >= to that
+ * given.  Useful for skipping empty sections of the btree.
+ */
+int dm_btree_lookup_next(struct dm_btree_info *info, dm_block_t root,
+			 uint64_t *keys, uint64_t *rkey, void *value_le);
+
+/*
  * Insertion (or overwrite an existing value).  O(ln(n))
  */
 int dm_btree_insert(struct dm_btree_info *info, dm_block_t root,
@@ -135,9 +142,10 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
 		    uint64_t *keys, dm_block_t *new_root);
 
 /*
- * Removes values between 'keys' and keys2, where keys2 is keys with the
- * final key replaced with 'end_key'.  'end_key' is the one-past-the-end
- * value.  'keys' may be altered.
+ * Removes a _contiguous_ run of values starting from 'keys' and not
+ * reaching keys2 (where keys2 is keys with the final key replaced with
+ * 'end_key').  'end_key' is the one-past-the-end value.  'keys' may be
+ * altered.
  */
 int dm_btree_remove_leaves(struct dm_btree_info *info, dm_block_t root,
 			   uint64_t *keys, uint64_t end_key,
-- 
cgit v0.10.2


From 59f0ec231f397001801264063db3b6dcc3eef590 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Tue, 1 Dec 2015 12:14:52 +0100
Subject: clk: sunxi: pll2: Fix clock running too fast

Contrary to what the datasheet says, the pre divider doesn't seem to be
incremented by one in the PLL2, but just uses the value from the register,
with 0 being a bypass.

This fixes the audio playing too fast.

Since we now have the same pre-divider flags, and the only difference with
the A10 is the post-divider offset, also remove the structure to just pass
the offset as an argument.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Fixes: eb662f854710 ("clk: sunxi: pll2: Add A13 support")
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>

diff --git a/drivers/clk/sunxi/clk-a10-pll2.c b/drivers/clk/sunxi/clk-a10-pll2.c
index 5484c31e..0ee1f36 100644
--- a/drivers/clk/sunxi/clk-a10-pll2.c
+++ b/drivers/clk/sunxi/clk-a10-pll2.c
@@ -41,15 +41,10 @@
 
 #define SUN4I_PLL2_OUTPUTS		4
 
-struct sun4i_pll2_data {
-	u32	post_div_offset;
-	u32	pre_div_flags;
-};
-
 static DEFINE_SPINLOCK(sun4i_a10_pll2_lock);
 
 static void __init sun4i_pll2_setup(struct device_node *node,
-				    struct sun4i_pll2_data *data)
+				    int post_div_offset)
 {
 	const char *clk_name = node->name, *parent;
 	struct clk **clks, *base_clk, *prediv_clk;
@@ -76,7 +71,7 @@ static void __init sun4i_pll2_setup(struct device_node *node,
 					  parent, 0, reg,
 					  SUN4I_PLL2_PRE_DIV_SHIFT,
 					  SUN4I_PLL2_PRE_DIV_WIDTH,
-					  data->pre_div_flags,
+					  CLK_DIVIDER_ONE_BASED | CLK_DIVIDER_ALLOW_ZERO,
 					  &sun4i_a10_pll2_lock);
 	if (!prediv_clk) {
 		pr_err("Couldn't register the prediv clock\n");
@@ -127,7 +122,7 @@ static void __init sun4i_pll2_setup(struct device_node *node,
 	 */
 	val = readl(reg);
 	val &= ~(SUN4I_PLL2_POST_DIV_MASK << SUN4I_PLL2_POST_DIV_SHIFT);
-	val |= (SUN4I_PLL2_POST_DIV_VALUE - data->post_div_offset) << SUN4I_PLL2_POST_DIV_SHIFT;
+	val |= (SUN4I_PLL2_POST_DIV_VALUE - post_div_offset) << SUN4I_PLL2_POST_DIV_SHIFT;
 	writel(val, reg);
 
 	of_property_read_string_index(node, "clock-output-names",
@@ -191,25 +186,17 @@ err_unmap:
 	iounmap(reg);
 }
 
-static struct sun4i_pll2_data sun4i_a10_pll2_data = {
-	.pre_div_flags	= CLK_DIVIDER_ONE_BASED | CLK_DIVIDER_ALLOW_ZERO,
-};
-
 static void __init sun4i_a10_pll2_setup(struct device_node *node)
 {
-	sun4i_pll2_setup(node, &sun4i_a10_pll2_data);
+	sun4i_pll2_setup(node, 0);
 }
 
 CLK_OF_DECLARE(sun4i_a10_pll2, "allwinner,sun4i-a10-pll2-clk",
 	       sun4i_a10_pll2_setup);
 
-static struct sun4i_pll2_data sun5i_a13_pll2_data = {
-	.post_div_offset	= 1,
-};
-
 static void __init sun5i_a13_pll2_setup(struct device_node *node)
 {
-	sun4i_pll2_setup(node, &sun5i_a13_pll2_data);
+	sun4i_pll2_setup(node, 1);
 }
 
 CLK_OF_DECLARE(sun5i_a13_pll2, "allwinner,sun5i-a13-pll2-clk",
-- 
cgit v0.10.2


From 599c963a0f19b14132065788322207eaa58bc7f8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 3 Dec 2015 10:18:21 -0500
Subject: cgroup_freezer: simplify propagation of CGROUP_FROZEN clearing in
 freezer_attach()

If one or more tasks get moved into a frozen css, the frozen state is
cleared up from the destination css so that it can be reasserted once
the migrated tasks are frozen.  freezer_attach() implements this in
two separate steps - clearing CGROUP_FROZEN on the target css while
processing each task and propagating the clearing upwards after the
task loop is done if necessary.

This patch merges the two steps.  Propagation now takes place inside
the task loop.  This simplifies the code and prepares it for the fix
of multi-destination migration.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index f1b30ad..ff02a8e 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -158,9 +158,7 @@ static void freezer_css_free(struct cgroup_subsys_state *css)
 static void freezer_attach(struct cgroup_subsys_state *new_css,
 			   struct cgroup_taskset *tset)
 {
-	struct freezer *freezer = css_freezer(new_css);
 	struct task_struct *task;
-	bool clear_frozen = false;
 
 	mutex_lock(&freezer_mutex);
 
@@ -175,21 +173,20 @@ static void freezer_attach(struct cgroup_subsys_state *new_css,
 	 * be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
 	 */
 	cgroup_taskset_for_each(task, tset) {
+		struct freezer *freezer = css_freezer(new_css);
+
 		if (!(freezer->state & CGROUP_FREEZING)) {
 			__thaw_task(task);
 		} else {
 			freeze_task(task);
-			freezer->state &= ~CGROUP_FROZEN;
-			clear_frozen = true;
+			/* clear FROZEN and propagate upwards */
+			while (freezer && (freezer->state & CGROUP_FROZEN)) {
+				freezer->state &= ~CGROUP_FROZEN;
+				freezer = parent_freezer(freezer);
+			}
 		}
 	}
 
-	/* propagate FROZEN clearing upwards */
-	while (clear_frozen && (freezer = parent_freezer(freezer))) {
-		freezer->state &= ~CGROUP_FROZEN;
-		clear_frozen = freezer->state & CGROUP_FREEZING;
-	}
-
 	mutex_unlock(&freezer_mutex);
 }
 
-- 
cgit v0.10.2


From 1f7dd3e5a6e4f093017fff12232572ee1aa4639b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 3 Dec 2015 10:18:21 -0500
Subject: cgroup: fix handling of multi-destination migration from
 subtree_control enabling

Consider the following v2 hierarchy.

  P0 (+memory) --- P1 (-memory) --- A
                                 \- B

P0 has memory enabled in its subtree_control while P1 doesn't.  If
both A and B contain processes, they would belong to the memory css of
P1.  Now if memory is enabled on P1's subtree_control, memory csses
should be created on both A and B and A's processes should be moved to
the former and B's processes the latter.  IOW, enabling controllers
can cause atomic migrations into different csses.

The core cgroup migration logic has been updated accordingly but the
controller migration methods haven't and still assume that all tasks
migrate to a single target css; furthermore, the methods were fed the
css in which subtree_control was updated which is the parent of the
target csses.  pids controller depends on the migration methods to
move charges and this made the controller attribute charges to the
wrong csses often triggering the following warning by driving a
counter negative.

 WARNING: CPU: 1 PID: 1 at kernel/cgroup_pids.c:97 pids_cancel.constprop.6+0x31/0x40()
 Modules linked in:
 CPU: 1 PID: 1 Comm: systemd Not tainted 4.4.0-rc1+ #29
 ...
  ffffffff81f65382 ffff88007c043b90 ffffffff81551ffc 0000000000000000
  ffff88007c043bc8 ffffffff810de202 ffff88007a752000 ffff88007a29ab00
  ffff88007c043c80 ffff88007a1d8400 0000000000000001 ffff88007c043bd8
 Call Trace:
  [<ffffffff81551ffc>] dump_stack+0x4e/0x82
  [<ffffffff810de202>] warn_slowpath_common+0x82/0xc0
  [<ffffffff810de2fa>] warn_slowpath_null+0x1a/0x20
  [<ffffffff8118e031>] pids_cancel.constprop.6+0x31/0x40
  [<ffffffff8118e0fd>] pids_can_attach+0x6d/0xf0
  [<ffffffff81188a4c>] cgroup_taskset_migrate+0x6c/0x330
  [<ffffffff81188e05>] cgroup_migrate+0xf5/0x190
  [<ffffffff81189016>] cgroup_attach_task+0x176/0x200
  [<ffffffff8118949d>] __cgroup_procs_write+0x2ad/0x460
  [<ffffffff81189684>] cgroup_procs_write+0x14/0x20
  [<ffffffff811854e5>] cgroup_file_write+0x35/0x1c0
  [<ffffffff812e26f1>] kernfs_fop_write+0x141/0x190
  [<ffffffff81265f88>] __vfs_write+0x28/0xe0
  [<ffffffff812666fc>] vfs_write+0xac/0x1a0
  [<ffffffff81267019>] SyS_write+0x49/0xb0
  [<ffffffff81bcef32>] entry_SYSCALL_64_fastpath+0x12/0x76

This patch fixes the bug by removing @css parameter from the three
migration methods, ->can_attach, ->cancel_attach() and ->attach() and
updating cgroup_taskset iteration helpers also return the destination
css in addition to the task being migrated.  All controllers are
updated accordingly.

* Controllers which don't care whether there are one or multiple
  target csses can be converted trivially.  cpu, io, freezer, perf,
  netclassid and netprio fall in this category.

* cpuset's current implementation assumes that there's single source
  and destination and thus doesn't support v2 hierarchy already.  The
  only change made by this patchset is how that single destination css
  is obtained.

* memory migration path already doesn't do anything on v2.  How the
  single destination css is obtained is updated and the prep stage of
  mem_cgroup_can_attach() is reordered to accomodate the change.

* pids is the only controller which was affected by this bug.  It now
  correctly handles multi-destination migrations and no longer causes
  counter underflow from incorrect accounting.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-and-tested-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Cc: Aleksa Sarai <cyphar@cyphar.com>

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 5bcdfc1..5a37188 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1127,15 +1127,15 @@ void blkcg_exit_queue(struct request_queue *q)
  * of the main cic data structures.  For now we allow a task to change
  * its cgroup only if it's the only owner of its ioc.
  */
-static int blkcg_can_attach(struct cgroup_subsys_state *css,
-			    struct cgroup_taskset *tset)
+static int blkcg_can_attach(struct cgroup_taskset *tset)
 {
 	struct task_struct *task;
+	struct cgroup_subsys_state *dst_css;
 	struct io_context *ioc;
 	int ret = 0;
 
 	/* task_lock() is needed to avoid races with exit_io_context() */
-	cgroup_taskset_for_each(task, tset) {
+	cgroup_taskset_for_each(task, dst_css, tset) {
 		task_lock(task);
 		ioc = task->io_context;
 		if (ioc && atomic_read(&ioc->nr_tasks) > 1)
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 869fd4a..06b77f9d 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -422,12 +422,9 @@ struct cgroup_subsys {
 	void (*css_reset)(struct cgroup_subsys_state *css);
 	void (*css_e_css_changed)(struct cgroup_subsys_state *css);
 
-	int (*can_attach)(struct cgroup_subsys_state *css,
-			  struct cgroup_taskset *tset);
-	void (*cancel_attach)(struct cgroup_subsys_state *css,
-			      struct cgroup_taskset *tset);
-	void (*attach)(struct cgroup_subsys_state *css,
-		       struct cgroup_taskset *tset);
+	int (*can_attach)(struct cgroup_taskset *tset);
+	void (*cancel_attach)(struct cgroup_taskset *tset);
+	void (*attach)(struct cgroup_taskset *tset);
 	int (*can_fork)(struct task_struct *task, void **priv_p);
 	void (*cancel_fork)(struct task_struct *task, void *priv);
 	void (*fork)(struct task_struct *task, void *priv);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f640830..cb91b44 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -120,8 +120,10 @@ struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state
 struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos,
 						     struct cgroup_subsys_state *css);
 
-struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
-struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
+struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
+					 struct cgroup_subsys_state **dst_cssp);
+struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
+					struct cgroup_subsys_state **dst_cssp);
 
 void css_task_iter_start(struct cgroup_subsys_state *css,
 			 struct css_task_iter *it);
@@ -236,30 +238,39 @@ void css_task_iter_end(struct css_task_iter *it);
 /**
  * cgroup_taskset_for_each - iterate cgroup_taskset
  * @task: the loop cursor
+ * @dst_css: the destination css
  * @tset: taskset to iterate
  *
  * @tset may contain multiple tasks and they may belong to multiple
- * processes.  When there are multiple tasks in @tset, if a task of a
- * process is in @tset, all tasks of the process are in @tset.  Also, all
- * are guaranteed to share the same source and destination csses.
+ * processes.
+ *
+ * On the v2 hierarchy, there may be tasks from multiple processes and they
+ * may not share the source or destination csses.
+ *
+ * On traditional hierarchies, when there are multiple tasks in @tset, if a
+ * task of a process is in @tset, all tasks of the process are in @tset.
+ * Also, all are guaranteed to share the same source and destination csses.
  *
  * Iteration is not in any specific order.
  */
-#define cgroup_taskset_for_each(task, tset)				\
-	for ((task) = cgroup_taskset_first((tset)); (task);		\
-	     (task) = cgroup_taskset_next((tset)))
+#define cgroup_taskset_for_each(task, dst_css, tset)			\
+	for ((task) = cgroup_taskset_first((tset), &(dst_css));		\
+	     (task);							\
+	     (task) = cgroup_taskset_next((tset), &(dst_css)))
 
 /**
  * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset
  * @leader: the loop cursor
+ * @dst_css: the destination css
  * @tset: takset to iterate
  *
  * Iterate threadgroup leaders of @tset.  For single-task migrations, @tset
  * may not contain any.
  */
-#define cgroup_taskset_for_each_leader(leader, tset)			\
-	for ((leader) = cgroup_taskset_first((tset)); (leader);		\
-	     (leader) = cgroup_taskset_next((tset)))			\
+#define cgroup_taskset_for_each_leader(leader, dst_css, tset)		\
+	for ((leader) = cgroup_taskset_first((tset), &(dst_css));	\
+	     (leader);							\
+	     (leader) = cgroup_taskset_next((tset), &(dst_css)))	\
 		if ((leader) != (leader)->group_leader)			\
 			;						\
 		else
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5cea63f..470f653 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2237,6 +2237,9 @@ struct cgroup_taskset {
 	struct list_head	src_csets;
 	struct list_head	dst_csets;
 
+	/* the subsys currently being processed */
+	int			ssid;
+
 	/*
 	 * Fields for cgroup_taskset_*() iteration.
 	 *
@@ -2299,25 +2302,29 @@ static void cgroup_taskset_add(struct task_struct *task,
 /**
  * cgroup_taskset_first - reset taskset and return the first task
  * @tset: taskset of interest
+ * @dst_cssp: output variable for the destination css
  *
  * @tset iteration is initialized and the first task is returned.
  */
-struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset)
+struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
+					 struct cgroup_subsys_state **dst_cssp)
 {
 	tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
 	tset->cur_task = NULL;
 
-	return cgroup_taskset_next(tset);
+	return cgroup_taskset_next(tset, dst_cssp);
 }
 
 /**
  * cgroup_taskset_next - iterate to the next task in taskset
  * @tset: taskset of interest
+ * @dst_cssp: output variable for the destination css
  *
  * Return the next task in @tset.  Iteration must have been initialized
  * with cgroup_taskset_first().
  */
-struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
+struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
+					struct cgroup_subsys_state **dst_cssp)
 {
 	struct css_set *cset = tset->cur_cset;
 	struct task_struct *task = tset->cur_task;
@@ -2332,6 +2339,18 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
 		if (&task->cg_list != &cset->mg_tasks) {
 			tset->cur_cset = cset;
 			tset->cur_task = task;
+
+			/*
+			 * This function may be called both before and
+			 * after cgroup_taskset_migrate().  The two cases
+			 * can be distinguished by looking at whether @cset
+			 * has its ->mg_dst_cset set.
+			 */
+			if (cset->mg_dst_cset)
+				*dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
+			else
+				*dst_cssp = cset->subsys[tset->ssid];
+
 			return task;
 		}
 
@@ -2367,7 +2386,8 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
 	/* check that we can legitimately attach to the cgroup */
 	for_each_e_css(css, i, dst_cgrp) {
 		if (css->ss->can_attach) {
-			ret = css->ss->can_attach(css, tset);
+			tset->ssid = i;
+			ret = css->ss->can_attach(tset);
 			if (ret) {
 				failed_css = css;
 				goto out_cancel_attach;
@@ -2400,9 +2420,12 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
 	 */
 	tset->csets = &tset->dst_csets;
 
-	for_each_e_css(css, i, dst_cgrp)
-		if (css->ss->attach)
-			css->ss->attach(css, tset);
+	for_each_e_css(css, i, dst_cgrp) {
+		if (css->ss->attach) {
+			tset->ssid = i;
+			css->ss->attach(tset);
+		}
+	}
 
 	ret = 0;
 	goto out_release_tset;
@@ -2411,8 +2434,10 @@ out_cancel_attach:
 	for_each_e_css(css, i, dst_cgrp) {
 		if (css == failed_css)
 			break;
-		if (css->ss->cancel_attach)
-			css->ss->cancel_attach(css, tset);
+		if (css->ss->cancel_attach) {
+			tset->ssid = i;
+			css->ss->cancel_attach(tset);
+		}
 	}
 out_release_tset:
 	spin_lock_bh(&css_set_lock);
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index ff02a8e..2d3df82 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -155,10 +155,10 @@ static void freezer_css_free(struct cgroup_subsys_state *css)
  * @freezer->lock.  freezer_attach() makes the new tasks conform to the
  * current state and all following state changes can see the new tasks.
  */
-static void freezer_attach(struct cgroup_subsys_state *new_css,
-			   struct cgroup_taskset *tset)
+static void freezer_attach(struct cgroup_taskset *tset)
 {
 	struct task_struct *task;
+	struct cgroup_subsys_state *new_css;
 
 	mutex_lock(&freezer_mutex);
 
@@ -172,7 +172,7 @@ static void freezer_attach(struct cgroup_subsys_state *new_css,
 	 * current state before executing the following - !frozen tasks may
 	 * be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
 	 */
-	cgroup_taskset_for_each(task, tset) {
+	cgroup_taskset_for_each(task, new_css, tset) {
 		struct freezer *freezer = css_freezer(new_css);
 
 		if (!(freezer->state & CGROUP_FREEZING)) {
diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c
index de3359a..8e27fc5 100644
--- a/kernel/cgroup_pids.c
+++ b/kernel/cgroup_pids.c
@@ -162,13 +162,13 @@ revert:
 	return -EAGAIN;
 }
 
-static int pids_can_attach(struct cgroup_subsys_state *css,
-			   struct cgroup_taskset *tset)
+static int pids_can_attach(struct cgroup_taskset *tset)
 {
-	struct pids_cgroup *pids = css_pids(css);
 	struct task_struct *task;
+	struct cgroup_subsys_state *dst_css;
 
-	cgroup_taskset_for_each(task, tset) {
+	cgroup_taskset_for_each(task, dst_css, tset) {
+		struct pids_cgroup *pids = css_pids(dst_css);
 		struct cgroup_subsys_state *old_css;
 		struct pids_cgroup *old_pids;
 
@@ -187,13 +187,13 @@ static int pids_can_attach(struct cgroup_subsys_state *css,
 	return 0;
 }
 
-static void pids_cancel_attach(struct cgroup_subsys_state *css,
-			       struct cgroup_taskset *tset)
+static void pids_cancel_attach(struct cgroup_taskset *tset)
 {
-	struct pids_cgroup *pids = css_pids(css);
 	struct task_struct *task;
+	struct cgroup_subsys_state *dst_css;
 
-	cgroup_taskset_for_each(task, tset) {
+	cgroup_taskset_for_each(task, dst_css, tset) {
+		struct pids_cgroup *pids = css_pids(dst_css);
 		struct cgroup_subsys_state *old_css;
 		struct pids_cgroup *old_pids;
 
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 10ae736..02a8ea5 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1429,15 +1429,16 @@ static int fmeter_getrate(struct fmeter *fmp)
 static struct cpuset *cpuset_attach_old_cs;
 
 /* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
-static int cpuset_can_attach(struct cgroup_subsys_state *css,
-			     struct cgroup_taskset *tset)
+static int cpuset_can_attach(struct cgroup_taskset *tset)
 {
-	struct cpuset *cs = css_cs(css);
+	struct cgroup_subsys_state *css;
+	struct cpuset *cs;
 	struct task_struct *task;
 	int ret;
 
 	/* used later by cpuset_attach() */
-	cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset));
+	cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
+	cs = css_cs(css);
 
 	mutex_lock(&cpuset_mutex);
 
@@ -1447,7 +1448,7 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css,
 	    (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
 		goto out_unlock;
 
-	cgroup_taskset_for_each(task, tset) {
+	cgroup_taskset_for_each(task, css, tset) {
 		ret = task_can_attach(task, cs->cpus_allowed);
 		if (ret)
 			goto out_unlock;
@@ -1467,9 +1468,14 @@ out_unlock:
 	return ret;
 }
 
-static void cpuset_cancel_attach(struct cgroup_subsys_state *css,
-				 struct cgroup_taskset *tset)
+static void cpuset_cancel_attach(struct cgroup_taskset *tset)
 {
+	struct cgroup_subsys_state *css;
+	struct cpuset *cs;
+
+	cgroup_taskset_first(tset, &css);
+	cs = css_cs(css);
+
 	mutex_lock(&cpuset_mutex);
 	css_cs(css)->attach_in_progress--;
 	mutex_unlock(&cpuset_mutex);
@@ -1482,16 +1488,19 @@ static void cpuset_cancel_attach(struct cgroup_subsys_state *css,
  */
 static cpumask_var_t cpus_attach;
 
-static void cpuset_attach(struct cgroup_subsys_state *css,
-			  struct cgroup_taskset *tset)
+static void cpuset_attach(struct cgroup_taskset *tset)
 {
 	/* static buf protected by cpuset_mutex */
 	static nodemask_t cpuset_attach_nodemask_to;
 	struct task_struct *task;
 	struct task_struct *leader;
-	struct cpuset *cs = css_cs(css);
+	struct cgroup_subsys_state *css;
+	struct cpuset *cs;
 	struct cpuset *oldcs = cpuset_attach_old_cs;
 
+	cgroup_taskset_first(tset, &css);
+	cs = css_cs(css);
+
 	mutex_lock(&cpuset_mutex);
 
 	/* prepare for attach */
@@ -1502,7 +1511,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
 
 	guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
 
-	cgroup_taskset_for_each(task, tset) {
+	cgroup_taskset_for_each(task, css, tset) {
 		/*
 		 * can_attach beforehand should guarantee that this doesn't
 		 * fail.  TODO: have a better way to handle failure here
@@ -1518,7 +1527,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
 	 * sleep and should be moved outside migration path proper.
 	 */
 	cpuset_attach_nodemask_to = cs->effective_mems;
-	cgroup_taskset_for_each_leader(leader, tset) {
+	cgroup_taskset_for_each_leader(leader, css, tset) {
 		struct mm_struct *mm = get_task_mm(leader);
 
 		if (mm) {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 36babfd..026305d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9456,12 +9456,12 @@ static int __perf_cgroup_move(void *info)
 	return 0;
 }
 
-static void perf_cgroup_attach(struct cgroup_subsys_state *css,
-			       struct cgroup_taskset *tset)
+static void perf_cgroup_attach(struct cgroup_taskset *tset)
 {
 	struct task_struct *task;
+	struct cgroup_subsys_state *css;
 
-	cgroup_taskset_for_each(task, tset)
+	cgroup_taskset_for_each(task, css, tset)
 		task_function_call(task, __perf_cgroup_move, task);
 }
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4d568ac..a9db4819 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8217,12 +8217,12 @@ static void cpu_cgroup_fork(struct task_struct *task, void *private)
 	sched_move_task(task);
 }
 
-static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
-				 struct cgroup_taskset *tset)
+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
 {
 	struct task_struct *task;
+	struct cgroup_subsys_state *css;
 
-	cgroup_taskset_for_each(task, tset) {
+	cgroup_taskset_for_each(task, css, tset) {
 #ifdef CONFIG_RT_GROUP_SCHED
 		if (!sched_rt_can_attach(css_tg(css), task))
 			return -EINVAL;
@@ -8235,12 +8235,12 @@ static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
 	return 0;
 }
 
-static void cpu_cgroup_attach(struct cgroup_subsys_state *css,
-			      struct cgroup_taskset *tset)
+static void cpu_cgroup_attach(struct cgroup_taskset *tset)
 {
 	struct task_struct *task;
+	struct cgroup_subsys_state *css;
 
-	cgroup_taskset_for_each(task, tset)
+	cgroup_taskset_for_each(task, css, tset)
 		sched_move_task(task);
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9acfb16..c92a65b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4779,23 +4779,18 @@ static void mem_cgroup_clear_mc(void)
 	spin_unlock(&mc.lock);
 }
 
-static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
-				 struct cgroup_taskset *tset)
+static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	struct cgroup_subsys_state *css;
+	struct mem_cgroup *memcg;
 	struct mem_cgroup *from;
 	struct task_struct *leader, *p;
 	struct mm_struct *mm;
 	unsigned long move_flags;
 	int ret = 0;
 
-	/*
-	 * We are now commited to this value whatever it is. Changes in this
-	 * tunable will only affect upcoming migrations, not the current one.
-	 * So we need to save it, and keep it going.
-	 */
-	move_flags = READ_ONCE(memcg->move_charge_at_immigrate);
-	if (!move_flags)
+	/* charge immigration isn't supported on the default hierarchy */
+	if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
 		return 0;
 
 	/*
@@ -4805,13 +4800,23 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
 	 * multiple.
 	 */
 	p = NULL;
-	cgroup_taskset_for_each_leader(leader, tset) {
+	cgroup_taskset_for_each_leader(leader, css, tset) {
 		WARN_ON_ONCE(p);
 		p = leader;
+		memcg = mem_cgroup_from_css(css);
 	}
 	if (!p)
 		return 0;
 
+	/*
+	 * We are now commited to this value whatever it is. Changes in this
+	 * tunable will only affect upcoming migrations, not the current one.
+	 * So we need to save it, and keep it going.
+	 */
+	move_flags = READ_ONCE(memcg->move_charge_at_immigrate);
+	if (!move_flags)
+		return 0;
+
 	from = mem_cgroup_from_task(p);
 
 	VM_BUG_ON(from == memcg);
@@ -4842,8 +4847,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
 	return ret;
 }
 
-static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css,
-				     struct cgroup_taskset *tset)
+static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
 {
 	if (mc.to)
 		mem_cgroup_clear_mc();
@@ -4985,10 +4989,10 @@ retry:
 	atomic_dec(&mc.from->moving_account);
 }
 
-static void mem_cgroup_move_task(struct cgroup_subsys_state *css,
-				 struct cgroup_taskset *tset)
+static void mem_cgroup_move_task(struct cgroup_taskset *tset)
 {
-	struct task_struct *p = cgroup_taskset_first(tset);
+	struct cgroup_subsys_state *css;
+	struct task_struct *p = cgroup_taskset_first(tset, &css);
 	struct mm_struct *mm = get_task_mm(p);
 
 	if (mm) {
@@ -5000,17 +5004,14 @@ static void mem_cgroup_move_task(struct cgroup_subsys_state *css,
 		mem_cgroup_clear_mc();
 }
 #else	/* !CONFIG_MMU */
-static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
-				 struct cgroup_taskset *tset)
+static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
 {
 	return 0;
 }
-static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css,
-				     struct cgroup_taskset *tset)
+static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
 {
 }
-static void mem_cgroup_move_task(struct cgroup_subsys_state *css,
-				 struct cgroup_taskset *tset)
+static void mem_cgroup_move_task(struct cgroup_taskset *tset)
 {
 }
 #endif
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 6441f47..81cb3c7 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -67,14 +67,15 @@ static int update_classid(const void *v, struct file *file, unsigned n)
 	return 0;
 }
 
-static void cgrp_attach(struct cgroup_subsys_state *css,
-			struct cgroup_taskset *tset)
+static void cgrp_attach(struct cgroup_taskset *tset)
 {
-	struct cgroup_cls_state *cs = css_cls_state(css);
-	void *v = (void *)(unsigned long)cs->classid;
 	struct task_struct *p;
+	struct cgroup_subsys_state *css;
+
+	cgroup_taskset_for_each(p, css, tset) {
+		struct cgroup_cls_state *cs = css_cls_state(css);
+		void *v = (void *)(unsigned long)cs->classid;
 
-	cgroup_taskset_for_each(p, tset) {
 		task_lock(p);
 		iterate_fd(p->files, 0, update_classid, v);
 		task_unlock(p);
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index cbd0a19..40fd09f 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -218,13 +218,14 @@ static int update_netprio(const void *v, struct file *file, unsigned n)
 	return 0;
 }
 
-static void net_prio_attach(struct cgroup_subsys_state *css,
-			    struct cgroup_taskset *tset)
+static void net_prio_attach(struct cgroup_taskset *tset)
 {
 	struct task_struct *p;
-	void *v = (void *)(unsigned long)css->cgroup->id;
+	struct cgroup_subsys_state *css;
+
+	cgroup_taskset_for_each(p, css, tset) {
+		void *v = (void *)(unsigned long)css->cgroup->id;
 
-	cgroup_taskset_for_each(p, tset) {
 		task_lock(p);
 		iterate_fd(p->files, 0, update_netprio, v);
 		task_unlock(p);
-- 
cgit v0.10.2


From 67cde9c4938945b9510730c64e68d2f1dd7bc0aa Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 3 Dec 2015 10:18:21 -0500
Subject: cgroup_pids: don't account for the root cgroup

Because accounting resources for the root cgroup sometimes incurs
measureable overhead for workloads which don't care about cgroup and
often ends up calculating a number which is available elsewhere in a
slightly different form, cgroup is not in the business of providing
system-wide statistics.  The pids controller which was introduced
recently was exposing "pids.current" at the root.  This patch disable
accounting for root cgroup and removes the file from the root
directory.

While this is a userland visible behavior change, pids has been
available only in one version and was badly broken there, so I don't
think this will be noticeable.  If it turns out to be a problem, we
can reinstate it for v1 hierarchies.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Aleksa Sarai <cyphar@cyphar.com>

diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c
index 8e27fc5..b50d5a1 100644
--- a/kernel/cgroup_pids.c
+++ b/kernel/cgroup_pids.c
@@ -106,7 +106,7 @@ static void pids_uncharge(struct pids_cgroup *pids, int num)
 {
 	struct pids_cgroup *p;
 
-	for (p = pids; p; p = parent_pids(p))
+	for (p = pids; parent_pids(p); p = parent_pids(p))
 		pids_cancel(p, num);
 }
 
@@ -123,7 +123,7 @@ static void pids_charge(struct pids_cgroup *pids, int num)
 {
 	struct pids_cgroup *p;
 
-	for (p = pids; p; p = parent_pids(p))
+	for (p = pids; parent_pids(p); p = parent_pids(p))
 		atomic64_add(num, &p->counter);
 }
 
@@ -140,7 +140,7 @@ static int pids_try_charge(struct pids_cgroup *pids, int num)
 {
 	struct pids_cgroup *p, *q;
 
-	for (p = pids; p; p = parent_pids(p)) {
+	for (p = pids; parent_pids(p); p = parent_pids(p)) {
 		int64_t new = atomic64_add_return(num, &p->counter);
 
 		/*
@@ -298,6 +298,7 @@ static struct cftype pids_files[] = {
 	{
 		.name = "current",
 		.read_s64 = pids_current_read,
+		.flags = CFTYPE_NOT_ON_ROOT,
 	},
 	{ }	/* terminate */
 };
-- 
cgit v0.10.2


From 4fd41a8552afc01054d9d9fc7f1a63c324867d27 Mon Sep 17 00:00:00 2001
From: Ken Xue <ken.xue@amd.com>
Date: Tue, 1 Dec 2015 14:45:46 +0800
Subject: SCSI: Fix NULL pointer dereference in runtime PM

The routines in scsi_pm.c assume that if a runtime-PM callback is
invoked for a SCSI device, it can only mean that the device's driver
has asked the block layer to handle the runtime power management (by
calling blk_pm_runtime_init(), which among other things sets q->dev).

However, this assumption turns out to be wrong for things like the ses
driver.  Normally ses devices are not allowed to do runtime PM, but
userspace can override this setting.  If this happens, the kernel gets
a NULL pointer dereference when blk_post_runtime_resume() tries to use
the uninitialized q->dev pointer.

This patch fixes the problem by checking q->dev in block layer before
handle runtime PM. Since ses doesn't define any PM callbacks and call
blk_pm_runtime_init(), the crash won't occur.

This fixes Bugzilla #101371.
https://bugzilla.kernel.org/show_bug.cgi?id=101371

More discussion can be found from below link.
http://marc.info/?l=linux-scsi&m=144163730531875&w=2

Signed-off-by: Ken Xue <Ken.Xue@amd.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Cc: Xiangliang Yu <Xiangliang.Yu@amd.com>
Cc: James E.J. Bottomley <JBottomley@odin.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Michael Terry <Michael.terry@canonical.com>
Cc: stable@vger.kernel.org
Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/block/blk-core.c b/block/blk-core.c
index a0af404..3636be4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -3405,6 +3405,9 @@ int blk_pre_runtime_suspend(struct request_queue *q)
 {
 	int ret = 0;
 
+	if (!q->dev)
+		return ret;
+
 	spin_lock_irq(q->queue_lock);
 	if (q->nr_pending) {
 		ret = -EBUSY;
@@ -3432,6 +3435,9 @@ EXPORT_SYMBOL(blk_pre_runtime_suspend);
  */
 void blk_post_runtime_suspend(struct request_queue *q, int err)
 {
+	if (!q->dev)
+		return;
+
 	spin_lock_irq(q->queue_lock);
 	if (!err) {
 		q->rpm_status = RPM_SUSPENDED;
@@ -3456,6 +3462,9 @@ EXPORT_SYMBOL(blk_post_runtime_suspend);
  */
 void blk_pre_runtime_resume(struct request_queue *q)
 {
+	if (!q->dev)
+		return;
+
 	spin_lock_irq(q->queue_lock);
 	q->rpm_status = RPM_RESUMING;
 	spin_unlock_irq(q->queue_lock);
@@ -3478,6 +3487,9 @@ EXPORT_SYMBOL(blk_pre_runtime_resume);
  */
 void blk_post_runtime_resume(struct request_queue *q, int err)
 {
+	if (!q->dev)
+		return;
+
 	spin_lock_irq(q->queue_lock);
 	if (!err) {
 		q->rpm_status = RPM_ACTIVE;
-- 
cgit v0.10.2


From 642c2d671ceff40e9453203ea0c66e991e11e249 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 30 Nov 2015 12:56:15 +0100
Subject: perf: Fix PERF_EVENT_IOC_PERIOD deadlock

Dmitry reported a fairly silly recursive lock deadlock for
PERF_EVENT_IOC_PERIOD, fix this by explicitly doing the inactive part of
__perf_event_period() instead of calling that function.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <stable@vger.kernel.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kostya Serebryany <kcc@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: c7999c6f3fed ("perf: Fix PERF_EVENT_IOC_PERIOD migration race")
Link: http://lkml.kernel.org/r/20151130115615.GJ17308@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5854fcf..49a5118 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4225,7 +4225,14 @@ retry:
 		goto retry;
 	}
 
-	__perf_event_period(&pe);
+	if (event->attr.freq) {
+		event->attr.sample_freq = value;
+	} else {
+		event->attr.sample_period = value;
+		event->hw.sample_period = value;
+	}
+
+	local64_set(&event->hw.period_left, 0);
 	raw_spin_unlock_irq(&ctx->lock);
 
 	return 0;
-- 
cgit v0.10.2


From ae5515d66362b9d96cdcfce504567f0b8b7bd83e Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 4 Dec 2015 08:38:42 -0700
Subject: Revert: "vfio: Include No-IOMMU mode"

Revert commit 033291eccbdb ("vfio: Include No-IOMMU mode") due to lack
of a user.  This was originally intended to fill a need for the DPDK
driver, but uptake has been slow so rather than support an unproven
kernel interface revert it and revisit when userspace catches up.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index da6e2ce..850d86c 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -31,21 +31,6 @@ menuconfig VFIO
 
 	  If you don't know what to do here, say N.
 
-menuconfig VFIO_NOIOMMU
-	bool "VFIO No-IOMMU support"
-	depends on VFIO
-	help
-	  VFIO is built on the ability to isolate devices using the IOMMU.
-	  Only with an IOMMU can userspace access to DMA capable devices be
-	  considered secure.  VFIO No-IOMMU mode enables IOMMU groups for
-	  devices without IOMMU backing for the purpose of re-using the VFIO
-	  infrastructure in a non-secure mode.  Use of this mode will result
-	  in an unsupportable kernel and will therefore taint the kernel.
-	  Device assignment to virtual machines is also not possible with
-	  this mode since there is no IOMMU to provide DMA translation.
-
-	  If you don't know what to do here, say N.
-
 source "drivers/vfio/pci/Kconfig"
 source "drivers/vfio/platform/Kconfig"
 source "virt/lib/Kconfig"
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 2760a7b..56bf6db 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -940,13 +940,13 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
 		return -EINVAL;
 
-	group = vfio_iommu_group_get(&pdev->dev);
+	group = iommu_group_get(&pdev->dev);
 	if (!group)
 		return -EINVAL;
 
 	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
 	if (!vdev) {
-		vfio_iommu_group_put(group, &pdev->dev);
+		iommu_group_put(group);
 		return -ENOMEM;
 	}
 
@@ -957,7 +957,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
 	if (ret) {
-		vfio_iommu_group_put(group, &pdev->dev);
+		iommu_group_put(group);
 		kfree(vdev);
 		return ret;
 	}
@@ -993,7 +993,7 @@ static void vfio_pci_remove(struct pci_dev *pdev)
 	if (!vdev)
 		return;
 
-	vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
+	iommu_group_put(pdev->dev.iommu_group);
 	kfree(vdev);
 
 	if (vfio_pci_is_vga(pdev)) {
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 9da0703..6070b79 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -62,7 +62,6 @@ struct vfio_container {
 	struct rw_semaphore		group_lock;
 	struct vfio_iommu_driver	*iommu_driver;
 	void				*iommu_data;
-	bool				noiommu;
 };
 
 struct vfio_unbound_dev {
@@ -85,7 +84,6 @@ struct vfio_group {
 	struct list_head		unbound_list;
 	struct mutex			unbound_lock;
 	atomic_t			opened;
-	bool				noiommu;
 };
 
 struct vfio_device {
@@ -97,147 +95,6 @@ struct vfio_device {
 	void				*device_data;
 };
 
-#ifdef CONFIG_VFIO_NOIOMMU
-static bool noiommu __read_mostly;
-module_param_named(enable_unsafe_noiommu_support,
-		   noiommu, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
-#endif
-
-/*
- * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
- * and remove functions, any use cases other than acquiring the first
- * reference for the purpose of calling vfio_add_group_dev() or removing
- * that symmetric reference after vfio_del_group_dev() should use the raw
- * iommu_group_{get,put} functions.  In particular, vfio_iommu_group_put()
- * removes the device from the dummy group and cannot be nested.
- */
-struct iommu_group *vfio_iommu_group_get(struct device *dev)
-{
-	struct iommu_group *group;
-	int __maybe_unused ret;
-
-	group = iommu_group_get(dev);
-
-#ifdef CONFIG_VFIO_NOIOMMU
-	/*
-	 * With noiommu enabled, an IOMMU group will be created for a device
-	 * that doesn't already have one and doesn't have an iommu_ops on their
-	 * bus.  We use iommu_present() again in the main code to detect these
-	 * fake groups.
-	 */
-	if (group || !noiommu || iommu_present(dev->bus))
-		return group;
-
-	group = iommu_group_alloc();
-	if (IS_ERR(group))
-		return NULL;
-
-	iommu_group_set_name(group, "vfio-noiommu");
-	ret = iommu_group_add_device(group, dev);
-	iommu_group_put(group);
-	if (ret)
-		return NULL;
-
-	/*
-	 * Where to taint?  At this point we've added an IOMMU group for a
-	 * device that is not backed by iommu_ops, therefore any iommu_
-	 * callback using iommu_ops can legitimately Oops.  So, while we may
-	 * be about to give a DMA capable device to a user without IOMMU
-	 * protection, which is clearly taint-worthy, let's go ahead and do
-	 * it here.
-	 */
-	add_taint(TAINT_USER, LOCKDEP_STILL_OK);
-	dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
-#endif
-
-	return group;
-}
-EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
-
-void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
-{
-#ifdef CONFIG_VFIO_NOIOMMU
-	if (!iommu_present(dev->bus))
-		iommu_group_remove_device(dev);
-#endif
-
-	iommu_group_put(group);
-}
-EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
-
-#ifdef CONFIG_VFIO_NOIOMMU
-static void *vfio_noiommu_open(unsigned long arg)
-{
-	if (arg != VFIO_NOIOMMU_IOMMU)
-		return ERR_PTR(-EINVAL);
-	if (!capable(CAP_SYS_RAWIO))
-		return ERR_PTR(-EPERM);
-
-	return NULL;
-}
-
-static void vfio_noiommu_release(void *iommu_data)
-{
-}
-
-static long vfio_noiommu_ioctl(void *iommu_data,
-			       unsigned int cmd, unsigned long arg)
-{
-	if (cmd == VFIO_CHECK_EXTENSION)
-		return arg == VFIO_NOIOMMU_IOMMU ? 1 : 0;
-
-	return -ENOTTY;
-}
-
-static int vfio_iommu_present(struct device *dev, void *unused)
-{
-	return iommu_present(dev->bus) ? 1 : 0;
-}
-
-static int vfio_noiommu_attach_group(void *iommu_data,
-				     struct iommu_group *iommu_group)
-{
-	return iommu_group_for_each_dev(iommu_group, NULL,
-					vfio_iommu_present) ? -EINVAL : 0;
-}
-
-static void vfio_noiommu_detach_group(void *iommu_data,
-				      struct iommu_group *iommu_group)
-{
-}
-
-static struct vfio_iommu_driver_ops vfio_noiommu_ops = {
-	.name = "vfio-noiommu",
-	.owner = THIS_MODULE,
-	.open = vfio_noiommu_open,
-	.release = vfio_noiommu_release,
-	.ioctl = vfio_noiommu_ioctl,
-	.attach_group = vfio_noiommu_attach_group,
-	.detach_group = vfio_noiommu_detach_group,
-};
-
-static struct vfio_iommu_driver vfio_noiommu_driver = {
-	.ops = &vfio_noiommu_ops,
-};
-
-/*
- * Wrap IOMMU drivers, the noiommu driver is the one and only driver for
- * noiommu groups (and thus containers) and not available for normal groups.
- */
-#define vfio_for_each_iommu_driver(con, pos)				\
-	for (pos = con->noiommu ? &vfio_noiommu_driver :		\
-	     list_first_entry(&vfio.iommu_drivers_list,			\
-			      struct vfio_iommu_driver, vfio_next);	\
-	     (con->noiommu ? pos != NULL :				\
-			&pos->vfio_next != &vfio.iommu_drivers_list);	\
-	      pos = con->noiommu ? NULL : list_next_entry(pos, vfio_next))
-#else
-#define vfio_for_each_iommu_driver(con, pos)				\
-	list_for_each_entry(pos, &vfio.iommu_drivers_list, vfio_next)
-#endif
-
-
 /**
  * IOMMU driver registration
  */
@@ -342,8 +199,7 @@ static void vfio_group_unlock_and_free(struct vfio_group *group)
 /**
  * Group objects - create, release, get, put, search
  */
-static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
-					    bool noiommu)
+static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
 {
 	struct vfio_group *group, *tmp;
 	struct device *dev;
@@ -361,7 +217,6 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
 	atomic_set(&group->container_users, 0);
 	atomic_set(&group->opened, 0);
 	group->iommu_group = iommu_group;
-	group->noiommu = noiommu;
 
 	group->nb.notifier_call = vfio_iommu_group_notifier;
 
@@ -397,8 +252,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
 
 	dev = device_create(vfio.class, NULL,
 			    MKDEV(MAJOR(vfio.group_devt), minor),
-			    group, "%s%d", noiommu ? "noiommu-" : "",
-			    iommu_group_id(iommu_group));
+			    group, "%d", iommu_group_id(iommu_group));
 	if (IS_ERR(dev)) {
 		vfio_free_group_minor(minor);
 		vfio_group_unlock_and_free(group);
@@ -786,8 +640,7 @@ int vfio_add_group_dev(struct device *dev,
 
 	group = vfio_group_get_from_iommu(iommu_group);
 	if (!group) {
-		group = vfio_create_group(iommu_group,
-					  !iommu_present(dev->bus));
+		group = vfio_create_group(iommu_group);
 		if (IS_ERR(group)) {
 			iommu_group_put(iommu_group);
 			return PTR_ERR(group);
@@ -999,7 +852,8 @@ static long vfio_ioctl_check_extension(struct vfio_container *container,
 		 */
 		if (!driver) {
 			mutex_lock(&vfio.iommu_drivers_lock);
-			vfio_for_each_iommu_driver(container, driver) {
+			list_for_each_entry(driver, &vfio.iommu_drivers_list,
+					    vfio_next) {
 				if (!try_module_get(driver->ops->owner))
 					continue;
 
@@ -1068,7 +922,7 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container,
 	}
 
 	mutex_lock(&vfio.iommu_drivers_lock);
-	vfio_for_each_iommu_driver(container, driver) {
+	list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
 		void *data;
 
 		if (!try_module_get(driver->ops->owner))
@@ -1333,9 +1187,6 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
 	if (atomic_read(&group->container_users))
 		return -EINVAL;
 
-	if (group->noiommu && !capable(CAP_SYS_RAWIO))
-		return -EPERM;
-
 	f = fdget(container_fd);
 	if (!f.file)
 		return -EBADF;
@@ -1351,13 +1202,6 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
 
 	down_write(&container->group_lock);
 
-	/* Real groups and fake groups cannot mix */
-	if (!list_empty(&container->group_list) &&
-	    container->noiommu != group->noiommu) {
-		ret = -EPERM;
-		goto unlock_out;
-	}
-
 	driver = container->iommu_driver;
 	if (driver) {
 		ret = driver->ops->attach_group(container->iommu_data,
@@ -1367,7 +1211,6 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
 	}
 
 	group->container = container;
-	container->noiommu = group->noiommu;
 	list_add(&group->container_next, &container->group_list);
 
 	/* Get a reference on the container and mark a user within the group */
@@ -1398,9 +1241,6 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
 	    !group->container->iommu_driver || !vfio_group_viable(group))
 		return -EINVAL;
 
-	if (group->noiommu && !capable(CAP_SYS_RAWIO))
-		return -EPERM;
-
 	device = vfio_device_get_from_name(group, buf);
 	if (!device)
 		return -ENODEV;
@@ -1443,10 +1283,6 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
 
 	fd_install(ret, filep);
 
-	if (group->noiommu)
-		dev_warn(device->dev, "vfio-noiommu device opened by user "
-			 "(%s:%d)\n", current->comm, task_pid_nr(current));
-
 	return ret;
 }
 
@@ -1535,11 +1371,6 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
 	if (!group)
 		return -ENODEV;
 
-	if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
-		vfio_group_put(group);
-		return -EPERM;
-	}
-
 	/* Do we need multiple instances of the group open?  Seems not. */
 	opened = atomic_cmpxchg(&group->opened, 0, 1);
 	if (opened) {
@@ -1702,11 +1533,6 @@ struct vfio_group *vfio_group_get_external_user(struct file *filep)
 	if (!atomic_inc_not_zero(&group->container_users))
 		return ERR_PTR(-EINVAL);
 
-	if (group->noiommu) {
-		atomic_dec(&group->container_users);
-		return ERR_PTR(-EPERM);
-	}
-
 	if (!group->container->iommu_driver ||
 			!vfio_group_viable(group)) {
 		atomic_dec(&group->container_users);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 610a86a..ddb4409 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -44,9 +44,6 @@ struct vfio_device_ops {
 	void	(*request)(void *device_data, unsigned int count);
 };
 
-extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
-extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev);
-
 extern int vfio_add_group_dev(struct device *dev,
 			      const struct vfio_device_ops *ops,
 			      void *device_data);
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 751b69f..9fd7b5d 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -39,13 +39,6 @@
 #define VFIO_SPAPR_TCE_v2_IOMMU		7
 
 /*
- * The No-IOMMU IOMMU offers no translation or isolation for devices and
- * supports no ioctls outside of VFIO_CHECK_EXTENSION.  Use of VFIO's No-IOMMU
- * code will taint the host kernel and should be used with extreme caution.
- */
-#define VFIO_NOIOMMU_IOMMU		8
-
-/*
  * The IOCTL interface is designed for extensibility by embedding the
  * structure length (argsz) and flags into structures passed between
  * kernel and userspace.  We therefore use the _IO() macro for these
-- 
cgit v0.10.2


From 84ed91526f9881886d70a082032236edaa20e7d4 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Fri, 4 Dec 2015 15:53:42 +0200
Subject: xhci: Fix memory leak in xhci_pme_acpi_rtd3_enable()

There is a memory leak because acpi_evaluate_dsm() actually returns an
object which the caller is supposed to release. Fix this by calling
ACPI_FREE() for the returned object (this expands to kfree() so passing
NULL there is fine as well).

While there correct indentation in !CONFIG_ACPI case.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: stable <stable@vger.kernel.org> # v4.2
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 17f6897..c621090 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -188,10 +188,14 @@ static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev)
 		0xb7, 0x0c, 0x34, 0xac,	0x01, 0xe9, 0xbf, 0x45,
 		0xb7, 0xe6, 0x2b, 0x34, 0xec, 0x93, 0x1e, 0x23,
 	};
-	acpi_evaluate_dsm(ACPI_HANDLE(&dev->dev), intel_dsm_uuid, 3, 1, NULL);
+	union acpi_object *obj;
+
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(&dev->dev), intel_dsm_uuid, 3, 1,
+				NULL);
+	ACPI_FREE(obj);
 }
 #else
-	static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev) { }
+static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev) { }
 #endif /* CONFIG_ACPI */
 
 /* called during probe() after chip reset completes */
-- 
cgit v0.10.2


From 096b110a3dd3c868e4610937c80d2e3f3357c1a9 Mon Sep 17 00:00:00 2001
From: Chunfeng Yun <chunfeng.yun@mediatek.com>
Date: Fri, 4 Dec 2015 15:53:43 +0200
Subject: usb: xhci: fix config fail of FS hub behind a HS hub with MTT

if a full speed hub connects to a high speed hub which
supports MTT, the MTT field of its slot context will be set
to 1 when xHCI driver setups an xHCI virtual device in
xhci_setup_addressable_virt_dev(); once usb core fetch its
hub descriptor, and need to update the xHC's internal data
structures for the device, the HUB field of its slot context
will be set to 1 too, meanwhile MTT is also set before,
this will cause configure endpoint command fail, so in the
case, we should clear MTT to 0 for full speed hub according
to section 6.2.2

Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index dfa44d3..3f91270 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -4778,8 +4778,16 @@ int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev,
 	ctrl_ctx->add_flags |= cpu_to_le32(SLOT_FLAG);
 	slot_ctx = xhci_get_slot_ctx(xhci, config_cmd->in_ctx);
 	slot_ctx->dev_info |= cpu_to_le32(DEV_HUB);
+	/*
+	 * refer to section 6.2.2: MTT should be 0 for full speed hub,
+	 * but it may be already set to 1 when setup an xHCI virtual
+	 * device, so clear it anyway.
+	 */
 	if (tt->multi)
 		slot_ctx->dev_info |= cpu_to_le32(DEV_MTT);
+	else if (hdev->speed == USB_SPEED_FULL)
+		slot_ctx->dev_info &= cpu_to_le32(~DEV_MTT);
+
 	if (xhci->hci_version > 0x95) {
 		xhci_dbg(xhci, "xHCI version %x needs hub "
 				"TT think time and number of ports\n",
-- 
cgit v0.10.2


From 6406eeb3f5bb376c7d9674e61f8da34ce7f05e8d Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Thu, 3 Dec 2015 17:26:27 -0500
Subject: usb: Quiet down false peer failure messages

My recent Intel box is spewing these messages:

xhci_hcd 0000:00:14.0: xHCI Host Controller
xhci_hcd 0000:00:14.0: new USB bus registered, assigned bus number 2
usb usb2: New USB device found, idVendor=1d6b, idProduct=0003
usb usb2: New USB device strings: Mfr=3, Product=2, SerialNumber=1
usb usb2: Product: xHCI Host Controller
usb usb2: Manufacturer: Linux 4.3.0+ xhci-hcd
usb usb2: SerialNumber: 0000:00:14.0
hub 2-0:1.0: USB hub found
hub 2-0:1.0: 6 ports detected
usb: failed to peer usb2-port2 and usb1-port1 by location (usb2-port2:none) (usb1-port1:usb2-port1)
usb usb2-port2: failed to peer to usb1-port1 (-16)
usb: port power management may be unreliable
usb: failed to peer usb2-port3 and usb1-port1 by location (usb2-port3:none) (usb1-port1:usb2-port1)
usb usb2-port3: failed to peer to usb1-port1 (-16)
usb: failed to peer usb2-port5 and usb1-port1 by location (usb2-port5:none) (usb1-port1:usb2-port1)
usb usb2-port5: failed to peer to usb1-port1 (-16)
usb: failed to peer usb2-port6 and usb1-port1 by location (usb2-port6:none) (usb1-port1:usb2-port1)
usb usb2-port6: failed to peer to usb1-port1 (-16)

Diving into the acpi tables, I noticed the EHCI hub has 12 ports while the XHCI
hub has 8 ports.  Most of those ports are of connect type USB_PORT_NOT_USED
(including port 1 of the EHCI hub).

Further the unused ports have location data initialized to 0x80000000.

Now each unused port on the xhci hub walks the port list and finds a matching
peer with port1 of the EHCI hub because the zero'd out group id bits falsely match.
After port1 of the XHCI hub, each following matching peer will generate the
above warning.

These warnings seem to be harmless for this scenario as I don't think it
matters that unused ports could not create a peer link.

The attached patch utilizes that assumption and just turns the pr_warn into
pr_debug to quiet things down.

Tested on my Intel box.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c
index 2106183..5487fe308 100644
--- a/drivers/usb/core/port.c
+++ b/drivers/usb/core/port.c
@@ -206,7 +206,7 @@ static int link_peers(struct usb_port *left, struct usb_port *right)
 		else
 			method = "default";
 
-		pr_warn("usb: failed to peer %s and %s by %s (%s:%s) (%s:%s)\n",
+		pr_debug("usb: failed to peer %s and %s by %s (%s:%s) (%s:%s)\n",
 			dev_name(&left->dev), dev_name(&right->dev), method,
 			dev_name(&left->dev),
 			lpeer ? dev_name(&lpeer->dev) : "none",
@@ -265,7 +265,7 @@ static void link_peers_report(struct usb_port *left, struct usb_port *right)
 	if (rc == 0) {
 		dev_dbg(&left->dev, "peered to %s\n", dev_name(&right->dev));
 	} else {
-		dev_warn(&left->dev, "failed to peer to %s (%d)\n",
+		dev_dbg(&left->dev, "failed to peer to %s (%d)\n",
 				dev_name(&right->dev), rc);
 		pr_warn_once("usb: port power management may be unreliable\n");
 		usb_port_block_power_off = 1;
-- 
cgit v0.10.2


From 4a0c4c36094cefd85cbe489590382ef69516ccef Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Wed, 2 Dec 2015 20:36:55 +0100
Subject: USB: host: ohci-at91: fix a crash in ohci_hcd_at91_overcurrent_irq

The interrupt handler, ohci_hcd_at91_overcurrent_irq may be called right
after registration. At that time, pdev->dev.platform_data is not yet set,
leading to a NULL pointer dereference.

Fixes: e4df92279fd9 (USB: host: ohci-at91: merge loops in ohci_hcd_at91_drv_probe)
Reported-by: Peter Rosin <peda@axentia.se>
Tested-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Cc: stable@vger.kernel.org # 4.3+
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c
index 342ffd1..8c6e15b 100644
--- a/drivers/usb/host/ohci-at91.c
+++ b/drivers/usb/host/ohci-at91.c
@@ -473,6 +473,8 @@ static int ohci_hcd_at91_drv_probe(struct platform_device *pdev)
 	if (!pdata)
 		return -ENOMEM;
 
+	pdev->dev.platform_data = pdata;
+
 	if (!of_property_read_u32(np, "num-ports", &ports))
 		pdata->ports = ports;
 
@@ -483,6 +485,7 @@ static int ohci_hcd_at91_drv_probe(struct platform_device *pdev)
 		 */
 		if (i >= pdata->ports) {
 			pdata->vbus_pin[i] = -EINVAL;
+			pdata->overcurrent_pin[i] = -EINVAL;
 			continue;
 		}
 
@@ -513,10 +516,8 @@ static int ohci_hcd_at91_drv_probe(struct platform_device *pdev)
 	}
 
 	at91_for_each_port(i) {
-		if (i >= pdata->ports) {
-			pdata->overcurrent_pin[i] = -EINVAL;
-			continue;
-		}
+		if (i >= pdata->ports)
+			break;
 
 		pdata->overcurrent_pin[i] =
 			of_get_named_gpio_flags(np, "atmel,oc-gpio", i, &flags);
@@ -552,8 +553,6 @@ static int ohci_hcd_at91_drv_probe(struct platform_device *pdev)
 		}
 	}
 
-	pdev->dev.platform_data = pdata;
-
 	device_init_wakeup(&pdev->dev, 1);
 	return usb_hcd_at91_probe(&ohci_at91_hc_driver, pdev);
 }
-- 
cgit v0.10.2


From 3845d2953aacf00ad069806ba8d1495675069f23 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 4 Dec 2015 10:28:14 -0600
Subject: PCI/MSI: Only use the generic MSI layer when domain is hierarchical

Since d8a1cb757550 ("PCI/MSI: Let pci_msi_get_domain use struct
device::msi_domain"), we use the MSI domain associated with the PCI device.

But finding an MSI domain doesn't mean that the domain is implemented using
the generic MSI domain API, and a number of MSI controllers are still using
arch_setup_msi_irq() and arch_teardown_msi_irqs().

Check that the domain we just obtained is hierarchical.  If it is, we can
use the new generic MSI stuff.  Otherwise we have to fall back to the old
arch_setup_msi_irq() and arch_teardown_msi_irqs() interfaces.

This avoids an oops in msi_domain_alloc_irqs() on systems with R-Car,
Tegra, Armada 370, and probably other DesignWare-based host controllers.

Fixes: d8a1cb757550 ("PCI/MSI: Let pci_msi_get_domain use struct device::msi_domain")
Reported-by: Phil Edworthy <phil.edworthy@renesas.com>
Tested-by: Phil Edworthy <phil.edworthy@renesas.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
CC: stable@vger.kernel.org	# v4.3+

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 53e4632..7eaa4c8 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -54,7 +54,7 @@ static int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 	struct irq_domain *domain;
 
 	domain = pci_msi_get_domain(dev);
-	if (domain)
+	if (domain && irq_domain_is_hierarchy(domain))
 		return pci_msi_domain_alloc_irqs(domain, dev, nvec, type);
 
 	return arch_setup_msi_irqs(dev, nvec, type);
@@ -65,7 +65,7 @@ static void pci_msi_teardown_msi_irqs(struct pci_dev *dev)
 	struct irq_domain *domain;
 
 	domain = pci_msi_get_domain(dev);
-	if (domain)
+	if (domain && irq_domain_is_hierarchy(domain))
 		pci_msi_domain_free_irqs(domain, dev);
 	else
 		arch_teardown_msi_irqs(dev);
-- 
cgit v0.10.2


From 863c1ca73f56d3473ff1d06357b31b0063538a96 Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@atmel.com>
Date: Tue, 17 Nov 2015 16:25:45 +0100
Subject: ARM: at91/dt: add always-on to 1.8V regulator

As the SDHCI controller needs the 1.8V line to be always enabled for some eMMC
configurations, set the proper "regulator-always-on" property to the board DTS
files.
Note that the sdhci classical regulator definitions doesn't suit our controller
for this 1.8V purpose.

Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Acked-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>

diff --git a/arch/arm/boot/dts/at91-sama5d2_xplained.dts b/arch/arm/boot/dts/at91-sama5d2_xplained.dts
index ad6de73..e74df32 100644
--- a/arch/arm/boot/dts/at91-sama5d2_xplained.dts
+++ b/arch/arm/boot/dts/at91-sama5d2_xplained.dts
@@ -184,6 +184,7 @@
 							regulator-name = "VDD_SDHC_1V8";
 							regulator-min-microvolt = <1800000>;
 							regulator-max-microvolt = <1800000>;
+							regulator-always-on;
 						};
 					};
 				};
-- 
cgit v0.10.2


From 087ffd4eae9929afd06f6a709861df3c3508492a Mon Sep 17 00:00:00 2001
From: Junxiao Bi <junxiao.bi@oracle.com>
Date: Fri, 4 Dec 2015 12:29:28 -0500
Subject: jbd2: fix null committed data return in undo_access

introduced jbd2_write_access_granted() to improve write|undo_access
speed, but missed to check the status of b_committed_data which caused
a kernel panic on ocfs2.

[ 6538.405938] ------------[ cut here ]------------
[ 6538.406686] kernel BUG at fs/ocfs2/suballoc.c:2400!
[ 6538.406686] invalid opcode: 0000 [#1] SMP
[ 6538.406686] Modules linked in: ocfs2 nfsd lockd grace nfs_acl auth_rpcgss sunrpc autofs4 ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs sd_mod sg ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables be2iscsi iscsi_boot_sysfs bnx2i cnic uio cxgb4i cxgb4 cxgb3i libcxgbi cxgb3 mdio ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr ipv6 iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ppdev xen_kbdfront xen_netfront xen_fbfront parport_pc parport pcspkr i2c_piix4 acpi_cpufreq ext4 jbd2 mbcache xen_blkfront floppy pata_acpi ata_generic ata_piix cirrus ttm drm_kms_helper drm fb_sys_fops sysimgblt sysfillrect i2c_core syscopyarea dm_mirror dm_region_hash dm_log dm_mod
[ 6538.406686] CPU: 1 PID: 16265 Comm: mmap_truncate Not tainted 4.3.0 #1
[ 6538.406686] Hardware name: Xen HVM domU, BIOS 4.3.1OVM 05/14/2014
[ 6538.406686] task: ffff88007c2bab00 ti: ffff880075b78000 task.ti: ffff880075b78000
[ 6538.406686] RIP: 0010:[<ffffffffa06a286b>]  [<ffffffffa06a286b>] ocfs2_block_group_clear_bits+0x23b/0x250 [ocfs2]
[ 6538.406686] RSP: 0018:ffff880075b7b7f8  EFLAGS: 00010246
[ 6538.406686] RAX: ffff8800760c5b40 RBX: ffff88006c06a000 RCX: ffffffffa06e6df0
[ 6538.406686] RDX: 0000000000000000 RSI: ffff88007a6f6ea0 RDI: ffff88007a760430
[ 6538.406686] RBP: ffff880075b7b878 R08: 0000000000000002 R09: 0000000000000001
[ 6538.406686] R10: ffffffffa06769be R11: 0000000000000000 R12: 0000000000000001
[ 6538.406686] R13: ffffffffa06a1750 R14: 0000000000000001 R15: ffff88007a6f6ea0
[ 6538.406686] FS:  00007f17fde30720(0000) GS:ffff88007f040000(0000) knlGS:0000000000000000
[ 6538.406686] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 6538.406686] CR2: 0000000000601730 CR3: 000000007aea0000 CR4: 00000000000406e0
[ 6538.406686] Stack:
[ 6538.406686]  ffff88007c2bb5b0 ffff880075b7b8e0 ffff88007a7604b0 ffff88006c640800
[ 6538.406686]  ffff88007a7604b0 ffff880075d77390 0000000075b7b878 ffffffffa06a309d
[ 6538.406686]  ffff880075d752d8 ffff880075b7b990 ffff880075b7b898 0000000000000000
[ 6538.406686] Call Trace:
[ 6538.406686]  [<ffffffffa06a309d>] ? ocfs2_read_group_descriptor+0x6d/0xa0 [ocfs2]
[ 6538.406686]  [<ffffffffa06a3654>] _ocfs2_free_suballoc_bits+0xe4/0x320 [ocfs2]
[ 6538.406686]  [<ffffffffa06a1750>] ? ocfs2_put_slot+0xf0/0xf0 [ocfs2]
[ 6538.406686]  [<ffffffffa06a397e>] _ocfs2_free_clusters+0xee/0x210 [ocfs2]
[ 6538.406686]  [<ffffffffa06a1750>] ? ocfs2_put_slot+0xf0/0xf0 [ocfs2]
[ 6538.406686]  [<ffffffffa06a1750>] ? ocfs2_put_slot+0xf0/0xf0 [ocfs2]
[ 6538.406686]  [<ffffffffa0682d50>] ? ocfs2_extend_trans+0x50/0x1a0 [ocfs2]
[ 6538.406686]  [<ffffffffa06a3ad5>] ocfs2_free_clusters+0x15/0x20 [ocfs2]
[ 6538.406686]  [<ffffffffa065072c>] ocfs2_replay_truncate_records+0xfc/0x290 [ocfs2]
[ 6538.406686]  [<ffffffffa06843ac>] ? ocfs2_start_trans+0xec/0x1d0 [ocfs2]
[ 6538.406686]  [<ffffffffa0654600>] __ocfs2_flush_truncate_log+0x140/0x2d0 [ocfs2]
[ 6538.406686]  [<ffffffffa0654394>] ? ocfs2_reserve_blocks_for_rec_trunc.clone.0+0x44/0x170 [ocfs2]
[ 6538.406686]  [<ffffffffa065acd4>] ocfs2_remove_btree_range+0x374/0x630 [ocfs2]
[ 6538.406686]  [<ffffffffa017486b>] ? jbd2_journal_stop+0x25b/0x470 [jbd2]
[ 6538.406686]  [<ffffffffa065d5b5>] ocfs2_commit_truncate+0x305/0x670 [ocfs2]
[ 6538.406686]  [<ffffffffa0683430>] ? ocfs2_journal_access_eb+0x20/0x20 [ocfs2]
[ 6538.406686]  [<ffffffffa067adb7>] ocfs2_truncate_file+0x297/0x380 [ocfs2]
[ 6538.406686]  [<ffffffffa01759e4>] ? jbd2_journal_begin_ordered_truncate+0x64/0xc0 [jbd2]
[ 6538.406686]  [<ffffffffa067c7a2>] ocfs2_setattr+0x572/0x860 [ocfs2]
[ 6538.406686]  [<ffffffff810e4a3f>] ? current_fs_time+0x3f/0x50
[ 6538.406686]  [<ffffffff812124b7>] notify_change+0x1d7/0x340
[ 6538.406686]  [<ffffffff8121abf9>] ? generic_getxattr+0x79/0x80
[ 6538.406686]  [<ffffffff811f5876>] do_truncate+0x66/0x90
[ 6538.406686]  [<ffffffff81120e30>] ? __audit_syscall_entry+0xb0/0x110
[ 6538.406686]  [<ffffffff811f5bb3>] do_sys_ftruncate.clone.0+0xf3/0x120
[ 6538.406686]  [<ffffffff811f5bee>] SyS_ftruncate+0xe/0x10
[ 6538.406686]  [<ffffffff816aa2ae>] entry_SYSCALL_64_fastpath+0x12/0x71
[ 6538.406686] Code: 28 48 81 ee b0 04 00 00 48 8b 92 50 fb ff ff 48 8b 80 b0 03 00 00 48 39 90 88 00 00 00 0f 84 30 fe ff ff 0f 0b eb fe 0f 0b eb fe <0f> 0b 0f 1f 00 eb fb 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00
[ 6538.406686] RIP  [<ffffffffa06a286b>] ocfs2_block_group_clear_bits+0x23b/0x250 [ocfs2]
[ 6538.406686]  RSP <ffff880075b7b7f8>
[ 6538.691128] ---[ end trace 31cd7011d6770d7e ]---
[ 6538.694492] Kernel panic - not syncing: Fatal exception
[ 6538.695484] Kernel Offset: disabled

Fixes: de92c8caf16c("jbd2: speedup jbd2_journal_get_[write|undo]_access()")
Cc: <stable@vger.kernel.org>
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index b996212..1498ad9 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1009,7 +1009,8 @@ out:
 }
 
 /* Fast check whether buffer is already attached to the required transaction */
-static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh)
+static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh,
+							bool undo)
 {
 	struct journal_head *jh;
 	bool ret = false;
@@ -1036,6 +1037,9 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh)
 	jh = READ_ONCE(bh->b_private);
 	if (!jh)
 		goto out;
+	/* For undo access buffer must have data copied */
+	if (undo && !jh->b_committed_data)
+		goto out;
 	if (jh->b_transaction != handle->h_transaction &&
 	    jh->b_next_transaction != handle->h_transaction)
 		goto out;
@@ -1073,7 +1077,7 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
 	struct journal_head *jh;
 	int rc;
 
-	if (jbd2_write_access_granted(handle, bh))
+	if (jbd2_write_access_granted(handle, bh, false))
 		return 0;
 
 	jh = jbd2_journal_add_journal_head(bh);
@@ -1210,7 +1214,7 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
 	char *committed_data = NULL;
 
 	JBUFFER_TRACE(jh, "entry");
-	if (jbd2_write_access_granted(handle, bh))
+	if (jbd2_write_access_granted(handle, bh, true))
 		return 0;
 
 	jh = jbd2_journal_add_journal_head(bh);
-- 
cgit v0.10.2


From 8423536fe453b3623f4ed7c2c3cca9b86af1ace5 Mon Sep 17 00:00:00 2001
From: Ludovic Desroches <ludovic.desroches@atmel.com>
Date: Tue, 1 Dec 2015 11:44:40 +0100
Subject: ARM: at91: fix pinctrl driver selection

Move the selection of the pinctrl driver to SoC family level since we
have two pinctrl drivers. It is useless to select one which is not
compatible with the SoC.

[abelloni: fixed pm.c when only sama2d2 is selected]
Signed-off-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>

diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig
index 9267300..28656c2 100644
--- a/arch/arm/mach-at91/Kconfig
+++ b/arch/arm/mach-at91/Kconfig
@@ -4,7 +4,6 @@ menuconfig ARCH_AT91
 	select ARCH_REQUIRE_GPIOLIB
 	select COMMON_CLK_AT91
 	select PINCTRL
-	select PINCTRL_AT91
 	select SOC_BUS
 
 if ARCH_AT91
@@ -17,6 +16,7 @@ config SOC_SAMA5D2
 	select HAVE_AT91_USB_CLK
 	select HAVE_AT91_H32MX
 	select HAVE_AT91_GENERATED_CLK
+	select PINCTRL_AT91PIO4
 	help
 	  Select this if ou are using one of Atmel's SAMA5D2 family SoC.
 
@@ -27,6 +27,7 @@ config SOC_SAMA5D3
 	select HAVE_AT91_UTMI
 	select HAVE_AT91_SMD
 	select HAVE_AT91_USB_CLK
+	select PINCTRL_AT91
 	help
 	  Select this if you are using one of Atmel's SAMA5D3 family SoC.
 	  This support covers SAMA5D31, SAMA5D33, SAMA5D34, SAMA5D35, SAMA5D36.
@@ -40,6 +41,7 @@ config SOC_SAMA5D4
 	select HAVE_AT91_SMD
 	select HAVE_AT91_USB_CLK
 	select HAVE_AT91_H32MX
+	select PINCTRL_AT91
 	help
 	  Select this if you are using one of Atmel's SAMA5D4 family SoC.
 
@@ -50,6 +52,7 @@ config SOC_AT91RM9200
 	select CPU_ARM920T
 	select HAVE_AT91_USB_CLK
 	select MIGHT_HAVE_PCI
+	select PINCTRL_AT91
 	select SOC_SAM_V4_V5
 	select SRAM if PM
 	help
@@ -65,6 +68,7 @@ config SOC_AT91SAM9
 	select HAVE_AT91_UTMI
 	select HAVE_FB_ATMEL
 	select MEMORY
+	select PINCTRL_AT91
 	select SOC_SAM_V4_V5
 	select SRAM if PM
 	help
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 80e277c..23726fb 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -41,8 +41,10 @@
  * implementation should be moved down into the pinctrl driver and get
  * called as part of the generic suspend/resume path.
  */
+#ifdef CONFIG_PINCTRL_AT91
 extern void at91_pinctrl_gpio_suspend(void);
 extern void at91_pinctrl_gpio_resume(void);
+#endif
 
 static struct {
 	unsigned long uhp_udp_mask;
@@ -151,8 +153,9 @@ static void at91_pm_suspend(suspend_state_t state)
 
 static int at91_pm_enter(suspend_state_t state)
 {
+#ifdef CONFIG_PINCTRL_AT91
 	at91_pinctrl_gpio_suspend();
-
+#endif
 	switch (state) {
 	/*
 	 * Suspend-to-RAM is like STANDBY plus slow clock mode, so
@@ -192,7 +195,9 @@ static int at91_pm_enter(suspend_state_t state)
 error:
 	target_state = PM_SUSPEND_ON;
 
+#ifdef CONFIG_PINCTRL_AT91
 	at91_pinctrl_gpio_resume();
+#endif
 	return 0;
 }
 
-- 
cgit v0.10.2


From 43d1c0eb7e11919f85200d2fce211173526f7304 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 20 Nov 2015 22:22:34 +0100
Subject: block: detach bdev inode from its wb in __blkdev_put()

Since 52ebea749aae ("writeback: make backing_dev_info host
cgroup-specific bdi_writebacks") inode, at some point in its lifetime,
gets attached to a wb (struct bdi_writeback).  Detaching happens on
evict, in inode_detach_wb() called from __destroy_inode(), and involves
updating wb.

However, detaching an internal bdev inode from its wb in
__destroy_inode() is too late.  Its bdi and by extension root wb are
embedded into struct request_queue, which has different lifetime rules
and can be freed long before the final bdput() is called (can be from
__fput() of a corresponding /dev inode, through dput() - evict() -
bd_forget().  bdevs hold onto the underlying disk/queue pair only while
opened; as soon as bdev is closed all bets are off.  In fact,
disk/queue can be gone before __blkdev_put() even returns:

1499 static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1500 {
...
1518         if (bdev->bd_contains == bdev) {
1519                 if (disk->fops->release)
1520                         disk->fops->release(disk, mode);

[ Driver puts its references to disk/queue ]

1521         }
1522         if (!bdev->bd_openers) {
1523                 struct module *owner = disk->fops->owner;
1524
1525                 disk_put_part(bdev->bd_part);
1526                 bdev->bd_part = NULL;
1527                 bdev->bd_disk = NULL;
1528                 if (bdev != bdev->bd_contains)
1529                         victim = bdev->bd_contains;
1530                 bdev->bd_contains = NULL;
1531
1532                 put_disk(disk);

[ We put ours, the queue is gone
  The last bdput() would result in a write to invalid memory ]

1533                 module_put(owner);
...
1539 }

Since bdev inodes are special anyway, detach them in __blkdev_put()
after clearing inode's dirty bits, turning the problematic
inode_detach_wb() in __destroy_inode() into a noop.

add_disk() grabs its disk->queue since 523e1d399ce0 ("block: make
gendisk hold a reference to its queue"), so the old ->release comment
is removed in favor of the new inode_detach_wb() comment.

Cc: stable@vger.kernel.org # 4.2+, needs backporting
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Tested-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/fs/block_dev.c b/fs/block_dev.c
index c25639e..44d4a1e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1523,11 +1523,14 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 		WARN_ON_ONCE(bdev->bd_holders);
 		sync_blockdev(bdev);
 		kill_bdev(bdev);
+
+		bdev_write_inode(bdev);
 		/*
-		 * ->release can cause the queue to disappear, so flush all
-		 * dirty data before.
+		 * Detaching bdev inode from its wb in __destroy_inode()
+		 * is too late: the queue which embeds its bdi (along with
+		 * root wb) can be gone as soon as we put_disk() below.
 		 */
-		bdev_write_inode(bdev);
+		inode_detach_wb(bdev->bd_inode);
 	}
 	if (bdev->bd_contains == bdev) {
 		if (disk->fops->release)
-- 
cgit v0.10.2


From a74a821624c0c75388a193337babd17a8c02c740 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 4 Dec 2015 16:44:24 +0100
Subject: ALSA: rme96: Fix unexpected volume reset after rate changes

rme96 driver needs to reset DAC depending on the sample rate, and this
results in resetting to the max volume suddenly.  It's because of the
missing call of snd_rme96_apply_dac_volume().

However, calling this function right after the DAC reset still may not
work, and we need some delay before this call.  Since the DAC reset
and the procedure after that are performed in the spinlock, we delay
the DAC volume restore at the end after the spinlock.

Reported-and-tested-by: Sylvain LABOISNE <maeda1@free.fr>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/rme96.c b/sound/pci/rme96.c
index 714df90..41c31db 100644
--- a/sound/pci/rme96.c
+++ b/sound/pci/rme96.c
@@ -741,10 +741,11 @@ snd_rme96_playback_setrate(struct rme96 *rme96,
 	{
 		/* change to/from double-speed: reset the DAC (if available) */
 		snd_rme96_reset_dac(rme96);
+		return 1; /* need to restore volume */
 	} else {
 		writel(rme96->wcreg, rme96->iobase + RME96_IO_CONTROL_REGISTER);
+		return 0;
 	}
-	return 0;
 }
 
 static int
@@ -980,6 +981,7 @@ snd_rme96_playback_hw_params(struct snd_pcm_substream *substream,
 	struct rme96 *rme96 = snd_pcm_substream_chip(substream);
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	int err, rate, dummy;
+	bool apply_dac_volume = false;
 
 	runtime->dma_area = (void __force *)(rme96->iobase +
 					     RME96_IO_PLAY_BUFFER);
@@ -993,24 +995,26 @@ snd_rme96_playback_hw_params(struct snd_pcm_substream *substream,
 	{
                 /* slave clock */
                 if ((int)params_rate(params) != rate) {
-			spin_unlock_irq(&rme96->lock);
-			return -EIO;                    
-                }
-	} else if ((err = snd_rme96_playback_setrate(rme96, params_rate(params))) < 0) {
-		spin_unlock_irq(&rme96->lock);
-		return err;
-	}
-	if ((err = snd_rme96_playback_setformat(rme96, params_format(params))) < 0) {
-		spin_unlock_irq(&rme96->lock);
-		return err;
+			err = -EIO;
+			goto error;
+		}
+	} else {
+		err = snd_rme96_playback_setrate(rme96, params_rate(params));
+		if (err < 0)
+			goto error;
+		apply_dac_volume = err > 0; /* need to restore volume later? */
 	}
+
+	err = snd_rme96_playback_setformat(rme96, params_format(params));
+	if (err < 0)
+		goto error;
 	snd_rme96_setframelog(rme96, params_channels(params), 1);
 	if (rme96->capture_periodsize != 0) {
 		if (params_period_size(params) << rme96->playback_frlog !=
 		    rme96->capture_periodsize)
 		{
-			spin_unlock_irq(&rme96->lock);
-			return -EBUSY;
+			err = -EBUSY;
+			goto error;
 		}
 	}
 	rme96->playback_periodsize =
@@ -1021,9 +1025,16 @@ snd_rme96_playback_hw_params(struct snd_pcm_substream *substream,
 		rme96->wcreg &= ~(RME96_WCR_PRO | RME96_WCR_DOLBY | RME96_WCR_EMP);
 		writel(rme96->wcreg |= rme96->wcreg_spdif_stream, rme96->iobase + RME96_IO_CONTROL_REGISTER);
 	}
+
+	err = 0;
+ error:
 	spin_unlock_irq(&rme96->lock);
-		
-	return 0;
+	if (apply_dac_volume) {
+		usleep_range(3000, 10000);
+		snd_rme96_apply_dac_volume(rme96);
+	}
+
+	return err;
 }
 
 static int
-- 
cgit v0.10.2


From 7f52f31443ae9b2e6a71ec54cfc5a2c89006ea27 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 4 Dec 2015 16:21:08 -0600
Subject: PCI: altera: Fix loop in tlp_read_packet()

TLP_LOOP is 500 and the "loop" variable was a u8 so "loop < TLP_LOOP" is
always true.  We only need this condition to work if there is a problem so
it would have been easy to miss this in testing.

Make it a normal for loop with "int i" instead of over thinking things and
making it complicated.

Fixes: 6bb4dd154ae8 ("PCI: altera: Add Altera PCIe host controller driver")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Ley Foon Tan <lftan@altera.com>

diff --git a/drivers/pci/host/pcie-altera.c b/drivers/pci/host/pcie-altera.c
index e5dda38..f0820d3 100644
--- a/drivers/pci/host/pcie-altera.c
+++ b/drivers/pci/host/pcie-altera.c
@@ -166,7 +166,7 @@ static bool altera_pcie_valid_config(struct altera_pcie *pcie,
 
 static int tlp_read_packet(struct altera_pcie *pcie, u32 *value)
 {
-	u8 loop;
+	int i;
 	bool sop = 0;
 	u32 ctrl;
 	u32 reg0, reg1;
@@ -175,7 +175,7 @@ static int tlp_read_packet(struct altera_pcie *pcie, u32 *value)
 	 * Minimum 2 loops to read TLP headers and 1 loop to read data
 	 * payload.
 	 */
-	for (loop = 0; loop < TLP_LOOP; loop++) {
+	for (i = 0; i < TLP_LOOP; i++) {
 		ctrl = cra_readl(pcie, RP_RXCPL_STATUS);
 		if ((ctrl & RP_RXCPL_SOP) || (ctrl & RP_RXCPL_EOP) || sop) {
 			reg0 = cra_readl(pcie, RP_RXCPL_REG0);
-- 
cgit v0.10.2


From 23ec56708af7155414a979675dc8132a5d6e16ef Mon Sep 17 00:00:00 2001
From: Ley Foon Tan <lftan@altera.com>
Date: Fri, 4 Dec 2015 16:21:12 -0600
Subject: PCI: altera: Fix Requester ID for config accesses

The Requester ID should use the Root Port devfn and it should be always 0.
Previously we constructed the Requester ID using the *Completer* devfn,
i.e., the devfn of the Function we expect to respond to the config access.
This causes issues when accessing configuration space for devices other
than the Root Port.

Build the Requester ID using the Root Port devfn.

Tested on Ethernet adapter card with multi-functions.

Signed-off-by: Ley Foon Tan <lftan@altera.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>

diff --git a/drivers/pci/host/pcie-altera.c b/drivers/pci/host/pcie-altera.c
index f0820d3..ed736a9 100644
--- a/drivers/pci/host/pcie-altera.c
+++ b/drivers/pci/host/pcie-altera.c
@@ -57,6 +57,7 @@
 #define TLP_REQ_ID(bus, devfn)		(((bus) << 8) | (devfn))
 #define TLP_HDR_SIZE			3
 #define TLP_LOOP			500
+#define RP_DEVFN			0
 
 #define INTX_NUM			4
 
@@ -233,7 +234,7 @@ static int tlp_cfg_dword_read(struct altera_pcie *pcie, u8 bus, u32 devfn,
 	else
 		headers[0] = TLP_CFG_DW0(TLP_FMTTYPE_CFGRD1);
 
-	headers[1] = TLP_CFG_DW1(TLP_REQ_ID(pcie->root_bus_nr, devfn),
+	headers[1] = TLP_CFG_DW1(TLP_REQ_ID(pcie->root_bus_nr, RP_DEVFN),
 					TLP_READ_TAG, byte_en);
 	headers[2] = TLP_CFG_DW2(bus, devfn, where);
 
@@ -253,7 +254,7 @@ static int tlp_cfg_dword_write(struct altera_pcie *pcie, u8 bus, u32 devfn,
 	else
 		headers[0] = TLP_CFG_DW0(TLP_FMTTYPE_CFGWR1);
 
-	headers[1] = TLP_CFG_DW1(TLP_REQ_ID(pcie->root_bus_nr, devfn),
+	headers[1] = TLP_CFG_DW1(TLP_REQ_ID(pcie->root_bus_nr, RP_DEVFN),
 					TLP_WRITE_TAG, byte_en);
 	headers[2] = TLP_CFG_DW2(bus, devfn, where);
 
-- 
cgit v0.10.2


From ea1d3795f65e14619642b5f67382800f58e43f3d Mon Sep 17 00:00:00 2001
From: Ley Foon Tan <lftan@altera.com>
Date: Fri, 4 Dec 2015 16:21:16 -0600
Subject: PCI: altera: Check TLP completion status

Check TLP packet successful completion status.  This fix the issue when
accessing multi-function devices in enumeration process, TLP will return
error when accessing non-exist function number.  Returns PCI error code
instead of generic errno.

Tested on Ethernet adapter card with multi-functions.

[bhelgaas: simplify completion status checking code]
Signed-off-by: Ley Foon Tan <lftan@altera.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>

diff --git a/drivers/pci/host/pcie-altera.c b/drivers/pci/host/pcie-altera.c
index ed736a9..072255d 100644
--- a/drivers/pci/host/pcie-altera.c
+++ b/drivers/pci/host/pcie-altera.c
@@ -55,6 +55,7 @@
 #define TLP_CFG_DW2(bus, devfn, offset)	\
 				(((bus) << 24) | ((devfn) << 16) | (offset))
 #define TLP_REQ_ID(bus, devfn)		(((bus) << 8) | (devfn))
+#define TLP_COMP_STATUS(s)		(((s) >> 12) & 7)
 #define TLP_HDR_SIZE			3
 #define TLP_LOOP			500
 #define RP_DEVFN			0
@@ -171,6 +172,7 @@ static int tlp_read_packet(struct altera_pcie *pcie, u32 *value)
 	bool sop = 0;
 	u32 ctrl;
 	u32 reg0, reg1;
+	u32 comp_status = 1;
 
 	/*
 	 * Minimum 2 loops to read TLP headers and 1 loop to read data
@@ -182,19 +184,25 @@ static int tlp_read_packet(struct altera_pcie *pcie, u32 *value)
 			reg0 = cra_readl(pcie, RP_RXCPL_REG0);
 			reg1 = cra_readl(pcie, RP_RXCPL_REG1);
 
-			if (ctrl & RP_RXCPL_SOP)
+			if (ctrl & RP_RXCPL_SOP) {
 				sop = true;
+				comp_status = TLP_COMP_STATUS(reg1);
+			}
 
 			if (ctrl & RP_RXCPL_EOP) {
+				if (comp_status)
+					return PCIBIOS_DEVICE_NOT_FOUND;
+
 				if (value)
 					*value = reg0;
+
 				return PCIBIOS_SUCCESSFUL;
 			}
 		}
 		udelay(5);
 	}
 
-	return -ENOENT;
+	return PCIBIOS_DEVICE_NOT_FOUND;
 }
 
 static void tlp_write_packet(struct altera_pcie *pcie, u32 *headers,
-- 
cgit v0.10.2


From 99496bd2971fc378226ad4413e5b72c4545714bd Mon Sep 17 00:00:00 2001
From: Ley Foon Tan <lftan@altera.com>
Date: Fri, 4 Dec 2015 16:21:21 -0600
Subject: PCI: altera: Fix error when INTx is 4

PCI interrupt lines start at 1, not at 0.  So, creates additional one
interrupt when register for irq domain.

Error when PCIe devices have 4 INTx:

  WARNING: CPU: 1 PID: 1 at kernel/irq/irqdomain.c:280
    irq_domain_associate+0x17c/0x1cc()
  error: hwirq 0x4 is too large for dummy

Tested on Ethernet adapter card with multi-functions.

Signed-off-by: Ley Foon Tan <lftan@altera.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>

diff --git a/drivers/pci/host/pcie-altera.c b/drivers/pci/host/pcie-altera.c
index 072255d..99da549 100644
--- a/drivers/pci/host/pcie-altera.c
+++ b/drivers/pci/host/pcie-altera.c
@@ -467,7 +467,7 @@ static int altera_pcie_init_irq_domain(struct altera_pcie *pcie)
 	struct device_node *node = dev->of_node;
 
 	/* Setup INTx */
-	pcie->irq_domain = irq_domain_add_linear(node, INTX_NUM,
+	pcie->irq_domain = irq_domain_add_linear(node, INTX_NUM + 1,
 					&intx_domain_ops, pcie);
 	if (!pcie->irq_domain) {
 		dev_err(dev, "Failed to get a INTx IRQ domain\n");
-- 
cgit v0.10.2


From e0fbac1cd49c5ca6cb6a0aa918d0943ce95d906c Mon Sep 17 00:00:00 2001
From: Yuanfang Chen <cheny@udel.edu>
Date: Tue, 24 Nov 2015 12:05:01 -0500
Subject: perf/x86/intel: Make L1D_PEND_MISS.FB_FULL not constrained on Haswell

There was a mistake in the Haswell constraints table.

Signed-off-by: Yuanfang Chen <cheny@udel.edu>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1448384701-9110-1-git-send-email-cheny@udel.edu
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index f63360b..e2a4300 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -232,7 +232,7 @@ static struct event_constraint intel_hsw_event_constraints[] = {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
 	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
-	INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */
+	INTEL_UEVENT_CONSTRAINT(0x148, 0x4),	/* L1D_PEND_MISS.PENDING */
 	INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
 	INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
 	/* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
-- 
cgit v0.10.2


From 169b932a15318e8e9f2f0f12eeb55dda09c8737f Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 9 Nov 2015 10:24:31 +0100
Subject: perf/x86/intel: Fix INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA macro

We need to add rest of the flags to the constraint mask
instead of another INTEL_ARCH_EVENT_MASK, fixing a typo.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1447061071-28085-1-git-send-email-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index ab18b8a..d0e35eb 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -387,7 +387,7 @@ struct cpu_hw_events {
 /* Check flags and event code/umask, and set the HSW N/A flag */
 #define INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(code, n) \
 	__EVENT_CONSTRAINT(code, n, 			\
-			  INTEL_ARCH_EVENT_MASK|INTEL_ARCH_EVENT_MASK, \
+			  INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
 			  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_NA_HSW)
 
 
-- 
cgit v0.10.2


From 4e93ad601a4308d4a67673c81556580817d56940 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 4 Nov 2015 16:00:05 +0100
Subject: perf: Do not send exit event twice

In case we monitor events system wide, we get EXIT event
(when configured) twice for each task that exited.

Note doubled lines with same pid/tid in following example:

  $ sudo ./perf record -a
  ^C[ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.480 MB perf.data (2518 samples) ]
  $ sudo ./perf report -D | grep EXIT

  0 60290687567581 0x59910 [0x38]: PERF_RECORD_EXIT(1250:1250):(1250:1250)
  0 60290687568354 0x59948 [0x38]: PERF_RECORD_EXIT(1250:1250):(1250:1250)
  0 60290687988744 0x59ad8 [0x38]: PERF_RECORD_EXIT(1250:1250):(1250:1250)
  0 60290687989198 0x59b10 [0x38]: PERF_RECORD_EXIT(1250:1250):(1250:1250)
  1 60290692567895 0x62af0 [0x38]: PERF_RECORD_EXIT(1253:1253):(1253:1253)
  1 60290692568322 0x62b28 [0x38]: PERF_RECORD_EXIT(1253:1253):(1253:1253)
  2 60290692739276 0x69a18 [0x38]: PERF_RECORD_EXIT(1252:1252):(1252:1252)
  2 60290692739910 0x69a50 [0x38]: PERF_RECORD_EXIT(1252:1252):(1252:1252)

The reason is that the cpu contexts are processes each time
we call perf_event_task. I'm changing the perf_event_aux logic
to serve task_ctx and cpu contexts separately, which ensure we
don't get EXIT event generated twice on same cpu context.

This does not affect other auxiliary events, as they don't
use task_ctx at all.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/1446649205-5822-1-git-send-email-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 49a5118..39cf4a4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5683,6 +5683,17 @@ perf_event_aux_ctx(struct perf_event_context *ctx,
 }
 
 static void
+perf_event_aux_task_ctx(perf_event_aux_output_cb output, void *data,
+			struct perf_event_context *task_ctx)
+{
+	rcu_read_lock();
+	preempt_disable();
+	perf_event_aux_ctx(task_ctx, output, data);
+	preempt_enable();
+	rcu_read_unlock();
+}
+
+static void
 perf_event_aux(perf_event_aux_output_cb output, void *data,
 	       struct perf_event_context *task_ctx)
 {
@@ -5691,14 +5702,23 @@ perf_event_aux(perf_event_aux_output_cb output, void *data,
 	struct pmu *pmu;
 	int ctxn;
 
+	/*
+	 * If we have task_ctx != NULL we only notify
+	 * the task context itself. The task_ctx is set
+	 * only for EXIT events before releasing task
+	 * context.
+	 */
+	if (task_ctx) {
+		perf_event_aux_task_ctx(output, data, task_ctx);
+		return;
+	}
+
 	rcu_read_lock();
 	list_for_each_entry_rcu(pmu, &pmus, entry) {
 		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
 		if (cpuctx->unique_pmu != pmu)
 			goto next;
 		perf_event_aux_ctx(&cpuctx->ctx, output, data);
-		if (task_ctx)
-			goto next;
 		ctxn = pmu->task_ctx_nr;
 		if (ctxn < 0)
 			goto next;
@@ -5708,12 +5728,6 @@ perf_event_aux(perf_event_aux_output_cb output, void *data,
 next:
 		put_cpu_ptr(pmu->pmu_cpu_context);
 	}
-
-	if (task_ctx) {
-		preempt_disable();
-		perf_event_aux_ctx(task_ctx, output, data);
-		preempt_enable();
-	}
 	rcu_read_unlock();
 }
 
@@ -8803,10 +8817,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 	struct perf_event_context *child_ctx, *clone_ctx = NULL;
 	unsigned long flags;
 
-	if (likely(!child->perf_event_ctxp[ctxn])) {
-		perf_event_task(child, NULL, 0);
+	if (likely(!child->perf_event_ctxp[ctxn]))
 		return;
-	}
 
 	local_irq_save(flags);
 	/*
@@ -8890,6 +8902,14 @@ void perf_event_exit_task(struct task_struct *child)
 
 	for_each_task_context_nr(ctxn)
 		perf_event_exit_task_context(child, ctxn);
+
+	/*
+	 * The perf_event_exit_task_context calls perf_event_task
+	 * with child's task_ctx, which generates EXIT events for
+	 * child contexts and sets child->perf_event_ctxp[] to NULL.
+	 * At this point we need to send EXIT events to cpu contexts.
+	 */
+	perf_event_task(child, NULL, 0);
 }
 
 static void perf_free_event(struct perf_event *event,
-- 
cgit v0.10.2


From 7c23b7c1996597dd9d60bb282fb5fa1be6ebd18b Mon Sep 17 00:00:00 2001
From: "Lu, Han" <han.lu@intel.com>
Date: Mon, 7 Dec 2015 15:59:13 +0800
Subject: ALSA: hda - Fix playback noise with 24/32 bit sample size on BXT

In BXT-P A0, HD-Audio DMA requests is later than expected,
and makes an audio stream sensitive to system latencies when
24/32 bits are playing.
Adjusting threshold of DMA fifo to force the DMA request
sooner to improve latency tolerance at the expense of power.

v2: move Intel specific code to hda_intel.c

Signed-off-by: Lu, Han <han.lu@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/include/sound/hda_register.h b/include/sound/hda_register.h
index 2ae8812..94dc6a9 100644
--- a/include/sound/hda_register.h
+++ b/include/sound/hda_register.h
@@ -93,6 +93,9 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 };
 #define AZX_REG_HSW_EM4			0x100c
 #define AZX_REG_HSW_EM5			0x1010
 
+/* Skylake/Broxton display HD-A controller Extended Mode registers */
+#define AZX_REG_SKL_EM4L		0x1040
+
 /* PCI space */
 #define AZX_PCIREG_TCSEL		0x44
 
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 963f824..bff5c8b 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -355,6 +355,8 @@ enum {
 					((pci)->device == 0x0d0c) || \
 					((pci)->device == 0x160c))
 
+#define IS_BROXTON(pci)	((pci)->device == 0x5a98)
+
 static char *driver_short_names[] = {
 	[AZX_DRIVER_ICH] = "HDA Intel",
 	[AZX_DRIVER_PCH] = "HDA Intel PCH",
@@ -506,15 +508,36 @@ static void azx_init_pci(struct azx *chip)
         }
 }
 
+/*
+ * In BXT-P A0, HD-Audio DMA requests is later than expected,
+ * and makes an audio stream sensitive to system latencies when
+ * 24/32 bits are playing.
+ * Adjusting threshold of DMA fifo to force the DMA request
+ * sooner to improve latency tolerance at the expense of power.
+ */
+static void bxt_reduce_dma_latency(struct azx *chip)
+{
+	u32 val;
+
+	val = azx_readl(chip, SKL_EM4L);
+	val &= (0x3 << 20);
+	azx_writel(chip, SKL_EM4L, val);
+}
+
 static void hda_intel_init_chip(struct azx *chip, bool full_reset)
 {
 	struct hdac_bus *bus = azx_bus(chip);
+	struct pci_dev *pci = chip->pci;
 
 	if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
 		snd_hdac_set_codec_wakeup(bus, true);
 	azx_init_chip(chip, full_reset);
 	if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
 		snd_hdac_set_codec_wakeup(bus, false);
+
+	/* reduce dma latency to avoid noise */
+	if (IS_BROXTON(pci))
+		bxt_reduce_dma_latency(chip);
 }
 
 /* calculate runtime delay from LPIB */
-- 
cgit v0.10.2


From 02f6ff90400d055f08b0ba0b5f0707630b6faed7 Mon Sep 17 00:00:00 2001
From: David Henningsson <david.henningsson@canonical.com>
Date: Mon, 7 Dec 2015 11:29:31 +0100
Subject: ALSA: hda - Add inverted dmic for Packard Bell DOTS

On the internal mic of the Packard Bell DOTS, one channel
has an inverted signal. Add a quirk to fix this up.

Cc: stable@vger.kernel.org
BugLink: https://bugs.launchpad.net/bugs/1523232
Signed-off-by: David Henningsson <david.henningsson@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 9bedf7c..ebc5362 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6409,6 +6409,7 @@ static const struct hda_fixup alc662_fixups[] = {
 static const struct snd_pci_quirk alc662_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1019, 0x9087, "ECS", ALC662_FIXUP_ASUS_MODE2),
 	SND_PCI_QUIRK(0x1025, 0x022f, "Acer Aspire One", ALC662_FIXUP_INV_DMIC),
+	SND_PCI_QUIRK(0x1025, 0x0241, "Packard Bell DOTS", ALC662_FIXUP_INV_DMIC),
 	SND_PCI_QUIRK(0x1025, 0x0308, "Acer Aspire 8942G", ALC662_FIXUP_ASPIRE),
 	SND_PCI_QUIRK(0x1025, 0x031c, "Gateway NV79", ALC662_FIXUP_SKU_IGNORE),
 	SND_PCI_QUIRK(0x1025, 0x0349, "eMachines eM250", ALC662_FIXUP_INV_DMIC),
-- 
cgit v0.10.2


From 2ac46030331e2952a56c05bd15d99082fc410088 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 15 Nov 2015 15:11:00 +0200
Subject: virtio-net: Stop doing DMA from the stack

Once virtio starts using the DMA API, we won't be able to safely DMA
from the stack.  virtio-net does a couple of config DMA requests
from small stack buffers -- switch to using dynamically-allocated
memory.

This should have no effect on any performance-critical code paths.

Reported-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Andy Lutomirski <luto@kernel.org>

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index d8838ded..f94ab78 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -140,6 +140,12 @@ struct virtnet_info {
 
 	/* CPU hot plug notifier */
 	struct notifier_block nb;
+
+	/* Control VQ buffers: protected by the rtnl lock */
+	struct virtio_net_ctrl_hdr ctrl_hdr;
+	virtio_net_ctrl_ack ctrl_status;
+	u8 ctrl_promisc;
+	u8 ctrl_allmulti;
 };
 
 struct padded_vnet_hdr {
@@ -976,31 +982,30 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 				 struct scatterlist *out)
 {
 	struct scatterlist *sgs[4], hdr, stat;
-	struct virtio_net_ctrl_hdr ctrl;
-	virtio_net_ctrl_ack status = ~0;
 	unsigned out_num = 0, tmp;
 
 	/* Caller should know better */
 	BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
 
-	ctrl.class = class;
-	ctrl.cmd = cmd;
+	vi->ctrl_status = ~0;
+	vi->ctrl_hdr.class = class;
+	vi->ctrl_hdr.cmd = cmd;
 	/* Add header */
-	sg_init_one(&hdr, &ctrl, sizeof(ctrl));
+	sg_init_one(&hdr, &vi->ctrl_hdr, sizeof(vi->ctrl_hdr));
 	sgs[out_num++] = &hdr;
 
 	if (out)
 		sgs[out_num++] = out;
 
 	/* Add return status. */
-	sg_init_one(&stat, &status, sizeof(status));
+	sg_init_one(&stat, &vi->ctrl_status, sizeof(vi->ctrl_status));
 	sgs[out_num] = &stat;
 
 	BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
 	virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
 
 	if (unlikely(!virtqueue_kick(vi->cvq)))
-		return status == VIRTIO_NET_OK;
+		return vi->ctrl_status == VIRTIO_NET_OK;
 
 	/* Spin for a response, the kick causes an ioport write, trapping
 	 * into the hypervisor, so the request should be handled immediately.
@@ -1009,7 +1014,7 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 	       !virtqueue_is_broken(vi->cvq))
 		cpu_relax();
 
-	return status == VIRTIO_NET_OK;
+	return vi->ctrl_status == VIRTIO_NET_OK;
 }
 
 static int virtnet_set_mac_address(struct net_device *dev, void *p)
@@ -1151,7 +1156,6 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 	struct scatterlist sg[2];
-	u8 promisc, allmulti;
 	struct virtio_net_ctrl_mac *mac_data;
 	struct netdev_hw_addr *ha;
 	int uc_count;
@@ -1163,22 +1167,22 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
 		return;
 
-	promisc = ((dev->flags & IFF_PROMISC) != 0);
-	allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
+	vi->ctrl_promisc = ((dev->flags & IFF_PROMISC) != 0);
+	vi->ctrl_allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
 
-	sg_init_one(sg, &promisc, sizeof(promisc));
+	sg_init_one(sg, &vi->ctrl_promisc, sizeof(vi->ctrl_promisc));
 
 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
 				  VIRTIO_NET_CTRL_RX_PROMISC, sg))
 		dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
-			 promisc ? "en" : "dis");
+			 vi->ctrl_promisc ? "en" : "dis");
 
-	sg_init_one(sg, &allmulti, sizeof(allmulti));
+	sg_init_one(sg, &vi->ctrl_allmulti, sizeof(vi->ctrl_allmulti));
 
 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
 				  VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
 		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
-			 allmulti ? "en" : "dis");
+			 vi->ctrl_allmulti ? "en" : "dis");
 
 	uc_count = netdev_uc_count(dev);
 	mc_count = netdev_mc_count(dev);
-- 
cgit v0.10.2


From ea013a9b205b47b1fcbc72522146fad560af0712 Mon Sep 17 00:00:00 2001
From: Andreas Werner <andreas.werner@men.de>
Date: Fri, 4 Dec 2015 18:12:49 +0100
Subject: libata-eh.c: Introduce new ata port flag for controller which lockup
 on read log page

Some controller lockup on a ata_read_log_page.
Add new ata port flag ATA_FLAG_NO_LOG_PAGE which can used
to blacklist a controller.

If this flag is set, any attempt to read a log page returns an error
without actually issuing the command.

Signed-off-by: Andreas Werner <andreas.werner@men.de>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index cb0508a..961acc7 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -1505,12 +1505,20 @@ static const char *ata_err_string(unsigned int err_mask)
 unsigned int ata_read_log_page(struct ata_device *dev, u8 log,
 			       u8 page, void *buf, unsigned int sectors)
 {
+	unsigned long ap_flags = dev->link->ap->flags;
 	struct ata_taskfile tf;
 	unsigned int err_mask;
 	bool dma = false;
 
 	DPRINTK("read log page - log 0x%x, page 0x%x\n", log, page);
 
+	/*
+	 * Return error without actually issuing the command on controllers
+	 * which e.g. lockup on a read log page.
+	 */
+	if (ap_flags & ATA_FLAG_NO_LOG_PAGE)
+		return AC_ERR_DEV;
+
 retry:
 	ata_tf_init(dev, &tf);
 	if (dev->dma_mode && ata_id_has_read_log_dma_ext(dev->id) &&
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 83577f8..600c1e0 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -210,6 +210,7 @@ enum {
 	ATA_FLAG_SLAVE_POSS	= (1 << 0), /* host supports slave dev */
 					    /* (doesn't imply presence) */
 	ATA_FLAG_SATA		= (1 << 1),
+	ATA_FLAG_NO_LOG_PAGE	= (1 << 5), /* do not issue log page read */
 	ATA_FLAG_NO_ATAPI	= (1 << 6), /* No ATAPI support */
 	ATA_FLAG_PIO_DMA	= (1 << 7), /* PIO cmds via DMA */
 	ATA_FLAG_PIO_LBA48	= (1 << 8), /* Host DMA engine is LBA28 only */
-- 
cgit v0.10.2


From 4f2568f5cb475529aa2894adc7c7912517c83cb0 Mon Sep 17 00:00:00 2001
From: Andreas Werner <andreas.werner@men.de>
Date: Fri, 4 Dec 2015 18:14:14 +0100
Subject: ata/sata_fsl.c: add ATA_FLAG_NO_LOG_PAGE to blacklist the controller
 for log page reads

Every attempt to issue a read log page command lockup the controller.
The command is currently sent if the sata device includes the devlsp feature
to read out the timing data.
This attempt to read the data, locks up the controller and the device
is not recognzied correctly (failed to set xfermode) and cannot be accessed.

This was found on Freescale P1013/P1022 and T4240 CPUs
using a ATP IG mSATA 4GB with the devslp feature.

fsl-sata ff718000.sata: Sata FSL Platform/CSB Driver init
[    1.254195] scsi0 : sata_fsl
[    1.256004] ata1: SATA max UDMA/133 irq 74
[    1.370666] fsl-gianfar ethernet.3: enabled errata workarounds, flags: 0x4
[    1.470671] fsl-gianfar ethernet.4: enabled errata workarounds, flags: 0x4
[    1.775584] ata1: Signature Update detected @ 504 msecs
[    1.947594] ata1: SATA link up 3.0 Gbps (SStatus 123 SControl 300)
[    1.948366] ata1.00: ATA-8: ATP IG mSATA, 20150311, max UDMA/133
[    1.948371] ata1.00: 7732368 sectors, multi 0: LBA
[    1.948843] ata1.00: failed to get Identify Device Data, Emask 0x1
[    1.948857] ata1.00: failed to set xfermode (err_mask=0x40)
[    7.467557] ata1: Signature Update detected @ 504 msecs
[    7.639560] ata1: SATA link up 3.0 Gbps (SStatus 123 SControl 300)
[    7.651320] ata1.00: failed to get Identify Device Data, Emask 0x1
[    7.651360] ata1.00: failed to set xfermode (err_mask=0x40)
[    7.655628] ata1: limiting SATA link speed to 1.5 Gbps
[    7.659458] ata1.00: limiting speed to UDMA/133:PIO3
[   13.163554] ata1: Signature Update detected @ 504 msecs
[   13.335558] ata1: SATA link up 3.0 Gbps (SStatus 123 SControl 300)
[   13.347298] ata1.00: failed to get Identify Device Data, Emask 0x1
[   13.347334] ata1.00: failed to set xfermode (err_mask=0x40)
[   13.351601] ata1.00: disabled
[   13.353278] ata1: exception Emask 0x50 SAct 0x0 SErr 0x800 action 0x6 frozen t4
[   13.359281] ata1: SError: { HostInt }
[   13.361644] ata1: hard resetting link

Signed-off-by: Andreas Werner <andreas.werner@men.de>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index 5389579..a723ae9 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -45,7 +45,8 @@ enum {
 	SATA_FSL_MAX_PRD_DIRECT	= 16,	/* Direct PRDT entries */
 
 	SATA_FSL_HOST_FLAGS	= (ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
-				ATA_FLAG_PMP | ATA_FLAG_NCQ | ATA_FLAG_AN),
+				   ATA_FLAG_PMP | ATA_FLAG_NCQ |
+				   ATA_FLAG_AN | ATA_FLAG_NO_LOG_PAGE),
 
 	SATA_FSL_MAX_CMDS	= SATA_FSL_QUEUE_DEPTH,
 	SATA_FSL_CMD_HDR_SIZE	= 16,	/* 4 DWORDS */
-- 
cgit v0.10.2


From d542483876f6ed720f573de3fbb1d8e60ccd0d6e Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 16 Nov 2015 16:57:08 +0200
Subject: vhost: relax log address alignment

commit 5d9a07b0de512b77bf28d2401e5fe3351f00a240 ("vhost: relax used
address alignment") fixed the alignment for the used virtual address,
but not for the physical address used for logging.

That's a mistake: alignment should clearly be the same for virtual and
physical addresses,

Cc: stable@vger.kernel.org
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index eec2f11..080422f 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -819,7 +819,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
 		BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE);
 		if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) ||
 		    (a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) ||
-		    (a.log_guest_addr & (sizeof(u64) - 1))) {
+		    (a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1))) {
 			r = -EINVAL;
 			break;
 		}
-- 
cgit v0.10.2


From c13f99b7e945dad5273a8b7ee230f4d1f22d3354 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Wed, 16 Sep 2015 19:29:17 -0500
Subject: virtio: fix memory leak of virtio ida cache layers

The virtio core uses a static ida named virtio_index_ida for
assigning index numbers to virtio devices during registration.
The ida core may allocate some internal idr cache layers and
an ida bitmap upon any ida allocation, and all these layers are
truely freed only upon the ida destruction. The virtio_index_ida
is not destroyed at present, leading to a memory leak when using
the virtio core as a module and atleast one virtio device is
registered and unregistered.

Fix this by invoking ida_destroy() in the virtio core module
exit.

Cc: stable@vger.kernel.org
Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index b1877d7..7062bb0 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -412,6 +412,7 @@ static int virtio_init(void)
 static void __exit virtio_exit(void)
 {
 	bus_unregister(&virtio_bus);
+	ida_destroy(&virtio_index_ida);
 }
 core_initcall(virtio_init);
 module_exit(virtio_exit);
-- 
cgit v0.10.2


From 40c172e5d99220c8a99dbc4468e792c74494a596 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 29 Nov 2015 12:43:48 +0200
Subject: tools/virtio: move list macro stubs

Makes them more generally available.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index 0a3da64..4db7d56 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -110,4 +110,10 @@ static inline void free_page(unsigned long addr)
 	(void) (&_min1 == &_min2);		\
 	_min1 < _min2 ? _min1 : _min2; })
 
+/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
+#define list_add_tail(a, b) do {} while (0)
+#define list_del(a) do {} while (0)
+#define list_for_each_entry(a, b, c) while (0)
+/* end of stubs */
+
 #endif /* KERNEL_H */
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index a3e0701..ee125e7 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -3,12 +3,6 @@
 #include <linux/scatterlist.h>
 #include <linux/kernel.h>
 
-/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
-#define list_add_tail(a, b) do {} while (0)
-#define list_del(a) do {} while (0)
-#define list_for_each_entry(a, b, c) while (0)
-/* end of stubs */
-
 struct virtio_device {
 	void *dev;
 	u64 features;
-- 
cgit v0.10.2


From 55564a02b2ad4dab0c29fd9391d5118592c7fdc8 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 29 Nov 2015 13:03:23 +0200
Subject: tools/virtio: fix byteswap logic

commit cf561f0d2eb74574ad9985a2feab134267a9d298 ("virtio: introduce
virtio_is_little_endian() helper") changed byteswap logic to
skip feature bit checks for LE platforms, but didn't
update tools/virtio, so vring_bench started failing.

Update the copy under tools/virtio/ (TODO: find a way to avoid this code
duplication).

Cc: Greg Kurz <gkurz@linux.vnet.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h
index 806d683..57a6964 100644
--- a/tools/virtio/linux/virtio_config.h
+++ b/tools/virtio/linux/virtio_config.h
@@ -40,33 +40,39 @@ static inline void __virtio_clear_bit(struct virtio_device *vdev,
 #define virtio_has_feature(dev, feature) \
 	(__virtio_test_bit((dev), feature))
 
+static inline bool virtio_is_little_endian(struct virtio_device *vdev)
+{
+	return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) ||
+		virtio_legacy_is_little_endian();
+}
+
+/* Memory accessors */
 static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val)
 {
-	return __virtio16_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+	return __virtio16_to_cpu(virtio_is_little_endian(vdev), val);
 }
 
 static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val)
 {
-	return __cpu_to_virtio16(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+	return __cpu_to_virtio16(virtio_is_little_endian(vdev), val);
 }
 
 static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val)
 {
-	return __virtio32_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+	return __virtio32_to_cpu(virtio_is_little_endian(vdev), val);
 }
 
 static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val)
 {
-	return __cpu_to_virtio32(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+	return __cpu_to_virtio32(virtio_is_little_endian(vdev), val);
 }
 
 static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val)
 {
-	return __virtio64_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+	return __virtio64_to_cpu(virtio_is_little_endian(vdev), val);
 }
 
 static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val)
 {
-	return __cpu_to_virtio64(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+	return __cpu_to_virtio64(virtio_is_little_endian(vdev), val);
 }
-
-- 
cgit v0.10.2


From 5fba13b5cf5856e725de35665c37b647323d3b9a Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 29 Nov 2015 13:34:44 +0200
Subject: vhost: replace % with & on data path

We know vring num is a power of 2, so use &
to mask the high bits.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 080422f..ad2146a 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1369,7 +1369,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
 	/* Grab the next descriptor number they're advertising, and increment
 	 * the index we've seen. */
 	if (unlikely(__get_user(ring_head,
-				&vq->avail->ring[last_avail_idx % vq->num]))) {
+				&vq->avail->ring[last_avail_idx & (vq->num - 1)]))) {
 		vq_err(vq, "Failed to read head: idx %d address %p\n",
 		       last_avail_idx,
 		       &vq->avail->ring[last_avail_idx % vq->num]);
@@ -1489,7 +1489,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
 	u16 old, new;
 	int start;
 
-	start = vq->last_used_idx % vq->num;
+	start = vq->last_used_idx & (vq->num - 1);
 	used = vq->used->ring + start;
 	if (count == 1) {
 		if (__put_user(heads[0].id, &used->id)) {
@@ -1531,7 +1531,7 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
 {
 	int start, n, r;
 
-	start = vq->last_used_idx % vq->num;
+	start = vq->last_used_idx & (vq->num - 1);
 	n = vq->num - start;
 	if (n < count) {
 		r = __vhost_add_used_n(vq, heads, n);
-- 
cgit v0.10.2


From 82107539bbb9db303fb6676c78c836add5680bb0 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Tue, 1 Dec 2015 15:32:49 +0100
Subject: virtio: Do not drop __GFP_HIGH in alloc_indirect

b92b1b89a33c ("virtio: force vring descriptors to be allocated from
lowmem") tried to exclude highmem pages for descriptors so it cleared
__GFP_HIGHMEM from a given gfp mask. The patch also cleared __GFP_HIGH
which doesn't make much sense for this fix because __GFP_HIGH only
controls access to memory reserves and it doesn't have any influence
on the zone selection. Some of the call paths use GFP_ATOMIC and
dropping __GFP_HIGH will reduce their changes for success because the
lack of access to memory reserves.

Signed-off-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Mel Gorman <mgorman@techsingularity.net>

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 096b857..abdb341 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -109,7 +109,7 @@ static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
 	 * otherwise virt_to_phys will give us bogus addresses in the
 	 * virtqueue.
 	 */
-	gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH);
+	gfp &= ~__GFP_HIGHMEM;
 
 	desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
 	if (!desc)
-- 
cgit v0.10.2


From f277ec42f38f406ed073569b24b04f1c291fec0f Mon Sep 17 00:00:00 2001
From: Venkatesh Srinivas <venkateshs@google.com>
Date: Tue, 10 Nov 2015 16:21:07 -0800
Subject: virtio_ring: shadow available ring flags & index

Improves cacheline transfer flow of available ring header.

Virtqueues are implemented as a pair of rings, one producer->consumer
avail ring and one consumer->producer used ring; preceding the
avail ring in memory are two contiguous u16 fields -- avail->flags
and avail->idx. A producer posts work by writing to avail->idx and
a consumer reads avail->idx.

The flags and idx fields only need to be written by a producer CPU
and only read by a consumer CPU; when the producer and consumer are
running on different CPUs and the virtio_ring code is structured to
only have source writes/sink reads, we can continuously transfer the
avail header cacheline between 'M' states between cores. This flow
optimizes core -> core bandwidth on certain CPUs.

(see: "Software Optimization Guide for AMD Family 15h Processors",
Section 11.6; similar language appears in the 10h guide and should
apply to CPUs w/ exclusive caches, using LLC as a transfer cache)

Unfortunately the existing virtio_ring code issued reads to the
avail->idx and read-modify-writes to avail->flags on the producer.

This change shadows the flags and index fields in producer memory;
the vring code now reads from the shadows and only ever writes to
avail->flags and avail->idx, allowing the cacheline to transfer
core -> core optimally.

In a concurrent version of vring_bench, the time required for
10,000,000 buffer checkout/returns was reduced by ~2% (average
across many runs) on an AMD Piledriver (15h) CPU:

(w/o shadowing):
 Performance counter stats for './vring_bench':
     5,451,082,016      L1-dcache-loads
     ...
       2.221477739 seconds time elapsed

(w/ shadowing):
 Performance counter stats for './vring_bench':
     5,405,701,361      L1-dcache-loads
     ...
       2.168405376 seconds time elapsed

The further away (in a NUMA sense) virtio producers and consumers are
from each other, the more we expect to benefit. Physical implementations
of virtio devices and implementations of virtio where the consumer polls
vring avail indexes (vhost) should also benefit.

Signed-off-by: Venkatesh Srinivas <venkateshs@google.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index abdb341..ee663c4 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -80,6 +80,12 @@ struct vring_virtqueue {
 	/* Last used index we've seen. */
 	u16 last_used_idx;
 
+	/* Last written value to avail->flags */
+	u16 avail_flags_shadow;
+
+	/* Last written value to avail->idx in guest byte order */
+	u16 avail_idx_shadow;
+
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	bool (*notify)(struct virtqueue *vq);
 
@@ -235,13 +241,14 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 
 	/* Put entry in available array (but don't update avail->idx until they
 	 * do sync). */
-	avail = virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) & (vq->vring.num - 1);
+	avail = vq->avail_idx_shadow & (vq->vring.num - 1);
 	vq->vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
 
 	/* Descriptors and available array need to be set before we expose the
 	 * new available array entries. */
 	virtio_wmb(vq->weak_barriers);
-	vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) + 1);
+	vq->avail_idx_shadow++;
+	vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
 	vq->num_added++;
 
 	pr_debug("Added buffer head %i to %p\n", head, vq);
@@ -354,8 +361,8 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq)
 	 * event. */
 	virtio_mb(vq->weak_barriers);
 
-	old = virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - vq->num_added;
-	new = virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx);
+	old = vq->avail_idx_shadow - vq->num_added;
+	new = vq->avail_idx_shadow;
 	vq->num_added = 0;
 
 #ifdef DEBUG
@@ -510,7 +517,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 	/* If we expect an interrupt for the next entry, tell host
 	 * by writing event index and flush out the write before
 	 * the read in the next get_buf call. */
-	if (!(vq->vring.avail->flags & cpu_to_virtio16(_vq->vdev, VRING_AVAIL_F_NO_INTERRUPT))) {
+	if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
 		vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, vq->last_used_idx);
 		virtio_mb(vq->weak_barriers);
 	}
@@ -537,7 +544,11 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 
-	vq->vring.avail->flags |= cpu_to_virtio16(_vq->vdev, VRING_AVAIL_F_NO_INTERRUPT);
+	if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
+		vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
+		vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
+	}
+
 }
 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
 
@@ -565,7 +576,10 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
 	/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
 	 * either clear the flags bit or point the event index at the next
 	 * entry. Always do both to keep code simple. */
-	vq->vring.avail->flags &= cpu_to_virtio16(_vq->vdev, ~VRING_AVAIL_F_NO_INTERRUPT);
+	if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
+		vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
+		vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
+	}
 	vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx);
 	END_USE(vq);
 	return last_used_idx;
@@ -633,9 +647,12 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
 	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
 	 * either clear the flags bit or point the event index at the next
 	 * entry. Always do both to keep code simple. */
-	vq->vring.avail->flags &= cpu_to_virtio16(_vq->vdev, ~VRING_AVAIL_F_NO_INTERRUPT);
+	if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
+		vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
+		vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
+	}
 	/* TODO: tune this threshold */
-	bufs = (u16)(virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - vq->last_used_idx) * 3 / 4;
+	bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4;
 	vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs);
 	virtio_mb(vq->weak_barriers);
 	if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) - vq->last_used_idx) > bufs)) {
@@ -670,7 +687,8 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 		/* detach_buf clears data, so grab it now. */
 		buf = vq->data[i];
 		detach_buf(vq, i);
-		vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - 1);
+		vq->avail_idx_shadow--;
+		vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
 		END_USE(vq);
 		return buf;
 	}
@@ -735,6 +753,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 	vq->weak_barriers = weak_barriers;
 	vq->broken = false;
 	vq->last_used_idx = 0;
+	vq->avail_flags_shadow = 0;
+	vq->avail_idx_shadow = 0;
 	vq->num_added = 0;
 	list_add_tail(&vq->vq.list, &vdev->vqs);
 #ifdef DEBUG
@@ -746,8 +766,10 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
 
 	/* No callback?  Tell other side not to bother us. */
-	if (!callback)
-		vq->vring.avail->flags |= cpu_to_virtio16(vdev, VRING_AVAIL_F_NO_INTERRUPT);
+	if (!callback) {
+		vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
+		vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow);
+	}
 
 	/* Put everything in free lists. */
 	vq->free_head = 0;
-- 
cgit v0.10.2


From f27a62995330fdc5f804acf2ebf1444f9025c879 Mon Sep 17 00:00:00 2001
From: Wenwei Tao <ww.tao0320@gmail.com>
Date: Sun, 6 Dec 2015 11:25:43 +0100
Subject: lightnvm: use flags in rrpc_get_blk
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

rrpc_get_blk use constant 0 as the input parameter
of nvm_get_blk, this may result in getting gc block
failed unexpectedly.

Signed-off-by: Wenwei Tao <ww.tao0320@gmail.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index 75e59c3..d606c7a 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -182,7 +182,7 @@ static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun,
 	struct nvm_block *blk;
 	struct rrpc_block *rblk;
 
-	blk = nvm_get_blk(rrpc->dev, rlun->parent, 0);
+	blk = nvm_get_blk(rrpc->dev, rlun->parent, flags);
 	if (!blk)
 		return NULL;
 
-- 
cgit v0.10.2


From d3d1a43842077c53e22dedaebfe7cb52022b18e1 Mon Sep 17 00:00:00 2001
From: Wenwei Tao <ww.tao0320@gmail.com>
Date: Sun, 6 Dec 2015 11:25:44 +0100
Subject: lightnvm: put blks when luns configure failed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Put the allocated blocks back to the free list
when the luns configure failed, to make these
blocks useable to others.

Signed-off-by: Wenwei Tao <ww.tao0320@gmail.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index d606c7a..cf1a4a5 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -202,6 +202,20 @@ static void rrpc_put_blk(struct rrpc *rrpc, struct rrpc_block *rblk)
 	nvm_put_blk(rrpc->dev, rblk->parent);
 }
 
+static void rrpc_put_blks(struct rrpc *rrpc)
+{
+	struct rrpc_lun *rlun;
+	int i;
+
+	for (i = 0; i < rrpc->nr_luns; i++) {
+		rlun = &rrpc->luns[i];
+		if (rlun->cur)
+			rrpc_put_blk(rrpc, rlun->cur);
+		if (rlun->gc_cur)
+			rrpc_put_blk(rrpc, rlun->gc_cur);
+	}
+}
+
 static struct rrpc_lun *get_next_lun(struct rrpc *rrpc)
 {
 	int next = atomic_inc_return(&rrpc->next_lun);
@@ -1224,18 +1238,21 @@ static int rrpc_luns_configure(struct rrpc *rrpc)
 
 		rblk = rrpc_get_blk(rrpc, rlun, 0);
 		if (!rblk)
-			return -EINVAL;
+			goto err;
 
 		rrpc_set_lun_cur(rlun, rblk);
 
 		/* Emergency gc block */
 		rblk = rrpc_get_blk(rrpc, rlun, 1);
 		if (!rblk)
-			return -EINVAL;
+			goto err;
 		rlun->gc_cur = rblk;
 	}
 
 	return 0;
+err:
+	rrpc_put_blks(rrpc);
+	return -EINVAL;
 }
 
 static struct nvm_tgt_type tt_rrpc;
-- 
cgit v0.10.2


From e9b76a80f1f165cb031f62128dc07d219e29f5a3 Mon Sep 17 00:00:00 2001
From: Wenwei Tao <ww.tao0320@gmail.com>
Date: Sun, 6 Dec 2015 11:25:45 +0100
Subject: lightnvm: refactor spin_unlock in gennvm_get_blk
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The spin_unlock is duplicated multiple times. Jump to a single unlock
to improve the code flow.

Signed-off-by: Wenwei Tao <ww.tao0320@gmail.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index 35dde84..ce60254 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -262,14 +262,11 @@ static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
 	if (list_empty(&lun->free_list)) {
 		pr_err_ratelimited("gennvm: lun %u have no free pages available",
 								lun->vlun.id);
-		spin_unlock(&vlun->lock);
 		goto out;
 	}
 
-	while (!is_gc && lun->vlun.nr_free_blocks < lun->reserved_blocks) {
-		spin_unlock(&vlun->lock);
+	if (!is_gc && lun->vlun.nr_free_blocks < lun->reserved_blocks)
 		goto out;
-	}
 
 	blk = list_first_entry(&lun->free_list, struct nvm_block, list);
 	list_move_tail(&blk->list, &lun->used_list);
@@ -278,8 +275,8 @@ static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
 	lun->vlun.nr_free_blocks--;
 	lun->vlun.nr_inuse_blocks++;
 
-	spin_unlock(&vlun->lock);
 out:
+	spin_unlock(&vlun->lock);
 	return blk;
 }
 
-- 
cgit v0.10.2


From 437f8f9a1e923deab6c61822c36f29f4a2198cfe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
Date: Sun, 6 Dec 2015 11:25:46 +0100
Subject: lightnvm: check mm before use
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The core can may issue I/Os before a media manager is registered with
the lightnvm subsystem. Make sure that we don't call the media manager
->end_io prematurely with a null pointer.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 06c3364..762c9a7 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -455,7 +455,7 @@ static void nvme_nvm_end_io(struct request *rq, int error)
 	struct nvm_rq *rqd = rq->end_io_data;
 	struct nvm_dev *dev = rqd->dev;
 
-	if (dev->mt->end_io(rqd, error))
+	if (dev->mt && dev->mt->end_io(rqd, error))
 		pr_err("nvme: err status: %x result: %lx\n",
 				rq->errors, (unsigned long)rq->special);
 
-- 
cgit v0.10.2


From 57b4bd06ff0372fe1e3617889c4b37fbd500364a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
Date: Sun, 6 Dec 2015 11:25:47 +0100
Subject: lightnvm: comments on constants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is not obvious what NVM_IO_* and NVM_BLK_T_* are used for. Make sure
to comment them appropriately as the other constants.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index c6916ae..935ef38 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -50,9 +50,16 @@ enum {
 	NVM_IO_DUAL_ACCESS	= 0x1,
 	NVM_IO_QUAD_ACCESS	= 0x2,
 
+	/* NAND Access Modes */
 	NVM_IO_SUSPEND		= 0x80,
 	NVM_IO_SLC_MODE		= 0x100,
 	NVM_IO_SCRAMBLE_DISABLE	= 0x200,
+
+	/* Block Types */
+	NVM_BLK_T_FREE		= 0x0,
+	NVM_BLK_T_BAD		= 0x1,
+	NVM_BLK_T_DEV		= 0x2,
+	NVM_BLK_T_HOST		= 0x4,
 };
 
 struct nvm_id_group {
-- 
cgit v0.10.2


From 16f26c3aa9b9c36a9d1092ae3258461d1008481e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
Date: Sun, 6 Dec 2015 11:25:48 +0100
Subject: lightnvm: replace req queue with nvmdev for lld
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the case where a request queue is passed to the low lever lightnvm
device drive integration, the device driver might pass its admin
commands through another queue. Instead pass nvm_dev, and let the
low level drive the appropriate queue.

Reported-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 0c3940e..7981b74 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -444,8 +444,9 @@ static void null_lnvm_end_io(struct request *rq, int error)
 	blk_put_request(rq);
 }
 
-static int null_lnvm_submit_io(struct request_queue *q, struct nvm_rq *rqd)
+static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 {
+	struct request_queue *q = dev->q;
 	struct request *rq;
 	struct bio *bio = rqd->bio;
 
@@ -470,7 +471,7 @@ static int null_lnvm_submit_io(struct request_queue *q, struct nvm_rq *rqd)
 	return 0;
 }
 
-static int null_lnvm_id(struct request_queue *q, struct nvm_id *id)
+static int null_lnvm_id(struct nvm_dev *dev, struct nvm_id *id)
 {
 	sector_t size = gb * 1024 * 1024 * 1024ULL;
 	sector_t blksize;
@@ -523,7 +524,7 @@ static int null_lnvm_id(struct request_queue *q, struct nvm_id *id)
 	return 0;
 }
 
-static void *null_lnvm_create_dma_pool(struct request_queue *q, char *name)
+static void *null_lnvm_create_dma_pool(struct nvm_dev *dev, char *name)
 {
 	mempool_t *virtmem_pool;
 
@@ -541,7 +542,7 @@ static void null_lnvm_destroy_dma_pool(void *pool)
 	mempool_destroy(pool);
 }
 
-static void *null_lnvm_dev_dma_alloc(struct request_queue *q, void *pool,
+static void *null_lnvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
 				gfp_t mem_flags, dma_addr_t *dma_handler)
 {
 	return mempool_alloc(pool, mem_flags);
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 86ce887..4a8d1fe 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -74,7 +74,7 @@ EXPORT_SYMBOL(nvm_unregister_target);
 void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags,
 							dma_addr_t *dma_handler)
 {
-	return dev->ops->dev_dma_alloc(dev->q, dev->ppalist_pool, mem_flags,
+	return dev->ops->dev_dma_alloc(dev, dev->ppalist_pool, mem_flags,
 								dma_handler);
 }
 EXPORT_SYMBOL(nvm_dev_dma_alloc);
@@ -246,7 +246,7 @@ static int nvm_init(struct nvm_dev *dev)
 	if (!dev->q || !dev->ops)
 		return ret;
 
-	if (dev->ops->identity(dev->q, &dev->identity)) {
+	if (dev->ops->identity(dev, &dev->identity)) {
 		pr_err("nvm: device could not be identified\n");
 		goto err;
 	}
@@ -326,8 +326,7 @@ int nvm_register(struct request_queue *q, char *disk_name,
 	}
 
 	if (dev->ops->max_phys_sect > 1) {
-		dev->ppalist_pool = dev->ops->create_dma_pool(dev->q,
-								"ppalist");
+		dev->ppalist_pool = dev->ops->create_dma_pool(dev, "ppalist");
 		if (!dev->ppalist_pool) {
 			pr_err("nvm: could not create ppa pool\n");
 			ret = -ENOMEM;
diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index ce60254..52b513a 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -195,7 +195,7 @@ static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn)
 	}
 
 	if (dev->ops->get_l2p_tbl) {
-		ret = dev->ops->get_l2p_tbl(dev->q, 0, dev->total_pages,
+		ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_pages,
 							gennvm_block_map, dev);
 		if (ret) {
 			pr_err("gennvm: could not read L2P table.\n");
@@ -346,7 +346,7 @@ static int gennvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 	gennvm_generic_to_addr_mode(dev, rqd);
 
 	rqd->dev = dev;
-	return dev->ops->submit_io(dev->q, rqd);
+	return dev->ops->submit_io(dev, rqd);
 }
 
 static void gennvm_blk_set_type(struct nvm_dev *dev, struct ppa_addr *ppa,
@@ -382,7 +382,7 @@ static void gennvm_mark_blk_bad(struct nvm_dev *dev, struct nvm_rq *rqd)
 	if (!dev->ops->set_bb_tbl)
 		return;
 
-	if (dev->ops->set_bb_tbl(dev->q, rqd, 1))
+	if (dev->ops->set_bb_tbl(dev, rqd, 1))
 		return;
 
 	gennvm_addr_to_generic_mode(dev, rqd);
@@ -450,7 +450,7 @@ static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
 
 	gennvm_generic_to_addr_mode(dev, &rqd);
 
-	ret = dev->ops->erase_block(dev->q, &rqd);
+	ret = dev->ops->erase_block(dev, &rqd);
 
 	if (plane_cnt)
 		nvm_dev_dma_free(dev, rqd.ppa_list, rqd.dma_ppa_list);
diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c
index cf1a4a5..134e4fa 100644
--- a/drivers/lightnvm/rrpc.c
+++ b/drivers/lightnvm/rrpc.c
@@ -1016,7 +1016,7 @@ static int rrpc_map_init(struct rrpc *rrpc)
 		return 0;
 
 	/* Bring up the mapping table from device */
-	ret = dev->ops->get_l2p_tbl(dev->q, 0, dev->total_pages,
+	ret = dev->ops->get_l2p_tbl(dev, 0, dev->total_pages,
 							rrpc_l2p_update, rrpc);
 	if (ret) {
 		pr_err("nvm: rrpc: could not read L2P table.\n");
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 762c9a7..15f2acb 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -271,9 +271,9 @@ static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
 	return 0;
 }
 
-static int nvme_nvm_identity(struct request_queue *q, struct nvm_id *nvm_id)
+static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id)
 {
-	struct nvme_ns *ns = q->queuedata;
+	struct nvme_ns *ns = nvmdev->q->queuedata;
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_nvm_id *nvme_nvm_id;
 	struct nvme_nvm_command c = {};
@@ -308,10 +308,10 @@ out:
 	return ret;
 }
 
-static int nvme_nvm_get_l2p_tbl(struct request_queue *q, u64 slba, u32 nlb,
+static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
 				nvm_l2p_update_fn *update_l2p, void *priv)
 {
-	struct nvme_ns *ns = q->queuedata;
+	struct nvme_ns *ns = nvmdev->q->queuedata;
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_nvm_command c = {};
 	u32 len = queue_max_hw_sectors(dev->admin_q) << 9;
@@ -415,10 +415,10 @@ out:
 	return ret;
 }
 
-static int nvme_nvm_set_bb_tbl(struct request_queue *q, struct nvm_rq *rqd,
+static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct nvm_rq *rqd,
 								int type)
 {
-	struct nvme_ns *ns = q->queuedata;
+	struct nvme_ns *ns = nvmdev->q->queuedata;
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_nvm_command c = {};
 	int ret = 0;
@@ -463,8 +463,9 @@ static void nvme_nvm_end_io(struct request *rq, int error)
 	blk_mq_free_request(rq);
 }
 
-static int nvme_nvm_submit_io(struct request_queue *q, struct nvm_rq *rqd)
+static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 {
+	struct request_queue *q = dev->q;
 	struct nvme_ns *ns = q->queuedata;
 	struct request *rq;
 	struct bio *bio = rqd->bio;
@@ -502,8 +503,9 @@ static int nvme_nvm_submit_io(struct request_queue *q, struct nvm_rq *rqd)
 	return 0;
 }
 
-static int nvme_nvm_erase_block(struct request_queue *q, struct nvm_rq *rqd)
+static int nvme_nvm_erase_block(struct nvm_dev *dev, struct nvm_rq *rqd)
 {
+	struct request_queue *q = dev->q;
 	struct nvme_ns *ns = q->queuedata;
 	struct nvme_nvm_command c = {};
 
@@ -515,9 +517,9 @@ static int nvme_nvm_erase_block(struct request_queue *q, struct nvm_rq *rqd)
 	return nvme_submit_sync_cmd(q, (struct nvme_command *)&c, NULL, 0);
 }
 
-static void *nvme_nvm_create_dma_pool(struct request_queue *q, char *name)
+static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name)
 {
-	struct nvme_ns *ns = q->queuedata;
+	struct nvme_ns *ns = nvmdev->q->queuedata;
 	struct nvme_dev *dev = ns->dev;
 
 	return dma_pool_create(name, dev->dev, PAGE_SIZE, PAGE_SIZE, 0);
@@ -530,7 +532,7 @@ static void nvme_nvm_destroy_dma_pool(void *pool)
 	dma_pool_destroy(dma_pool);
 }
 
-static void *nvme_nvm_dev_dma_alloc(struct request_queue *q, void *pool,
+static void *nvme_nvm_dev_dma_alloc(struct nvm_dev *dev, void *pool,
 				    gfp_t mem_flags, dma_addr_t *dma_handler)
 {
 	return dma_pool_alloc(pool, mem_flags, dma_handler);
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 935ef38..034117b 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -183,17 +183,17 @@ struct nvm_block;
 
 typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
 typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *);
-typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *);
-typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32,
+typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *);
+typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32,
 				nvm_l2p_update_fn *, void *);
 typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int,
 				nvm_bb_update_fn *, void *);
-typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int);
-typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *);
-typedef int (nvm_erase_blk_fn)(struct request_queue *, struct nvm_rq *);
-typedef void *(nvm_create_dma_pool_fn)(struct request_queue *, char *);
+typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct nvm_rq *, int);
+typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
+typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *);
+typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
 typedef void (nvm_destroy_dma_pool_fn)(void *);
-typedef void *(nvm_dev_dma_alloc_fn)(struct request_queue *, void *, gfp_t,
+typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
 								dma_addr_t *);
 typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t);
 
-- 
cgit v0.10.2


From 762796bc9e5758df6c72253e4ec19657055cea1f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
Date: Sun, 6 Dec 2015 11:25:49 +0100
Subject: lightnvm: fix media mgr registration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes two issues during media manager registration.

1. The ppa pool can be used at media manager registration. Allocate the
ppa pool before that.

2. If a media manager can't be found, this should not lead to the
device being unallocated. A media manager can be registered later, that
can manage the device. Only warn if a media manager fails
initialization.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 4a8d1fe..8f41b24 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -97,15 +97,47 @@ static struct nvmm_type *nvm_find_mgr_type(const char *name)
 	return NULL;
 }
 
+struct nvmm_type *nvm_init_mgr(struct nvm_dev *dev)
+{
+	struct nvmm_type *mt;
+	int ret;
+
+	lockdep_assert_held(&nvm_lock);
+
+	list_for_each_entry(mt, &nvm_mgrs, list) {
+		ret = mt->register_mgr(dev);
+		if (ret < 0) {
+			pr_err("nvm: media mgr failed to init (%d) on dev %s\n",
+								ret, dev->name);
+			return NULL; /* initialization failed */
+		} else if (ret > 0)
+			return mt;
+	}
+
+	return NULL;
+}
+
 int nvm_register_mgr(struct nvmm_type *mt)
 {
+	struct nvm_dev *dev;
 	int ret = 0;
 
 	down_write(&nvm_lock);
-	if (nvm_find_mgr_type(mt->name))
+	if (nvm_find_mgr_type(mt->name)) {
 		ret = -EEXIST;
-	else
+		goto finish;
+	} else {
 		list_add(&mt->list, &nvm_mgrs);
+	}
+
+	/* try to register media mgr if any device have none configured */
+	list_for_each_entry(dev, &nvm_devices, devices) {
+		if (dev->mt)
+			continue;
+
+		dev->mt = nvm_init_mgr(dev);
+	}
+finish:
 	up_write(&nvm_lock);
 
 	return ret;
@@ -123,26 +155,6 @@ void nvm_unregister_mgr(struct nvmm_type *mt)
 }
 EXPORT_SYMBOL(nvm_unregister_mgr);
 
-/* register with device with a supported manager */
-static int register_mgr(struct nvm_dev *dev)
-{
-	struct nvmm_type *mt;
-	int ret = 0;
-
-	list_for_each_entry(mt, &nvm_mgrs, list) {
-		ret = mt->register_mgr(dev);
-		if (ret > 0) {
-			dev->mt = mt;
-			break; /* successfully initialized */
-		}
-	}
-
-	if (!ret)
-		pr_info("nvm: no compatible nvm manager found.\n");
-
-	return ret;
-}
-
 static struct nvm_dev *nvm_find_nvm_dev(const char *name)
 {
 	struct nvm_dev *dev;
@@ -271,14 +283,6 @@ static int nvm_init(struct nvm_dev *dev)
 		goto err;
 	}
 
-	down_write(&nvm_lock);
-	ret = register_mgr(dev);
-	up_write(&nvm_lock);
-	if (ret < 0)
-		goto err;
-	if (!ret)
-		return 0;
-
 	pr_info("nvm: registered %s [%u/%u/%u/%u/%u/%u]\n",
 			dev->name, dev->sec_per_pg, dev->nr_planes,
 			dev->pgs_per_blk, dev->blks_per_lun, dev->nr_luns,
@@ -334,7 +338,9 @@ int nvm_register(struct request_queue *q, char *disk_name,
 		}
 	}
 
+	/* register device with a supported media manager */
 	down_write(&nvm_lock);
+	dev->mt = nvm_init_mgr(dev);
 	list_add(&dev->devices, &nvm_devices);
 	up_write(&nvm_lock);
 
@@ -379,19 +385,13 @@ static int nvm_create_target(struct nvm_dev *dev,
 	struct nvm_tgt_type *tt;
 	struct nvm_target *t;
 	void *targetdata;
-	int ret = 0;
 
-	down_write(&nvm_lock);
 	if (!dev->mt) {
-		ret = register_mgr(dev);
-		if (!ret)
-			ret = -ENODEV;
-		if (ret < 0) {
-			up_write(&nvm_lock);
-			return ret;
-		}
+		pr_info("nvm: device has no media manager registered.\n");
+		return -ENODEV;
 	}
 
+	down_write(&nvm_lock);
 	tt = nvm_find_target_type(create->tgttype);
 	if (!tt) {
 		pr_err("nvm: target type %s not found\n", create->tgttype);
-- 
cgit v0.10.2


From 008b74438269b368d57a14452afbc194f7f9ff2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
Date: Sun, 6 Dec 2015 11:25:50 +0100
Subject: lightnvm: prevent gennvm module unload on use
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After the gennvm module has been initialized. It might be attached to
one or several devices. In that case, the module is in use. Make sure
that it can not be unloaded.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c
index 52b513a..f434e89 100644
--- a/drivers/lightnvm/gennvm.c
+++ b/drivers/lightnvm/gennvm.c
@@ -219,6 +219,9 @@ static int gennvm_register(struct nvm_dev *dev)
 	struct gen_nvm *gn;
 	int ret;
 
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
 	gn = kzalloc(sizeof(struct gen_nvm), GFP_KERNEL);
 	if (!gn)
 		return -ENOMEM;
@@ -242,12 +245,14 @@ static int gennvm_register(struct nvm_dev *dev)
 	return 1;
 err:
 	gennvm_free(dev);
+	module_put(THIS_MODULE);
 	return ret;
 }
 
 static void gennvm_unregister(struct nvm_dev *dev)
 {
 	gennvm_free(dev);
+	module_put(THIS_MODULE);
 }
 
 static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
-- 
cgit v0.10.2


From 4158624454db3756c1f1b3641f1ab4215e697936 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
Date: Sun, 6 Dec 2015 11:25:51 +0100
Subject: lightnvm: do not compile in debugging by default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The LightNVM module exposes a debug interface when CONFIG_NVM_DEBUG is
set. This interfaces takes a string to configure media managers and
targets. Make sure this interface is only exposed when chosen
deliberately.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig
index a16bf56..85a3390 100644
--- a/drivers/lightnvm/Kconfig
+++ b/drivers/lightnvm/Kconfig
@@ -18,6 +18,7 @@ if NVM
 
 config NVM_DEBUG
 	bool "Open-Channel SSD debugging support"
+	default n
 	---help---
 	Exposes a debug management interface to create/remove targets at:
 
-- 
cgit v0.10.2


From 98fb754831e1d139f1151e968bdebb6eefb9b581 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 1 Dec 2015 13:20:40 +0100
Subject: arm64: update linker script to increased L1_CACHE_BYTES value

Bring the linker script in line with the recent increase of
L1_CACHE_BYTES to 128. Replace the hardcoded value of 64 with the
symbolic constant.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
[catalin.marinas@arm.com: fix up RW_DATA_SECTION as well]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 1ee2c39..71426a7 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -5,6 +5,7 @@
  */
 
 #include <asm-generic/vmlinux.lds.h>
+#include <asm/cache.h>
 #include <asm/kernel-pgtable.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
@@ -140,7 +141,7 @@ SECTIONS
 		ARM_EXIT_KEEP(EXIT_DATA)
 	}
 
-	PERCPU_SECTION(64)
+	PERCPU_SECTION(L1_CACHE_BYTES)
 
 	. = ALIGN(PAGE_SIZE);
 	__init_end = .;
@@ -158,7 +159,7 @@ SECTIONS
 	. = ALIGN(PAGE_SIZE);
 	_data = .;
 	_sdata = .;
-	RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE)
+	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
 	PECOFF_EDATA_PADDING
 	_edata = .;
 
-- 
cgit v0.10.2


From 756b9b37cfb2e3dc76b2e43a8c097402ac736e07 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Mon, 7 Dec 2015 12:52:23 -0800
Subject: SUNRPC: Fix callback channel

The NFSv4.1 callback channel is currently broken because the receive
message will keep shrinking because the backchannel receive buffer size
never gets reset.
The easiest solution to this problem is instead of changing the receive
buffer, to rather adjust the copied request.

Fixes: 38b7631fbe42 ("nfs4: limit callback decoding to received bytes")
Cc: Benjamin Coddington <bcodding@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index beac58b..646cdac 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -78,8 +78,7 @@ static __be32 *read_buf(struct xdr_stream *xdr, int nbytes)
 
 	p = xdr_inline_decode(xdr, nbytes);
 	if (unlikely(p == NULL))
-		printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed "
-							"or truncated request.\n");
+		printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed!\n");
 	return p;
 }
 
@@ -890,7 +889,6 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 	struct cb_compound_hdr_arg hdr_arg = { 0 };
 	struct cb_compound_hdr_res hdr_res = { NULL };
 	struct xdr_stream xdr_in, xdr_out;
-	struct xdr_buf *rq_arg = &rqstp->rq_arg;
 	__be32 *p, status;
 	struct cb_process_state cps = {
 		.drc_status = 0,
@@ -902,8 +900,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
 
 	dprintk("%s: start\n", __func__);
 
-	rq_arg->len = rq_arg->head[0].iov_len + rq_arg->page_len;
-	xdr_init_decode(&xdr_in, rq_arg, rq_arg->head[0].iov_base);
+	xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
 
 	p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
 	xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 95f82d8..229956b 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -353,20 +353,12 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
 {
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct svc_serv *bc_serv = xprt->bc_serv;
-	struct xdr_buf *rq_rcv_buf = &req->rq_rcv_buf;
 
 	spin_lock(&xprt->bc_pa_lock);
 	list_del(&req->rq_bc_pa_list);
 	xprt_dec_alloc_count(xprt, 1);
 	spin_unlock(&xprt->bc_pa_lock);
 
-	if (copied <= rq_rcv_buf->head[0].iov_len) {
-		rq_rcv_buf->head[0].iov_len = copied;
-		rq_rcv_buf->page_len = 0;
-	} else {
-		rq_rcv_buf->page_len = copied - rq_rcv_buf->head[0].iov_len;
-	}
-
 	req->rq_private_buf.len = copied;
 	set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 7fccf96..cc98528 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1363,7 +1363,19 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
 	memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen);
 	memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg));
 	memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res));
+
+	/* Adjust the argument buffer length */
 	rqstp->rq_arg.len = req->rq_private_buf.len;
+	if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
+		rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;
+		rqstp->rq_arg.page_len = 0;
+	} else if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len +
+			rqstp->rq_arg.page_len)
+		rqstp->rq_arg.page_len = rqstp->rq_arg.len -
+			rqstp->rq_arg.head[0].iov_len;
+	else
+		rqstp->rq_arg.len = rqstp->rq_arg.head[0].iov_len +
+			rqstp->rq_arg.page_len;
 
 	/* reset result send buffer "put" position */
 	resv->iov_len = 0;
-- 
cgit v0.10.2


From 785f742223c03ec70f9dcf36b87f59e9df2067e0 Mon Sep 17 00:00:00 2001
From: Ira Weiny <ira.weiny@intel.com>
Date: Mon, 30 Nov 2015 09:34:26 -0500
Subject: IB/qib: Fix qib_mr structure

struct qib_mr requires the mr member be the last because struct
qib_mregion contains a dynamic array at the end.  The additions
of members should have been placed before this structure as the
comment noted.

Failure to do so was causing random memory corruption.  Reproducing
this bug was easy to do by running the client and server of
ib_write_bw -s 8 -n 5 on the same node.

This BUG() was tripped in a slab debug kernel:

kernel BUG at mm/slab.c:2572!

Fixes: 38071a461f0a ("IB/qib: Support the new memory registration API")
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 2baf5ad..bc803f3 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -329,9 +329,9 @@ struct qib_sge {
 struct qib_mr {
 	struct ib_mr ibmr;
 	struct ib_umem *umem;
-	struct qib_mregion mr;  /* must be last */
 	u64 *pages;
 	u32 npages;
+	struct qib_mregion mr;  /* must be last */
 };
 
 /*
-- 
cgit v0.10.2


From 1d784b890c0101db2556b981e864c2282a3c1b02 Mon Sep 17 00:00:00 2001
From: Mike Marciniszyn <mike.marciniszyn@intel.com>
Date: Tue, 1 Dec 2015 10:13:51 -0500
Subject: IB/core: Fix user mode post wr corruption

Commit e622f2f4ad21 ("IB: split struct ib_send_wr")
introduced a regression for HCAs whose user mode post
sends go through ib_uverbs_post_send().

The code didn't account for the fact that the first sge is
offset by an operation dependent length.  The allocation did,
but the pointer to the destination sge list is computed without
that knowledge.  The sge list copy_from_user() then corrupts
fields in the work request

Store the operation dependent length in a local variable and
compute the sge list copy_from_user() destination using that length.

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 94816ae..4cb8e9d 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -2446,6 +2446,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 	int                             i, sg_ind;
 	int				is_ud;
 	ssize_t                         ret = -EINVAL;
+	size_t                          next_size;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
@@ -2490,7 +2491,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 				goto out_put;
 			}
 
-			ud = alloc_wr(sizeof(*ud), user_wr->num_sge);
+			next_size = sizeof(*ud);
+			ud = alloc_wr(next_size, user_wr->num_sge);
 			if (!ud) {
 				ret = -ENOMEM;
 				goto out_put;
@@ -2511,7 +2513,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 			   user_wr->opcode == IB_WR_RDMA_READ) {
 			struct ib_rdma_wr *rdma;
 
-			rdma = alloc_wr(sizeof(*rdma), user_wr->num_sge);
+			next_size = sizeof(*rdma);
+			rdma = alloc_wr(next_size, user_wr->num_sge);
 			if (!rdma) {
 				ret = -ENOMEM;
 				goto out_put;
@@ -2525,7 +2528,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 			   user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
 			struct ib_atomic_wr *atomic;
 
-			atomic = alloc_wr(sizeof(*atomic), user_wr->num_sge);
+			next_size = sizeof(*atomic);
+			atomic = alloc_wr(next_size, user_wr->num_sge);
 			if (!atomic) {
 				ret = -ENOMEM;
 				goto out_put;
@@ -2540,7 +2544,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 		} else if (user_wr->opcode == IB_WR_SEND ||
 			   user_wr->opcode == IB_WR_SEND_WITH_IMM ||
 			   user_wr->opcode == IB_WR_SEND_WITH_INV) {
-			next = alloc_wr(sizeof(*next), user_wr->num_sge);
+			next_size = sizeof(*next);
+			next = alloc_wr(next_size, user_wr->num_sge);
 			if (!next) {
 				ret = -ENOMEM;
 				goto out_put;
@@ -2572,7 +2577,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 
 		if (next->num_sge) {
 			next->sg_list = (void *) next +
-				ALIGN(sizeof *next, sizeof (struct ib_sge));
+				ALIGN(next_size, sizeof(struct ib_sge));
 			if (copy_from_user(next->sg_list,
 					   buf + sizeof cmd +
 					   cmd.wr_count * cmd.wqe_size +
-- 
cgit v0.10.2


From 57ab2512138205fe7836332fb4742441e53907ff Mon Sep 17 00:00:00 2001
From: Easwar Hariharan <easwar.hariharan@intel.com>
Date: Mon, 2 Nov 2015 12:13:14 -0500
Subject: IB/qib: Minor fixes to qib per SFF 8636

Minor errors found via code inspection during future development.
SFF 8636 defines bit position 2 to hold the status indication of
QSFP memory paging. The mask used to test for the value was
incorrect and is fixed in this patch. Additionally, the dump
function had a mismatch between the field being printed out and
the field used to source the data which was fixed.

Reviewed-by: Mitko Haralanov <mitko.haralanov@intel.com>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reported-by: Easwar Hariharan <easwar.hariharan@intel.com>
Signed-off-by: Easwar Hariharan <easwar.hariharan@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/qib/qib_qsfp.c b/drivers/infiniband/hw/qib/qib_qsfp.c
index 5e27f76..4c7c3c8 100644
--- a/drivers/infiniband/hw/qib/qib_qsfp.c
+++ b/drivers/infiniband/hw/qib/qib_qsfp.c
@@ -292,7 +292,7 @@ int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp)
 		qib_dev_porterr(ppd->dd, ppd->port,
 				"QSFP byte0 is 0x%02X, S/B 0x0C/D\n", peek[0]);
 
-	if ((peek[2] & 2) == 0) {
+	if ((peek[2] & 4) == 0) {
 		/*
 		 * If cable is paged, rather than "flat memory", we need to
 		 * set the page to zero, Even if it already appears to be zero.
@@ -538,7 +538,7 @@ int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len)
 	sofar += scnprintf(buf + sofar, len - sofar, "Date:%.*s\n",
 			   QSFP_DATE_LEN, cd.date);
 	sofar += scnprintf(buf + sofar, len - sofar, "Lot:%.*s\n",
-			   QSFP_LOT_LEN, cd.date);
+			   QSFP_LOT_LEN, cd.lot);
 
 	while (bidx < QSFP_DEFAULT_HDR_CNT) {
 		int iidx;
-- 
cgit v0.10.2


From d144da8c6f51f48ec39d891ea9dff80169c45f3b Mon Sep 17 00:00:00 2001
From: Mike Marciniszyn <mike.marciniszyn@intel.com>
Date: Mon, 2 Nov 2015 12:13:25 -0500
Subject: IB/core: use RCU for uverbs id lookup

The current implementation gets a spin_lock, and at any scale with
qib and hfi1 post send, the lock contention grows exponentially
with the number of QPs.

idr_find() is RCU compatibile, so read doesn't need the lock.

Change to use rcu_read_lock() and rcu_read_unlock() in
__idr_get_uobj().

kfree_rcu() is used to insure a grace period between the
idr removal and actual free.

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-By: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 4cb8e9d..1c02dea 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -62,9 +62,11 @@ static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };
  * The ib_uobject locking scheme is as follows:
  *
  * - ib_uverbs_idr_lock protects the uverbs idrs themselves, so it
- *   needs to be held during all idr operations.  When an object is
+ *   needs to be held during all idr write operations.  When an object is
  *   looked up, a reference must be taken on the object's kref before
- *   dropping this lock.
+ *   dropping this lock.  For read operations, the rcu_read_lock()
+ *   and rcu_write_lock() but similarly the kref reference is grabbed
+ *   before the rcu_read_unlock().
  *
  * - Each object also has an rwsem.  This rwsem must be held for
  *   reading while an operation that uses the object is performed.
@@ -96,7 +98,7 @@ static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
 
 static void release_uobj(struct kref *kref)
 {
-	kfree(container_of(kref, struct ib_uobject, ref));
+	kfree_rcu(container_of(kref, struct ib_uobject, ref), rcu);
 }
 
 static void put_uobj(struct ib_uobject *uobj)
@@ -145,7 +147,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
 {
 	struct ib_uobject *uobj;
 
-	spin_lock(&ib_uverbs_idr_lock);
+	rcu_read_lock();
 	uobj = idr_find(idr, id);
 	if (uobj) {
 		if (uobj->context == context)
@@ -153,7 +155,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
 		else
 			uobj = NULL;
 	}
-	spin_unlock(&ib_uverbs_idr_lock);
+	rcu_read_unlock();
 
 	return uobj;
 }
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 9a68a19..120da1d 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1271,6 +1271,7 @@ struct ib_uobject {
 	int			id;		/* index into kernel idr */
 	struct kref		ref;
 	struct rw_semaphore	mutex;		/* protects .live */
+	struct rcu_head		rcu;		/* kfree_rcu() overhead */
 	int			live;
 };
 
-- 
cgit v0.10.2


From 2c63d1072ad7cf1059333ef5cfa06075bead4a39 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 20 Nov 2015 17:41:36 +0100
Subject: IB/iser: use sector_div instead of do_div

do_div is the wrong way to divide a sector_t, as it is less
efficient when sector_t is 32-bit wide. With the upcoming
do_div optimizations, the kernel starts warning about this:

drivers/infiniband/ulp/iser/iser_verbs.c:1296:4: note: in expansion of macro 'do_div'
include/asm-generic/div64.h:224:22: warning: passing argument 1 of '__div64_32' from incompatible pointer type

This changes the code to use sector_div instead, which always
produces optimal code.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index a930702..42f4da6 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -1293,7 +1293,7 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
 		if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
 			sector_t sector_off = mr_status.sig_err.sig_err_offset;
 
-			do_div(sector_off, sector_size + 8);
+			sector_div(sector_off, sector_size + 8);
 			*sector = scsi_get_lba(iser_task->sc) + sector_off;
 
 			pr_err("PI error found type %d at sector %llx "
-- 
cgit v0.10.2


From 3ebd2fd0d0119a5ac7906bf17be637b527f63d31 Mon Sep 17 00:00:00 2001
From: Kaike Wan <kaike.wan@intel.com>
Date: Fri, 30 Oct 2015 08:23:45 -0400
Subject: IB/sa: Put netlink request into the request list before sending

It was found by Saurabh Sengar that the netlink code tried to allocate
memory with GFP_KERNEL while holding a spinlock. While it is possible
to fix the issue by replacing GFP_KERNEL with GFP_ATOMIC, it is better
to get rid of the spinlock while sending the packet. However, in order
to protect against a race condition that a quick response may be received
before the request is put on the request list, we need to put the request
on the list first.

Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Reviewed-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reported-by: Saurabh Sengar <saurabh.truth@gmail.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 2aba774..a95a32b 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -512,7 +512,7 @@ static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask)
 	return len;
 }
 
-static int ib_nl_send_msg(struct ib_sa_query *query)
+static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
 {
 	struct sk_buff *skb = NULL;
 	struct nlmsghdr *nlh;
@@ -526,7 +526,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query)
 	if (len <= 0)
 		return -EMSGSIZE;
 
-	skb = nlmsg_new(len, GFP_KERNEL);
+	skb = nlmsg_new(len, gfp_mask);
 	if (!skb)
 		return -ENOMEM;
 
@@ -544,7 +544,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query)
 	/* Repair the nlmsg header length */
 	nlmsg_end(skb, nlh);
 
-	ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL);
+	ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, gfp_mask);
 	if (!ret)
 		ret = len;
 	else
@@ -553,7 +553,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query)
 	return ret;
 }
 
-static int ib_nl_make_request(struct ib_sa_query *query)
+static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
 {
 	unsigned long flags;
 	unsigned long delay;
@@ -562,25 +562,27 @@ static int ib_nl_make_request(struct ib_sa_query *query)
 	INIT_LIST_HEAD(&query->list);
 	query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq);
 
+	/* Put the request on the list first.*/
 	spin_lock_irqsave(&ib_nl_request_lock, flags);
-	ret = ib_nl_send_msg(query);
-	if (ret <= 0) {
-		ret = -EIO;
-		goto request_out;
-	} else {
-		ret = 0;
-	}
-
 	delay = msecs_to_jiffies(sa_local_svc_timeout_ms);
 	query->timeout = delay + jiffies;
 	list_add_tail(&query->list, &ib_nl_request_list);
 	/* Start the timeout if this is the only request */
 	if (ib_nl_request_list.next == &query->list)
 		queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay);
-
-request_out:
 	spin_unlock_irqrestore(&ib_nl_request_lock, flags);
 
+	ret = ib_nl_send_msg(query, gfp_mask);
+	if (ret <= 0) {
+		ret = -EIO;
+		/* Remove the request */
+		spin_lock_irqsave(&ib_nl_request_lock, flags);
+		list_del(&query->list);
+		spin_unlock_irqrestore(&ib_nl_request_lock, flags);
+	} else {
+		ret = 0;
+	}
+
 	return ret;
 }
 
@@ -1108,7 +1110,7 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
 
 	if (query->flags & IB_SA_ENABLE_LOCAL_SERVICE) {
 		if (!ibnl_chk_listeners(RDMA_NL_GROUP_LS)) {
-			if (!ib_nl_make_request(query))
+			if (!ib_nl_make_request(query, gfp_mask))
 				return id;
 		}
 		ib_sa_disable_local_svc(query);
-- 
cgit v0.10.2


From 4d59ad2995e4128c06d889f9e223099b1f19548e Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 1 Dec 2015 10:17:32 -0800
Subject: IB/srp: Fix a memory leak

If srp_connect_ch() returns a positive value then that is considered
by its caller as a connection failure but this does not result in a
scsi_host_put() call and additionally causes the srp_create_target()
function to return a positive value while it should return a negative
value. Avoid all this confusion and additionally fix a memory leak by
ensuring that srp_connect_ch() always returns a value that is <= 0.
This patch avoids that a rejected login triggers the following memory
leak:

unreferenced object 0xffff88021b24a220 (size 8):
  comm "srp_daemon", pid 56421, jiffies 4295006762 (age 4240.750s)
  hex dump (first 8 bytes):
    68 6f 73 74 35 38 00 a5                          host58..
  backtrace:
    [<ffffffff8151014a>] kmemleak_alloc+0x7a/0xc0
    [<ffffffff81165c1e>] __kmalloc_track_caller+0xfe/0x160
    [<ffffffff81260d2b>] kvasprintf+0x5b/0x90
    [<ffffffff81260e2d>] kvasprintf_const+0x8d/0xb0
    [<ffffffff81254b0c>] kobject_set_name_vargs+0x3c/0xa0
    [<ffffffff81337e3c>] dev_set_name+0x3c/0x40
    [<ffffffff81355757>] scsi_host_alloc+0x327/0x4b0
    [<ffffffffa03edc8e>] srp_create_target+0x4e/0x8a0 [ib_srp]
    [<ffffffff8133778b>] dev_attr_store+0x1b/0x20
    [<ffffffff811f27fa>] sysfs_kf_write+0x4a/0x60
    [<ffffffff811f1e8e>] kernfs_fop_write+0x14e/0x180
    [<ffffffff81176eef>] __vfs_write+0x2f/0xf0
    [<ffffffff811771e4>] vfs_write+0xa4/0x100
    [<ffffffff81177c64>] SyS_write+0x54/0xc0
    [<ffffffff8151b257>] entry_SYSCALL_64_fastpath+0x12/0x6f

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Cc: Sebastian Parschauer <sebastian.riemer@profitbricks.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 9909022..a074dad 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -994,16 +994,16 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
 
 	ret = srp_lookup_path(ch);
 	if (ret)
-		return ret;
+		goto out;
 
 	while (1) {
 		init_completion(&ch->done);
 		ret = srp_send_req(ch, multich);
 		if (ret)
-			return ret;
+			goto out;
 		ret = wait_for_completion_interruptible(&ch->done);
 		if (ret < 0)
-			return ret;
+			goto out;
 
 		/*
 		 * The CM event handling code will set status to
@@ -1011,15 +1011,16 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
 		 * back, or SRP_DLID_REDIRECT if we get a lid/qp
 		 * redirect REJ back.
 		 */
-		switch (ch->status) {
+		ret = ch->status;
+		switch (ret) {
 		case 0:
 			ch->connected = true;
-			return 0;
+			goto out;
 
 		case SRP_PORT_REDIRECT:
 			ret = srp_lookup_path(ch);
 			if (ret)
-				return ret;
+				goto out;
 			break;
 
 		case SRP_DLID_REDIRECT:
@@ -1028,13 +1029,16 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
 		case SRP_STALE_CONN:
 			shost_printk(KERN_ERR, target->scsi_host, PFX
 				     "giving up on stale connection\n");
-			ch->status = -ECONNRESET;
-			return ch->status;
+			ret = -ECONNRESET;
+			goto out;
 
 		default:
-			return ch->status;
+			goto out;
 		}
 	}
+
+out:
+	return ret <= 0 ? ret : -ENODEV;
 }
 
 static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
-- 
cgit v0.10.2


From 09c0c0bea500a9ad362589990ee316c9b2482f44 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Tue, 1 Dec 2015 10:18:03 -0800
Subject: IB/srp: Fix possible send queue overflow

When using work request based memory registration (fast_reg)
we must reserve SQ entries for registration and invalidation
in addition to send operations. Each IO consumes 3 SQ entries
(registration, send, invalidation) so we need to allocate 3x
larger send-queue instead of 2x.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
CC: Stable <stable@vger.kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index a074dad..c7a95d2 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -488,7 +488,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
 	struct ib_qp *qp;
 	struct ib_fmr_pool *fmr_pool = NULL;
 	struct srp_fr_pool *fr_pool = NULL;
-	const int m = 1 + dev->use_fast_reg;
+	const int m = dev->use_fast_reg ? 3 : 1;
 	struct ib_cq_init_attr cq_attr = {};
 	int ret;
 
-- 
cgit v0.10.2


From fc925518aa04ee9f03c4e0cadd9c59f1ef48bcfa Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Dec 2015 10:18:30 -0800
Subject: IB/srp: Initialize dma_length in srp_map_idb

Without this sg_dma_len will return 0 on architectures tha have
the dma_length field.

Fixes: commit f7f7aab1a5c0 ("IB/srp: Convert to new registration API")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index c7a95d2..72fac20 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1524,6 +1524,9 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
 		state.sg_nents = 1;
 		sg_set_buf(idb_sg, req->indirect_desc, idb_len);
 		idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+		idb_sg->dma_length = idb_sg->length;	      /* hack^2 */
+#endif
 		ret = srp_map_finish_fr(&state, ch);
 		if (ret < 0)
 			return ret;
-- 
cgit v0.10.2


From a745f4f410082fedc17be75e10e4098f300943db Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 1 Dec 2015 10:18:47 -0800
Subject: IB/srp: Fix indirect data buffer rkey endianness

Detected by sparse.

Fixes: commit 330179f2fa93 ("IB/srp: Register the indirect data buffer descriptor")
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: stable <stable@vger.kernel.org> # v4.3+
Cc: Sagi Grimberg <sagig@mellanox.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Sebastian Parschauer <sebastian.riemer@profitbricks.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 72fac20..fac1423 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1662,7 +1662,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
 			return ret;
 		req->nmdesc++;
 	} else {
-		idb_rkey = target->global_mr->rkey;
+		idb_rkey = cpu_to_be32(target->global_mr->rkey);
 	}
 
 	indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
-- 
cgit v0.10.2


From 57b0be9c0fb0ba3a35683c6ce21db7162d6758c5 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Tue, 1 Dec 2015 10:19:38 -0800
Subject: IB/srp: Fix srp_map_sg_fr()

After dma_map_sg() has been called the return value of that function
must be used as the number of elements in the scatterlist instead of
scsi_sg_count().

Fixes: commit f7f7aab1a5c0 ("IB/srp: Convert to new registration API")
Reported-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: stable <stable@vger.kernel.org> # v4.4+
Cc: Sagi Grimberg <sagig@mellanox.com>
Cc: Sebastian Parschauer <sebastian.riemer@profitbricks.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index fac1423..3db9a65 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1313,7 +1313,7 @@ reset_state:
 }
 
 static int srp_map_finish_fr(struct srp_map_state *state,
-			     struct srp_rdma_ch *ch)
+			     struct srp_rdma_ch *ch, int sg_nents)
 {
 	struct srp_target_port *target = ch->target;
 	struct srp_device *dev = target->srp_host->srp_dev;
@@ -1328,10 +1328,10 @@ static int srp_map_finish_fr(struct srp_map_state *state,
 
 	WARN_ON_ONCE(!dev->use_fast_reg);
 
-	if (state->sg_nents == 0)
+	if (sg_nents == 0)
 		return 0;
 
-	if (state->sg_nents == 1 && target->global_mr) {
+	if (sg_nents == 1 && target->global_mr) {
 		srp_map_desc(state, sg_dma_address(state->sg),
 			     sg_dma_len(state->sg),
 			     target->global_mr->rkey);
@@ -1345,8 +1345,7 @@ static int srp_map_finish_fr(struct srp_map_state *state,
 	rkey = ib_inc_rkey(desc->mr->rkey);
 	ib_update_fast_reg_key(desc->mr, rkey);
 
-	n = ib_map_mr_sg(desc->mr, state->sg, state->sg_nents,
-			 dev->mr_page_size);
+	n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, dev->mr_page_size);
 	if (unlikely(n < 0))
 		return n;
 
@@ -1452,16 +1451,15 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
 	state->fr.next = req->fr_list;
 	state->fr.end = req->fr_list + ch->target->cmd_sg_cnt;
 	state->sg = scat;
-	state->sg_nents = scsi_sg_count(req->scmnd);
 
-	while (state->sg_nents) {
+	while (count) {
 		int i, n;
 
-		n = srp_map_finish_fr(state, ch);
+		n = srp_map_finish_fr(state, ch, count);
 		if (unlikely(n < 0))
 			return n;
 
-		state->sg_nents -= n;
+		count -= n;
 		for (i = 0; i < n; i++)
 			state->sg = sg_next(state->sg);
 	}
@@ -1521,13 +1519,12 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
 
 	if (dev->use_fast_reg) {
 		state.sg = idb_sg;
-		state.sg_nents = 1;
 		sg_set_buf(idb_sg, req->indirect_desc, idb_len);
 		idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
 #ifdef CONFIG_NEED_SG_DMA_LENGTH
 		idb_sg->dma_length = idb_sg->length;	      /* hack^2 */
 #endif
-		ret = srp_map_finish_fr(&state, ch);
+		ret = srp_map_finish_fr(&state, ch, 1);
 		if (ret < 0)
 			return ret;
 	} else if (dev->use_fmr) {
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 87a2a91..f6af531 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -300,10 +300,7 @@ struct srp_map_state {
 	dma_addr_t		base_dma_addr;
 	u32			dma_len;
 	u32			total_len;
-	union {
-		unsigned int	npages;
-		int		sg_nents;
-	};
+	unsigned int		npages;
 	unsigned int		nmdesc;
 	unsigned int		ndesc;
 };
-- 
cgit v0.10.2


From 8f5ba10ed40a9d3ffe84854984227d011a7428bd Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Thu, 3 Dec 2015 16:04:17 -0800
Subject: IB core: Fix ib_sg_to_pages()

On 12/03/2015 01:18 AM, Christoph Hellwig wrote:
> The patch looks good to me, but while we touch this area, how about
> throwing in a few cosmetic fixes as well?

How about the patch below ? In that version of the ib_sg_to_pages() fix
these concerns have been addressed and additionally to more bugs have been fixed.

------------

[PATCH] IB core: Fix ib_sg_to_pages()

Fix the code for detecting gaps. A gap occurs not only if the
second or later scatterlist element is not aligned but also if
any scatterlist element other than the last does not end at a
page boundary.

In the code for coalescing contiguous elements, ensure that
mr->length is correct and that last_page_addr is up-to-date.

Ensure that this function returns a negative
error code instead of zero if the first set_page() call fails.

Fixes: commit 4c67e2bfc8b7 ("IB/core: Introduce new fast registration API")
Reported-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>

Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 043a60e..545906d 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1516,7 +1516,7 @@ EXPORT_SYMBOL(ib_map_mr_sg);
  * @sg_nents:      number of entries in sg
  * @set_page:      driver page assignment function pointer
  *
- * Core service helper for drivers to covert the largest
+ * Core service helper for drivers to convert the largest
  * prefix of given sg list to a page vector. The sg list
  * prefix converted is the prefix that meet the requirements
  * of ib_map_mr_sg.
@@ -1533,7 +1533,7 @@ int ib_sg_to_pages(struct ib_mr *mr,
 	u64 last_end_dma_addr = 0, last_page_addr = 0;
 	unsigned int last_page_off = 0;
 	u64 page_mask = ~((u64)mr->page_size - 1);
-	int i;
+	int i, ret;
 
 	mr->iova = sg_dma_address(&sgl[0]);
 	mr->length = 0;
@@ -1544,27 +1544,29 @@ int ib_sg_to_pages(struct ib_mr *mr,
 		u64 end_dma_addr = dma_addr + dma_len;
 		u64 page_addr = dma_addr & page_mask;
 
-		if (i && page_addr != dma_addr) {
-			if (last_end_dma_addr != dma_addr) {
-				/* gap */
-				goto done;
-
-			} else if (last_page_off + dma_len <= mr->page_size) {
-				/* chunk this fragment with the last */
-				mr->length += dma_len;
-				last_end_dma_addr += dma_len;
-				last_page_off += dma_len;
-				continue;
-			} else {
-				/* map starting from the next page */
-				page_addr = last_page_addr + mr->page_size;
-				dma_len -= mr->page_size - last_page_off;
-			}
+		/*
+		 * For the second and later elements, check whether either the
+		 * end of element i-1 or the start of element i is not aligned
+		 * on a page boundary.
+		 */
+		if (i && (last_page_off != 0 || page_addr != dma_addr)) {
+			/* Stop mapping if there is a gap. */
+			if (last_end_dma_addr != dma_addr)
+				break;
+
+			/*
+			 * Coalesce this element with the last. If it is small
+			 * enough just update mr->length. Otherwise start
+			 * mapping from the next page.
+			 */
+			goto next_page;
 		}
 
 		do {
-			if (unlikely(set_page(mr, page_addr)))
-				goto done;
+			ret = set_page(mr, page_addr);
+			if (unlikely(ret < 0))
+				return i ? : ret;
+next_page:
 			page_addr += mr->page_size;
 		} while (page_addr < end_dma_addr);
 
@@ -1574,7 +1576,6 @@ int ib_sg_to_pages(struct ib_mr *mr,
 		last_page_off = end_dma_addr & ~page_mask;
 	}
 
-done:
 	return i;
 }
 EXPORT_SYMBOL(ib_sg_to_pages);
-- 
cgit v0.10.2


From 25642e1459ace29f6ce5a171efc8b7b59a52a2d4 Mon Sep 17 00:00:00 2001
From: Alistair Popple <alistair@popple.id.au>
Date: Mon, 7 Dec 2015 11:28:28 +1100
Subject: powerpc/opal-irqchip: Fix double endian conversion

The OPAL event calls return a mask of events that are active in big
endian format. This is checked when unmasking the events in the
irqchip by comparison with a cached value. The cached value was stored
in big endian format but should've been converted to CPU endian
first.

This bug leads to OPAL event delivery being delayed or dropped on some
systems. Symptoms may include a non-functional console.

The bug is fixed by calling opal_handle_events(...) instead of
duplicating code in opal_event_unmask(...).

Fixes: 9f0fd0499d30 ("powerpc/powernv: Add a virtual irqchip for opal events")
Cc: stable@vger.kernel.org # v4.2+
Reported-by: Douglas L Lehr <dllehr@us.ibm.com>
Signed-off-by: Alistair Popple <alistair@popple.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index 6ccfb6c..0a00e2a 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -43,11 +43,34 @@ static unsigned int opal_irq_count;
 static unsigned int *opal_irqs;
 
 static void opal_handle_irq_work(struct irq_work *work);
-static __be64 last_outstanding_events;
+static u64 last_outstanding_events;
 static struct irq_work opal_event_irq_work = {
 	.func = opal_handle_irq_work,
 };
 
+void opal_handle_events(uint64_t events)
+{
+	int virq, hwirq = 0;
+	u64 mask = opal_event_irqchip.mask;
+
+	if (!in_irq() && (events & mask)) {
+		last_outstanding_events = events;
+		irq_work_queue(&opal_event_irq_work);
+		return;
+	}
+
+	while (events & mask) {
+		hwirq = fls64(events) - 1;
+		if (BIT_ULL(hwirq) & mask) {
+			virq = irq_find_mapping(opal_event_irqchip.domain,
+						hwirq);
+			if (virq)
+				generic_handle_irq(virq);
+		}
+		events &= ~BIT_ULL(hwirq);
+	}
+}
+
 static void opal_event_mask(struct irq_data *d)
 {
 	clear_bit(d->hwirq, &opal_event_irqchip.mask);
@@ -55,12 +78,12 @@ static void opal_event_mask(struct irq_data *d)
 
 static void opal_event_unmask(struct irq_data *d)
 {
+	__be64 events;
+
 	set_bit(d->hwirq, &opal_event_irqchip.mask);
 
-	opal_poll_events(&last_outstanding_events);
-	if (last_outstanding_events & opal_event_irqchip.mask)
-		/* Need to retrigger the interrupt */
-		irq_work_queue(&opal_event_irq_work);
+	opal_poll_events(&events);
+	opal_handle_events(be64_to_cpu(events));
 }
 
 static int opal_event_set_type(struct irq_data *d, unsigned int flow_type)
@@ -96,29 +119,6 @@ static int opal_event_map(struct irq_domain *d, unsigned int irq,
 	return 0;
 }
 
-void opal_handle_events(uint64_t events)
-{
-	int virq, hwirq = 0;
-	u64 mask = opal_event_irqchip.mask;
-
-	if (!in_irq() && (events & mask)) {
-		last_outstanding_events = events;
-		irq_work_queue(&opal_event_irq_work);
-		return;
-	}
-
-	while (events & mask) {
-		hwirq = fls64(events) - 1;
-		if (BIT_ULL(hwirq) & mask) {
-			virq = irq_find_mapping(opal_event_irqchip.domain,
-						hwirq);
-			if (virq)
-				generic_handle_irq(virq);
-		}
-		events &= ~BIT_ULL(hwirq);
-	}
-}
-
 static irqreturn_t opal_interrupt(int irq, void *data)
 {
 	__be64 events;
@@ -131,7 +131,7 @@ static irqreturn_t opal_interrupt(int irq, void *data)
 
 static void opal_handle_irq_work(struct irq_work *work)
 {
-	opal_handle_events(be64_to_cpu(last_outstanding_events));
+	opal_handle_events(last_outstanding_events);
 }
 
 static int opal_event_match(struct irq_domain *h, struct device_node *node,
-- 
cgit v0.10.2


From e606e035cc7293a3824527d97359711fdda00663 Mon Sep 17 00:00:00 2001
From: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Date: Mon, 7 Dec 2015 14:34:40 +0100
Subject: cxl: Set endianess of kernel contexts

A process element (defined in CAIA) keeps track of the endianess of
contexts through the Little Endian (LE) bit of the State Register. It
is currently set for user contexts, but was somehow forgotten for
kernel contexts, so this patch fixes it.
It could lead to erratic behavior from an AFU when the context is
attached through the kernel API.

Fixes: 2f663527bd6a ("cxl: Configure PSL for kernel contexts and merge code")
Cc: stable@vger.kernel.org # 4.2+
Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Suggested-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index d2e75c8..f409097 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -497,6 +497,7 @@ static u64 calculate_sr(struct cxl_context *ctx)
 {
 	u64 sr = 0;
 
+	set_endian(sr);
 	if (ctx->master)
 		sr |= CXL_PSL_SR_An_MP;
 	if (mfspr(SPRN_LPCR) & LPCR_TC)
@@ -506,7 +507,6 @@ static u64 calculate_sr(struct cxl_context *ctx)
 		sr |= CXL_PSL_SR_An_HV;
 	} else {
 		sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R;
-		set_endian(sr);
 		sr &= ~(CXL_PSL_SR_An_HV);
 		if (!test_tsk_thread_flag(current, TIF_32BIT))
 			sr |= CXL_PSL_SR_An_SF;
-- 
cgit v0.10.2


From 23adc192b862b69ad80a40bd5206e337f41264ac Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Tue, 8 Dec 2015 12:27:18 +0800
Subject: ALSA: hda - Fixing speaker noise on the two latest thinkpad models

We have two latest thinkpad laptop models which are all based on the
Intel skylake platforms, and all of them have the codec alc293 on
them. When the machines boot to the desktop, an greeting dialogue
shows up with the notification sound. But on these two models, there
is noise with the notification sound. We have 3 SKUs for each of
the models, all of them have this problem.

So far, this problem is only specific to these two thinkpad models,
we did not find this problem on the old thinkpad models with the
codec alc293 or alc292.

A workaround for this problem is disabling the aamix.

Cc: stable@vger.kernel.org
BugLink: https://bugs.launchpad.net/bugs/1523517
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index ebc5362..49b1d36 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4596,6 +4596,7 @@ enum {
 	ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
 	ALC275_FIXUP_DELL_XPS,
 	ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE,
+	ALC293_FIXUP_LENOVO_SPK_NOISE,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5187,6 +5188,12 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
 	},
+	[ALC293_FIXUP_LENOVO_SPK_NOISE] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc_fixup_disable_aamix,
+		.chained = true,
+		.chain_id = ALC269_FIXUP_THINKPAD_ACPI
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -5334,6 +5341,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x2215, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x17aa, 0x2223, "ThinkPad T550", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x2226, "ThinkPad X250", ALC292_FIXUP_TPT440_DOCK),
+	SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE),
 	SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
 	SND_PCI_QUIRK(0x17aa, 0x3978, "IdeaPad Y410P", ALC269_FIXUP_NO_SHUTUP),
 	SND_PCI_QUIRK(0x17aa, 0x5013, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
@@ -5343,6 +5351,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x5034, "Thinkpad T450", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x5036, "Thinkpad T450s", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x503c, "Thinkpad L450", ALC292_FIXUP_TPT440_DOCK),
+	SND_PCI_QUIRK(0x17aa, 0x504b, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE),
 	SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K),
 	SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
-- 
cgit v0.10.2


From 8fbf9d92a7bc4cadd3a0139698cf17031dfcdfca Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Thu, 26 Nov 2015 19:45:16 +0100
Subject: drm/vmwgfx: Implement the cursor_set2 callback v2

Fixes native drm clients like Fedora 23 Wayland which now appears to
be able to use cursor hotspots without strange cursor offsets.
Also fixes a couple of ignored error paths.

Since the core drm cursor hotspot is incompatible with the legacy vmwgfx
hotspot (the core drm hotspot is reset when the drm_mode_cursor ioctl
is used), we need to keep track of both and add them when the device
hotspot is set. We assume that either is always zero.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index a09cf85..c49812b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1233,6 +1233,7 @@ static void vmw_master_drop(struct drm_device *dev,
 
 	vmw_fp->locked_master = drm_master_get(file_priv->master);
 	ret = ttm_vt_lock(&vmaster->lock, false, vmw_fp->tfile);
+	vmw_kms_legacy_hotspot_clear(dev_priv);
 	if (unlikely((ret != 0))) {
 		DRM_ERROR("Unable to lock TTM at VT switch.\n");
 		drm_master_put(&vmw_fp->locked_master);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index a8ae9df..469cdd5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -925,6 +925,7 @@ int vmw_kms_present(struct vmw_private *dev_priv,
 		    uint32_t num_clips);
 int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
+void vmw_kms_legacy_hotspot_clear(struct vmw_private *dev_priv);
 
 int vmw_dumb_create(struct drm_file *file_priv,
 		    struct drm_device *dev,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 9fcd7f8..9b4bb9e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -133,13 +133,19 @@ void vmw_cursor_update_position(struct vmw_private *dev_priv,
 	vmw_mmio_write(++count, fifo_mem + SVGA_FIFO_CURSOR_COUNT);
 }
 
-int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
-			   uint32_t handle, uint32_t width, uint32_t height)
+
+/*
+ * vmw_du_crtc_cursor_set2 - Driver cursor_set2 callback.
+ */
+int vmw_du_crtc_cursor_set2(struct drm_crtc *crtc, struct drm_file *file_priv,
+			    uint32_t handle, uint32_t width, uint32_t height,
+			    int32_t hot_x, int32_t hot_y)
 {
 	struct vmw_private *dev_priv = vmw_priv(crtc->dev);
 	struct vmw_display_unit *du = vmw_crtc_to_du(crtc);
 	struct vmw_surface *surface = NULL;
 	struct vmw_dma_buffer *dmabuf = NULL;
+	s32 hotspot_x, hotspot_y;
 	int ret;
 
 	/*
@@ -151,6 +157,8 @@ int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
 	 */
 	drm_modeset_unlock_crtc(crtc);
 	drm_modeset_lock_all(dev_priv->dev);
+	hotspot_x = hot_x + du->hotspot_x;
+	hotspot_y = hot_y + du->hotspot_y;
 
 	/* A lot of the code assumes this */
 	if (handle && (width != 64 || height != 64)) {
@@ -187,31 +195,34 @@ int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
 		vmw_dmabuf_unreference(&du->cursor_dmabuf);
 
 	/* setup new image */
+	ret = 0;
 	if (surface) {
 		/* vmw_user_surface_lookup takes one reference */
 		du->cursor_surface = surface;
 
 		du->cursor_surface->snooper.crtc = crtc;
 		du->cursor_age = du->cursor_surface->snooper.age;
-		vmw_cursor_update_image(dev_priv, surface->snooper.image,
-					64, 64, du->hotspot_x, du->hotspot_y);
+		ret = vmw_cursor_update_image(dev_priv, surface->snooper.image,
+					      64, 64, hotspot_x, hotspot_y);
 	} else if (dmabuf) {
 		/* vmw_user_surface_lookup takes one reference */
 		du->cursor_dmabuf = dmabuf;
 
 		ret = vmw_cursor_update_dmabuf(dev_priv, dmabuf, width, height,
-					       du->hotspot_x, du->hotspot_y);
+					       hotspot_x, hotspot_y);
 	} else {
 		vmw_cursor_update_position(dev_priv, false, 0, 0);
-		ret = 0;
 		goto out;
 	}
 
-	vmw_cursor_update_position(dev_priv, true,
-				   du->cursor_x + du->hotspot_x,
-				   du->cursor_y + du->hotspot_y);
+	if (!ret) {
+		vmw_cursor_update_position(dev_priv, true,
+					   du->cursor_x + hotspot_x,
+					   du->cursor_y + hotspot_y);
+		du->core_hotspot_x = hot_x;
+		du->core_hotspot_y = hot_y;
+	}
 
-	ret = 0;
 out:
 	drm_modeset_unlock_all(dev_priv->dev);
 	drm_modeset_lock_crtc(crtc, crtc->cursor);
@@ -239,8 +250,10 @@ int vmw_du_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 	drm_modeset_lock_all(dev_priv->dev);
 
 	vmw_cursor_update_position(dev_priv, shown,
-				   du->cursor_x + du->hotspot_x,
-				   du->cursor_y + du->hotspot_y);
+				   du->cursor_x + du->hotspot_x +
+				   du->core_hotspot_x,
+				   du->cursor_y + du->hotspot_y +
+				   du->core_hotspot_y);
 
 	drm_modeset_unlock_all(dev_priv->dev);
 	drm_modeset_lock_crtc(crtc, crtc->cursor);
@@ -334,6 +347,29 @@ err_unreserve:
 	ttm_bo_unreserve(bo);
 }
 
+/**
+ * vmw_kms_legacy_hotspot_clear - Clear legacy hotspots
+ *
+ * @dev_priv: Pointer to the device private struct.
+ *
+ * Clears all legacy hotspots.
+ */
+void vmw_kms_legacy_hotspot_clear(struct vmw_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	struct vmw_display_unit *du;
+	struct drm_crtc *crtc;
+
+	drm_modeset_lock_all(dev);
+	drm_for_each_crtc(crtc, dev) {
+		du = vmw_crtc_to_du(crtc);
+
+		du->hotspot_x = 0;
+		du->hotspot_y = 0;
+	}
+	drm_modeset_unlock_all(dev);
+}
+
 void vmw_kms_cursor_post_execbuf(struct vmw_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
@@ -351,7 +387,9 @@ void vmw_kms_cursor_post_execbuf(struct vmw_private *dev_priv)
 		du->cursor_age = du->cursor_surface->snooper.age;
 		vmw_cursor_update_image(dev_priv,
 					du->cursor_surface->snooper.image,
-					64, 64, du->hotspot_x, du->hotspot_y);
+					64, 64,
+					du->hotspot_x + du->core_hotspot_x,
+					du->hotspot_y + du->core_hotspot_y);
 	}
 
 	mutex_unlock(&dev->mode_config.mutex);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index 782df7c..edd8150 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -159,6 +159,8 @@ struct vmw_display_unit {
 
 	int hotspot_x;
 	int hotspot_y;
+	s32 core_hotspot_x;
+	s32 core_hotspot_y;
 
 	unsigned unit;
 
@@ -193,8 +195,9 @@ void vmw_du_crtc_restore(struct drm_crtc *crtc);
 void vmw_du_crtc_gamma_set(struct drm_crtc *crtc,
 			   u16 *r, u16 *g, u16 *b,
 			   uint32_t start, uint32_t size);
-int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
-			   uint32_t handle, uint32_t width, uint32_t height);
+int vmw_du_crtc_cursor_set2(struct drm_crtc *crtc, struct drm_file *file_priv,
+			    uint32_t handle, uint32_t width, uint32_t height,
+			    int32_t hot_x, int32_t hot_y);
 int vmw_du_crtc_cursor_move(struct drm_crtc *crtc, int x, int y);
 int vmw_du_connector_dpms(struct drm_connector *connector, int mode);
 void vmw_du_connector_save(struct drm_connector *connector);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index bb63e4d..52caecb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -297,7 +297,7 @@ static int vmw_ldu_crtc_set_config(struct drm_mode_set *set)
 static struct drm_crtc_funcs vmw_legacy_crtc_funcs = {
 	.save = vmw_du_crtc_save,
 	.restore = vmw_du_crtc_restore,
-	.cursor_set = vmw_du_crtc_cursor_set,
+	.cursor_set2 = vmw_du_crtc_cursor_set2,
 	.cursor_move = vmw_du_crtc_cursor_move,
 	.gamma_set = vmw_du_crtc_gamma_set,
 	.destroy = vmw_ldu_crtc_destroy,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index b96d1ab..13926ff 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -533,7 +533,7 @@ out_no_fence:
 static struct drm_crtc_funcs vmw_screen_object_crtc_funcs = {
 	.save = vmw_du_crtc_save,
 	.restore = vmw_du_crtc_restore,
-	.cursor_set = vmw_du_crtc_cursor_set,
+	.cursor_set2 = vmw_du_crtc_cursor_set2,
 	.cursor_move = vmw_du_crtc_cursor_move,
 	.gamma_set = vmw_du_crtc_gamma_set,
 	.destroy = vmw_sou_crtc_destroy,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index b1fc1c0..f823fc3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -1043,7 +1043,7 @@ out_finish:
 static struct drm_crtc_funcs vmw_stdu_crtc_funcs = {
 	.save = vmw_du_crtc_save,
 	.restore = vmw_du_crtc_restore,
-	.cursor_set = vmw_du_crtc_cursor_set,
+	.cursor_set2 = vmw_du_crtc_cursor_set2,
 	.cursor_move = vmw_du_crtc_cursor_move,
 	.gamma_set = vmw_du_crtc_gamma_set,
 	.destroy = vmw_stdu_crtc_destroy,
-- 
cgit v0.10.2


From 928c75fbeb6b62edbdff7c52ee1e9a3591faee4f Mon Sep 17 00:00:00 2001
From: LABBE Corentin <clabbe.montjoie@gmail.com>
Date: Tue, 24 Nov 2015 15:34:09 +0100
Subject: usb: phy: msm: fix a possible NULL dereference

of_match_device could return NULL, and so cause a NULL pointer
dereference later.
Even if the probability of this case is very low, fixing it made
static analyzers happy.
Solving this with of_device_get_match_data made also code simplier.

Reported-by: coverity (CID 1324133)
Signed-off-by: LABBE Corentin <clabbe.montjoie@gmail.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>

diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index 80eb991..0d19a6d 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -1506,7 +1506,6 @@ static int msm_otg_read_dt(struct platform_device *pdev, struct msm_otg *motg)
 {
 	struct msm_otg_platform_data *pdata;
 	struct extcon_dev *ext_id, *ext_vbus;
-	const struct of_device_id *id;
 	struct device_node *node = pdev->dev.of_node;
 	struct property *prop;
 	int len, ret, words;
@@ -1518,8 +1517,9 @@ static int msm_otg_read_dt(struct platform_device *pdev, struct msm_otg *motg)
 
 	motg->pdata = pdata;
 
-	id = of_match_device(msm_otg_dt_match, &pdev->dev);
-	pdata->phy_type = (enum msm_usb_phy_type) id->data;
+	pdata->phy_type = (enum msm_usb_phy_type)of_device_get_match_data(&pdev->dev);
+	if (!pdata->phy_type)
+		return 1;
 
 	motg->link_rst = devm_reset_control_get(&pdev->dev, "link");
 	if (IS_ERR(motg->link_rst))
-- 
cgit v0.10.2


From 4d05591094979d24a0e728f246589963c40102e9 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 7 Dec 2015 21:23:15 -0800
Subject: usb: musb: core: Fix pm runtime for deferred probe

If musb_init_controller fails at musb_platform_init, we have already
called pm_runtime_irq_safe for musb and that causes the pm runtime count
to be enabled for parent before the parent has completed initialization.
This causes pm to stop working as on unload nothing gets idled.

This issue can be reproduced at least with:

# modprobe omap2430
HS USB OTG: no transceiver configured
musb-hdrc musb-hdrc.0.auto: musb_init_controller failed with status -517
# modprobe phy-twl4030-usb
# rmmod omap2430

And after the steps above omap2430 will block deeper idle states on
omap3.

To fix this, let's not enable pm runtime until we need to and the
parent has been initialized. Note that this does not fix the issue of
PM being broken for musb during runtime.

Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>

diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 18cfc0a..3c07ffd 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -2017,7 +2017,6 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
 	/* We need musb_read/write functions initialized for PM */
 	pm_runtime_use_autosuspend(musb->controller);
 	pm_runtime_set_autosuspend_delay(musb->controller, 200);
-	pm_runtime_irq_safe(musb->controller);
 	pm_runtime_enable(musb->controller);
 
 	/* The musb_platform_init() call:
@@ -2218,6 +2217,12 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
 
 	pm_runtime_put(musb->controller);
 
+	/*
+	 * For why this is currently needed, see commit 3e43a0725637
+	 * ("usb: musb: core: add pm_runtime_irq_safe()")
+	 */
+	pm_runtime_irq_safe(musb->controller);
+
 	return 0;
 
 fail5:
-- 
cgit v0.10.2


From 27681abc429554622a188a3a0b213fb1c20040f7 Mon Sep 17 00:00:00 2001
From: Mian Yousaf Kaukab <yousaf.kaukab@intel.com>
Date: Sat, 28 Nov 2015 18:35:44 +0100
Subject: usb: gadget: uvc: fix permissions of configfs attributes

76e0da3 "usb-gadget/uvc: use per-attribute show and store methods"
removed write permission for writeable attributes. Correct attribute
permissions.

Fixes: 76e0da3 "usb-gadget/uvc: use per-attribute show and store methods"
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mian Yousaf Kaukab <yousaf.kaukab@intel.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>

diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c
index 289ebca..ad8c9b0 100644
--- a/drivers/usb/gadget/function/uvc_configfs.c
+++ b/drivers/usb/gadget/function/uvc_configfs.c
@@ -20,7 +20,7 @@
 #define UVC_ATTR(prefix, cname, aname) \
 static struct configfs_attribute prefix##attr_##cname = { \
 	.ca_name	= __stringify(aname),				\
-	.ca_mode	= S_IRUGO,					\
+	.ca_mode	= S_IRUGO | S_IWUGO,				\
 	.ca_owner	= THIS_MODULE,					\
 	.show		= prefix##cname##_show,				\
 	.store		= prefix##cname##_store,			\
-- 
cgit v0.10.2


From f8062386671a596ca7022c61727a14a25679a0a1 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 5 Dec 2015 16:13:53 -0800
Subject: of/fdt: Add mutex protection for calls to __unflatten_device_tree()

__unflatten_device_tree() calls unflatten_dt_node(), which declares
a static variable. It is therefore not reentrant.

One of the callers of __unflatten_device_tree(), unflatten_device_tree(),
is only called once during early initialization and does not need to be
protected. The other caller, of_fdt_unflatten_tree(), can be called at
any time, possibly multiple times in parallel. This can happen, for
example, if multiple devicetree overlays have to be loaded and installed.

Without this protection, errors such as the following may be seen.

kernel: End of tree marker overwritten: e6a3a458
kernel: find_target_node:
	Failed to find target-indirect node at /fragment@0
kernel: __of_overlay_create: of_build_overlay_info() failed for tree@/

Add a mutex to of_fdt_unflatten_tree() to make the call reentrant.

Cc: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org # v4.1+
Signed-off-by: Rob Herring <robh@kernel.org>

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 1bbe3a9..655f79d 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/initrd.h>
 #include <linux/memblock.h>
+#include <linux/mutex.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <linux/of_reserved_mem.h>
@@ -436,6 +437,8 @@ static void *kernel_tree_alloc(u64 size, u64 align)
 	return kzalloc(size, GFP_KERNEL);
 }
 
+static DEFINE_MUTEX(of_fdt_unflatten_mutex);
+
 /**
  * of_fdt_unflatten_tree - create tree of device_nodes from flat blob
  *
@@ -447,7 +450,9 @@ static void *kernel_tree_alloc(u64 size, u64 align)
 void of_fdt_unflatten_tree(const unsigned long *blob,
 			struct device_node **mynodes)
 {
+	mutex_lock(&of_fdt_unflatten_mutex);
 	__unflatten_device_tree(blob, mynodes, &kernel_tree_alloc);
+	mutex_unlock(&of_fdt_unflatten_mutex);
 }
 EXPORT_SYMBOL_GPL(of_fdt_unflatten_tree);
 
-- 
cgit v0.10.2


From d3632493c70b6a866a77264cd8cfdeb89958b906 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 20 Nov 2015 11:04:12 -0800
Subject: IB/cma: Add a missing rcu_read_unlock()

Ensure that validate_ipv4_net_dev() calls rcu_read_unlock() if
fib_lookup() fails. Detected by sparse. Compile-tested only.

Fixes: "IB/cma: Validate routing of incoming requests" (commit f887f2ac87c2).
Cc: Haggai Eran <haggaie@mellanox.com>
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Haggai Eran <haggaie@mellanox.com>
Reviewed-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>

Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 944cd90..d2d5d00 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1126,10 +1126,7 @@ static bool validate_ipv4_net_dev(struct net_device *net_dev,
 
 	rcu_read_lock();
 	err = fib_lookup(dev_net(net_dev), &fl4, &res, 0);
-	if (err)
-		return false;
-
-	ret = FIB_RES_DEV(res) == net_dev;
+	ret = err == 0 && FIB_RES_DEV(res) == net_dev;
 	rcu_read_unlock();
 
 	return ret;
-- 
cgit v0.10.2


From 533708867dd6388f643f12c87465b59e732d729d Mon Sep 17 00:00:00 2001
From: Hal Rosenstock <hal@dev.mellanox.co.il>
Date: Fri, 13 Nov 2015 15:22:22 -0500
Subject: IB/mad: Require CM send method for everything except ClassPortInfo

Receipt of CM MAD with other than the Send method for an attribute
other than the ClassPortInfo attribute is invalid.

CM attributes other than ClassPortInfo only use the send method.

The SRP initiator does not maintain a timeout policy for CM connect
requests relies on the CM layer to do that. The result was that
the SRP initiator hung as the connect request never completed.

A new SRP target has been observed to respond to Send CM REQ
with GetResp of CM REQ with bad status. This is non conformant
with IBA spec but exposes a vulnerability in the current MAD/CM
code which will respond to the incoming GetResp of CM REQ as if
it was a valid incoming Send of CM REQ rather than tossing
this on the floor. It also causes the MAD layer not to
retransmit the original REQ even though it has not received a REP.

Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Hal Rosenstock <hal@mellanox.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 8d8af7a..2281de1 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1811,6 +1811,11 @@ static int validate_mad(const struct ib_mad_hdr *mad_hdr,
 		if (qp_num == 0)
 			valid = 1;
 	} else {
+		/* CM attributes other than ClassPortInfo only use Send method */
+		if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_CM) &&
+		    (mad_hdr->attr_id != IB_MGMT_CLASSPORTINFO_ATTR_ID) &&
+		    (mad_hdr->method != IB_MGMT_METHOD_SEND))
+			goto out;
 		/* Filter GSI packets sent to QP0 */
 		if (qp_num != 0)
 			valid = 1;
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 188df91..ec9b44d 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -237,6 +237,8 @@ struct ib_vendor_mad {
 	u8			data[IB_MGMT_VENDOR_DATA];
 };
 
+#define IB_MGMT_CLASSPORTINFO_ATTR_ID	cpu_to_be16(0x0001)
+
 struct ib_class_port_info {
 	u8			base_version;
 	u8			class_version;
-- 
cgit v0.10.2


From a5e14ba334e202c58e45ef47414ec94c585c1a8c Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Wed, 28 Oct 2015 13:28:15 +0200
Subject: mlx4: Expose correct max_sge_rd limit

mlx4 devices (ConnectX-2, ConnectX-3) has a limitation
where rdma read work queue entries cannot exceed 512 bytes.
A rdma_read wqe needs to fit in 512 bytes:
- wqe control segment (16 bytes)
- rdma segment (16 bytes)
- scatter elements (16 bytes each)

So max_sge_rd should be: (512 - 16 - 16) / 16 = 30.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index f567160..97d6878 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -456,7 +456,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 	props->max_qp_wr	   = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
 	props->max_sge		   = min(dev->dev->caps.max_sq_sg,
 					 dev->dev->caps.max_rq_sg);
-	props->max_sge_rd = props->max_sge;
+	props->max_sge_rd	   = MLX4_MAX_SGE_RD;
 	props->max_cq		   = dev->dev->quotas.cq;
 	props->max_cqe		   = dev->dev->caps.max_cqes;
 	props->max_mr		   = dev->dev->quotas.mpt;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 7501626..d3133be 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -427,6 +427,17 @@ enum {
 };
 
 enum {
+	/*
+	 * Max wqe size for rdma read is 512 bytes, so this
+	 * limits our max_sge_rd as the wqe needs to fit:
+	 * - ctrl segment (16 bytes)
+	 * - rdma segment (16 bytes)
+	 * - scatter elements (16 bytes each)
+	 */
+	MLX4_MAX_SGE_RD	= (512 - 16 - 16) / 16
+};
+
+enum {
 	MLX4_DEV_PMC_SUBTYPE_GUID_INFO	 = 0x14,
 	MLX4_DEV_PMC_SUBTYPE_PORT_INFO	 = 0x15,
 	MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE	 = 0x16,
-- 
cgit v0.10.2


From f1a47d37fbc65bc49d32f23ab61c3e09fefeab73 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Wed, 28 Oct 2015 13:28:16 +0200
Subject: iser-target: Remove explicit mlx4 work-around

The driver now exposes sufficient limits so we can
avoid having mlx4 specific work-around.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index dfbbbb2..8a51c3b 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -157,16 +157,9 @@ isert_create_qp(struct isert_conn *isert_conn,
 	attr.recv_cq = comp->cq;
 	attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS;
 	attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
-	/*
-	 * FIXME: Use devattr.max_sge - 2 for max_send_sge as
-	 * work-around for RDMA_READs with ConnectX-2.
-	 *
-	 * Also, still make sure to have at least two SGEs for
-	 * outgoing control PDU responses.
-	 */
-	attr.cap.max_send_sge = max(2, device->dev_attr.max_sge - 2);
-	isert_conn->max_sge = attr.cap.max_send_sge;
-
+	attr.cap.max_send_sge = device->dev_attr.max_sge;
+	isert_conn->max_sge = min(device->dev_attr.max_sge,
+				  device->dev_attr.max_sge_rd);
 	attr.cap.max_recv_sge = 1;
 	attr.sq_sig_type = IB_SIGNAL_REQ_WR;
 	attr.qp_type = IB_QPT_RC;
-- 
cgit v0.10.2


From 4ad78628445d26e5e9487b2e8f23274ad7b0f5d3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 8 Dec 2015 03:07:22 -0500
Subject: 9p: ->evict_inode() should kick out ->i_data, not ->i_mapping

For block devices the pagecache is associated with the inode
on bdevfs, not with the aliasing ones on the mountable filesystems.
The latter have its own ->i_data empty and ->i_mapping pointing
to the (unique per major/minor) bdevfs inode.  That guarantees
cache coherence between all block device inodes with the same
device number.

Eviction of an alias inode has no business trying to evict the
pages belonging to bdevfs one; moreover, ->i_mapping is only
safe to access when the thing is opened.  At the time of
->evict_inode() the victim is definitely *not* opened.  We are
about to kill the address space embedded into struct inode
(inode->i_data) and that's what we need to empty of any pages.

9p instance tries to empty inode->i_mapping instead, which is
both unsafe and bogus - if we have several device nodes with
the same device number in different places, closing one of them
should not try to empty the (shared) page cache.

Fortunately, other instances in the tree are OK; they are
evicting from &inode->i_data instead, as 9p one should.

Cc: stable@vger.kernel.org # v2.6.32+, ones prior to 2.6.36 need only half of that
Reported-by: "Suzuki K. Poulose" <Suzuki.Poulose@arm.com>
Tested-by: "Suzuki K. Poulose" <Suzuki.Poulose@arm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 699941e..5110785 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -451,9 +451,9 @@ void v9fs_evict_inode(struct inode *inode)
 {
 	struct v9fs_inode *v9inode = V9FS_I(inode);
 
-	truncate_inode_pages_final(inode->i_mapping);
+	truncate_inode_pages_final(&inode->i_data);
 	clear_inode(inode);
-	filemap_fdatawrite(inode->i_mapping);
+	filemap_fdatawrite(&inode->i_data);
 
 	v9fs_cache_inode_put_cookie(inode);
 	/* clunk the fid stashed in writeback_fid */
-- 
cgit v0.10.2


From 2d4594acbf6d8f75a27f3578476b6a27d8b13ebb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 8 Dec 2015 12:22:47 -0500
Subject: fix the regression from "direct-io: Fix negative return from dio read
 beyond eof"

Sure, it's better to bail out of past-the-eof read and return 0 than return
a bogus negative value on such.  Only we'd better make sure we are bailing out
with 0 and not -ENOMEM...

Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1c75a3a..602e844 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1175,6 +1175,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 		if (dio->flags & DIO_LOCKING)
 			mutex_unlock(&inode->i_mutex);
 		kmem_cache_free(dio_cache, dio);
+		retval = 0;
 		goto out;
 	}
 
-- 
cgit v0.10.2


From af096e2235c5de76af7e8749f59a90de07f5e943 Mon Sep 17 00:00:00 2001
From: Minfei Huang <mhuang@redhat.com>
Date: Tue, 8 Dec 2015 13:47:34 -0700
Subject: null_blk: Fix error path in module initialization

Module couldn't release resource properly during the initialization. To
fix this issue, we will clean up the proper resource before returning.

Signed-off-by: Minfei Huang <mnfhuang@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 7981b74..8162475 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -766,7 +766,9 @@ out:
 
 static int __init null_init(void)
 {
+	int ret = 0;
 	unsigned int i;
+	struct nullb *nullb;
 
 	if (bs > PAGE_SIZE) {
 		pr_warn("null_blk: invalid block size\n");
@@ -808,22 +810,29 @@ static int __init null_init(void)
 								0, 0, NULL);
 		if (!ppa_cache) {
 			pr_err("null_blk: unable to create ppa cache\n");
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto err_ppa;
 		}
 	}
 
 	for (i = 0; i < nr_devices; i++) {
-		if (null_add_dev()) {
-			unregister_blkdev(null_major, "nullb");
-			goto err_ppa;
-		}
+		ret = null_add_dev();
+		if (ret)
+			goto err_dev;
 	}
 
 	pr_info("null: module loaded\n");
 	return 0;
-err_ppa:
+
+err_dev:
+	while (!list_empty(&nullb_list)) {
+		nullb = list_entry(nullb_list.next, struct nullb, list);
+		null_del_dev(nullb);
+	}
 	kmem_cache_destroy(ppa_cache);
-	return -EINVAL;
+err_ppa:
+	unregister_blkdev(null_major, "nullb");
+	return ret;
 }
 
 static void __exit null_exit(void)
-- 
cgit v0.10.2


From db2f24dc240856fb1d78005307f1523b7b3c121b Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Wed, 18 Nov 2015 09:37:15 +0100
Subject: um: Fix get_signal() usage

If get_signal() returns us a signal to post
we must not call it again, otherwise the already
posted signal will be overridden.
Before commit a610d6e672d this was the case as we stopped
the while after a successful handle_signal().

Cc: <stable@vger.kernel.org> # 3.10-
Fixes: a610d6e672d ("pull clearing RESTORE_SIGMASK into block_sigmask()")
Signed-off-by: Richard Weinberger <richard@nod.at>

diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 57acbd6..fc8be0e 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -69,7 +69,7 @@ void do_signal(struct pt_regs *regs)
 	struct ksignal ksig;
 	int handled_sig = 0;
 
-	while (get_signal(&ksig)) {
+	if (get_signal(&ksig)) {
 		handled_sig = 1;
 		/* Whee!  Actually deliver the signal.  */
 		handle_signal(&ksig, regs);
-- 
cgit v0.10.2


From fb1770aa78a43530940d0c2dd161e77bc705bdac Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Wed, 18 Nov 2015 23:12:09 +0900
Subject: arch: um: fix error when linking vmlinux.

On gcc Ubuntu 4.8.4-2ubuntu1~14.04, linking vmlinux fails with:

arch/um/os-Linux/built-in.o: In function `os_timer_create':
/android/kernel/android/arch/um/os-Linux/time.c:51: undefined reference to `timer_create'
arch/um/os-Linux/built-in.o: In function `os_timer_set_interval':
/android/kernel/android/arch/um/os-Linux/time.c:84: undefined reference to `timer_settime'
arch/um/os-Linux/built-in.o: In function `os_timer_remain':
/android/kernel/android/arch/um/os-Linux/time.c:109: undefined reference to `timer_gettime'
arch/um/os-Linux/built-in.o: In function `os_timer_one_shot':
/android/kernel/android/arch/um/os-Linux/time.c:132: undefined reference to `timer_settime'
arch/um/os-Linux/built-in.o: In function `os_timer_disable':
/android/kernel/android/arch/um/os-Linux/time.c:145: undefined reference to `timer_settime'

This is because -lrt appears in the generated link commandline
after arch/um/os-Linux/built-in.o. Fix this by removing -lrt from
arch/um/Makefile and adding it to the UM-specific section of
scripts/link-vmlinux.sh.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: Richard Weinberger <richard@nod.at>

diff --git a/arch/um/Makefile b/arch/um/Makefile
index 25ed409..e3abe6f 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -131,7 +131,7 @@ export LDS_ELF_FORMAT := $(ELF_FORMAT)
 # The wrappers will select whether using "malloc" or the kernel allocator.
 LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc
 
-LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt)) -lrt
+LD_FLAGS_CMDLINE = $(foreach opt,$(LDFLAGS),-Wl,$(opt))
 
 # Used by link-vmlinux.sh which has special support for um link
 export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE)
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 1a10d8a..dacf71a 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -62,7 +62,7 @@ vmlinux_link()
 			-Wl,--start-group                                    \
 				 ${KBUILD_VMLINUX_MAIN}                      \
 			-Wl,--end-group                                      \
-			-lutil ${1}
+			-lutil -lrt ${1}
 		rm -f linux
 	fi
 }
-- 
cgit v0.10.2


From 8090bfd2bb9abc6293f5cedef8ec9be84a913d2f Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Sun, 29 Nov 2015 21:13:50 +0100
Subject: um: Fix fpstate handling

The x86 FPU cleanup changed fpstate to a plain integer.
UML on x86 has to deal with that too.

Signed-off-by: Richard Weinberger <richard@nod.at>

diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index 06934a8..e5f854c 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -211,7 +211,7 @@ static int copy_sc_from_user(struct pt_regs *regs,
 		if (err)
 			return 1;
 
-		err = convert_fxsr_from_user(&fpx, sc.fpstate);
+		err = convert_fxsr_from_user(&fpx, (void *)sc.fpstate);
 		if (err)
 			return 1;
 
@@ -227,7 +227,7 @@ static int copy_sc_from_user(struct pt_regs *regs,
 	{
 		struct user_i387_struct fp;
 
-		err = copy_from_user(&fp, sc.fpstate,
+		err = copy_from_user(&fp, (void *)sc.fpstate,
 				     sizeof(struct user_i387_struct));
 		if (err)
 			return 1;
@@ -291,7 +291,7 @@ static int copy_sc_to_user(struct sigcontext __user *to,
 #endif
 #undef PUTREG
 	sc.oldmask = mask;
-	sc.fpstate = to_fp;
+	sc.fpstate = (unsigned long)to_fp;
 
 	err = copy_to_user(to, &sc, sizeof(struct sigcontext));
 	if (err)
@@ -468,12 +468,10 @@ long sys_sigreturn(void)
 	struct sigframe __user *frame = (struct sigframe __user *)(sp - 8);
 	sigset_t set;
 	struct sigcontext __user *sc = &frame->sc;
-	unsigned long __user *oldmask = &sc->oldmask;
-	unsigned long __user *extramask = frame->extramask;
 	int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long);
 
-	if (copy_from_user(&set.sig[0], oldmask, sizeof(set.sig[0])) ||
-	    copy_from_user(&set.sig[1], extramask, sig_size))
+	if (copy_from_user(&set.sig[0], (void *)sc->oldmask, sizeof(set.sig[0])) ||
+	    copy_from_user(&set.sig[1], frame->extramask, sig_size))
 		goto segfault;
 
 	set_current_blocked(&set);
@@ -505,6 +503,7 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig,
 {
 	struct rt_sigframe __user *frame;
 	int err = 0, sig = ksig->sig;
+	unsigned long fp_to;
 
 	frame = (struct rt_sigframe __user *)
 		round_down(stack_top - sizeof(struct rt_sigframe), 16);
@@ -526,7 +525,10 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig,
 	err |= __save_altstack(&frame->uc.uc_stack, PT_REGS_SP(regs));
 	err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs,
 			       set->sig[0]);
-	err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate);
+
+	fp_to = (unsigned long)&frame->fpstate;
+
+	err |= __put_user(fp_to, &frame->uc.uc_mcontext.fpstate);
 	if (sizeof(*set) == 16) {
 		err |= __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
 		err |= __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
-- 
cgit v0.10.2


From 887a9853092c09e20598f4a7f91ac1cfb762be50 Mon Sep 17 00:00:00 2001
From: "Geyslan G. Bem" <geyslan@gmail.com>
Date: Tue, 1 Dec 2015 17:18:55 -0300
Subject: um: fix returns without va_end

When using va_list ensure that va_start will be followed by va_end.

Signed-off-by: Geyslan G. Bem <geyslan@gmail.com>
Signed-off-by: Richard Weinberger <richard@nod.at>

diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c
index e697a41..e9f8445 100644
--- a/arch/um/drivers/net_user.c
+++ b/arch/um/drivers/net_user.c
@@ -249,21 +249,23 @@ void close_addr(unsigned char *addr, unsigned char *netmask, void *arg)
 
 char *split_if_spec(char *str, ...)
 {
-	char **arg, *end;
+	char **arg, *end, *ret = NULL;
 	va_list ap;
 
 	va_start(ap, str);
 	while ((arg = va_arg(ap, char **)) != NULL) {
 		if (*str == '\0')
-			return NULL;
+			goto out;
 		end = strchr(str, ',');
 		if (end != str)
 			*arg = str;
 		if (end == NULL)
-			return NULL;
+			goto out;
 		*end++ = '\0';
 		str = end;
 	}
+	ret = str;
+out:
 	va_end(ap);
-	return str;
+	return ret;
 }
-- 
cgit v0.10.2


From 73d4da7b9fec13cbddcf548888c4d0e46f1d5087 Mon Sep 17 00:00:00 2001
From: Wengang Wang <wen.gang.wang@oracle.com>
Date: Thu, 8 Oct 2015 13:21:33 +0800
Subject: IB/mlx4: Use correct order of variables in log message

There is a mis-order in mlx4 log. Fix it.

Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
Acked-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 2177e56..d48d579 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1010,7 +1010,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
 		if (!(smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
 		      smp->method == IB_MGMT_METHOD_GET) || network_view) {
 			mlx4_err(dev, "Unprivileged slave %d is trying to execute a Subnet MGMT MAD, class 0x%x, method 0x%x, view=%s for attr 0x%x. Rejecting\n",
-				 slave, smp->method, smp->mgmt_class,
+				 slave, smp->mgmt_class, smp->method,
 				 network_view ? "Network" : "Host",
 				 be16_to_cpu(smp->attr_id));
 			return -EPERM;
-- 
cgit v0.10.2


From 0ef2f05c7e02ff99c0b5b583d7dee2cd12b053f2 Mon Sep 17 00:00:00 2001
From: Wengang Wang <wen.gang.wang@oracle.com>
Date: Thu, 8 Oct 2015 13:27:04 +0800
Subject: IB/mlx4: Use vmalloc for WR buffers when needed

There are several hits that WR buffer allocation(kmalloc) failed.
It failed at order 3 and/or 4 contigous pages allocation. At the same time
there are actually 100MB+ free memory but well fragmented.
So try vmalloc when kmalloc failed.

Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
Acked-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index a2e4ca5..13eaaf4 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -34,6 +34,7 @@
 #include <linux/log2.h>
 #include <linux/slab.h>
 #include <linux/netdevice.h>
+#include <linux/vmalloc.h>
 
 #include <rdma/ib_cache.h>
 #include <rdma/ib_pack.h>
@@ -795,8 +796,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 		if (err)
 			goto err_mtt;
 
-		qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
-		qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
+		qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
+		if (!qp->sq.wrid)
+			qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
+						gfp, PAGE_KERNEL);
+		qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp);
+		if (!qp->rq.wrid)
+			qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
+						gfp, PAGE_KERNEL);
 		if (!qp->sq.wrid || !qp->rq.wrid) {
 			err = -ENOMEM;
 			goto err_wrid;
@@ -886,8 +893,8 @@ err_wrid:
 		if (qp_has_rq(init_attr))
 			mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
 	} else {
-		kfree(qp->sq.wrid);
-		kfree(qp->rq.wrid);
+		kvfree(qp->sq.wrid);
+		kvfree(qp->rq.wrid);
 	}
 
 err_mtt:
@@ -1062,8 +1069,8 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
 					      &qp->db);
 		ib_umem_release(qp->umem);
 	} else {
-		kfree(qp->sq.wrid);
-		kfree(qp->rq.wrid);
+		kvfree(qp->sq.wrid);
+		kvfree(qp->rq.wrid);
 		if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
 		    MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
 			free_proxy_bufs(&dev->ib_dev, qp);
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index dce5dfe..8d133c4 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -34,6 +34,7 @@
 #include <linux/mlx4/qp.h>
 #include <linux/mlx4/srq.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 
 #include "mlx4_ib.h"
 #include "user.h"
@@ -172,8 +173,12 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
 
 		srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
 		if (!srq->wrid) {
-			err = -ENOMEM;
-			goto err_mtt;
+			srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
+					      GFP_KERNEL, PAGE_KERNEL);
+			if (!srq->wrid) {
+				err = -ENOMEM;
+				goto err_mtt;
+			}
 		}
 	}
 
@@ -204,7 +209,7 @@ err_wrid:
 	if (pd->uobject)
 		mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
 	else
-		kfree(srq->wrid);
+		kvfree(srq->wrid);
 
 err_mtt:
 	mlx4_mtt_cleanup(dev->dev, &srq->mtt);
-- 
cgit v0.10.2


From ab5cdc31630c7596d81ca8fbe7d695f10666f39b Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Wed, 21 Oct 2015 09:21:17 +0300
Subject: IB/mlx5: Postpone remove_keys under knowledge of coming preemption

The remove_keys() logic is performed as garbage collection task. Such
task is intended to be run when no other active processes are running.

The need_resched() will return TRUE if there are user tasks to be
activated in near future.

In such case, we don't execute remove_keys() and postpone
the garbage collection work to try to run in next cycle,
in order to free CPU resources to other tasks.

The possible pseudo-code to trigger such scenario:
1. Allocate a lot of MR to fill the cache above the limit.
2. Wait a small amount of time "to calm" the system.
3. Start CPU extensive operations on multi-node cluster.
4. Expect performance degradation during MR cache shrink operation.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index ec8993a..6000f7a 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -381,7 +381,19 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
 			}
 		}
 	} else if (ent->cur > 2 * ent->limit) {
-		if (!someone_adding(cache) &&
+		/*
+		 * The remove_keys() logic is performed as garbage collection
+		 * task. Such task is intended to be run when no other active
+		 * processes are running.
+		 *
+		 * The need_resched() will return TRUE if there are user tasks
+		 * to be activated in near future.
+		 *
+		 * In such case, we don't execute remove_keys() and postpone
+		 * the garbage collection work to try to run in next cycle,
+		 * in order to free CPU resources to other tasks.
+		 */
+		if (!need_resched() && !someone_adding(cache) &&
 		    time_after(jiffies, cache->last_add + 300 * HZ)) {
 			remove_keys(dev, i, 1);
 			if (ent->cur > ent->limit)
-- 
cgit v0.10.2


From 714a98fc3f20934ce8098667b809de954ebab93c Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Wed, 9 Dec 2015 10:06:05 +1000
Subject: drm/nouveau/pmu: remove whitelist for PGOB-exit WAR, enable by
 default

NVIDIA have indicated that the workaround is required on all GK10[467]
boards that have the PGOB fuse set.

I've left the commandline option in place for now, as paranoia.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
index 8f76000..913192c 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
@@ -159,7 +159,6 @@ struct nvkm_device_func {
 struct nvkm_device_quirk {
 	u8 tv_pin_mask;
 	u8 tv_gpio;
-	bool War00C800_0;
 };
 
 struct nvkm_device_chip {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
index caf22b5..62ad030 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
@@ -259,12 +259,6 @@ nvkm_device_pci_10de_0df4[] = {
 };
 
 static const struct nvkm_device_pci_vendor
-nvkm_device_pci_10de_0fcd[] = {
-	{ 0x17aa, 0x3801, NULL, { .War00C800_0 = true } }, /* Lenovo Y510P */
-	{}
-};
-
-static const struct nvkm_device_pci_vendor
 nvkm_device_pci_10de_0fd2[] = {
 	{ 0x1028, 0x0595, "GeForce GT 640M LE" },
 	{ 0x1028, 0x05b2, "GeForce GT 640M LE" },
@@ -279,12 +273,6 @@ nvkm_device_pci_10de_0fe3[] = {
 };
 
 static const struct nvkm_device_pci_vendor
-nvkm_device_pci_10de_0fe4[] = {
-	{ 0x144d, 0xc740, NULL, { .War00C800_0 = true } },
-	{}
-};
-
-static const struct nvkm_device_pci_vendor
 nvkm_device_pci_10de_104b[] = {
 	{ 0x1043, 0x844c, "GeForce GT 625" },
 	{ 0x1043, 0x846b, "GeForce GT 625" },
@@ -690,13 +678,6 @@ nvkm_device_pci_10de_1189[] = {
 static const struct nvkm_device_pci_vendor
 nvkm_device_pci_10de_1199[] = {
 	{ 0x1458, 0xd001, "GeForce GTX 760" },
-	{ 0x1462, 0x1106, "GeForce GTX 780M", { .War00C800_0 = true } }, /* Medion Erazer X7827 */
-	{}
-};
-
-static const struct nvkm_device_pci_vendor
-nvkm_device_pci_10de_11e0[] = {
-	{ 0x1558, 0x5106, NULL, { .War00C800_0 = true } },
 	{}
 };
 
@@ -707,14 +688,6 @@ nvkm_device_pci_10de_11e3[] = {
 };
 
 static const struct nvkm_device_pci_vendor
-nvkm_device_pci_10de_11fc[] = {
-	{ 0x1179, 0x0001, NULL, { .War00C800_0 = true } }, /* Toshiba Tecra W50 */
-	{ 0x17aa, 0x2211, NULL, { .War00C800_0 = true } }, /* Lenovo W541 */
-	{ 0x17aa, 0x221e, NULL, { .War00C800_0 = true } }, /* Lenovo W541 */
-	{}
-};
-
-static const struct nvkm_device_pci_vendor
 nvkm_device_pci_10de_1247[] = {
 	{ 0x1043, 0x212a, "GeForce GT 635M" },
 	{ 0x1043, 0x212b, "GeForce GT 635M" },
@@ -1368,7 +1341,7 @@ nvkm_device_pci_10de[] = {
 	{ 0x0fc6, "GeForce GTX 650" },
 	{ 0x0fc8, "GeForce GT 740" },
 	{ 0x0fc9, "GeForce GT 730" },
-	{ 0x0fcd, "GeForce GT 755M", nvkm_device_pci_10de_0fcd },
+	{ 0x0fcd, "GeForce GT 755M" },
 	{ 0x0fce, "GeForce GT 640M LE" },
 	{ 0x0fd1, "GeForce GT 650M" },
 	{ 0x0fd2, "GeForce GT 640M", nvkm_device_pci_10de_0fd2 },
@@ -1382,7 +1355,7 @@ nvkm_device_pci_10de[] = {
 	{ 0x0fe1, "GeForce GT 730M" },
 	{ 0x0fe2, "GeForce GT 745M" },
 	{ 0x0fe3, "GeForce GT 745M", nvkm_device_pci_10de_0fe3 },
-	{ 0x0fe4, "GeForce GT 750M", nvkm_device_pci_10de_0fe4 },
+	{ 0x0fe4, "GeForce GT 750M" },
 	{ 0x0fe9, "GeForce GT 750M" },
 	{ 0x0fea, "GeForce GT 755M" },
 	{ 0x0fec, "GeForce 710A" },
@@ -1497,12 +1470,12 @@ nvkm_device_pci_10de[] = {
 	{ 0x11c6, "GeForce GTX 650 Ti" },
 	{ 0x11c8, "GeForce GTX 650" },
 	{ 0x11cb, "GeForce GT 740" },
-	{ 0x11e0, "GeForce GTX 770M", nvkm_device_pci_10de_11e0 },
+	{ 0x11e0, "GeForce GTX 770M" },
 	{ 0x11e1, "GeForce GTX 765M" },
 	{ 0x11e2, "GeForce GTX 765M" },
 	{ 0x11e3, "GeForce GTX 760M", nvkm_device_pci_10de_11e3 },
 	{ 0x11fa, "Quadro K4000" },
-	{ 0x11fc, "Quadro K2100M", nvkm_device_pci_10de_11fc },
+	{ 0x11fc, "Quadro K2100M" },
 	{ 0x1200, "GeForce GTX 560 Ti" },
 	{ 0x1201, "GeForce GTX 560" },
 	{ 0x1203, "GeForce GTX 460 SE v2" },
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk104.c
index d942fa7..86f9f3b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk104.c
@@ -81,9 +81,7 @@ gk104_pmu_pgob(struct nvkm_pmu *pmu, bool enable)
 	nvkm_mask(device, 0x000200, 0x00001000, 0x00001000);
 	nvkm_rd32(device, 0x000200);
 
-	if ( nvkm_boolopt(device->cfgopt, "War00C800_0",
-	    device->quirk ? device->quirk->War00C800_0 : false)) {
-		nvkm_info(&pmu->subdev, "hw bug workaround enabled\n");
+	if (nvkm_boolopt(device->cfgopt, "War00C800_0", true)) {
 		switch (device->chipset) {
 		case 0xe4:
 			magic(device, 0x04000000);
-- 
cgit v0.10.2


From 5b01310cfc8d2302dcca1d8da42873edab2ef784 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Tue, 8 Dec 2015 17:44:02 -0500
Subject: powerpc/sbc8641: drop bogus PHY IRQ entries from DTS file

This file was originally cloned off of the MPC8641D-HPCN reference
platform, which actually had a PHY IRQ line connected. However this
board does not. The bogus entry was largely inert and went undetected
until commit 321beec5047af83db90c88114b7e664b156f49fe ("net: phy: Use
interrupts when available in NOLINK state") was added to the tree.

With the above commit, the board fails to NFS boot since it sits waiting
for a PHY IRQ event that of course never arrives. Removing the bogus
entries from the DTS file fixes the issue.

Cc: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/boot/dts/sbc8641d.dts b/arch/powerpc/boot/dts/sbc8641d.dts
index 631ede7..68f0ed7 100644
--- a/arch/powerpc/boot/dts/sbc8641d.dts
+++ b/arch/powerpc/boot/dts/sbc8641d.dts
@@ -227,23 +227,15 @@
 				reg = <0x520 0x20>;
 
 				phy0: ethernet-phy@1f {
-					interrupt-parent = <&mpic>;
-					interrupts = <10 1>;
 					reg = <0x1f>;
 				};
 				phy1: ethernet-phy@0 {
-					interrupt-parent = <&mpic>;
-					interrupts = <10 1>;
 					reg = <0>;
 				};
 				phy2: ethernet-phy@1 {
-					interrupt-parent = <&mpic>;
-					interrupts = <10 1>;
 					reg = <1>;
 				};
 				phy3: ethernet-phy@2 {
-					interrupt-parent = <&mpic>;
-					interrupts = <10 1>;
 					reg = <2>;
 				};
 				tbi0: tbi-phy@11 {
-- 
cgit v0.10.2


From dc9c41bd9ece090b54eb8f1bbdfb1930e10d3ae7 Mon Sep 17 00:00:00 2001
From: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Date: Tue, 8 Dec 2015 16:59:25 +1100
Subject: Revert "powerpc/eeh: Don't unfreeze PHB PE after reset"

This reverts commit 527d10ef3a315d3cb9dc098dacd61889a6c26439.

The reverted commit breaks cxlflash devices following an EEH reset (and
possibly other cxl devices, however this has not been tested).

The reverted commit changed the behaviour of eeh_reset_device() so that PHB
PEs are not unfrozen following the completion of the reset. This should not
be problematic, as no device resources should have been associated with the
PHB PE.

However, when attempting to load the cxlflash driver after a reset, the
driver attempts to read Vital Product Data through a call to
pci_read_vpd() (which is called on the physical cxl device, not on the
virtual AFU device). pci_read_vpd() in turn attempts to read from the cxl
device's config space. This fails, as the PE it's trying to read from is
still frozen. In turn, the driver gets an -ENODEV and fails to initialise.

It appears this issue only affects some parts of the VPD area, as "lspci
-vvv", which only reads a subset of the VPD bytes, is not broken by the
original patch.

At this stage, we don't fully understand why we're trying to read a frozen
PE, and we don't know how this affects other cxl devices. It is possible
that there is an underlying bug in the cxl driver or the powerpc CAPI
support code, or alternatively a bug in the PCI resource allocation/mapping
code that is incorrectly mapping resources to PE#0.

As such, this fix is incomplete, however it is necessary to prevent a
serious regression in CAPI support.

In the meantime, revert the commit, especially as it was intended to be a
non-functional change.

Cc: Gavin Shan <gwshan@linux.vnet.ibm.com>
Cc: Ian Munsie <imunsie@au1.ibm.com>
Cc: Daniel Axtens <dja@axtens.net>
Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 80dfe89..8d14feb 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -590,16 +590,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
 	eeh_ops->configure_bridge(pe);
 	eeh_pe_restore_bars(pe);
 
-	/*
-	 * If it's PHB PE, the frozen state on all available PEs should have
-	 * been cleared by the PHB reset. Otherwise, we unfreeze the PE and its
-	 * child PEs because they might be in frozen state.
-	 */
-	if (!(pe->type & EEH_PE_PHB)) {
-		rc = eeh_clear_pe_frozen_state(pe, false);
-		if (rc)
-			return rc;
-	}
+	/* Clear frozen state */
+	rc = eeh_clear_pe_frozen_state(pe, false);
+	if (rc)
+		return rc;
 
 	/* Give the system 5 seconds to finish running the user-space
 	 * hotplug shutdown scripts, e.g. ifdown for ethernet.  Yes,
-- 
cgit v0.10.2


From e410b5cbabe70b1f1da08146481f6bd7e904643c Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Mon, 7 Dec 2015 15:02:52 +0800
Subject: drm/amdgpu: fix the lost duplicates checking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com>
Cc: stable@vger.kernel.org

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index f6ea4b4..9c253c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -477,6 +477,14 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 		if (domain == AMDGPU_GEM_DOMAIN_CPU)
 			goto error_unreserve;
 	}
+	list_for_each_entry(entry, &duplicates, head) {
+		domain = amdgpu_mem_type_to_domain(entry->bo->mem.mem_type);
+		/* if anything is swapped out don't swap it in here,
+		   just abort and wait for the next CS */
+		if (domain == AMDGPU_GEM_DOMAIN_CPU)
+			goto error_unreserve;
+	}
+
 	r = amdgpu_vm_update_page_directory(adev, bo_va->vm);
 	if (r)
 		goto error_unreserve;
-- 
cgit v0.10.2


From 5f3e226f511ec98d70c970f09cdb4ec46511cc5e Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Fri, 4 Dec 2015 23:09:03 +0200
Subject: radeon/cik: Fix GFX IB test on Big-Endian
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch makes the IB test on the GFX ring pass for CI-based cards
installed in Big-Endian machines.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 0154db4..f81fb26 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -4173,11 +4173,7 @@ void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 	control |= ib->length_dw | (vm_id << 24);
 
 	radeon_ring_write(ring, header);
-	radeon_ring_write(ring,
-#ifdef __BIG_ENDIAN
-			  (2 << 0) |
-#endif
-			  (ib->gpu_addr & 0xFFFFFFFC));
+	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
 	radeon_ring_write(ring, control);
 }
-- 
cgit v0.10.2


From 687f4b98d1f4e27508f7ad4bcce787c1ba58b289 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Fri, 4 Dec 2015 23:09:04 +0200
Subject: radeon: Fix VCE ring test for Big-Endian systems
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes the VCE ring test when running on Big-Endian machines.
Every write to the ring needs to be translated to little-endian.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
index 574f62b..86f57e4 100644
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
@@ -699,12 +699,12 @@ bool radeon_vce_semaphore_emit(struct radeon_device *rdev,
 {
 	uint64_t addr = semaphore->gpu_addr;
 
-	radeon_ring_write(ring, VCE_CMD_SEMAPHORE);
-	radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
-	radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
-	radeon_ring_write(ring, 0x01003000 | (emit_wait ? 1 : 0));
+	radeon_ring_write(ring, cpu_to_le32(VCE_CMD_SEMAPHORE));
+	radeon_ring_write(ring, cpu_to_le32((addr >> 3) & 0x000FFFFF));
+	radeon_ring_write(ring, cpu_to_le32((addr >> 23) & 0x000FFFFF));
+	radeon_ring_write(ring, cpu_to_le32(0x01003000 | (emit_wait ? 1 : 0)));
 	if (!emit_wait)
-		radeon_ring_write(ring, VCE_CMD_END);
+		radeon_ring_write(ring, cpu_to_le32(VCE_CMD_END));
 
 	return true;
 }
@@ -719,10 +719,10 @@ bool radeon_vce_semaphore_emit(struct radeon_device *rdev,
 void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
 	struct radeon_ring *ring = &rdev->ring[ib->ring];
-	radeon_ring_write(ring, VCE_CMD_IB);
-	radeon_ring_write(ring, ib->gpu_addr);
-	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr));
-	radeon_ring_write(ring, ib->length_dw);
+	radeon_ring_write(ring, cpu_to_le32(VCE_CMD_IB));
+	radeon_ring_write(ring, cpu_to_le32(ib->gpu_addr));
+	radeon_ring_write(ring, cpu_to_le32(upper_32_bits(ib->gpu_addr)));
+	radeon_ring_write(ring, cpu_to_le32(ib->length_dw));
 }
 
 /**
@@ -738,12 +738,12 @@ void radeon_vce_fence_emit(struct radeon_device *rdev,
 	struct radeon_ring *ring = &rdev->ring[fence->ring];
 	uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr;
 
-	radeon_ring_write(ring, VCE_CMD_FENCE);
-	radeon_ring_write(ring, addr);
-	radeon_ring_write(ring, upper_32_bits(addr));
-	radeon_ring_write(ring, fence->seq);
-	radeon_ring_write(ring, VCE_CMD_TRAP);
-	radeon_ring_write(ring, VCE_CMD_END);
+	radeon_ring_write(ring, cpu_to_le32(VCE_CMD_FENCE));
+	radeon_ring_write(ring, cpu_to_le32(addr));
+	radeon_ring_write(ring, cpu_to_le32(upper_32_bits(addr)));
+	radeon_ring_write(ring, cpu_to_le32(fence->seq));
+	radeon_ring_write(ring, cpu_to_le32(VCE_CMD_TRAP));
+	radeon_ring_write(ring, cpu_to_le32(VCE_CMD_END));
 }
 
 /**
@@ -765,7 +765,7 @@ int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 			  ring->idx, r);
 		return r;
 	}
-	radeon_ring_write(ring, VCE_CMD_END);
+	radeon_ring_write(ring, cpu_to_le32(VCE_CMD_END));
 	radeon_ring_unlock_commit(rdev, ring, false);
 
 	for (i = 0; i < rdev->usec_timeout; i++) {
-- 
cgit v0.10.2


From 361c32d39087e7caa99e629c0d7fb00643cb2190 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Fri, 4 Dec 2015 23:09:05 +0200
Subject: radeon: Fix VCE IB test on Big-Endian systems
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch makes the VCE IB test pass on Big-Endian systems. It converts
to little-endian the contents of the VCE message.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
index 86f57e4..7eb1ae7 100644
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
@@ -361,31 +361,31 @@ int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring,
 
 	/* stitch together an VCE create msg */
 	ib.length_dw = 0;
-	ib.ptr[ib.length_dw++] = 0x0000000c; /* len */
-	ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */
-	ib.ptr[ib.length_dw++] = handle;
-
-	ib.ptr[ib.length_dw++] = 0x00000030; /* len */
-	ib.ptr[ib.length_dw++] = 0x01000001; /* create cmd */
-	ib.ptr[ib.length_dw++] = 0x00000000;
-	ib.ptr[ib.length_dw++] = 0x00000042;
-	ib.ptr[ib.length_dw++] = 0x0000000a;
-	ib.ptr[ib.length_dw++] = 0x00000001;
-	ib.ptr[ib.length_dw++] = 0x00000080;
-	ib.ptr[ib.length_dw++] = 0x00000060;
-	ib.ptr[ib.length_dw++] = 0x00000100;
-	ib.ptr[ib.length_dw++] = 0x00000100;
-	ib.ptr[ib.length_dw++] = 0x0000000c;
-	ib.ptr[ib.length_dw++] = 0x00000000;
-
-	ib.ptr[ib.length_dw++] = 0x00000014; /* len */
-	ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */
-	ib.ptr[ib.length_dw++] = upper_32_bits(dummy);
-	ib.ptr[ib.length_dw++] = dummy;
-	ib.ptr[ib.length_dw++] = 0x00000001;
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000c); /* len */
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001); /* session cmd */
+	ib.ptr[ib.length_dw++] = cpu_to_le32(handle);
+
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000030); /* len */
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x01000001); /* create cmd */
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000000);
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000042);
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000a);
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001);
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000080);
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000060);
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000100);
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000100);
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000c);
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000000);
+
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000014); /* len */
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x05000005); /* feedback buffer */
+	ib.ptr[ib.length_dw++] = cpu_to_le32(upper_32_bits(dummy));
+	ib.ptr[ib.length_dw++] = cpu_to_le32(dummy);
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001);
 
 	for (i = ib.length_dw; i < ib_size_dw; ++i)
-		ib.ptr[i] = 0x0;
+		ib.ptr[i] = cpu_to_le32(0x0);
 
 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r) {
@@ -428,21 +428,21 @@ int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring,
 
 	/* stitch together an VCE destroy msg */
 	ib.length_dw = 0;
-	ib.ptr[ib.length_dw++] = 0x0000000c; /* len */
-	ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */
-	ib.ptr[ib.length_dw++] = handle;
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x0000000c); /* len */
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001); /* session cmd */
+	ib.ptr[ib.length_dw++] = cpu_to_le32(handle);
 
-	ib.ptr[ib.length_dw++] = 0x00000014; /* len */
-	ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */
-	ib.ptr[ib.length_dw++] = upper_32_bits(dummy);
-	ib.ptr[ib.length_dw++] = dummy;
-	ib.ptr[ib.length_dw++] = 0x00000001;
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000014); /* len */
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x05000005); /* feedback buffer */
+	ib.ptr[ib.length_dw++] = cpu_to_le32(upper_32_bits(dummy));
+	ib.ptr[ib.length_dw++] = cpu_to_le32(dummy);
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000001);
 
-	ib.ptr[ib.length_dw++] = 0x00000008; /* len */
-	ib.ptr[ib.length_dw++] = 0x02000001; /* destroy cmd */
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x00000008); /* len */
+	ib.ptr[ib.length_dw++] = cpu_to_le32(0x02000001); /* destroy cmd */
 
 	for (i = ib.length_dw; i < ib_size_dw; ++i)
-		ib.ptr[i] = 0x0;
+		ib.ptr[i] = cpu_to_le32(0x0);
 
 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r) {
-- 
cgit v0.10.2


From 9a811230481243f384b8036c6a558bfdbd961f78 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 9 Dec 2015 15:17:43 +0100
Subject: ALSA: hda - Fix noise problems on Thinkpad T440s

Lenovo Thinkpad T440s suffers from constant background noises, and it
seems to be a generic hardware issue on this model:
  https://forums.lenovo.com/t5/ThinkPad-T400-T500-and-newer-T/T440s-speaker-noise/td-p/1339883

As the noise comes from the analog loopback path, disabling the path
is the easy workaround.

Also, the machine gives significant cracking noises at PM suspend.  A
workaround found by trial-and-error is to disable the shutup callback
currently used for ALC269-variant.

This patch addresses these noise issues by introducing a new fixup
chain.  Although the same workaround might be applicable to other
Thinkpad models, it's applied only to T440s (17aa:220c) in this patch,
so far, just to be safe (you chicken!).  As a compromise, a new model
option string "tp440" is provided now, though, so that owners of other
Thinkpad models can test it more easily.

Bugzilla: https://bugzilla.opensuse.org/show_bug.cgi?id=958504
Reported-and-tested-by: Tim Hardeck <thardeck@suse.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 49b1d36..8dd2ac1 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4204,6 +4204,18 @@ static void alc_fixup_tpt440_dock(struct hda_codec *codec,
 	}
 }
 
+/* additional fixup for Thinkpad T440s noise problem */
+static void alc_fixup_tpt440(struct hda_codec *codec,
+				  const struct hda_fixup *fix, int action)
+{
+	struct alc_spec *spec = codec->spec;
+
+	if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+		spec->shutup = alc_no_shutup; /* reduce click noise */
+		spec->gen.mixer_nid = 0; /* reduce background noise */
+	}
+}
+
 static void alc_shutup_dell_xps13(struct hda_codec *codec)
 {
 	struct alc_spec *spec = codec->spec;
@@ -4578,6 +4590,7 @@ enum {
 	ALC255_FIXUP_HEADSET_MODE_NO_HP_MIC,
 	ALC293_FIXUP_DELL1_MIC_NO_PRESENCE,
 	ALC292_FIXUP_TPT440_DOCK,
+	ALC292_FIXUP_TPT440,
 	ALC283_FIXUP_BXBT2807_MIC,
 	ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED,
 	ALC282_FIXUP_ASPIRE_V5_PINS,
@@ -5051,6 +5064,12 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC269_FIXUP_LIMIT_INT_MIC_BOOST
 	},
+	[ALC292_FIXUP_TPT440] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc_fixup_tpt440,
+		.chained = true,
+		.chain_id = ALC292_FIXUP_TPT440_DOCK,
+	},
 	[ALC283_FIXUP_BXBT2807_MIC] = {
 		.type = HDA_FIXUP_PINS,
 		.v.pins = (const struct hda_pintbl[]) {
@@ -5332,7 +5351,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x21fb, "Thinkpad T430s", ALC269_FIXUP_LENOVO_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x2203, "Thinkpad X230 Tablet", ALC269_FIXUP_LENOVO_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x2208, "Thinkpad T431s", ALC269_FIXUP_LENOVO_DOCK),
-	SND_PCI_QUIRK(0x17aa, 0x220c, "Thinkpad T440s", ALC292_FIXUP_TPT440_DOCK),
+	SND_PCI_QUIRK(0x17aa, 0x220c, "Thinkpad T440s", ALC292_FIXUP_TPT440),
 	SND_PCI_QUIRK(0x17aa, 0x220e, "Thinkpad T440p", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x2210, "Thinkpad T540p", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x2211, "Thinkpad W541", ALC292_FIXUP_TPT440_DOCK),
@@ -5432,6 +5451,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
 	{.id = ALC283_FIXUP_CHROME_BOOK, .name = "alc283-dac-wcaps"},
 	{.id = ALC283_FIXUP_SENSE_COMBO_JACK, .name = "alc283-sense-combo"},
 	{.id = ALC292_FIXUP_TPT440_DOCK, .name = "tpt440-dock"},
+	{.id = ALC292_FIXUP_TPT440, .name = "tpt440"},
 	{}
 };
 
-- 
cgit v0.10.2


From 4c3141e09cfa6460bfcd5e90f73e498db654c917 Mon Sep 17 00:00:00 2001
From: Carlo Caione <carlo@endlessm.com>
Date: Tue, 1 Dec 2015 17:24:17 +0100
Subject: of/irq: Export of_irq_find_parent again

of_irq_find_parent was made static since it had no users outside of
of_irq.c. Export it again since we are going to use it again.

Signed-off-by: Carlo Caione <carlo@endlessm.com>
[robh: move of_irq_find_parent to correct ifdef section]
Signed-off-by: Rob Herring <robh@kernel.org>

diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 902b89b..4fa916d 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -53,7 +53,7 @@ EXPORT_SYMBOL_GPL(irq_of_parse_and_map);
  * Returns a pointer to the interrupt parent node, or NULL if the interrupt
  * parent could not be determined.
  */
-static struct device_node *of_irq_find_parent(struct device_node *child)
+struct device_node *of_irq_find_parent(struct device_node *child)
 {
 	struct device_node *p;
 	const __be32 *parp;
@@ -77,6 +77,7 @@ static struct device_node *of_irq_find_parent(struct device_node *child)
 
 	return p;
 }
+EXPORT_SYMBOL_GPL(of_irq_find_parent);
 
 /**
  * of_irq_parse_raw - Low level interrupt tree parsing
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 039f2ee..f648acf 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -46,6 +46,7 @@ extern int of_irq_get(struct device_node *dev, int index);
 extern int of_irq_get_byname(struct device_node *dev, const char *name);
 extern int of_irq_to_resource_table(struct device_node *dev,
 		struct resource *res, int nr_irqs);
+extern struct device_node *of_irq_find_parent(struct device_node *child);
 extern struct irq_domain *of_msi_get_domain(struct device *dev,
 					    struct device_node *np,
 					    enum irq_domain_bus_token token);
@@ -70,6 +71,11 @@ static inline int of_irq_to_resource_table(struct device_node *dev,
 {
 	return 0;
 }
+static inline void *of_irq_find_parent(struct device_node *child)
+{
+	return NULL;
+}
+
 static inline struct irq_domain *of_msi_get_domain(struct device *dev,
 						   struct device_node *np,
 						   enum irq_domain_bus_token token)
-- 
cgit v0.10.2


From eaddb5725357e9f05ffe5d271630f8197d089da4 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 9 Dec 2015 09:11:10 -0600
Subject: of/irq: move of_msi_map_rid declaration to the correct ifdef section

In checking fixes for of_irq_find_parent declaration location, I found
that of_msi_map_rid is also wrong. of_msi_map_rid is not implemented for
Sparc, so it should not be in the Sparc specific section of the header.
Move it to just depend on OF_IRQ.

Cc: Frank Rowand <frowand.list@gmail.com>
Signed-off-by: Rob Herring <robh@kernel.org>

diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index f648acf..1e0deb8 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -53,6 +53,7 @@ extern struct irq_domain *of_msi_get_domain(struct device *dev,
 extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev,
 						       u32 rid);
 extern void of_msi_configure(struct device *dev, struct device_node *np);
+u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in);
 #else
 static inline int of_irq_count(struct device_node *dev)
 {
@@ -90,6 +91,11 @@ static inline struct irq_domain *of_msi_map_get_device_domain(struct device *dev
 static inline void of_msi_configure(struct device *dev, struct device_node *np)
 {
 }
+static inline u32 of_msi_map_rid(struct device *dev,
+				 struct device_node *msi_np, u32 rid_in)
+{
+	return rid_in;
+}
 #endif
 
 #if defined(CONFIG_OF_IRQ) || defined(CONFIG_SPARC)
@@ -99,7 +105,6 @@ static inline void of_msi_configure(struct device *dev, struct device_node *np)
  * so declare it here regardless of the CONFIG_OF_IRQ setting.
  */
 extern unsigned int irq_of_parse_and_map(struct device_node *node, int index);
-u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in);
 
 #else /* !CONFIG_OF && !CONFIG_SPARC */
 static inline unsigned int irq_of_parse_and_map(struct device_node *dev,
@@ -107,12 +112,6 @@ static inline unsigned int irq_of_parse_and_map(struct device_node *dev,
 {
 	return 0;
 }
-
-static inline u32 of_msi_map_rid(struct device *dev,
-				 struct device_node *msi_np, u32 rid_in)
-{
-	return rid_in;
-}
 #endif /* !CONFIG_OF */
 
 #endif /* __OF_IRQ_H */
-- 
cgit v0.10.2


From 7d32cdef535622c0aea39807989f62cdddea207e Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Mon, 23 Nov 2015 21:50:11 +0200
Subject: usb: musb: fail with error when no DMA controller set

Fail with error when no DMA controller is set.

Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Felipe Balbi <balbi@ti.com>

diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 3c07ffd..ee9ff70 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -2094,6 +2094,7 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
 #ifndef CONFIG_MUSB_PIO_ONLY
 	if (!musb->ops->dma_init || !musb->ops->dma_exit) {
 		dev_err(dev, "DMA controller not set\n");
+		status = -ENODEV;
 		goto fail2;
 	}
 	musb_dma_controller_create = musb->ops->dma_init;
-- 
cgit v0.10.2


From 5328e1ea87fb2b5cf695115df4325c1913209e97 Mon Sep 17 00:00:00 2001
From: Gabriele Martino <g.martino@gmx.com>
Date: Wed, 9 Dec 2015 17:05:58 +0100
Subject: ALSA: hda/ca0132 - quirk for Alienware 17 2015

The Alienware 17 (2015) has the same card and pin configuration of the
Alienware 15, so the same quirks must be applied.

Signed-off-by: Gabriele Martino <g.martino@gmx.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
index f8a12ca4..4ef2259 100644
--- a/sound/pci/hda/patch_ca0132.c
+++ b/sound/pci/hda/patch_ca0132.c
@@ -778,7 +778,8 @@ static const struct hda_pintbl alienware_pincfgs[] = {
 };
 
 static const struct snd_pci_quirk ca0132_quirks[] = {
-	SND_PCI_QUIRK(0x1028, 0x0685, "Alienware 15", QUIRK_ALIENWARE),
+	SND_PCI_QUIRK(0x1028, 0x0685, "Alienware 15 2015", QUIRK_ALIENWARE),
+	SND_PCI_QUIRK(0x1028, 0x0688, "Alienware 17 2015", QUIRK_ALIENWARE),
 	{}
 };
 
-- 
cgit v0.10.2


From 49e99fc717f624aa75ca755d6e7bc029efd3f0e9 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Wed, 9 Dec 2015 16:23:24 +0000
Subject: dm thin metadata: fix bug when taking a metadata snapshot

When you take a metadata snapshot the btree roots for the mapping and
details tree need to have their reference counts incremented so they
persist for the lifetime of the metadata snap.

The roots being incremented were those currently written in the
superblock, which could possibly be out of date if concurrent IO is
triggering new mappings, breaking of sharing, etc.

Fix this by performing a commit with the metadata lock held while taking
a metadata snapshot.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org

diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 67871e7..c219a05 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1207,6 +1207,12 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd)
 	dm_block_t held_root;
 
 	/*
+	 * We commit to ensure the btree roots which we increment in a
+	 * moment are up to date.
+	 */
+	__commit_transaction(pmd);
+
+	/*
 	 * Copy the superblock.
 	 */
 	dm_sm_inc_block(pmd->metadata_sm, THIN_SUPERBLOCK_LOCATION);
-- 
cgit v0.10.2


From 50dd842ad83b43bed71790efb31cfb2f6c05c9c1 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Wed, 9 Dec 2015 16:38:12 +0000
Subject: dm space map metadata: fix ref counting bug when bootstrapping a new
 space map

When applying block operations (BOPs) do not remove them from the
uncommitted BOP ring-buffer until after they've been applied -- in case
we recurse.

Also, perform BOP_INC operation, in dm_sm_metadata_create() and
sm_metadata_extend(), in terms of the uncommitted BOP ring-buffer rather
than using direct calls to sm_ll_inc().

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org

diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c
index 5309129..fca6dbc 100644
--- a/drivers/md/persistent-data/dm-space-map-metadata.c
+++ b/drivers/md/persistent-data/dm-space-map-metadata.c
@@ -136,7 +136,7 @@ static int brb_push(struct bop_ring_buffer *brb,
 	return 0;
 }
 
-static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result)
+static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result)
 {
 	struct block_op *bop;
 
@@ -147,6 +147,17 @@ static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result)
 	result->type = bop->type;
 	result->block = bop->block;
 
+	return 0;
+}
+
+static int brb_pop(struct bop_ring_buffer *brb)
+{
+	struct block_op *bop;
+
+	if (brb_empty(brb))
+		return -ENODATA;
+
+	bop = brb->bops + brb->begin;
 	brb->begin = brb_next(brb, brb->begin);
 
 	return 0;
@@ -211,7 +222,7 @@ static int apply_bops(struct sm_metadata *smm)
 	while (!brb_empty(&smm->uncommitted)) {
 		struct block_op bop;
 
-		r = brb_pop(&smm->uncommitted, &bop);
+		r = brb_peek(&smm->uncommitted, &bop);
 		if (r) {
 			DMERR("bug in bop ring buffer");
 			break;
@@ -220,6 +231,8 @@ static int apply_bops(struct sm_metadata *smm)
 		r = commit_bop(smm, &bop);
 		if (r)
 			break;
+
+		brb_pop(&smm->uncommitted);
 	}
 
 	return r;
@@ -683,7 +696,6 @@ static struct dm_space_map bootstrap_ops = {
 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
 {
 	int r, i;
-	enum allocation_event ev;
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 	dm_block_t old_len = smm->ll.nr_blocks;
 
@@ -705,11 +717,12 @@ static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
 	 * allocate any new blocks.
 	 */
 	do {
-		for (i = old_len; !r && i < smm->begin; i++) {
-			r = sm_ll_inc(&smm->ll, i, &ev);
-			if (r)
-				goto out;
-		}
+		for (i = old_len; !r && i < smm->begin; i++)
+			r = add_bop(smm, BOP_INC, i);
+
+		if (r)
+			goto out;
+
 		old_len = smm->begin;
 
 		r = apply_bops(smm);
@@ -754,7 +767,6 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
 {
 	int r;
 	dm_block_t i;
-	enum allocation_event ev;
 	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
 
 	smm->begin = superblock + 1;
@@ -782,7 +794,7 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
 	 * allocated blocks that they were built from.
 	 */
 	for (i = superblock; !r && i < smm->begin; i++)
-		r = sm_ll_inc(&smm->ll, i, &ev);
+		r = add_bop(smm, BOP_INC, i);
 
 	if (r)
 		return r;
-- 
cgit v0.10.2


From d7e35dfa2531b53618b9e6edcd8752ce988ac555 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Thu, 3 Dec 2015 22:04:01 -0500
Subject: bitops.h: correctly handle rol32 with 0 byte shift

ROL on a 32 bit integer with a shift of 32 or more is undefined and the
result is arch-dependent. Avoid this by handling the trivial case of
roling by 0 correctly.

The trivial solution of checking if shift is 0 breaks gcc's detection
of this code as a ROL instruction, which is unacceptable.

This bug was reported and fixed in GCC
(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57157):

	The standard rotate idiom,

	  (x << n) | (x >> (32 - n))

	is recognized by gcc (for concreteness, I discuss only the case that x
	is an uint32_t here).

	However, this is portable C only for n in the range 0 < n < 32. For n
	== 0, we get x >> 32 which gives undefined behaviour according to the
	C standard (6.5.7, Bitwise shift operators). To portably support n ==
	0, one has to write the rotate as something like

	  (x << n) | (x >> ((-n) & 31))

	And this is apparently not recognized by gcc.

Note that this is broken on older GCCs and will result in slower ROL.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 2b8ed12..defeaac 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -107,7 +107,7 @@ static inline __u64 ror64(__u64 word, unsigned int shift)
  */
 static inline __u32 rol32(__u32 word, unsigned int shift)
 {
-	return (word << shift) | (word >> (32 - shift));
+	return (word << shift) | (word >> ((-shift) & 31));
 }
 
 /**
-- 
cgit v0.10.2


From 27f972d3e00b50639deb4cc1392afaeb08d3cecc Mon Sep 17 00:00:00 2001
From: Jan Stancek <jstancek@redhat.com>
Date: Tue, 8 Dec 2015 13:57:51 -0500
Subject: ipmi: move timer init to before irq is setup

We encountered a panic on boot in ipmi_si on a dell per320 due to an
uninitialized timer as follows.

static int smi_start_processing(void       *send_info,
                                ipmi_smi_t intf)
{
        /* Try to claim any interrupts. */
        if (new_smi->irq_setup)
                new_smi->irq_setup(new_smi);

 --> IRQ arrives here and irq handler tries to modify uninitialized timer

    which triggers BUG_ON(!timer->function) in __mod_timer().

 Call Trace:
   <IRQ>
   [<ffffffffa0532617>] start_new_msg+0x47/0x80 [ipmi_si]
   [<ffffffffa053269e>] start_check_enables+0x4e/0x60 [ipmi_si]
   [<ffffffffa0532bd8>] smi_event_handler+0x1e8/0x640 [ipmi_si]
   [<ffffffff810f5584>] ? __rcu_process_callbacks+0x54/0x350
   [<ffffffffa053327c>] si_irq_handler+0x3c/0x60 [ipmi_si]
   [<ffffffff810efaf0>] handle_IRQ_event+0x60/0x170
   [<ffffffff810f245e>] handle_edge_irq+0xde/0x180
   [<ffffffff8100fc59>] handle_irq+0x49/0xa0
   [<ffffffff8154643c>] do_IRQ+0x6c/0xf0
   [<ffffffff8100ba53>] ret_from_intr+0x0/0x11

        /* Set up the timer that drives the interface. */
        setup_timer(&new_smi->si_timer, smi_timeout, (long)new_smi);

The following patch fixes the problem.

To: Openipmi-developer@lists.sourceforge.net
To: Corey Minyard <minyard@acm.org>
CC: linux-kernel@vger.kernel.org

Signed-off-by: Jan Stancek <jstancek@redhat.com>
Signed-off-by: Tony Camuso <tcamuso@redhat.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
Cc: stable@vger.kernel.org # Applies cleanly to 3.10-, needs small rework before

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 55fe902..4cc72fa 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1230,14 +1230,14 @@ static int smi_start_processing(void       *send_info,
 
 	new_smi->intf = intf;
 
-	/* Try to claim any interrupts. */
-	if (new_smi->irq_setup)
-		new_smi->irq_setup(new_smi);
-
 	/* Set up the timer that drives the interface. */
 	setup_timer(&new_smi->si_timer, smi_timeout, (long)new_smi);
 	smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES);
 
+	/* Try to claim any interrupts. */
+	if (new_smi->irq_setup)
+		new_smi->irq_setup(new_smi);
+
 	/*
 	 * Check if the user forcefully enabled the daemon.
 	 */
-- 
cgit v0.10.2


From 14054fb1da099fd89208b8b319a00e0b902c7645 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Mon, 30 Nov 2015 17:56:38 +0200
Subject: ARM: dts: am4372: fix clock source for arm twd and global timers

ARM TWD and Global timer are clocked by PERIPHCLK which is MPU_CLK/2.
But now they are clocked by dpll_mpu_m2_ck == MPU_CLK and, as result.
Timekeeping core misbehaves. For example, execution of command
"sleep 5" will take 10 sec instead of 5.

Hence, fix it by adding mpu_periphclk ("fixed-factor-clock") and use
it for clocking ARM TWD and Global timer (same way as on OMAP4).

Cc: Tony Lindgren <tony@atomide.com>
Cc: Felipe Balbi <balbi@ti.com>
Cc: Tero Kristo <t-kristo@ti.com>
Fixes:commit 8cbd4c2f6a99 ("arm: boot: dts: am4372: add ARM timers and SCU nodes")
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>

diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi
index d83ff9c..de8791a 100644
--- a/arch/arm/boot/dts/am4372.dtsi
+++ b/arch/arm/boot/dts/am4372.dtsi
@@ -74,7 +74,7 @@
 		reg = <0x48240200 0x100>;
 		interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
 		interrupt-parent = <&gic>;
-		clocks = <&dpll_mpu_m2_ck>;
+		clocks = <&mpu_periphclk>;
 	};
 
 	local_timer: timer@48240600 {
@@ -82,7 +82,7 @@
 		reg = <0x48240600 0x100>;
 		interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>;
 		interrupt-parent = <&gic>;
-		clocks = <&dpll_mpu_m2_ck>;
+		clocks = <&mpu_periphclk>;
 	};
 
 	l2-cache-controller@48242000 {
diff --git a/arch/arm/boot/dts/am43xx-clocks.dtsi b/arch/arm/boot/dts/am43xx-clocks.dtsi
index cc88728..a38af2b 100644
--- a/arch/arm/boot/dts/am43xx-clocks.dtsi
+++ b/arch/arm/boot/dts/am43xx-clocks.dtsi
@@ -259,6 +259,14 @@
 		ti,invert-autoidle-bit;
 	};
 
+	mpu_periphclk: mpu_periphclk {
+		#clock-cells = <0>;
+		compatible = "fixed-factor-clock";
+		clocks = <&dpll_mpu_m2_ck>;
+		clock-mult = <1>;
+		clock-div = <2>;
+	};
+
 	dpll_ddr_ck: dpll_ddr_ck {
 		#clock-cells = <0>;
 		compatible = "ti,am3-dpll-clock";
-- 
cgit v0.10.2


From ed8b45a3679eb49069b094c0711b30833f27c734 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Thu, 10 Dec 2015 14:37:53 +0000
Subject: dm btree: fix bufio buffer leaks in dm_btree_del() error path

If dm_btree_del()'s call to push_frame() fails, e.g. due to
btree_node_validator finding invalid metadata, the dm_btree_del() error
path must unlock all frames (which have active dm-bufio buffers) that
were pushed onto the del_stack.

Otherwise, dm_bufio_client_destroy() will BUG_ON() because dm-bufio
buffers have leaked, e.g.:
  device-mapper: bufio: leaked buffer 3, hold count 1, list 0

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org

diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index 7e5b7f1..b1ced58 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -257,6 +257,16 @@ static void pop_frame(struct del_stack *s)
 	dm_tm_unlock(s->tm, f->b);
 }
 
+static void unlock_all_frames(struct del_stack *s)
+{
+	struct frame *f;
+
+	while (unprocessed_frames(s)) {
+		f = s->spine + s->top--;
+		dm_tm_unlock(s->tm, f->b);
+	}
+}
+
 int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
 {
 	int r;
@@ -313,9 +323,13 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
 			pop_frame(s);
 		}
 	}
-
 out:
+	if (r) {
+		/* cleanup all frames of del_stack */
+		unlock_all_frames(s);
+	}
 	kfree(s);
+
 	return r;
 }
 EXPORT_SYMBOL_GPL(dm_btree_del);
-- 
cgit v0.10.2


From 059393c5bdd1420bdf1bed2972f33196dff263ae Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 7 Dec 2015 10:11:11 +0000
Subject: irqchip/gic-v3: Add missing struct device_node declaration

When the GICv3 header file is used in a C file that doesn't include
any of the OF stuff, we end up with a bunch of ugly warnings.

Let's keep GCC quiet by adding a forward declaration.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: Jason Cooper <jason@lakedaemon.net>
Link: http://lkml.kernel.org/r/1449483072-17694-2-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index c9ae0c6..d5d798b 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -330,6 +330,7 @@ struct rdists {
 };
 
 struct irq_domain;
+struct device_node;
 int its_cpu_init(void);
 int its_init(struct device_node *node, struct rdists *rdists,
 	     struct irq_domain *domain);
-- 
cgit v0.10.2


From 8e31ed9c189788463c3b6c1f950cd90755f05f76 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 7 Dec 2015 10:11:12 +0000
Subject: irqchip/gic-v3: Add missing include for barrier.h

Both the 32bit and 64bit versions of the GICv3 header file are using
barriers, but neglect to include barrier.h, leading to an interesting
splat in some circumstances.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: Jason Cooper <jason@lakedaemon.net>
Link: http://lkml.kernel.org/r/1449483072-17694-3-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h
index 6607d97..7da5503 100644
--- a/arch/arm/include/asm/arch_gicv3.h
+++ b/arch/arm/include/asm/arch_gicv3.h
@@ -21,6 +21,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/io.h>
+#include <asm/barrier.h>
 
 #define __ACCESS_CP15(CRn, Op1, CRm, Op2)	p15, Op1, %0, CRn, CRm, Op2
 #define __ACCESS_CP15_64(Op1, CRm)		p15, Op1, %Q0, %R0, CRm
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 030cdcb..2731d3b 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -77,6 +77,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/stringify.h>
+#include <asm/barrier.h>
 
 /*
  * Low-level accessors
-- 
cgit v0.10.2


From e2bf3e6ecaff79c5479682da2dc7b2035e52c5b8 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 10 Dec 2015 18:21:41 +0100
Subject: clocksource: Mmio: remove artificial 32bit limitation

The EP93xx is registering a clocksource of 40 bits with
clocksource_mmio_init() but this is not working because of this
artificial limitation. It works fine to lift the uppe limit to
64 bits, and since cycle_t is u64, it should intuitively have been
like that from the beginning.

Fixes: 000bc17817bf "ARM: ep93xx: switch to GENERIC_CLOCKEVENTS"
Reported-by: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: http://lkml.kernel.org/r/1449768101-6879-1-git-send-email-linus.walleij@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/drivers/clocksource/mmio.c b/drivers/clocksource/mmio.c
index 1593ade..c4f7d7a 100644
--- a/drivers/clocksource/mmio.c
+++ b/drivers/clocksource/mmio.c
@@ -55,7 +55,7 @@ int __init clocksource_mmio_init(void __iomem *base, const char *name,
 {
 	struct clocksource_mmio *cs;
 
-	if (bits > 32 || bits < 16)
+	if (bits > 64 || bits < 16)
 		return -EINVAL;
 
 	cs = kzalloc(sizeof(struct clocksource_mmio), GFP_KERNEL);
-- 
cgit v0.10.2


From 5d756147f7d356c3e85372373a0054dcf25a370e Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@marvell.com>
Date: Mon, 7 Dec 2015 21:09:24 +0800
Subject: ARM: dts: berlin: correct BG2Q's sdhci2 2nd clock

The optional 2nd clock is CLKID_SDIO. We removed CLK_IGNORE_UNUSED
from CLKID_SDIO's flag, so the sdhci2 doesn't work. This patch fixes
this issue by correcting the sdhci2's 2nd clock.

Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
Signed-off-by: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>

diff --git a/arch/arm/boot/dts/berlin2q.dtsi b/arch/arm/boot/dts/berlin2q.dtsi
index 8ea177f..5b10129 100644
--- a/arch/arm/boot/dts/berlin2q.dtsi
+++ b/arch/arm/boot/dts/berlin2q.dtsi
@@ -135,7 +135,7 @@
 			compatible = "mrvl,pxav3-mmc";
 			reg = <0xab1000 0x200>;
 			interrupts = <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>;
-			clocks = <&chip_clk CLKID_NFC_ECC>, <&chip_clk CLKID_NFC>;
+			clocks = <&chip_clk CLKID_NFC_ECC>, <&chip_clk CLKID_SDIO>;
 			clock-names = "io", "core";
 			status = "disabled";
 		};
-- 
cgit v0.10.2


From b5010d2081331e1c95358c4eb500bd30f5d532ce Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@marvell.com>
Date: Mon, 7 Dec 2015 21:09:25 +0800
Subject: ARM: dts: berlin: add 2nd clock for BG2Q sdhci0 and sdhci1

We removed CLK_IGNORE_UNUSED from CLKID_SDIO's flag, so the sdhci0 and
sdhci1 don't work. We fix this by adding the optional 2nd clock for
BG2Q's sdhci0 and sdhci1. This patch brings another benefit: the 2nd
clock can be disabled during runtime pm, so saves power a bit.

Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
Signed-off-by: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>

diff --git a/arch/arm/boot/dts/berlin2q.dtsi b/arch/arm/boot/dts/berlin2q.dtsi
index 5b10129..fb1da99 100644
--- a/arch/arm/boot/dts/berlin2q.dtsi
+++ b/arch/arm/boot/dts/berlin2q.dtsi
@@ -118,7 +118,8 @@
 		sdhci0: sdhci@ab0000 {
 			compatible = "mrvl,pxav3-mmc";
 			reg = <0xab0000 0x200>;
-			clocks = <&chip_clk CLKID_SDIO1XIN>;
+			clocks = <&chip_clk CLKID_SDIO1XIN>, <&chip_clk CLKID_SDIO>;
+			clock-names = "io", "core";
 			interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
 			status = "disabled";
 		};
@@ -126,7 +127,8 @@
 		sdhci1: sdhci@ab0800 {
 			compatible = "mrvl,pxav3-mmc";
 			reg = <0xab0800 0x200>;
-			clocks = <&chip_clk CLKID_SDIO1XIN>;
+			clocks = <&chip_clk CLKID_SDIO1XIN>, <&chip_clk CLKID_SDIO>;
+			clock-names = "io", "core";
 			interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>;
 			status = "disabled";
 		};
-- 
cgit v0.10.2


From ddd47fbcf68dd07487fd86f5e3d9f594f8349980 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 9 Dec 2015 15:11:12 +0100
Subject: ARM: pxa: use PWM lookup table for all machines

The recent change to use a pwm lookup table for the ezx machines
was incomplete and only changed the a780 model, but not the
other ones in the same file.

This adds the missing calls to pwm_add_table().

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: c3322022897c ("ARM: pxa: ezx: Use PWM lookup table")
Acked-by: Thierry Reding <thierry.reding@gmail.com>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>

diff --git a/arch/arm/mach-pxa/ezx.c b/arch/arm/mach-pxa/ezx.c
index 9a9c15b..7c0d561 100644
--- a/arch/arm/mach-pxa/ezx.c
+++ b/arch/arm/mach-pxa/ezx.c
@@ -889,6 +889,7 @@ static void __init e680_init(void)
 
 	pxa_set_keypad_info(&e680_keypad_platform_data);
 
+	pwm_add_table(ezx_pwm_lookup, ARRAY_SIZE(ezx_pwm_lookup));
 	platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
 	platform_add_devices(ARRAY_AND_SIZE(e680_devices));
 }
@@ -956,6 +957,7 @@ static void __init a1200_init(void)
 
 	pxa_set_keypad_info(&a1200_keypad_platform_data);
 
+	pwm_add_table(ezx_pwm_lookup, ARRAY_SIZE(ezx_pwm_lookup));
 	platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
 	platform_add_devices(ARRAY_AND_SIZE(a1200_devices));
 }
@@ -1148,6 +1150,7 @@ static void __init a910_init(void)
 		platform_device_register(&a910_camera);
 	}
 
+	pwm_add_table(ezx_pwm_lookup, ARRAY_SIZE(ezx_pwm_lookup));
 	platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
 	platform_add_devices(ARRAY_AND_SIZE(a910_devices));
 }
@@ -1215,6 +1218,7 @@ static void __init e6_init(void)
 
 	pxa_set_keypad_info(&e6_keypad_platform_data);
 
+	pwm_add_table(ezx_pwm_lookup, ARRAY_SIZE(ezx_pwm_lookup));
 	platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
 	platform_add_devices(ARRAY_AND_SIZE(e6_devices));
 }
@@ -1256,6 +1260,7 @@ static void __init e2_init(void)
 
 	pxa_set_keypad_info(&e2_keypad_platform_data);
 
+	pwm_add_table(ezx_pwm_lookup, ARRAY_SIZE(ezx_pwm_lookup));
 	platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
 	platform_add_devices(ARRAY_AND_SIZE(e2_devices));
 }
-- 
cgit v0.10.2


From 9f5bd30818c42c6c36a51f93b4df75a2ea2bd85e Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Mon, 30 Nov 2015 04:17:31 +0200
Subject: vgaarb: fix signal handling in vga_get()

There are few defects in vga_get() related to signal hadning:

  - we shouldn't check for pending signals for TASK_UNINTERRUPTIBLE
    case;

  - if we found pending signal we must remove ourself from wait queue
    and change task state back to running;

  - -ERESTARTSYS is more appropriate, I guess.

Signed-off-by: Kirill A. Shutemov <kirill@shutemov.name>
Cc: stable@vger.kernel.org
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
index 3166e4b..9abcaa5 100644
--- a/drivers/gpu/vga/vgaarb.c
+++ b/drivers/gpu/vga/vgaarb.c
@@ -395,8 +395,10 @@ int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible)
 		set_current_state(interruptible ?
 				  TASK_INTERRUPTIBLE :
 				  TASK_UNINTERRUPTIBLE);
-		if (signal_pending(current)) {
-			rc = -EINTR;
+		if (interruptible && signal_pending(current)) {
+			__set_current_state(TASK_RUNNING);
+			remove_wait_queue(&vga_wait_queue, &wait);
+			rc = -ERESTARTSYS;
 			break;
 		}
 		schedule();
-- 
cgit v0.10.2


From 9c17190595840b4ed30e8d5f286636ceb28aae4f Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Mon, 30 Nov 2015 17:59:26 -0800
Subject: ARM: dts: vf610: use reset values for L2 cache latencies

Linux on Vybrid used several different L2 latencies so far, none
of them seem to be the right ones. According to the application note
AN4947 ("Understanding Vybrid Architecture"), the tag portion runs
on CPU clock and is inside the L2 cache controller, whereas the data
portion is stored in the external SRAM running on platform clock.
Hence it is likely that the correct value requires a higher data
latency then tag latency.

These are the values which have been used so far:
- The mainline values:
  arm,data-latency = <1 1 1>;
  arm,tag-latency = <2 2 2>;
  Those values have lead to problems on higher clocks. They look
  like a poor translation from the reset values (missing +1 offset
  and a mix up between tag/latency values).
- The Linux 3.0 (SoC vendor BSP) values (converted to DT notation):
  arm,data-latency = <4 2 3>
  arm,tag-latency = <4 2 3>
  The cache initialization function along with the value matches the
  i.MX6 code from the same kernel, so it seems that those values have
  just been copied.
- The Colibri values:
  arm,data-latency = <2 1 2>;
  arm,tag-latency = <3 2 3>;
  Those were a mix between the values of the Linux 3.0 based BSP and
  the mainline values above.
- The SoC Reset values (converted to DT notation):
  arm,data-latency = <3 3 3>;
  arm,tag-latency = <2 2 2>;

So far there is no official statement on what the correct values are.
See also the related Freescale community thread:
https://community.freescale.com/message/579785#579785

For now, the reset values seem to be the best bet. Remove all other
"bogus" values and use the reset value on vf610.dtsi level.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Cc: <stable@vger.kernel.org>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>

diff --git a/arch/arm/boot/dts/vf610-colibri.dtsi b/arch/arm/boot/dts/vf610-colibri.dtsi
index 19fe045..2d7eab7 100644
--- a/arch/arm/boot/dts/vf610-colibri.dtsi
+++ b/arch/arm/boot/dts/vf610-colibri.dtsi
@@ -18,8 +18,3 @@
 		reg = <0x80000000 0x10000000>;
 	};
 };
-
-&L2 {
-	arm,data-latency = <2 1 2>;
-	arm,tag-latency = <3 2 3>;
-};
diff --git a/arch/arm/boot/dts/vf610.dtsi b/arch/arm/boot/dts/vf610.dtsi
index 5f8eb1b..58bc6e4 100644
--- a/arch/arm/boot/dts/vf610.dtsi
+++ b/arch/arm/boot/dts/vf610.dtsi
@@ -19,7 +19,7 @@
 		reg = <0x40006000 0x1000>;
 		cache-unified;
 		cache-level = <2>;
-		arm,data-latency = <1 1 1>;
+		arm,data-latency = <3 3 3>;
 		arm,tag-latency = <2 2 2>;
 	};
 };
-- 
cgit v0.10.2


From 82d340081b6f71237373d1452e3573a5a122794c Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 8 Dec 2015 17:39:15 +0000
Subject: arm64: Improve error reporting on set_pte_at() checks

Currently the BUG_ON() checks do not give enough information about the
PTEs being set. This patch changes BUG_ON to WARN_ONCE and dumps the
values of the old and new PTEs. In addition, the checks are only made if
the new PTE entry is valid.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Ming Lei <tom.leiming@gmail.com>
Cc: Will Deacon <will.deacon@arm.com>

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 7e074f9..63f52b5 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -276,10 +276,14 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 	 * hardware updates of the pte (ptep_set_access_flags safely changes
 	 * valid ptes without going through an invalid entry).
 	 */
-	if (IS_ENABLED(CONFIG_DEBUG_VM) && IS_ENABLED(CONFIG_ARM64_HW_AFDBM) &&
-	    pte_valid(*ptep)) {
-		BUG_ON(!pte_young(pte));
-		BUG_ON(pte_write(*ptep) && !pte_dirty(pte));
+	if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) &&
+	    pte_valid(*ptep) && pte_valid(pte)) {
+		VM_WARN_ONCE(!pte_young(pte),
+			     "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
+			     __func__, pte_val(*ptep), pte_val(pte));
+		VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(pte),
+			     "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
+			     __func__, pte_val(*ptep), pte_val(pte));
 	}
 
 	set_pte(ptep, pte);
-- 
cgit v0.10.2


From f69115fdbc1ac0718e7d19ad3caa3da2ecfe1c96 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Fri, 11 Dec 2015 14:38:06 +0200
Subject: xhci: fix usb2 resume timing and races.

According to USB 2 specs ports need to signal resume for at least 20ms,
in practice even longer, before moving to U0 state.
Both host and devices can initiate resume.

On device initiated resume, a port status interrupt with the port in resume
state in issued. The interrupt handler tags a resume_done[port]
timestamp with current time + USB_RESUME_TIMEOUT, and kick roothub timer.
Root hub timer requests for port status, finds the port in resume state,
checks if resume_done[port] timestamp passed, and set port to U0 state.

On host initiated resume, current code sets the port to resume state,
sleep 20ms, and finally sets the port to U0 state. This should also
be changed to work in a similar way as the device initiated resume, with
timestamp tagging, but that is not yet tested and will be a separate
fix later.

There are a few issues with this approach

1. A host initiated resume will also generate a resume event. The event
   handler will find the port in resume state, believe it's a device
   initiated resume, and act accordingly.

2. A port status request might cut the resume signalling short if a
   get_port_status request is handled during the host resume signalling.
   The port will be found in resume state. The timestamp is not set leading
   to time_after_eq(jiffies, timestamp) returning true, as timestamp = 0.
   get_port_status will proceed with moving the port to U0.

3. If an error, or anything else happens to the port during device
   initiated resume signalling it will leave all the device resume
   parameters hanging uncleared, preventing further suspend, returning
   -EBUSY, and cause the pm thread to busyloop trying to enter suspend.

Fix this by using the existing resuming_ports bitfield to indicate that
resume signalling timing is taken care of.
Check if the resume_done[port] is set before using it for timestamp
comparison, and also clear out any resume signalling related variables
if port is not in U0 or Resume state

This issue was discovered when a PM thread busylooped, trying to runtime
suspend the xhci USB 2 roothub on a Dell XPS

Cc: stable <stable@vger.kernel.org>
Reported-by: Daniel J Blueman <daniel@quora.org>
Tested-by: Daniel J Blueman <daniel@quora.org>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 0230965..f980c23 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -733,8 +733,30 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd,
 		if ((raw_port_status & PORT_RESET) ||
 				!(raw_port_status & PORT_PE))
 			return 0xffffffff;
-		if (time_after_eq(jiffies,
-					bus_state->resume_done[wIndex])) {
+		/* did port event handler already start resume timing? */
+		if (!bus_state->resume_done[wIndex]) {
+			/* If not, maybe we are in a host initated resume? */
+			if (test_bit(wIndex, &bus_state->resuming_ports)) {
+				/* Host initated resume doesn't time the resume
+				 * signalling using resume_done[].
+				 * It manually sets RESUME state, sleeps 20ms
+				 * and sets U0 state. This should probably be
+				 * changed, but not right now.
+				 */
+			} else {
+				/* port resume was discovered now and here,
+				 * start resume timing
+				 */
+				unsigned long timeout = jiffies +
+					msecs_to_jiffies(USB_RESUME_TIMEOUT);
+
+				set_bit(wIndex, &bus_state->resuming_ports);
+				bus_state->resume_done[wIndex] = timeout;
+				mod_timer(&hcd->rh_timer, timeout);
+			}
+		/* Has resume been signalled for USB_RESUME_TIME yet? */
+		} else if (time_after_eq(jiffies,
+					 bus_state->resume_done[wIndex])) {
 			int time_left;
 
 			xhci_dbg(xhci, "Resume USB2 port %d\n",
@@ -775,13 +797,26 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd,
 		} else {
 			/*
 			 * The resume has been signaling for less than
-			 * 20ms. Report the port status as SUSPEND,
-			 * let the usbcore check port status again
-			 * and clear resume signaling later.
+			 * USB_RESUME_TIME. Report the port status as SUSPEND,
+			 * let the usbcore check port status again and clear
+			 * resume signaling later.
 			 */
 			status |= USB_PORT_STAT_SUSPEND;
 		}
 	}
+	/*
+	 * Clear stale usb2 resume signalling variables in case port changed
+	 * state during resume signalling. For example on error
+	 */
+	if ((bus_state->resume_done[wIndex] ||
+	     test_bit(wIndex, &bus_state->resuming_ports)) &&
+	    (raw_port_status & PORT_PLS_MASK) != XDEV_U3 &&
+	    (raw_port_status & PORT_PLS_MASK) != XDEV_RESUME) {
+		bus_state->resume_done[wIndex] = 0;
+		clear_bit(wIndex, &bus_state->resuming_ports);
+	}
+
+
 	if ((raw_port_status & PORT_PLS_MASK) == XDEV_U0 &&
 	    (raw_port_status & PORT_POWER)) {
 		if (bus_state->suspended_ports & (1 << wIndex)) {
@@ -1115,6 +1150,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 				if ((temp & PORT_PE) == 0)
 					goto error;
 
+				set_bit(wIndex, &bus_state->resuming_ports);
 				xhci_set_link_state(xhci, port_array, wIndex,
 							XDEV_RESUME);
 				spin_unlock_irqrestore(&xhci->lock, flags);
@@ -1122,6 +1158,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 				spin_lock_irqsave(&xhci->lock, flags);
 				xhci_set_link_state(xhci, port_array, wIndex,
 							XDEV_U0);
+				clear_bit(wIndex, &bus_state->resuming_ports);
 			}
 			bus_state->port_c_suspend |= 1 << wIndex;
 
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 6c5e813..eeaa6c6 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1583,7 +1583,8 @@ static void handle_port_status(struct xhci_hcd *xhci,
 			 */
 			bogus_port_status = true;
 			goto cleanup;
-		} else {
+		} else if (!test_bit(faked_port_index,
+				     &bus_state->resuming_ports)) {
 			xhci_dbg(xhci, "resume HS port %d\n", port_id);
 			bus_state->resume_done[faked_port_index] = jiffies +
 				msecs_to_jiffies(USB_RESUME_TIMEOUT);
-- 
cgit v0.10.2


From ad87e03213b552a5c33d5e1e7a19a73768397010 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 10 Dec 2015 15:27:21 -0500
Subject: USB: add quirk for devices with broken LPM

Some USB device / host controller combinations seem to have problems
with Link Power Management.  For example, Steinar found that his xHCI
controller wouldn't handle bandwidth calculations correctly for two
video cards simultaneously when LPM was enabled, even though the bus
had plenty of bandwidth available.

This patch introduces a new quirk flag for devices that should remain
disabled for LPM, and creates quirk entries for Steinar's devices.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-by: Steinar H. Gunderson <sgunderson@bigfoot.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 585c3cb..a5cc032 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -124,6 +124,10 @@ struct usb_hub *usb_hub_to_struct_hub(struct usb_device *hdev)
 
 int usb_device_supports_lpm(struct usb_device *udev)
 {
+	/* Some devices have trouble with LPM */
+	if (udev->quirks & USB_QUIRK_NO_LPM)
+		return 0;
+
 	/* USB 2.1 (and greater) devices indicate LPM support through
 	 * their USB 2.0 Extended Capabilities BOS descriptor.
 	 */
@@ -4512,6 +4516,8 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
 		goto fail;
 	}
 
+	usb_detect_quirks(udev);
+
 	if (udev->wusb == 0 && le16_to_cpu(udev->descriptor.bcdUSB) >= 0x0201) {
 		retval = usb_get_bos_descriptor(udev);
 		if (!retval) {
@@ -4710,7 +4716,6 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
 		if (status < 0)
 			goto loop;
 
-		usb_detect_quirks(udev);
 		if (udev->quirks & USB_QUIRK_DELAY_INIT)
 			msleep(1000);
 
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index fcd6ac0..6dc810b 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -202,6 +202,12 @@ static const struct usb_device_id usb_quirk_list[] = {
 	{ USB_DEVICE(0x1a0a, 0x0200), .driver_info =
 			USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
 
+	/* Blackmagic Design Intensity Shuttle */
+	{ USB_DEVICE(0x1edb, 0xbd3b), .driver_info = USB_QUIRK_NO_LPM },
+
+	/* Blackmagic Design UltraStudio SDI */
+	{ USB_DEVICE(0x1edb, 0xbd4f), .driver_info = USB_QUIRK_NO_LPM },
+
 	{ }  /* terminating entry must be last */
 };
 
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index 9948c87..1d0043d 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -47,4 +47,7 @@
 /* device generates spurious wakeup, ignore remote wakeup capability */
 #define USB_QUIRK_IGNORE_REMOTE_WAKEUP		BIT(9)
 
+/* device can't handle Link Power Management */
+#define USB_QUIRK_NO_LPM			BIT(10)
+
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
cgit v0.10.2


From 2dd2e4d1eaadfc39eae0399c5be1a122a73ed13b Mon Sep 17 00:00:00 2001
From: yangbo lu <yangbo.lu@freescale.com>
Date: Fri, 4 Dec 2015 16:55:03 -0600
Subject: ARM64: dts: ls2080a: fix eSDHC endianness

Add the "little-endian" property to fix the issue that eSDHC
is not working and dumping out "mmc0: Controller never released
inhibit bit(s)." error messages constantly.

Fixes: 5461597f6ce0 ("dts/ls2080a: Update DTSI to add support of various peripherals")
Signed-off-by: Yangbo Lu <yangbo.lu@freescale.com>
Signed-off-by: Li Yang <leoli@freescale.com>
Signed-off-by: Kevin Hilman <khilman@linaro.org>

diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
index e81cd48..ed45e25 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
@@ -269,6 +269,7 @@
 			clock-frequency = <0>;	/* Updated by bootloader */
 			voltage-ranges = <1800 1800 3300 3300>;
 			sdhci,auto-cmd12;
+			little-endian;
 			bus-width = <4>;
 		};
 
-- 
cgit v0.10.2


From edb70e29f3c3e3b1064a7b7a5d129c8f32762e6f Mon Sep 17 00:00:00 2001
From: Li Yang <leoli@freescale.com>
Date: Fri, 4 Dec 2015 16:55:04 -0600
Subject: dt-bindings: define little-endian property for QorIQ GPIO

The GPIO block on different QorIQ chips could have registers in different
endianess.  Define the property to specify which endian is used by the
hardware.

Signed-off-by: Liu Gang <Gang.Liu@freescale.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Li Yang <leoli@freescale.com>
Signed-off-by: Kevin Hilman <khilman@linaro.org>

diff --git a/Documentation/devicetree/bindings/gpio/gpio-mpc8xxx.txt b/Documentation/devicetree/bindings/gpio/gpio-mpc8xxx.txt
index f2455c5..120bc49 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-mpc8xxx.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio-mpc8xxx.txt
@@ -11,6 +11,10 @@ Required properties:
       0 = active high
       1 = active low
 
+Optional properties:
+- little-endian : GPIO registers are used as little endian. If not
+                  present registers are used as big endian by default.
+
 Example:
 
 gpio0: gpio@1100 {
-- 
cgit v0.10.2


From 6534778371b157e375a28a70290201d9aa20ab08 Mon Sep 17 00:00:00 2001
From: Liu Gang <Gang.Liu@freescale.com>
Date: Fri, 4 Dec 2015 16:55:05 -0600
Subject: ls2080a/dts: Add little endian property for GPIO IP block

The GPIO block for ls2080a platform has little endian registers,
the GPIO driver needs this property to read/write registers by
right interface.

Signed-off-by: Liu Gang <Gang.Liu@freescale.com>
Signed-off-by: Li Yang <leoli@freescale.com>
Signed-off-by: Kevin Hilman <khilman@linaro.org>

diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
index ed45e25..925552e 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
@@ -278,6 +278,7 @@
 			reg = <0x0 0x2300000 0x0 0x10000>;
 			interrupts = <0 36 0x4>; /* Level high type */
 			gpio-controller;
+			little-endian;
 			#gpio-cells = <2>;
 			interrupt-controller;
 			#interrupt-cells = <2>;
@@ -288,6 +289,7 @@
 			reg = <0x0 0x2310000 0x0 0x10000>;
 			interrupts = <0 36 0x4>; /* Level high type */
 			gpio-controller;
+			little-endian;
 			#gpio-cells = <2>;
 			interrupt-controller;
 			#interrupt-cells = <2>;
@@ -298,6 +300,7 @@
 			reg = <0x0 0x2320000 0x0 0x10000>;
 			interrupts = <0 37 0x4>; /* Level high type */
 			gpio-controller;
+			little-endian;
 			#gpio-cells = <2>;
 			interrupt-controller;
 			#interrupt-cells = <2>;
@@ -308,6 +311,7 @@
 			reg = <0x0 0x2330000 0x0 0x10000>;
 			interrupts = <0 37 0x4>; /* Level high type */
 			gpio-controller;
+			little-endian;
 			#gpio-cells = <2>;
 			interrupt-controller;
 			#interrupt-cells = <2>;
-- 
cgit v0.10.2


From e46e31a3696ae2d66f32c207df3969613726e636 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Mon, 30 Nov 2015 14:47:46 -0500
Subject: parisc iommu: fix panic due to trying to allocate too large region

When using the Promise TX2+ SATA controller on PA-RISC, the system often
crashes with kernel panic, for example just writing data with the dd
utility will make it crash.

Kernel panic - not syncing: drivers/parisc/sba_iommu.c: I/O MMU @ 000000000000a000 is out of mapping resources

CPU: 0 PID: 18442 Comm: mkspadfs Not tainted 4.4.0-rc2 #2
Backtrace:
 [<000000004021497c>] show_stack+0x14/0x20
 [<0000000040410bf0>] dump_stack+0x88/0x100
 [<000000004023978c>] panic+0x124/0x360
 [<0000000040452c18>] sba_alloc_range+0x698/0x6a0
 [<0000000040453150>] sba_map_sg+0x260/0x5b8
 [<000000000c18dbb4>] ata_qc_issue+0x264/0x4a8 [libata]
 [<000000000c19535c>] ata_scsi_translate+0xe4/0x220 [libata]
 [<000000000c19a93c>] ata_scsi_queuecmd+0xbc/0x320 [libata]
 [<0000000040499bbc>] scsi_dispatch_cmd+0xfc/0x130
 [<000000004049da34>] scsi_request_fn+0x6e4/0x970
 [<00000000403e95a8>] __blk_run_queue+0x40/0x60
 [<00000000403e9d8c>] blk_run_queue+0x3c/0x68
 [<000000004049a534>] scsi_run_queue+0x2a4/0x360
 [<000000004049be68>] scsi_end_request+0x1a8/0x238
 [<000000004049de84>] scsi_io_completion+0xfc/0x688
 [<0000000040493c74>] scsi_finish_command+0x17c/0x1d0

The cause of the crash is not exhaustion of the IOMMU space, there is
plenty of free pages. The function sba_alloc_range is called with size
0x11000, thus the pages_needed variable is 0x11. The function
sba_search_bitmap is called with bits_wanted 0x11 and boundary size is
0x10 (because dma_get_seg_boundary(dev) returns 0xffff).

The function sba_search_bitmap attempts to allocate 17 pages that must not
cross 16-page boundary - it can't satisfy this requirement
(iommu_is_span_boundary always returns true) and fails even if there are
many free entries in the IOMMU space.

How did it happen that we try to allocate 17 pages that don't cross
16-page boundary? The cause is in the function iommu_coalesce_chunks. This
function tries to coalesce adjacent entries in the scatterlist. The
function does several checks if it may coalesce one entry with the next,
one of those checks is this:

	if (startsg->length + dma_len > max_seg_size)
		break;

When it finishes coalescing adjacent entries, it allocates the mapping:

sg_dma_len(contig_sg) = dma_len;
dma_len = ALIGN(dma_len + dma_offset, IOVP_SIZE);
sg_dma_address(contig_sg) =
	PIDE_FLAG
	| (iommu_alloc_range(ioc, dev, dma_len) << IOVP_SHIFT)
	| dma_offset;

It is possible that (startsg->length + dma_len > max_seg_size) is false
(we are just near the 0x10000 max_seg_size boundary), so the funcion
decides to coalesce this entry with the next entry. When the coalescing
succeeds, the function performs
	dma_len = ALIGN(dma_len + dma_offset, IOVP_SIZE);
And now, because of non-zero dma_offset, dma_len is greater than 0x10000.
iommu_alloc_range (a pointer to sba_alloc_range) is called and it attempts
to allocate 17 pages for a device that must not cross 16-page boundary.

To fix the bug, we must make sure that dma_len after addition of
dma_offset and alignment doesn't cross the segment boundary. I.e. change
	if (startsg->length + dma_len > max_seg_size)
		break;
to
	if (ALIGN(dma_len + dma_offset + startsg->length, IOVP_SIZE) > max_seg_size)
		break;

This patch makes this change (it precalculates max_seg_boundary at the
beginning of the function iommu_coalesce_chunks). I also added a check
that the mapping length doesn't exceed dma_get_seg_boundary(dev) (it is
not needed for Promise TX2+ SATA, but it may be needed for other devices
that have dma_get_seg_boundary lower than dma_get_max_seg_size).

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Helge Deller <deller@gmx.de>

diff --git a/drivers/parisc/iommu-helpers.h b/drivers/parisc/iommu-helpers.h
index 761e77b..e56f156 100644
--- a/drivers/parisc/iommu-helpers.h
+++ b/drivers/parisc/iommu-helpers.h
@@ -104,7 +104,11 @@ iommu_coalesce_chunks(struct ioc *ioc, struct device *dev,
 	struct scatterlist *contig_sg;	   /* contig chunk head */
 	unsigned long dma_offset, dma_len; /* start/len of DMA stream */
 	unsigned int n_mappings = 0;
-	unsigned int max_seg_size = dma_get_max_seg_size(dev);
+	unsigned int max_seg_size = min(dma_get_max_seg_size(dev),
+					(unsigned)DMA_CHUNK_SIZE);
+	unsigned int max_seg_boundary = dma_get_seg_boundary(dev) + 1;
+	if (max_seg_boundary)	/* check if the addition above didn't overflow */
+		max_seg_size = min(max_seg_size, max_seg_boundary);
 
 	while (nents > 0) {
 
@@ -138,14 +142,11 @@ iommu_coalesce_chunks(struct ioc *ioc, struct device *dev,
 
 			/*
 			** First make sure current dma stream won't
-			** exceed DMA_CHUNK_SIZE if we coalesce the
+			** exceed max_seg_size if we coalesce the
 			** next entry.
 			*/   
-			if(unlikely(ALIGN(dma_len + dma_offset + startsg->length,
-					    IOVP_SIZE) > DMA_CHUNK_SIZE))
-				break;
-
-			if (startsg->length + dma_len > max_seg_size)
+			if (unlikely(ALIGN(dma_len + dma_offset + startsg->length, IOVP_SIZE) >
+				     max_seg_size))
 				break;
 
 			/*
-- 
cgit v0.10.2


From 5f0e9b4c301742c860b8aedc3e7c471d4a611043 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 1 Dec 2015 10:41:47 -0600
Subject: parisc: Remove unused pcibios_init_bus()

There are no callers of pcibios_init_bus(), so remove it.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Helge Deller <deller@gmx.de>

diff --git a/arch/parisc/kernel/pci.c b/arch/parisc/kernel/pci.c
index 64f2764..c99f3dd 100644
--- a/arch/parisc/kernel/pci.c
+++ b/arch/parisc/kernel/pci.c
@@ -171,24 +171,6 @@ void pcibios_set_master(struct pci_dev *dev)
 }
 
 
-void __init pcibios_init_bus(struct pci_bus *bus)
-{
-	struct pci_dev *dev = bus->self;
-	unsigned short bridge_ctl;
-
-	/* We deal only with pci controllers and pci-pci bridges. */
-	if (!dev || (dev->class >> 8) != PCI_CLASS_BRIDGE_PCI)
-		return;
-
-	/* PCI-PCI bridge - set the cache line and default latency
-	   (32) for primary and secondary buses. */
-	pci_write_config_byte(dev, PCI_SEC_LATENCY_TIMER, 32);
-
-	pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &bridge_ctl);
-	bridge_ctl |= PCI_BRIDGE_CTL_PARITY | PCI_BRIDGE_CTL_SERR;
-	pci_write_config_word(dev, PCI_BRIDGE_CONTROL, bridge_ctl);
-}
-
 /*
  * pcibios align resources() is called every time generic PCI code
  * wants to generate a new address. The process of looking for
-- 
cgit v0.10.2


From 5c477b45792fb8ccd279162483aa57cc8ee40ae1 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sun, 6 Dec 2015 21:56:26 +0100
Subject: parisc: Wire up mlock2 syscall

Signed-off-by: Helge Deller <deller@gmx.de>

diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h
index 3317038..35bdccb 100644
--- a/arch/parisc/include/uapi/asm/unistd.h
+++ b/arch/parisc/include/uapi/asm/unistd.h
@@ -360,8 +360,9 @@
 #define __NR_execveat		(__NR_Linux + 342)
 #define __NR_membarrier		(__NR_Linux + 343)
 #define __NR_userfaultfd	(__NR_Linux + 344)
+#define __NR_mlock2		(__NR_Linux + 345)
 
-#define __NR_Linux_syscalls	(__NR_userfaultfd + 1)
+#define __NR_Linux_syscalls	(__NR_mlock2 + 1)
 
 
 #define __IGNORE_select		/* newselect */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 78c3ef8..d4ffcfb 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -440,6 +440,7 @@
 	ENTRY_COMP(execveat)
 	ENTRY_SAME(membarrier)
 	ENTRY_SAME(userfaultfd)
+	ENTRY_SAME(mlock2)		/* 345 */
 
 
 .ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b))
-- 
cgit v0.10.2


From 78c0cbffebe60e1182c8f077de2c06a1a48dc5ae Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sun, 6 Dec 2015 21:25:20 +0100
Subject: parisc: Disable huge pages on Mako machines

Mako-based machines (PA8800 and PA8900 CPUs) don't allow aliasing on
non-equaivalent addresses.

Signed-off-by: Helge Deller <deller@gmx.de>

diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index d8534f9..291cee2 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -372,7 +372,8 @@ static inline pte_t pte_mkspecial(pte_t pte)	{ return pte; }
  */
 #ifdef CONFIG_HUGETLB_PAGE
 #define pte_huge(pte)           (pte_val(pte) & _PAGE_HUGE)
-#define pte_mkhuge(pte)         (__pte(pte_val(pte) | _PAGE_HUGE))
+#define pte_mkhuge(pte)         (__pte(pte_val(pte) | \
+				 (parisc_requires_coherency() ? 0 : _PAGE_HUGE)))
 #else
 #define pte_huge(pte)           (0)
 #define pte_mkhuge(pte)         (pte)
-- 
cgit v0.10.2


From a88c769548047b21f76fd71e04b6a3300ff17160 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Fri, 11 Dec 2015 13:40:24 -0800
Subject: mm: hugetlb: fix hugepage memory leak caused by wrong reserve count

When dequeue_huge_page_vma() in alloc_huge_page() fails, we fall back on
alloc_buddy_huge_page() to directly create a hugepage from the buddy
allocator.

In that case, however, if alloc_buddy_huge_page() succeeds we don't
decrement h->resv_huge_pages, which means that successful
hugetlb_fault() returns without releasing the reserve count.  As a
result, subsequent hugetlb_fault() might fail despite that there are
still free hugepages.

This patch simply adds decrementing code on that code path.

I reproduced this problem when testing v4.3 kernel in the following situation:
 - the test machine/VM is a NUMA system,
 - hugepage overcommiting is enabled,
 - most of hugepages are allocated and there's only one free hugepage
   which is on node 0 (for example),
 - another program, which calls set_mempolicy(MPOL_BIND) to bind itself to
   node 1, tries to allocate a hugepage,
 - the allocation should fail but the reserve count is still hold.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: <stable@vger.kernel.org> [3.16+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 827bb02..4fe4340 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1886,7 +1886,10 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 		page = __alloc_buddy_huge_page_with_mpol(h, vma, addr);
 		if (!page)
 			goto out_uncharge_cgroup;
-
+		if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
+			SetPagePrivate(page);
+			h->resv_huge_pages--;
+		}
 		spin_lock(&hugetlb_lock);
 		list_move(&page->lru, &h->hugepage_activelist);
 		/* Fall through */
-- 
cgit v0.10.2


From 9516a18a9a253a94292e74f11f92083126c5a237 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@virtuozzo.com>
Date: Fri, 11 Dec 2015 13:40:24 -0800
Subject: memcg: fix memory.high target

When the memory.high threshold is exceeded, try_charge() schedules a
task_work to reclaim the excess.  The reclaim target is set to the
number of pages requested by try_charge().

This is wrong, because try_charge() usually charges more pages than
requested (batch > nr_pages) in order to refill per cpu stocks.  As a
result, a process in a cgroup can easily exceed memory.high
significantly when doing a lot of charges w/o returning to userspace
(e.g.  reading a file in big chunks).

Fix this issue by assuring that when exceeding memory.high a process
reclaims as many pages as were actually charged (i.e.  batch).

Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c92a65b..32259e3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2128,7 +2128,7 @@ done_restock:
 	 */
 	do {
 		if (page_counter_read(&memcg->memory) > memcg->high) {
-			current->memcg_nr_pages_over_high += nr_pages;
+			current->memcg_nr_pages_over_high += batch;
 			set_notify_resume(current);
 			break;
 		}
-- 
cgit v0.10.2


From 475a2f905d5a41d5fc569ef21841be67d0a7f788 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Fri, 11 Dec 2015 13:40:29 -0800
Subject: mm: fix swapped Movable and Reclaimable in /proc/pagetypeinfo

Commit 016c13daa5c9 ("mm, page_alloc: use masks and shifts when
converting GFP flags to migrate types") has swapped MIGRATE_MOVABLE and
MIGRATE_RECLAIMABLE in the enum definition.  However, migratetype_names
wasn't updated to reflect that.

As a result, the file /proc/pagetypeinfo shows the counts for Movable as
Reclaimable and vice versa.

Additionally, commit 0aaa29a56e4f ("mm, page_alloc: reserve pageblocks
for high-order atomic allocations on demand") introduced
MIGRATE_HIGHATOMIC, but did not add a letter to distinguish it into
show_migration_types(), so it doesn't appear in the listing of free
areas during page alloc failures or oom kills.

This patch fixes both problems.  The atomic reserves will show with a
letter 'H' in the free areas listings.

Fixes: 016c13daa5c9 ("mm, page_alloc: use masks and shifts when converting GFP flags to migrate types")
Fixes: 0aaa29a56e4f ("mm, page_alloc: reserve pageblocks for high-order atomic allocations on demand")
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 17a3c66..9d666df 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3647,8 +3647,9 @@ static void show_migration_types(unsigned char type)
 {
 	static const char types[MIGRATE_TYPES] = {
 		[MIGRATE_UNMOVABLE]	= 'U',
-		[MIGRATE_RECLAIMABLE]	= 'E',
 		[MIGRATE_MOVABLE]	= 'M',
+		[MIGRATE_RECLAIMABLE]	= 'E',
+		[MIGRATE_HIGHATOMIC]	= 'H',
 #ifdef CONFIG_CMA
 		[MIGRATE_CMA]		= 'C',
 #endif
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 879a2be..2ec3434 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -921,8 +921,8 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
 #ifdef CONFIG_PROC_FS
 static char * const migratetype_names[MIGRATE_TYPES] = {
 	"Unmovable",
-	"Reclaimable",
 	"Movable",
+	"Reclaimable",
 	"HighAtomic",
 #ifdef CONFIG_CMA
 	"CMA",
-- 
cgit v0.10.2


From 373ccbe5927034b55bdc80b0f8b54d6e13fe8d12 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 11 Dec 2015 13:40:32 -0800
Subject: mm, vmstat: allow WQ concurrency to discover memory reclaim doesn't
 make any progress

Tetsuo Handa has reported that the system might basically livelock in
OOM condition without triggering the OOM killer.

The issue is caused by internal dependency of the direct reclaim on
vmstat counter updates (via zone_reclaimable) which are performed from
the workqueue context.  If all the current workers get assigned to an
allocation request, though, they will be looping inside the allocator
trying to reclaim memory but zone_reclaimable can see stalled numbers so
it will consider a zone reclaimable even though it has been scanned way
too much.  WQ concurrency logic will not consider this situation as a
congested workqueue because it relies that worker would have to sleep in
such a situation.  This also means that it doesn't try to spawn new
workers or invoke the rescuer thread if the one is assigned to the
queue.

In order to fix this issue we need to do two things.  First we have to
let wq concurrency code know that we are in trouble so we have to do a
short sleep.  In order to prevent from issues handled by 0e093d99763e
("writeback: do not sleep on the congestion queue if there are no
congested BDIs or if significant congestion is not being encountered in
the current zone") we limit the sleep only to worker threads which are
the ones of the interest anyway.

The second thing to do is to create a dedicated workqueue for vmstat and
mark it WQ_MEM_RECLAIM to note it participates in the reclaim and to
have a spare worker thread for it.

Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Tejun Heo <tj@kernel.org>
Cc: Cristopher Lameter <clameter@sgi.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Arkadiusz Miskiewicz <arekm@maven.pl>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 8ed2ffd..7340353 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -957,8 +957,9 @@ EXPORT_SYMBOL(congestion_wait);
  * jiffies for either a BDI to exit congestion of the given @sync queue
  * or a write to complete.
  *
- * In the absence of zone congestion, cond_resched() is called to yield
- * the processor if necessary but otherwise does not sleep.
+ * In the absence of zone congestion, a short sleep or a cond_resched is
+ * performed to yield the processor and to allow other subsystems to make
+ * a forward progress.
  *
  * The return value is 0 if the sleep is for the full timeout. Otherwise,
  * it is the number of jiffies that were still remaining when the function
@@ -978,7 +979,19 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
 	 */
 	if (atomic_read(&nr_wb_congested[sync]) == 0 ||
 	    !test_bit(ZONE_CONGESTED, &zone->flags)) {
-		cond_resched();
+
+		/*
+		 * Memory allocation/reclaim might be called from a WQ
+		 * context and the current implementation of the WQ
+		 * concurrency control doesn't recognize that a particular
+		 * WQ is congested if the worker thread is looping without
+		 * ever sleeping. Therefore we have to do a short sleep
+		 * here rather than calling cond_resched().
+		 */
+		if (current->flags & PF_WQ_WORKER)
+			schedule_timeout(1);
+		else
+			cond_resched();
 
 		/* In case we scheduled, work out time remaining */
 		ret = timeout - (jiffies - start);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 2ec3434..0d5712b 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1379,6 +1379,7 @@ static const struct file_operations proc_vmstat_file_operations = {
 #endif /* CONFIG_PROC_FS */
 
 #ifdef CONFIG_SMP
+static struct workqueue_struct *vmstat_wq;
 static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
 int sysctl_stat_interval __read_mostly = HZ;
 static cpumask_var_t cpu_stat_off;
@@ -1391,7 +1392,7 @@ static void vmstat_update(struct work_struct *w)
 		 * to occur in the future. Keep on running the
 		 * update worker thread.
 		 */
-		schedule_delayed_work_on(smp_processor_id(),
+		queue_delayed_work_on(smp_processor_id(), vmstat_wq,
 			this_cpu_ptr(&vmstat_work),
 			round_jiffies_relative(sysctl_stat_interval));
 	} else {
@@ -1460,7 +1461,7 @@ static void vmstat_shepherd(struct work_struct *w)
 		if (need_update(cpu) &&
 			cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
 
-			schedule_delayed_work_on(cpu,
+			queue_delayed_work_on(cpu, vmstat_wq,
 				&per_cpu(vmstat_work, cpu), 0);
 
 	put_online_cpus();
@@ -1549,6 +1550,7 @@ static int __init setup_vmstat(void)
 
 	start_shepherd_timer();
 	cpu_notifier_register_done();
+	vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
 #endif
 #ifdef CONFIG_PROC_FS
 	proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
-- 
cgit v0.10.2


From ed0f1e2102b6f245210be5f93a08d708b6ac070f Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 11 Dec 2015 13:40:35 -0800
Subject: MAINTAINERS: make Vladimir co-maintainer of the memory controller

Vladimir architected and authored much of the current state of the
memcg's slab memory accounting and tracking.  Make sure he gets CC'd on
bug reports ;-)

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 38df53f..9bff63c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2975,6 +2975,7 @@ F:	kernel/cpuset.c
 CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)
 M:	Johannes Weiner <hannes@cmpxchg.org>
 M:	Michal Hocko <mhocko@kernel.org>
+M:	Vladimir Davydov <vdavydov@virtuozzo.com>
 L:	cgroups@vger.kernel.org
 L:	linux-mm@kvack.org
 S:	Maintained
-- 
cgit v0.10.2


From 3066a9670ba6f57dd5046640a2494912a2110f1e Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 11 Dec 2015 13:40:38 -0800
Subject: osd fs: __r4w_get_page rely on PageUptodate for uptodate

Commit 42cb14b110a5 ("mm: migrate dirty page without
clear_page_dirty_for_io etc") simplified the migration of a PageDirty
pagecache page: one stat needs moving from zone to zone and that's about
all.

It's convenient and safest for it to shift the PageDirty bit from old
page to new, just before updating the zone stats: before copying data
and marking the new PageUptodate.  This is all done while both pages are
isolated and locked, just as before; and just as before, there's a
moment when the new page is visible in the radix_tree, but not yet
PageUptodate.  What's new is that it may now be briefly visible as
PageDirty before it is PageUptodate.

When I scoured the tree to see if this could cause a problem anywhere,
the only places I found were in two similar functions __r4w_get_page():
which look up a page with find_get_page() (not using page lock), then
claim it's uptodate if it's PageDirty or PageWriteback or PageUptodate.

I'm not sure whether that was right before, but now it might be wrong
(on rare occasions): only claim the page is uptodate if PageUptodate.
Or perhaps the page in question could never be migratable anyway?

Signed-off-by: Hugh Dickins <hughd@google.com>
Tested-by: Boaz Harrosh <ooo@electrozaur.com>
Cc: Benny Halevy <bhalevy@panasas.com>
Cc: Trond Myklebust <trond.myklebust@primarydata.com>
Cc: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 73c64da..60f03b7 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -592,10 +592,7 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
 			}
 			unlock_page(page);
 		}
-		if (PageDirty(page) || PageWriteback(page))
-			*uptodate = true;
-		else
-			*uptodate = PageUptodate(page);
+		*uptodate = PageUptodate(page);
 		EXOFS_DBGMSG2("index=0x%lx uptodate=%d\n", index, *uptodate);
 		return page;
 	} else {
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 5c0c6b5..9aebffb 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -476,10 +476,7 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
 		}
 		unlock_page(page);
 	}
-	if (PageDirty(page) || PageWriteback(page))
-		*uptodate = true;
-	else
-		*uptodate = PageUptodate(page);
+	*uptodate = PageUptodate(page);
 	dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate);
 	return page;
 }
-- 
cgit v0.10.2


From 25be6a659598791d511c09ed71dca24e4844d128 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 11 Dec 2015 13:40:40 -0800
Subject: mm: fix kerneldoc on mem_cgroup_replace_page

Whoops, I missed removing the kerneldoc comment of the lrucare arg
removed from mem_cgroup_replace_page; but it's a good comment, keep it.

Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 32259e3..e234c21 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5512,11 +5512,11 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)
  * mem_cgroup_replace_page - migrate a charge to another page
  * @oldpage: currently charged page
  * @newpage: page to transfer the charge to
- * @lrucare: either or both pages might be on the LRU already
  *
  * Migrate the charge from @oldpage to @newpage.
  *
  * Both pages must be locked, @newpage->mapping must be set up.
+ * Either or both pages might be on the LRU already.
  */
 void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
 {
-- 
cgit v0.10.2


From 98e89cf02aed11166698dd53c6f14865613babb3 Mon Sep 17 00:00:00 2001
From: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Date: Fri, 11 Dec 2015 13:40:43 -0800
Subject: mm: kmemleak: mark kmemleak_init prototype as __init

The kmemleak_init() definition in mm/kmemleak.c is marked __init but its
prototype in include/linux/kmemleak.h is marked __ref since commit
a6186d89c913 ("kmemleak: Mark the early log buffer as __initdata").

This causes a section mismatch which is reported as a warning when
building with clang -Wsection, because kmemleak_init() is declared in
section .ref.text but defined in .init.text.

Fix this by marking kmemleak_init() prototype __init.

Signed-off-by: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index d0a1f99..4894c68 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -25,7 +25,7 @@
 
 #ifdef CONFIG_DEBUG_KMEMLEAK
 
-extern void kmemleak_init(void) __ref;
+extern void kmemleak_init(void) __init;
 extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
 			   gfp_t gfp) __ref;
 extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
-- 
cgit v0.10.2


From 86fffe4a61dd972d5a4e23260d530be6da02f614 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 11 Dec 2015 13:40:46 -0800
Subject: kernel: remove stop_machine() Kconfig dependency

Currently the full stop_machine() routine is only enabled on SMP if
module unloading is enabled, or if the CPUs are hotpluggable.  This
leads to configurations where stop_machine() is broken as it will then
only run the callback on the local CPU with irqs disabled, and not stop
the other CPUs or run the callback on them.

For example, this breaks MTRR setup on x86 in certain configs since
ea8596bb2d8d379 ("kprobes/x86: Remove unused text_poke_smp() and
text_poke_smp_batch() functions") as the MTRR is only established on the
boot CPU.

This patch removes the Kconfig option for STOP_MACHINE and uses the SMP
and HOTPLUG_CPU config options to compile the correct stop_machine() for
the architecture, removing the false dependency on MODULE_UNLOAD in the
process.

Link: https://lkml.org/lkml/2014/10/8/124
References: https://bugs.freedesktop.org/show_bug.cgi?id=84794
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Pranith Kumar <bobby.prani@gmail.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: H. Peter Anvin <hpa@linux.intel.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Iulia Manda <iulia.manda21@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Chuck Ebbert <cebbert.lkml@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 0adedca..0e1b154 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -99,7 +99,7 @@ static inline int try_stop_cpus(const struct cpumask *cpumask,
  * grabbing every spinlock (and more).  So the "read" side to such a
  * lock is anything which disables preemption.
  */
-#if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
+#if defined(CONFIG_SMP) || defined(CONFIG_HOTPLUG_CPU)
 
 /**
  * stop_machine: freeze the machine on all CPUs and run this function
@@ -118,7 +118,7 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
 
 int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
 				   const struct cpumask *cpus);
-#else	 /* CONFIG_STOP_MACHINE && CONFIG_SMP */
+#else	/* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
 
 static inline int stop_machine(cpu_stop_fn_t fn, void *data,
 				 const struct cpumask *cpus)
@@ -137,5 +137,5 @@ static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
 	return stop_machine(fn, data, cpus);
 }
 
-#endif	/* CONFIG_STOP_MACHINE && CONFIG_SMP */
+#endif	/* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
 #endif	/* _LINUX_STOP_MACHINE */
diff --git a/init/Kconfig b/init/Kconfig
index c24b6f7..235c7a2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -2030,13 +2030,6 @@ config INIT_ALL_POSSIBLE
 	  it was better to provide this option than to break all the archs
 	  and have several arch maintainers pursuing me down dark alleys.
 
-config STOP_MACHINE
-	bool
-	default y
-	depends on (SMP && MODULE_UNLOAD) || HOTPLUG_CPU
-	help
-	  Need stop_machine() primitive.
-
 source "block/Kconfig"
 
 config PREEMPT_NOTIFIERS
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 867bc20..a3bbaee 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -531,7 +531,7 @@ static int __init cpu_stop_init(void)
 }
 early_initcall(cpu_stop_init);
 
-#ifdef CONFIG_STOP_MACHINE
+#if defined(CONFIG_SMP) || defined(CONFIG_HOTPLUG_CPU)
 
 static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
 {
@@ -631,4 +631,4 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
 	return ret ?: done.ret;
 }
 
-#endif	/* CONFIG_STOP_MACHINE */
+#endif	/* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
-- 
cgit v0.10.2


From 0d777df5d8953293be090d9ab5a355db893e8357 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Fri, 11 Dec 2015 13:40:49 -0800
Subject: mm: hugetlb: call huge_pte_alloc() only if ptep is null

Currently at the beginning of hugetlb_fault(), we call huge_pte_offset()
and check whether the obtained *ptep is a migration/hwpoison entry or
not.  And if not, then we get to call huge_pte_alloc().  This is racy
because the *ptep could turn into migration/hwpoison entry after the
huge_pte_offset() check.  This race results in BUG_ON in
huge_pte_alloc().

We don't have to call huge_pte_alloc() when the huge_pte_offset()
returns non-NULL, so let's fix this bug with moving the code into else
block.

Note that the *ptep could turn into a migration/hwpoison entry after
this block, but that's not a problem because we have another
!pte_present check later (we never go into hugetlb_no_page() in that
case.)

Fixes: 290408d4a250 ("hugetlb: hugepage migration core")
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: <stable@vger.kernel.org>	[2.6.36+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4fe4340..9e3fa71 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3696,12 +3696,12 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
 			return VM_FAULT_HWPOISON_LARGE |
 				VM_FAULT_SET_HINDEX(hstate_index(h));
+	} else {
+		ptep = huge_pte_alloc(mm, address, huge_page_size(h));
+		if (!ptep)
+			return VM_FAULT_OOM;
 	}
 
-	ptep = huge_pte_alloc(mm, address, huge_page_size(h));
-	if (!ptep)
-		return VM_FAULT_OOM;
-
 	mapping = vma->vm_file->f_mapping;
 	idx = vma_hugecache_offset(h, vma, address);
 
-- 
cgit v0.10.2


From dbe409e4f5e5075bd9ff7f8dd5c627abf3ee38c1 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Fri, 11 Dec 2015 13:40:52 -0800
Subject: mm/hugetlb.c: fix resv map memory leak for placeholder entries

Dmitry Vyukov reported the following memory leak

unreferenced object 0xffff88002eaafd88 (size 32):
  comm "a.out", pid 5063, jiffies 4295774645 (age 15.810s)
  hex dump (first 32 bytes):
    28 e9 4e 63 00 88 ff ff 28 e9 4e 63 00 88 ff ff  (.Nc....(.Nc....
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
     kmalloc include/linux/slab.h:458
     region_chg+0x2d4/0x6b0 mm/hugetlb.c:398
     __vma_reservation_common+0x2c3/0x390 mm/hugetlb.c:1791
     vma_needs_reservation mm/hugetlb.c:1813
     alloc_huge_page+0x19e/0xc70 mm/hugetlb.c:1845
     hugetlb_no_page mm/hugetlb.c:3543
     hugetlb_fault+0x7a1/0x1250 mm/hugetlb.c:3717
     follow_hugetlb_page+0x339/0xc70 mm/hugetlb.c:3880
     __get_user_pages+0x542/0xf30 mm/gup.c:497
     populate_vma_page_range+0xde/0x110 mm/gup.c:919
     __mm_populate+0x1c7/0x310 mm/gup.c:969
     do_mlock+0x291/0x360 mm/mlock.c:637
     SYSC_mlock2 mm/mlock.c:658
     SyS_mlock2+0x4b/0x70 mm/mlock.c:648

Dmitry identified a potential memory leak in the routine region_chg,
where a region descriptor is not free'ed on an error path.

However, the root cause for the above memory leak resides in region_del.
In this specific case, a "placeholder" entry is created in region_chg.
The associated page allocation fails, and the placeholder entry is left
in the reserve map.  This is "by design" as the entry should be deleted
when the map is released.  The bug is in the region_del routine which is
used to delete entries within a specific range (and when the map is
released).  region_del did not handle the case where a placeholder entry
exactly matched the start of the range range to be deleted.  In this
case, the entry would not be deleted and leaked.  The fix is to take
these special placeholder entries into account in region_del.

The region_chg error path leak is also fixed.

Fixes: feba16e25a57 ("mm/hugetlb: add region_del() to delete a specific range of entries")
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: <stable@vger.kernel.org>	[4.3+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9e3fa71..ef6963b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -372,8 +372,10 @@ retry_locked:
 		spin_unlock(&resv->lock);
 
 		trg = kmalloc(sizeof(*trg), GFP_KERNEL);
-		if (!trg)
+		if (!trg) {
+			kfree(nrg);
 			return -ENOMEM;
+		}
 
 		spin_lock(&resv->lock);
 		list_add(&trg->link, &resv->region_cache);
@@ -483,8 +485,16 @@ static long region_del(struct resv_map *resv, long f, long t)
 retry:
 	spin_lock(&resv->lock);
 	list_for_each_entry_safe(rg, trg, head, link) {
-		if (rg->to <= f)
+		/*
+		 * Skip regions before the range to be deleted.  file_region
+		 * ranges are normally of the form [from, to).  However, there
+		 * may be a "placeholder" entry in the map which is of the form
+		 * (from, to) with from == to.  Check for placeholder entries
+		 * at the beginning of the range to be deleted.
+		 */
+		if (rg->to <= f && (rg->to != rg->from || rg->to != f))
 			continue;
+
 		if (rg->from >= t)
 			break;
 
-- 
cgit v0.10.2


From 267a4c76bbdb950688d3aeb020976c2918064584 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 11 Dec 2015 13:40:55 -0800
Subject: tmpfs: fix shmem_evict_inode() warnings on i_blocks

Dmitry Vyukov provides a little program, autogenerated by syzkaller,
which races a fault on a mapping of a sparse memfd object, against
truncation of that object below the fault address: run repeatedly for a
few minutes, it reliably generates shmem_evict_inode()'s
WARN_ON(inode->i_blocks).

(But there's nothing specific to memfd here, nor to the fstat which it
happened to use to generate the fault: though that looked suspicious,
since a shmem_recalc_inode() had been added there recently.  The same
problem can be reproduced with open+unlink in place of memfd_create, and
with fstatfs in place of fstat.)

v3.7 commit 0f3c42f522dc ("tmpfs: change final i_blocks BUG to WARNING")
explains one cause of such a warning (a race with shmem_writepage to
swap), and possible solutions; but we never took it further, and this
syzkaller incident turns out to have a different cause.

shmem_getpage_gfp()'s error recovery, when a freshly allocated page is
then found to be beyond eof, looks plausible - decrementing the alloced
count that was just before incremented - but in fact can go wrong, if a
racing thread (the truncator, for example) gets its shmem_recalc_inode()
in just after our delete_from_page_cache().  delete_from_page_cache()
decrements nrpages, that shmem_recalc_inode() will balance the books by
decrementing alloced itself, then our decrement of alloced take it one
too low: leading to the WARNING when the object is finally evicted.

Once the new page has been exposed in the page cache,
shmem_getpage_gfp() must leave it to shmem_recalc_inode() itself to get
the accounting right in all cases (and not fall through from "trunc:" to
"decused:").  Adjust that error recovery block; and the reinitialization
of info and sbinfo can be removed too.

While we're here, fix shmem_writepage() to avoid the original issue: it
will be safe against a racing shmem_recalc_inode(), if it merely
increments swapped before the shmem_delete_from_page_cache() which
decrements nrpages (but it must then do its own shmem_recalc_inode()
before that, while still in balance, instead of after).  (Aside: why do
we shmem_recalc_inode() here in the swap path? Because its raison d'etre
is to cope with clean sparse shmem pages being reclaimed behind our
back: so here when swapping is a good place to look for that case.) But
I've not now managed to reproduce this bug, even without the patch.

I don't see why I didn't do that earlier: perhaps inhibited by the
preference to eliminate shmem_recalc_inode() altogether.  Driven by this
incident, I do now have a patch to do so at last; but still want to sit
on it for a bit, there's a couple of questions yet to be resolved.

Signed-off-by: Hugh Dickins <hughd@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/shmem.c b/mm/shmem.c
index 9187eee..2afcdbb 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -843,14 +843,14 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 		list_add_tail(&info->swaplist, &shmem_swaplist);
 
 	if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
-		swap_shmem_alloc(swap);
-		shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
-
 		spin_lock(&info->lock);
-		info->swapped++;
 		shmem_recalc_inode(inode);
+		info->swapped++;
 		spin_unlock(&info->lock);
 
+		swap_shmem_alloc(swap);
+		shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
+
 		mutex_unlock(&shmem_swaplist_mutex);
 		BUG_ON(page_mapped(page));
 		swap_writepage(page, wbc);
@@ -1078,7 +1078,7 @@ repeat:
 	if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
 	    ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
 		error = -EINVAL;
-		goto failed;
+		goto unlock;
 	}
 
 	if (page && sgp == SGP_WRITE)
@@ -1246,11 +1246,15 @@ clear:
 	/* Perhaps the file has been truncated since we checked */
 	if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
 	    ((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
+		if (alloced) {
+			ClearPageDirty(page);
+			delete_from_page_cache(page);
+			spin_lock(&info->lock);
+			shmem_recalc_inode(inode);
+			spin_unlock(&info->lock);
+		}
 		error = -EINVAL;
-		if (alloced)
-			goto trunc;
-		else
-			goto failed;
+		goto unlock;
 	}
 	*pagep = page;
 	return 0;
@@ -1258,23 +1262,13 @@ clear:
 	/*
 	 * Error recovery.
 	 */
-trunc:
-	info = SHMEM_I(inode);
-	ClearPageDirty(page);
-	delete_from_page_cache(page);
-	spin_lock(&info->lock);
-	info->alloced--;
-	inode->i_blocks -= BLOCKS_PER_PAGE;
-	spin_unlock(&info->lock);
 decused:
-	sbinfo = SHMEM_SB(inode->i_sb);
 	if (sbinfo->max_blocks)
 		percpu_counter_add(&sbinfo->used_blocks, -1);
 unacct:
 	shmem_unacct_blocks(info->flags, 1);
 failed:
-	if (swap.val && error != -EINVAL &&
-	    !shmem_confirm_swap(mapping, index, swap))
+	if (swap.val && !shmem_confirm_swap(mapping, index, swap))
 		error = -EEXIST;
 unlock:
 	if (page) {
-- 
cgit v0.10.2


From 26bbe7ef6d5cdc7ec08cba6d433fca4060f258f3 Mon Sep 17 00:00:00 2001
From: Seth Jennings <sjennings@variantweb.net>
Date: Fri, 11 Dec 2015 13:40:57 -0800
Subject: drivers/base/memory.c: prohibit offlining of memory blocks with
 missing sections

Commit bdee237c0343 ("x86: mm: Use 2GB memory block size on large-memory
x86-64 systems") and 982792c782ef ("x86, mm: probe memory block size for
generic x86 64bit") introduced large block sizes for x86.  This made it
possible to have multiple sections per memory block where previously,
there was a only every one section per block.

Since blocks consist of contiguous ranges of section, there can be holes
in the blocks where sections are not present.  If one attempts to
offline such a block, a crash occurs since the code is not designed to
deal with this.

This patch is a quick fix to gaurd against the crash by not allowing
blocks with non-present sections to be offlined.

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=107781

Signed-off-by: Seth Jennings <sjennings@variantweb.net>
Reported-by: Andrew Banman <abanman@sgi.com>
Cc: Daniel J Blueman <daniel@numascale.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Greg KH <greg@kroah.com>
Cc: Russ Anderson <rja@sgi.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 2804aed..25425d3 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -303,6 +303,10 @@ static int memory_subsys_offline(struct device *dev)
 	if (mem->state == MEM_OFFLINE)
 		return 0;
 
+	/* Can't offline block with non-present sections */
+	if (mem->section_count != sections_per_block)
+		return -EINVAL;
+
 	return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
 }
 
-- 
cgit v0.10.2


From a2b829d95958da2025ef844c0f53ac15ad720fac Mon Sep 17 00:00:00 2001
From: Chen Jie <chenjie6@huawei.com>
Date: Fri, 11 Dec 2015 13:41:00 -0800
Subject: mm/oom_kill.c: avoid attempting to kill init sharing same memory

It's possible that an oom killed victim shares an ->mm with the init
process and thus oom_kill_process() would end up trying to kill init as
well.

This has been shown in practice:

	Out of memory: Kill process 9134 (init) score 3 or sacrifice child
	Killed process 9134 (init) total-vm:1868kB, anon-rss:84kB, file-rss:572kB
	Kill process 1 (init) sharing same memory
	...
	Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000009

And this will result in a kernel panic.

If a process is forked by init and selected for oom kill while still
sharing init_mm, then it's likely this system is in a recoverable state.
However, it's better not to try to kill init and allow the machine to
panic due to unkillable processes.

[rientjes@google.com: rewrote changelog]
[akpm@linux-foundation.org: fix inverted test, per Ben]
Signed-off-by: Chen Jie <chenjie6@huawei.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Ben Hutchings <ben@decadent.org.uk>
Cc: Li Zefan <lizefan@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index d13a339..c126809 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -608,6 +608,8 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
 			continue;
 		if (unlikely(p->flags & PF_KTHREAD))
 			continue;
+		if (is_global_init(p))
+			continue;
 		if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
 			continue;
 
-- 
cgit v0.10.2


From 854ee2e944b4daf795e32562a7d2f9e90ab5a6a8 Mon Sep 17 00:00:00 2001
From: Junxiao Bi <junxiao.bi@oracle.com>
Date: Fri, 11 Dec 2015 13:41:03 -0800
Subject: ocfs2: fix SGID not inherited issue

Commit 8f1eb48758aa ("ocfs2: fix umask ignored issue") introduced an
issue, SGID of sub dir was not inherited from its parents dir.  It is
because SGID is set into "inode->i_mode" in ocfs2_get_init_inode(), but
is overwritten by "mode" which don't have SGID set later.

Fixes: 8f1eb48758aa ("ocfs2: fix umask ignored issue")
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Mark Fasheh <mfasheh@suse.de>
Cc: Joel Becker <jlbec@evilplan.org>
Acked-by: Srinivas Eeda <srinivas.eeda@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index a03f6f4..3123408 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -367,13 +367,11 @@ static int ocfs2_mknod(struct inode *dir,
 		goto leave;
 	}
 
-	status = posix_acl_create(dir, &mode, &default_acl, &acl);
+	status = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
 	if (status) {
 		mlog_errno(status);
 		goto leave;
 	}
-	/* update inode->i_mode after mask with "umask". */
-	inode->i_mode = mode;
 
 	handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
 							    S_ISDIR(mode),
-- 
cgit v0.10.2


From 2d33fa1059da4c8e816627a688d950b613ec0474 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Fri, 11 Dec 2015 13:41:06 -0800
Subject: sh64: fix __NR_fgetxattr

According to arch/sh/kernel/syscalls_64.S and common sense, __NR_fgetxattr
has to be defined to 259, but it doesn't.  Instead, it's defined to 269,
which is of course used by another syscall, __NR_sched_setaffinity in this
case.

This bug was found by strace test suite.

Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/sh/include/uapi/asm/unistd_64.h b/arch/sh/include/uapi/asm/unistd_64.h
index e6820c8..47ebd5b 100644
--- a/arch/sh/include/uapi/asm/unistd_64.h
+++ b/arch/sh/include/uapi/asm/unistd_64.h
@@ -278,7 +278,7 @@
 #define __NR_fsetxattr		256
 #define __NR_getxattr		257
 #define __NR_lgetxattr		258
-#define __NR_fgetxattr		269
+#define __NR_fgetxattr		259
 #define __NR_listxattr		260
 #define __NR_llistxattr		261
 #define __NR_flistxattr		262
-- 
cgit v0.10.2


From 9530d0fe129c0197d5df13319ccefd08a827383b Mon Sep 17 00:00:00 2001
From: Qais Yousef <qais.yousef@imgtec.com>
Date: Fri, 11 Dec 2015 13:41:09 -0800
Subject: MIPS: fix DMA contiguous allocation

Recent changes to how GFP_ATOMIC is defined seems to have broken the
condition to use mips_alloc_from_contiguous() in
mips_dma_alloc_coherent().

I couldn't bottom out the exact change but I think it's this commit
d0164adc89f6 ("mm, page_alloc: distinguish between being unable to
sleep, unwilling to sleep and avoiding waking kswapd").

GFP_ATOMIC has multiple bits set and the check for !(gfp & GFP_ATOMIC)
isn't enough.

The reason behind this condition is to check whether we can potentially
do a sleeping memory allocation.  Use gfpflags_allow_blocking() instead
which should be more robust.

Signed-off-by: Qais Yousef <qais.yousef@imgtec.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index d8117be..730d394 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -145,7 +145,7 @@ static void *mips_dma_alloc_coherent(struct device *dev, size_t size,
 
 	gfp = massage_gfp_flags(dev, gfp);
 
-	if (IS_ENABLED(CONFIG_DMA_CMA) && !(gfp & GFP_ATOMIC))
+	if (IS_ENABLED(CONFIG_DMA_CMA) && gfpflags_allow_blocking(gfp))
 		page = dma_alloc_from_contiguous(dev,
 					count, get_order(size));
 	if (!page)
-- 
cgit v0.10.2


From dfd01f026058a59a513f8a365b439a0681b803af Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sun, 13 Dec 2015 22:11:16 +0100
Subject: sched/wait: Fix the signal handling fix

Jan Stancek reported that I wrecked things for him by fixing things for
Vladimir :/

His report was due to an UNINTERRUPTIBLE wait getting -EINTR, which
should not be possible, however my previous patch made this possible by
unconditionally checking signal_pending().

We cannot use current->state as was done previously, because the
instruction after the store to that variable it can be changed.  We must
instead pass the initial state along and use that.

Fixes: 68985633bccb ("sched/wait: Fix signal handling in bit wait helpers")
Reported-by: Jan Stancek <jstancek@redhat.com>
Reported-by: Chris Mason <clm@fb.com>
Tested-by: Jan Stancek <jstancek@redhat.com>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com>
Tested-by: Chris Mason <clm@fb.com>
Reviewed-by: Paul Turner <pjt@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: tglx@linutronix.de
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: hpa@zytor.com
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 6b66dd5..a329f5b 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1831,11 +1831,11 @@ cifs_invalidate_mapping(struct inode *inode)
  * @word: long word containing the bit lock
  */
 static int
-cifs_wait_bit_killable(struct wait_bit_key *key)
+cifs_wait_bit_killable(struct wait_bit_key *key, int mode)
 {
-	if (fatal_signal_pending(current))
-		return -ERESTARTSYS;
 	freezable_schedule_unsafe();
+	if (signal_pending_state(mode, current))
+		return -ERESTARTSYS;
 	return 0;
 }
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 31b0a52..c7e8b87 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -75,11 +75,11 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
  * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
  * @word: long word containing the bit lock
  */
-int nfs_wait_bit_killable(struct wait_bit_key *key)
+int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
 {
-	if (fatal_signal_pending(current))
-		return -ERESTARTSYS;
 	freezable_schedule_unsafe();
+	if (signal_pending_state(mode, current))
+		return -ERESTARTSYS;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 56cfde2..9dea85f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -379,7 +379,7 @@ extern int nfs_drop_inode(struct inode *);
 extern void nfs_clear_inode(struct inode *);
 extern void nfs_evict_inode(struct inode *);
 void nfs_zap_acl_cache(struct inode *inode);
-extern int nfs_wait_bit_killable(struct wait_bit_key *key);
+extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);
 
 /* super.c */
 extern const struct super_operations nfs_sops;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index fe3ddd2..452a011 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -129,7 +129,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c)
 		set_bit(NFS_IO_INPROGRESS, &c->flags);
 		if (atomic_read(&c->io_count) == 0)
 			break;
-		ret = nfs_wait_bit_killable(&q.key);
+		ret = nfs_wait_bit_killable(&q.key, TASK_KILLABLE);
 	} while (atomic_read(&c->io_count) != 0 && !ret);
 	finish_wait(wq, &q.wait);
 	return ret;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 5a8ae21..bec0384 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1466,11 +1466,11 @@ static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
 }
 
 /* stop waiting if someone clears NFS_LAYOUT_RETRY_LAYOUTGET bit. */
-static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key)
+static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key, int mode)
 {
 	if (!test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, key->flags))
 		return 1;
-	return nfs_wait_bit_killable(key);
+	return nfs_wait_bit_killable(key, mode);
 }
 
 static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 1e1bf9f..513b36f 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -145,7 +145,7 @@ __remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
 	list_del(&old->task_list);
 }
 
-typedef int wait_bit_action_f(struct wait_bit_key *);
+typedef int wait_bit_action_f(struct wait_bit_key *, int mode);
 void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
 void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
 void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
@@ -960,10 +960,10 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 	} while (0)
 
 
-extern int bit_wait(struct wait_bit_key *);
-extern int bit_wait_io(struct wait_bit_key *);
-extern int bit_wait_timeout(struct wait_bit_key *);
-extern int bit_wait_io_timeout(struct wait_bit_key *);
+extern int bit_wait(struct wait_bit_key *, int);
+extern int bit_wait_io(struct wait_bit_key *, int);
+extern int bit_wait_timeout(struct wait_bit_key *, int);
+extern int bit_wait_io_timeout(struct wait_bit_key *, int);
 
 /**
  * wait_on_bit - wait for a bit to be cleared
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index f10bd87..f15d6b6 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -392,7 +392,7 @@ __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
 	do {
 		prepare_to_wait(wq, &q->wait, mode);
 		if (test_bit(q->key.bit_nr, q->key.flags))
-			ret = (*action)(&q->key);
+			ret = (*action)(&q->key, mode);
 	} while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
 	finish_wait(wq, &q->wait);
 	return ret;
@@ -431,7 +431,7 @@ __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
 		prepare_to_wait_exclusive(wq, &q->wait, mode);
 		if (!test_bit(q->key.bit_nr, q->key.flags))
 			continue;
-		ret = action(&q->key);
+		ret = action(&q->key, mode);
 		if (!ret)
 			continue;
 		abort_exclusive_wait(wq, &q->wait, mode, &q->key);
@@ -581,43 +581,43 @@ void wake_up_atomic_t(atomic_t *p)
 }
 EXPORT_SYMBOL(wake_up_atomic_t);
 
-__sched int bit_wait(struct wait_bit_key *word)
+__sched int bit_wait(struct wait_bit_key *word, int mode)
 {
 	schedule();
-	if (signal_pending(current))
+	if (signal_pending_state(mode, current))
 		return -EINTR;
 	return 0;
 }
 EXPORT_SYMBOL(bit_wait);
 
-__sched int bit_wait_io(struct wait_bit_key *word)
+__sched int bit_wait_io(struct wait_bit_key *word, int mode)
 {
 	io_schedule();
-	if (signal_pending(current))
+	if (signal_pending_state(mode, current))
 		return -EINTR;
 	return 0;
 }
 EXPORT_SYMBOL(bit_wait_io);
 
-__sched int bit_wait_timeout(struct wait_bit_key *word)
+__sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
 {
 	unsigned long now = READ_ONCE(jiffies);
 	if (time_after_eq(now, word->timeout))
 		return -EAGAIN;
 	schedule_timeout(word->timeout - now);
-	if (signal_pending(current))
+	if (signal_pending_state(mode, current))
 		return -EINTR;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(bit_wait_timeout);
 
-__sched int bit_wait_io_timeout(struct wait_bit_key *word)
+__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
 {
 	unsigned long now = READ_ONCE(jiffies);
 	if (time_after_eq(now, word->timeout))
 		return -EAGAIN;
 	io_schedule_timeout(word->timeout - now);
-	if (signal_pending(current))
+	if (signal_pending_state(mode, current))
 		return -EINTR;
 	return 0;
 }
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index f14f24e..73ad57a 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -250,11 +250,11 @@ void rpc_destroy_wait_queue(struct rpc_wait_queue *queue)
 }
 EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
 
-static int rpc_wait_bit_killable(struct wait_bit_key *key)
+static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode)
 {
-	if (fatal_signal_pending(current))
-		return -ERESTARTSYS;
 	freezable_schedule_unsafe();
+	if (signal_pending_state(mode, current))
+		return -ERESTARTSYS;
 	return 0;
 }
 
-- 
cgit v0.10.2


From 9f9499ae8e6415cefc4fe0a96ad0e27864353c89 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 13 Dec 2015 17:42:58 -0800
Subject: Linux 4.4-rc5


diff --git a/Makefile b/Makefile
index d644f6e..bc0165d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 4
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME = Blurry Fish Butt
 
 # *DOCUMENTATION*
-- 
cgit v0.10.2


From 21266be9ed542f13436bd9c75316d43e1e84f6ae Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 19 Nov 2015 18:19:29 -0800
Subject: arch: consolidate CONFIG_STRICT_DEVM in lib/Kconfig.debug

Let all the archs that implement devmem_is_allowed() opt-in to a common
definition of CONFIG_STRICT_DEVM in lib/Kconfig.debug.

Cc: Kees Cook <keescook@chromium.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "David S. Miller" <davem@davemloft.net>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
[heiko: drop 'default y' for s390]
Acked-by: Ingo Molnar <mingo@redhat.com>
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 34e1569..b8a4797 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2,6 +2,7 @@ config ARM
 	bool
 	default y
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAVE_CUSTOM_GPIO_H
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 259c0ca..e356357 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -15,20 +15,6 @@ config ARM_PTDUMP
 	  kernel.
 	  If in doubt, say "N"
 
-config STRICT_DEVMEM
-	bool "Filter access to /dev/mem"
-	depends on MMU
-	---help---
-	  If this option is disabled, you allow userspace (root) access to all
-	  of memory, including kernel and userspace memory. Accidental
-	  access to this is obviously disastrous, but specific access can
-	  be used by people debugging the kernel.
-
-	  If this option is switched on, the /dev/mem file only allows
-	  userspace access to memory mapped peripherals.
-
-          If in doubt, say Y.
-
 # RMK wants arm kernels compiled with frame pointers or stack unwinding.
 # If you know what you are doing and are willing to live without stack
 # traces, you can get a slightly smaller kernel by setting this option to
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 871f217..08f64b4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -3,6 +3,7 @@ config ARM64
 	select ACPI_CCA_REQUIRED if ACPI
 	select ACPI_GENERIC_GSI if ACPI
 	select ACPI_REDUCED_HARDWARE_ONLY if ACPI
+	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index 04fb73b..e13c4bf 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -14,20 +14,6 @@ config ARM64_PTDUMP
 	  kernel.
 	  If in doubt, say "N"
 
-config STRICT_DEVMEM
-	bool "Filter access to /dev/mem"
-	depends on MMU
-	help
-	  If this option is disabled, you allow userspace (root) access to all
-	  of memory, including kernel and userspace memory. Accidental
-	  access to this is obviously disastrous, but specific access can
-	  be used by people debugging the kernel.
-
-	  If this option is switched on, the /dev/mem file only allows
-	  userspace access to memory mapped peripherals.
-
-	  If in doubt, say Y.
-
 config PID_IN_CONTEXTIDR
 	bool "Write the current PID to the CONTEXTIDR register"
 	help
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 34aa193..03bfd6b 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -10,6 +10,7 @@ config FRV
 	select HAVE_DEBUG_BUGVERBOSE
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_CPU_DEVICES
+	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select OLD_SIGSUSPEND3
 	select OLD_SIGACTION
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 9e44bbd..836ac5a 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -13,6 +13,7 @@ config M32R
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
 	select GENERIC_ATOMIC64
+	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_USES_GETTIMEOFFSET
 	select MODULES_USE_ELF_RELA
 	select HAVE_DEBUG_STACKOVERFLOW
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index db49e0d..85eabc4 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -159,6 +159,7 @@ config PPC
 	select EDAC_SUPPORT
 	select EDAC_ATOMIC_SCRUB
 	select ARCH_HAS_DMA_SET_COHERENT_MASK
+	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select HAVE_ARCH_SECCOMP_FILTER
 
 config GENERIC_CSUM
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 3a510f4..a0e44a9 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -335,18 +335,6 @@ config PPC_EARLY_DEBUG_CPM_ADDR
 	  platform probing is done, all platforms selected must
 	  share the same address.
 
-config STRICT_DEVMEM
-	def_bool y
-	prompt "Filter access to /dev/mem"
-	help
-	  This option restricts access to /dev/mem.  If this option is
-	  disabled, you allow userspace access to all memory, including
-	  kernel and userspace memory. Accidental memory access is likely
-	  to be disastrous.
-	  Memory access is required for experts who want to debug the kernel.
-
-	  If you are unsure, say Y.
-
 config FAIL_IOMMU
 	bool "Fault-injection capability for IOMMU"
 	depends on FAULT_INJECTION
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 3a55f49..779becb 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -66,6 +66,7 @@ config S390
 	def_bool y
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
+	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_SG_CHAIN
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index c56878e..26c5d5be 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -5,18 +5,6 @@ config TRACE_IRQFLAGS_SUPPORT
 
 source "lib/Kconfig.debug"
 
-config STRICT_DEVMEM
-	def_bool y
-	prompt "Filter access to /dev/mem"
-	---help---
-	  This option restricts access to /dev/mem.  If this option is
-	  disabled, you allow userspace access to all memory, including
-	  kernel and userspace memory. Accidental memory access is likely
-	  to be disastrous.
-	  Memory access is required for experts who want to debug the kernel.
-
-	  If you are unsure, say Y.
-
 config S390_PTDUMP
 	bool "Export kernel pagetable layout to userspace via debugfs"
 	depends on DEBUG_KERNEL
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 106c21b..cf31168 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -19,6 +19,7 @@ config TILE
 	select VIRT_TO_BUS
 	select SYS_HYPERVISOR
 	select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
+	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_CLOCKEVENTS
 	select MODULES_USE_ELF_RELA
@@ -116,9 +117,6 @@ config ARCH_DISCONTIGMEM_DEFAULT
 config TRACE_IRQFLAGS_SUPPORT
 	def_bool y
 
-config STRICT_DEVMEM
-	def_bool y
-
 # SMP is required for Tilera Linux.
 config SMP
 	def_bool y
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index c9faddc..5dc4c0a 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -1,5 +1,6 @@
 config UNICORE32
 	def_bool y
+	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
 	select HAVE_MEMBLOCK
diff --git a/arch/unicore32/Kconfig.debug b/arch/unicore32/Kconfig.debug
index 1a36262..f075bbe 100644
--- a/arch/unicore32/Kconfig.debug
+++ b/arch/unicore32/Kconfig.debug
@@ -2,20 +2,6 @@ menu "Kernel hacking"
 
 source "lib/Kconfig.debug"
 
-config STRICT_DEVMEM
-	bool "Filter access to /dev/mem"
-	depends on MMU
-	---help---
-	  If this option is disabled, you allow userspace (root) access to all
-	  of memory, including kernel and userspace memory. Accidental
-	  access to this is obviously disastrous, but specific access can
-	  be used by people debugging the kernel.
-
-	  If this option is switched on, the /dev/mem file only allows
-	  userspace access to memory mapped peripherals.
-
-          If in doubt, say Y.
-
 config EARLY_PRINTK
 	def_bool DEBUG_OCD
 	help
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index db3622f..75fba1f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,6 +24,7 @@ config X86
 	select ARCH_DISCARD_MEMBLOCK
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
+	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FAST_MULTIPLIER
 	select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 137dfa9..1116452 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -5,23 +5,6 @@ config TRACE_IRQFLAGS_SUPPORT
 
 source "lib/Kconfig.debug"
 
-config STRICT_DEVMEM
-	bool "Filter access to /dev/mem"
-	---help---
-	  If this option is disabled, you allow userspace (root) access to all
-	  of memory, including kernel and userspace memory. Accidental
-	  access to this is obviously disastrous, but specific access can
-	  be used by people debugging the kernel. Note that with PAT support
-	  enabled, even in this case there are restrictions on /dev/mem
-	  use due to the cache aliasing requirements.
-
-	  If this option is switched on, the /dev/mem file only allows
-	  userspace access to PCI space and the BIOS code and data regions.
-	  This is sufficient for dosemu and X and all common users of
-	  /dev/mem.
-
-	  If in doubt, say Y.
-
 config X86_VERBOSE_BOOTUP
 	bool "Enable verbose x86 bootup info messages"
 	default y
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8c15b29..289dfcb 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1853,3 +1853,25 @@ source "samples/Kconfig"
 
 source "lib/Kconfig.kgdb"
 
+config ARCH_HAS_DEVMEM_IS_ALLOWED
+	bool
+
+config STRICT_DEVMEM
+	bool "Filter access to /dev/mem"
+	depends on MMU
+	depends on ARCH_HAS_DEVMEM_IS_ALLOWED
+	default y if TILE || PPC
+	---help---
+	  If this option is disabled, you allow userspace (root) access to all
+	  of memory, including kernel and userspace memory. Accidental
+	  access to this is obviously disastrous, but specific access can
+	  be used by people debugging the kernel. Note that with PAT support
+	  enabled, even in this case there are restrictions on /dev/mem
+	  use due to the cache aliasing requirements.
+
+	  If this option is switched on, the /dev/mem file only allows
+	  userspace access to PCI space and the BIOS code and data regions.
+	  This is sufficient for dosemu and X and all common users of
+	  /dev/mem.
+
+	  If in doubt, say Y.
-- 
cgit v0.10.2


From 90a545e981267e917b9d698ce07affd69787db87 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 23 Nov 2015 15:49:03 -0800
Subject: restrict /dev/mem to idle io memory ranges

This effectively promotes IORESOURCE_BUSY to IORESOURCE_EXCLUSIVE
semantics by default.  If userspace really believes it is safe to access
the memory region it can also perform the extra step of disabling an
active driver.  This protects device address ranges with read side
effects and otherwise directs userspace to use the driver.

Persistent memory presents a large "mistake surface" to /dev/mem as now
accidental writes can corrupt a filesystem.

In general if a device driver is busily using a memory region it already
informs other parts of the kernel to not touch it via
request_mem_region().  /dev/mem should honor the same safety restriction
by default.  Debugging a device driver from userspace becomes more
difficult with this enabled.  Any application using /dev/mem or mmap of
sysfs pci resources will now need to perform the extra step of either:

1/ Disabling the driver, for example:

   echo <device id> > /dev/bus/<parent bus>/drivers/<driver name>/unbind

2/ Rebooting with "iomem=relaxed" on the command line

3/ Recompiling with CONFIG_IO_STRICT_DEVMEM=n

Traditional users of /dev/mem like dosemu are unaffected because the
first 1MB of memory is not subject to the IO_STRICT_DEVMEM restriction.
Legacy X configurations use /dev/mem to talk to graphics hardware, but
that functionality has since moved to kernel graphics drivers.

Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/kernel/resource.c b/kernel/resource.c
index f150dbb..09c0597 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1498,8 +1498,15 @@ int iomem_is_exclusive(u64 addr)
 			break;
 		if (p->end < addr)
 			continue;
-		if (p->flags & IORESOURCE_BUSY &&
-		     p->flags & IORESOURCE_EXCLUSIVE) {
+		/*
+		 * A resource is exclusive if IORESOURCE_EXCLUSIVE is set
+		 * or CONFIG_IO_STRICT_DEVMEM is enabled and the
+		 * resource is busy.
+		 */
+		if ((p->flags & IORESOURCE_BUSY) == 0)
+			continue;
+		if (IS_ENABLED(CONFIG_IO_STRICT_DEVMEM)
+				|| p->flags & IORESOURCE_EXCLUSIVE) {
 			err = 1;
 			break;
 		}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 289dfcb..073496d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1869,9 +1869,26 @@ config STRICT_DEVMEM
 	  enabled, even in this case there are restrictions on /dev/mem
 	  use due to the cache aliasing requirements.
 
+	  If this option is switched on, and IO_STRICT_DEVMEM=n, the /dev/mem
+	  file only allows userspace access to PCI space and the BIOS code and
+	  data regions.  This is sufficient for dosemu and X and all common
+	  users of /dev/mem.
+
+	  If in doubt, say Y.
+
+config IO_STRICT_DEVMEM
+	bool "Filter I/O access to /dev/mem"
+	depends on STRICT_DEVMEM
+	default STRICT_DEVMEM
+	---help---
+	  If this option is disabled, you allow userspace (root) access to all
+	  io-memory regardless of whether a driver is actively using that
+	  range.  Accidental access to this is obviously disastrous, but
+	  specific access can be used by people debugging kernel drivers.
+
 	  If this option is switched on, the /dev/mem file only allows
-	  userspace access to PCI space and the BIOS code and data regions.
-	  This is sufficient for dosemu and X and all common users of
-	  /dev/mem.
+	  userspace access to *idle* io-memory ranges (see /proc/iomem) This
+	  may break traditional users of /dev/mem (dosemu, legacy X, etc...)
+	  if the driver using a given range cannot be disabled.
 
 	  If in doubt, say Y.
-- 
cgit v0.10.2


From 4ebb16ca9a06a54cdb2e7f2ce1e506fa4d432445 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 28 Oct 2015 07:48:19 +0900
Subject: block: introduce bdev_file_inode()

Similar to the file_inode() helper, provide a helper to lookup the inode for a
raw block device itself.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Suggested-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 44d4a1e..52248bc 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -156,11 +156,16 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
 	return 0;
 }
 
+static struct inode *bdev_file_inode(struct file *file)
+{
+	return file->f_mapping->host;
+}
+
 static ssize_t
 blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
-	struct inode *inode = file->f_mapping->host;
+	struct inode *inode = bdev_file_inode(file);
 
 	if (IS_DAX(inode))
 		return dax_do_io(iocb, inode, iter, offset, blkdev_get_block,
@@ -338,7 +343,7 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping,
  */
 static loff_t block_llseek(struct file *file, loff_t offset, int whence)
 {
-	struct inode *bd_inode = file->f_mapping->host;
+	struct inode *bd_inode = bdev_file_inode(file);
 	loff_t retval;
 
 	mutex_lock(&bd_inode->i_mutex);
@@ -349,7 +354,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int whence)
 	
 int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 {
-	struct inode *bd_inode = filp->f_mapping->host;
+	struct inode *bd_inode = bdev_file_inode(filp);
 	struct block_device *bdev = I_BDEV(bd_inode);
 	int error;
 	
@@ -1605,14 +1610,14 @@ EXPORT_SYMBOL(blkdev_put);
 
 static int blkdev_close(struct inode * inode, struct file * filp)
 {
-	struct block_device *bdev = I_BDEV(filp->f_mapping->host);
+	struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
 	blkdev_put(bdev, filp->f_mode);
 	return 0;
 }
 
 static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {
-	struct block_device *bdev = I_BDEV(file->f_mapping->host);
+	struct block_device *bdev = I_BDEV(bdev_file_inode(file));
 	fmode_t mode = file->f_mode;
 
 	/*
@@ -1637,7 +1642,7 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
-	struct inode *bd_inode = file->f_mapping->host;
+	struct inode *bd_inode = bdev_file_inode(file);
 	loff_t size = i_size_read(bd_inode);
 	struct blk_plug plug;
 	ssize_t ret;
@@ -1669,7 +1674,7 @@ EXPORT_SYMBOL_GPL(blkdev_write_iter);
 ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct file *file = iocb->ki_filp;
-	struct inode *bd_inode = file->f_mapping->host;
+	struct inode *bd_inode = bdev_file_inode(file);
 	loff_t size = i_size_read(bd_inode);
 	loff_t pos = iocb->ki_pos;
 
-- 
cgit v0.10.2


From 5a023cdba50c5f5f2bc351783b3131699deb3937 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 30 Nov 2015 10:20:29 -0800
Subject: block: enable dax for raw block devices

If an application wants exclusive access to all of the persistent memory
provided by an NVDIMM namespace it can use this raw-block-dax facility
to forgo establishing a filesystem.  This capability is targeted
primarily to hypervisors wanting to provision persistent memory for
guests.  It can be disabled / enabled dynamically via the new BLKDAXSET
ioctl.

Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Reviewed-by: Jan Kara <jack@suse.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/block/ioctl.c b/block/ioctl.c
index 0918aed..7a964d8 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -406,6 +406,62 @@ static inline int is_unrecognized_ioctl(int ret)
 		ret == -ENOIOCTLCMD;
 }
 
+#ifdef CONFIG_FS_DAX
+bool blkdev_dax_capable(struct block_device *bdev)
+{
+	struct gendisk *disk = bdev->bd_disk;
+
+	if (!disk->fops->direct_access)
+		return false;
+
+	/*
+	 * If the partition is not aligned on a page boundary, we can't
+	 * do dax I/O to it.
+	 */
+	if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512))
+			|| (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
+		return false;
+
+	return true;
+}
+
+static int blkdev_daxset(struct block_device *bdev, unsigned long argp)
+{
+	unsigned long arg;
+	int rc = 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (get_user(arg, (int __user *)(argp)))
+		return -EFAULT;
+	arg = !!arg;
+	if (arg == !!(bdev->bd_inode->i_flags & S_DAX))
+		return 0;
+
+	if (arg)
+		arg = S_DAX;
+
+	if (arg && !blkdev_dax_capable(bdev))
+		return -ENOTTY;
+
+	mutex_lock(&bdev->bd_inode->i_mutex);
+	if (bdev->bd_map_count == 0)
+		inode_set_flags(bdev->bd_inode, arg, S_DAX);
+	else
+		rc = -EBUSY;
+	mutex_unlock(&bdev->bd_inode->i_mutex);
+	return rc;
+}
+#else
+static int blkdev_daxset(struct block_device *bdev, int arg)
+{
+	if (arg)
+		return -ENOTTY;
+	return 0;
+}
+#endif
+
 static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
 		unsigned cmd, unsigned long arg)
 {
@@ -568,6 +624,11 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 	case BLKTRACESETUP:
 	case BLKTRACETEARDOWN:
 		return blk_trace_ioctl(bdev, cmd, argp);
+	case BLKDAXSET:
+		return blkdev_daxset(bdev, arg);
+	case BLKDAXGET:
+		return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX));
+		break;
 	case IOC_PR_REGISTER:
 		return blkdev_pr_register(bdev, argp);
 	case IOC_PR_RESERVE:
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 52248bc..5c0b2cb 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1235,8 +1235,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 				}
 			}
 
-			if (!ret)
+			if (!ret) {
 				bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
+				if (!blkdev_dax_capable(bdev))
+					bdev->bd_inode->i_flags &= ~S_DAX;
+			}
 
 			/*
 			 * If the device is invalidated, rescan partition
@@ -1250,6 +1253,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 				else if (ret == -ENOMEDIUM)
 					invalidate_partitions(disk, bdev);
 			}
+
 			if (ret)
 				goto out_clear;
 		} else {
@@ -1270,12 +1274,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 				goto out_clear;
 			}
 			bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
-			/*
-			 * If the partition is not aligned on a page
-			 * boundary, we can't do dax I/O to it.
-			 */
-			if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) ||
-			    (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
+			if (!blkdev_dax_capable(bdev))
 				bdev->bd_inode->i_flags &= ~S_DAX;
 		}
 	} else {
@@ -1713,13 +1712,101 @@ static const struct address_space_operations def_blk_aops = {
 	.is_dirty_writeback = buffer_check_dirty_writeback,
 };
 
+#ifdef CONFIG_FS_DAX
+/*
+ * In the raw block case we do not need to contend with truncation nor
+ * unwritten file extents.  Without those concerns there is no need for
+ * additional locking beyond the mmap_sem context that these routines
+ * are already executing under.
+ *
+ * Note, there is no protection if the block device is dynamically
+ * resized (partition grow/shrink) during a fault. A stable block device
+ * size is already not enforced in the blkdev_direct_IO path.
+ *
+ * For DAX, it is the responsibility of the block device driver to
+ * ensure the whole-disk device size is stable while requests are in
+ * flight.
+ *
+ * Finally, unlike the filemap_page_mkwrite() case there is no
+ * filesystem superblock to sync against freezing.  We still include a
+ * pfn_mkwrite callback for dax drivers to receive write fault
+ * notifications.
+ */
+static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	return __dax_fault(vma, vmf, blkdev_get_block, NULL);
+}
+
+static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
+		pmd_t *pmd, unsigned int flags)
+{
+	return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
+}
+
+static void blkdev_vm_open(struct vm_area_struct *vma)
+{
+	struct inode *bd_inode = bdev_file_inode(vma->vm_file);
+	struct block_device *bdev = I_BDEV(bd_inode);
+
+	mutex_lock(&bd_inode->i_mutex);
+	bdev->bd_map_count++;
+	mutex_unlock(&bd_inode->i_mutex);
+}
+
+static void blkdev_vm_close(struct vm_area_struct *vma)
+{
+	struct inode *bd_inode = bdev_file_inode(vma->vm_file);
+	struct block_device *bdev = I_BDEV(bd_inode);
+
+	mutex_lock(&bd_inode->i_mutex);
+	bdev->bd_map_count--;
+	mutex_unlock(&bd_inode->i_mutex);
+}
+
+static const struct vm_operations_struct blkdev_dax_vm_ops = {
+	.open		= blkdev_vm_open,
+	.close		= blkdev_vm_close,
+	.fault		= blkdev_dax_fault,
+	.pmd_fault	= blkdev_dax_pmd_fault,
+	.pfn_mkwrite	= blkdev_dax_fault,
+};
+
+static const struct vm_operations_struct blkdev_default_vm_ops = {
+	.open		= blkdev_vm_open,
+	.close		= blkdev_vm_close,
+	.fault		= filemap_fault,
+	.map_pages	= filemap_map_pages,
+};
+
+static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct inode *bd_inode = bdev_file_inode(file);
+	struct block_device *bdev = I_BDEV(bd_inode);
+
+	file_accessed(file);
+	mutex_lock(&bd_inode->i_mutex);
+	bdev->bd_map_count++;
+	if (IS_DAX(bd_inode)) {
+		vma->vm_ops = &blkdev_dax_vm_ops;
+		vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
+	} else {
+		vma->vm_ops = &blkdev_default_vm_ops;
+	}
+	mutex_unlock(&bd_inode->i_mutex);
+
+	return 0;
+}
+#else
+#define blkdev_mmap generic_file_mmap
+#endif
+
 const struct file_operations def_blk_fops = {
 	.open		= blkdev_open,
 	.release	= blkdev_close,
 	.llseek		= block_llseek,
 	.read_iter	= blkdev_read_iter,
 	.write_iter	= blkdev_write_iter,
-	.mmap		= generic_file_mmap,
+	.mmap		= blkdev_mmap,
 	.fsync		= blkdev_fsync,
 	.unlocked_ioctl	= block_ioctl,
 #ifdef CONFIG_COMPAT
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3aa5142..96fabc9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -482,6 +482,9 @@ struct block_device {
 	int			bd_fsfreeze_count;
 	/* Mutex for freeze */
 	struct mutex		bd_fsfreeze_mutex;
+#ifdef CONFIG_FS_DAX
+	int			bd_map_count;
+#endif
 };
 
 /*
@@ -2264,6 +2267,14 @@ extern struct super_block *freeze_bdev(struct block_device *);
 extern void emergency_thaw_all(void);
 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
 extern int fsync_bdev(struct block_device *);
+#ifdef CONFIG_FS_DAX
+extern bool blkdev_dax_capable(struct block_device *bdev);
+#else
+static inline bool blkdev_dax_capable(struct block_device *bdev)
+{
+	return false;
+}
+#endif
 
 extern struct super_block *blockdev_superblock;
 
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index f15d980..401c409 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -152,6 +152,8 @@ struct inodes_stat_t {
 #define BLKSECDISCARD _IO(0x12,125)
 #define BLKROTATIONAL _IO(0x12,126)
 #define BLKZEROOUT _IO(0x12,127)
+#define BLKDAXSET _IO(0x12,128)
+#define BLKDAXGET _IO(0x12,129)
 
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
-- 
cgit v0.10.2


From ac34f15e0c6d2fd58480052b6985f6991fb53bcc Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 29 Dec 2015 14:02:29 -0800
Subject: block: fix del_gendisk() vs blkdev_ioctl crash

When tearing down a block device early in its lifetime, userspace may
still be performing discovery actions like blkdev_ioctl() to re-read
partitions.

The nvdimm_revalidate_disk() implementation depends on
disk->driverfs_dev to be valid at entry.  However, it is set to NULL in
del_gendisk() and fatally this is happening *before* the disk device is
deleted from userspace view.

There's no reason for del_gendisk() to clear ->driverfs_dev.  That
device is the parent of the disk.  It is guaranteed to not be freed
until the disk, as a child, drops its ->parent reference.

We could also fix this issue locally in nvdimm_revalidate_disk() by
using disk_to_dev(disk)->parent, but lets fix it globally since
->driverfs_dev follows the lifetime of the parent.  Longer term we
should probably just add a @parent parameter to add_disk(), and stop
carrying this pointer in the gendisk.

 BUG: unable to handle kernel NULL pointer dereference at           (null)
 IP: [<ffffffffa00340a8>] nvdimm_revalidate_disk+0x18/0x90 [libnvdimm]
 CPU: 2 PID: 538 Comm: systemd-udevd Tainted: G           O    4.4.0-rc5 #2257
 [..]
 Call Trace:
  [<ffffffff8143e5c7>] rescan_partitions+0x87/0x2c0
  [<ffffffff810f37f9>] ? __lock_is_held+0x49/0x70
  [<ffffffff81438c62>] __blkdev_reread_part+0x72/0xb0
  [<ffffffff81438cc5>] blkdev_reread_part+0x25/0x40
  [<ffffffff8143982d>] blkdev_ioctl+0x4fd/0x9c0
  [<ffffffff811246c9>] ? current_kernel_time64+0x69/0xd0
  [<ffffffff812916dd>] block_ioctl+0x3d/0x50
  [<ffffffff81264c38>] do_vfs_ioctl+0x308/0x560
  [<ffffffff8115dbd1>] ? __audit_syscall_entry+0xb1/0x100
  [<ffffffff810031d6>] ? do_audit_syscall_entry+0x66/0x70
  [<ffffffff81264f09>] SyS_ioctl+0x79/0x90
  [<ffffffff81902672>] entry_SYSCALL_64_fastpath+0x12/0x76

Reported-by: Robert Hu <robert.hu@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/block/genhd.c b/block/genhd.c
index e5cafa5..1a971be 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -664,7 +664,6 @@ void del_gendisk(struct gendisk *disk)
 
 	kobject_put(disk->part0.holder_dir);
 	kobject_put(disk->slave_dir);
-	disk->driverfs_dev = NULL;
 	if (!sysfs_deprecated)
 		sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
 	pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
-- 
cgit v0.10.2


From 9e0e252a048b0ba5066f0dc15c3b2468ffe5c422 Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Thu, 24 Dec 2015 19:20:32 -0700
Subject: badblocks: Add core badblock management code

Take the core badblocks implementation from md, and make it generally
available. This follows the same style as kernel implementations of
linked lists, rb-trees etc, where you can have a structure that can be
embedded anywhere, and accessor functions to manipulate the data.

The only changes in this copy of the code are ones to generalize
function/variable names from md-specific ones. Also add init and free
functions.

Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/block/Makefile b/block/Makefile
index 00ecc97..db5f622 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
 			blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \
 			blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
 			genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
-			partitions/
+			badblocks.o partitions/
 
 obj-$(CONFIG_BOUNCE)	+= bounce.o
 obj-$(CONFIG_BLK_DEV_BSG)	+= bsg.o
diff --git a/block/badblocks.c b/block/badblocks.c
new file mode 100644
index 0000000..96aeb91
--- /dev/null
+++ b/block/badblocks.c
@@ -0,0 +1,561 @@
+/*
+ * Bad block management
+ *
+ * - Heavily based on MD badblocks code from Neil Brown
+ *
+ * Copyright (c) 2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/badblocks.h>
+#include <linux/seqlock.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+
+/**
+ * badblocks_check() - check a given range for bad sectors
+ * @bb:		the badblocks structure that holds all badblock information
+ * @s:		sector (start) at which to check for badblocks
+ * @sectors:	number of sectors to check for badblocks
+ * @first_bad:	pointer to store location of the first badblock
+ * @bad_sectors: pointer to store number of badblocks after @first_bad
+ *
+ * We can record which blocks on each device are 'bad' and so just
+ * fail those blocks, or that stripe, rather than the whole device.
+ * Entries in the bad-block table are 64bits wide.  This comprises:
+ * Length of bad-range, in sectors: 0-511 for lengths 1-512
+ * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
+ *  A 'shift' can be set so that larger blocks are tracked and
+ *  consequently larger devices can be covered.
+ * 'Acknowledged' flag - 1 bit. - the most significant bit.
+ *
+ * Locking of the bad-block table uses a seqlock so badblocks_check
+ * might need to retry if it is very unlucky.
+ * We will sometimes want to check for bad blocks in a bi_end_io function,
+ * so we use the write_seqlock_irq variant.
+ *
+ * When looking for a bad block we specify a range and want to
+ * know if any block in the range is bad.  So we binary-search
+ * to the last range that starts at-or-before the given endpoint,
+ * (or "before the sector after the target range")
+ * then see if it ends after the given start.
+ *
+ * Return:
+ *  0: there are no known bad blocks in the range
+ *  1: there are known bad block which are all acknowledged
+ * -1: there are bad blocks which have not yet been acknowledged in metadata.
+ * plus the start/length of the first bad section we overlap.
+ */
+int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
+			sector_t *first_bad, int *bad_sectors)
+{
+	int hi;
+	int lo;
+	u64 *p = bb->page;
+	int rv;
+	sector_t target = s + sectors;
+	unsigned seq;
+
+	if (bb->shift > 0) {
+		/* round the start down, and the end up */
+		s >>= bb->shift;
+		target += (1<<bb->shift) - 1;
+		target >>= bb->shift;
+		sectors = target - s;
+	}
+	/* 'target' is now the first block after the bad range */
+
+retry:
+	seq = read_seqbegin(&bb->lock);
+	lo = 0;
+	rv = 0;
+	hi = bb->count;
+
+	/* Binary search between lo and hi for 'target'
+	 * i.e. for the last range that starts before 'target'
+	 */
+	/* INVARIANT: ranges before 'lo' and at-or-after 'hi'
+	 * are known not to be the last range before target.
+	 * VARIANT: hi-lo is the number of possible
+	 * ranges, and decreases until it reaches 1
+	 */
+	while (hi - lo > 1) {
+		int mid = (lo + hi) / 2;
+		sector_t a = BB_OFFSET(p[mid]);
+
+		if (a < target)
+			/* This could still be the one, earlier ranges
+			 * could not.
+			 */
+			lo = mid;
+		else
+			/* This and later ranges are definitely out. */
+			hi = mid;
+	}
+	/* 'lo' might be the last that started before target, but 'hi' isn't */
+	if (hi > lo) {
+		/* need to check all range that end after 's' to see if
+		 * any are unacknowledged.
+		 */
+		while (lo >= 0 &&
+		       BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
+			if (BB_OFFSET(p[lo]) < target) {
+				/* starts before the end, and finishes after
+				 * the start, so they must overlap
+				 */
+				if (rv != -1 && BB_ACK(p[lo]))
+					rv = 1;
+				else
+					rv = -1;
+				*first_bad = BB_OFFSET(p[lo]);
+				*bad_sectors = BB_LEN(p[lo]);
+			}
+			lo--;
+		}
+	}
+
+	if (read_seqretry(&bb->lock, seq))
+		goto retry;
+
+	return rv;
+}
+EXPORT_SYMBOL_GPL(badblocks_check);
+
+/**
+ * badblocks_set() - Add a range of bad blocks to the table.
+ * @bb:		the badblocks structure that holds all badblock information
+ * @s:		first sector to mark as bad
+ * @sectors:	number of sectors to mark as bad
+ * @acknowledged: weather to mark the bad sectors as acknowledged
+ *
+ * This might extend the table, or might contract it if two adjacent ranges
+ * can be merged. We binary-search to find the 'insertion' point, then
+ * decide how best to handle it.
+ *
+ * Return:
+ *  0: success
+ *  1: failed to set badblocks (out of space)
+ */
+int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
+			int acknowledged)
+{
+	u64 *p;
+	int lo, hi;
+	int rv = 0;
+	unsigned long flags;
+
+	if (bb->shift < 0)
+		/* badblocks are disabled */
+		return 0;
+
+	if (bb->shift) {
+		/* round the start down, and the end up */
+		sector_t next = s + sectors;
+
+		s >>= bb->shift;
+		next += (1<<bb->shift) - 1;
+		next >>= bb->shift;
+		sectors = next - s;
+	}
+
+	write_seqlock_irqsave(&bb->lock, flags);
+
+	p = bb->page;
+	lo = 0;
+	hi = bb->count;
+	/* Find the last range that starts at-or-before 's' */
+	while (hi - lo > 1) {
+		int mid = (lo + hi) / 2;
+		sector_t a = BB_OFFSET(p[mid]);
+
+		if (a <= s)
+			lo = mid;
+		else
+			hi = mid;
+	}
+	if (hi > lo && BB_OFFSET(p[lo]) > s)
+		hi = lo;
+
+	if (hi > lo) {
+		/* we found a range that might merge with the start
+		 * of our new range
+		 */
+		sector_t a = BB_OFFSET(p[lo]);
+		sector_t e = a + BB_LEN(p[lo]);
+		int ack = BB_ACK(p[lo]);
+
+		if (e >= s) {
+			/* Yes, we can merge with a previous range */
+			if (s == a && s + sectors >= e)
+				/* new range covers old */
+				ack = acknowledged;
+			else
+				ack = ack && acknowledged;
+
+			if (e < s + sectors)
+				e = s + sectors;
+			if (e - a <= BB_MAX_LEN) {
+				p[lo] = BB_MAKE(a, e-a, ack);
+				s = e;
+			} else {
+				/* does not all fit in one range,
+				 * make p[lo] maximal
+				 */
+				if (BB_LEN(p[lo]) != BB_MAX_LEN)
+					p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
+				s = a + BB_MAX_LEN;
+			}
+			sectors = e - s;
+		}
+	}
+	if (sectors && hi < bb->count) {
+		/* 'hi' points to the first range that starts after 's'.
+		 * Maybe we can merge with the start of that range
+		 */
+		sector_t a = BB_OFFSET(p[hi]);
+		sector_t e = a + BB_LEN(p[hi]);
+		int ack = BB_ACK(p[hi]);
+
+		if (a <= s + sectors) {
+			/* merging is possible */
+			if (e <= s + sectors) {
+				/* full overlap */
+				e = s + sectors;
+				ack = acknowledged;
+			} else
+				ack = ack && acknowledged;
+
+			a = s;
+			if (e - a <= BB_MAX_LEN) {
+				p[hi] = BB_MAKE(a, e-a, ack);
+				s = e;
+			} else {
+				p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
+				s = a + BB_MAX_LEN;
+			}
+			sectors = e - s;
+			lo = hi;
+			hi++;
+		}
+	}
+	if (sectors == 0 && hi < bb->count) {
+		/* we might be able to combine lo and hi */
+		/* Note: 's' is at the end of 'lo' */
+		sector_t a = BB_OFFSET(p[hi]);
+		int lolen = BB_LEN(p[lo]);
+		int hilen = BB_LEN(p[hi]);
+		int newlen = lolen + hilen - (s - a);
+
+		if (s >= a && newlen < BB_MAX_LEN) {
+			/* yes, we can combine them */
+			int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
+
+			p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
+			memmove(p + hi, p + hi + 1,
+				(bb->count - hi - 1) * 8);
+			bb->count--;
+		}
+	}
+	while (sectors) {
+		/* didn't merge (it all).
+		 * Need to add a range just before 'hi'
+		 */
+		if (bb->count >= MAX_BADBLOCKS) {
+			/* No room for more */
+			rv = 1;
+			break;
+		} else {
+			int this_sectors = sectors;
+
+			memmove(p + hi + 1, p + hi,
+				(bb->count - hi) * 8);
+			bb->count++;
+
+			if (this_sectors > BB_MAX_LEN)
+				this_sectors = BB_MAX_LEN;
+			p[hi] = BB_MAKE(s, this_sectors, acknowledged);
+			sectors -= this_sectors;
+			s += this_sectors;
+		}
+	}
+
+	bb->changed = 1;
+	if (!acknowledged)
+		bb->unacked_exist = 1;
+	write_sequnlock_irqrestore(&bb->lock, flags);
+
+	return rv;
+}
+EXPORT_SYMBOL_GPL(badblocks_set);
+
+/**
+ * badblocks_clear() - Remove a range of bad blocks to the table.
+ * @bb:		the badblocks structure that holds all badblock information
+ * @s:		first sector to mark as bad
+ * @sectors:	number of sectors to mark as bad
+ *
+ * This may involve extending the table if we spilt a region,
+ * but it must not fail.  So if the table becomes full, we just
+ * drop the remove request.
+ *
+ * Return:
+ *  0: success
+ *  1: failed to clear badblocks
+ */
+int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
+{
+	u64 *p;
+	int lo, hi;
+	sector_t target = s + sectors;
+	int rv = 0;
+
+	if (bb->shift > 0) {
+		/* When clearing we round the start up and the end down.
+		 * This should not matter as the shift should align with
+		 * the block size and no rounding should ever be needed.
+		 * However it is better the think a block is bad when it
+		 * isn't than to think a block is not bad when it is.
+		 */
+		s += (1<<bb->shift) - 1;
+		s >>= bb->shift;
+		target >>= bb->shift;
+		sectors = target - s;
+	}
+
+	write_seqlock_irq(&bb->lock);
+
+	p = bb->page;
+	lo = 0;
+	hi = bb->count;
+	/* Find the last range that starts before 'target' */
+	while (hi - lo > 1) {
+		int mid = (lo + hi) / 2;
+		sector_t a = BB_OFFSET(p[mid]);
+
+		if (a < target)
+			lo = mid;
+		else
+			hi = mid;
+	}
+	if (hi > lo) {
+		/* p[lo] is the last range that could overlap the
+		 * current range.  Earlier ranges could also overlap,
+		 * but only this one can overlap the end of the range.
+		 */
+		if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
+			/* Partial overlap, leave the tail of this range */
+			int ack = BB_ACK(p[lo]);
+			sector_t a = BB_OFFSET(p[lo]);
+			sector_t end = a + BB_LEN(p[lo]);
+
+			if (a < s) {
+				/* we need to split this range */
+				if (bb->count >= MAX_BADBLOCKS) {
+					rv = -ENOSPC;
+					goto out;
+				}
+				memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
+				bb->count++;
+				p[lo] = BB_MAKE(a, s-a, ack);
+				lo++;
+			}
+			p[lo] = BB_MAKE(target, end - target, ack);
+			/* there is no longer an overlap */
+			hi = lo;
+			lo--;
+		}
+		while (lo >= 0 &&
+		       BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
+			/* This range does overlap */
+			if (BB_OFFSET(p[lo]) < s) {
+				/* Keep the early parts of this range. */
+				int ack = BB_ACK(p[lo]);
+				sector_t start = BB_OFFSET(p[lo]);
+
+				p[lo] = BB_MAKE(start, s - start, ack);
+				/* now low doesn't overlap, so.. */
+				break;
+			}
+			lo--;
+		}
+		/* 'lo' is strictly before, 'hi' is strictly after,
+		 * anything between needs to be discarded
+		 */
+		if (hi - lo > 1) {
+			memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
+			bb->count -= (hi - lo - 1);
+		}
+	}
+
+	bb->changed = 1;
+out:
+	write_sequnlock_irq(&bb->lock);
+	return rv;
+}
+EXPORT_SYMBOL_GPL(badblocks_clear);
+
+/**
+ * ack_all_badblocks() - Acknowledge all bad blocks in a list.
+ * @bb:		the badblocks structure that holds all badblock information
+ *
+ * This only succeeds if ->changed is clear.  It is used by
+ * in-kernel metadata updates
+ */
+void ack_all_badblocks(struct badblocks *bb)
+{
+	if (bb->page == NULL || bb->changed)
+		/* no point even trying */
+		return;
+	write_seqlock_irq(&bb->lock);
+
+	if (bb->changed == 0 && bb->unacked_exist) {
+		u64 *p = bb->page;
+		int i;
+
+		for (i = 0; i < bb->count ; i++) {
+			if (!BB_ACK(p[i])) {
+				sector_t start = BB_OFFSET(p[i]);
+				int len = BB_LEN(p[i]);
+
+				p[i] = BB_MAKE(start, len, 1);
+			}
+		}
+		bb->unacked_exist = 0;
+	}
+	write_sequnlock_irq(&bb->lock);
+}
+EXPORT_SYMBOL_GPL(ack_all_badblocks);
+
+/**
+ * badblocks_show() - sysfs access to bad-blocks list
+ * @bb:		the badblocks structure that holds all badblock information
+ * @page:	buffer received from sysfs
+ * @unack:	weather to show unacknowledged badblocks
+ *
+ * Return:
+ *  Length of returned data
+ */
+ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
+{
+	size_t len;
+	int i;
+	u64 *p = bb->page;
+	unsigned seq;
+
+	if (bb->shift < 0)
+		return 0;
+
+retry:
+	seq = read_seqbegin(&bb->lock);
+
+	len = 0;
+	i = 0;
+
+	while (len < PAGE_SIZE && i < bb->count) {
+		sector_t s = BB_OFFSET(p[i]);
+		unsigned int length = BB_LEN(p[i]);
+		int ack = BB_ACK(p[i]);
+
+		i++;
+
+		if (unack && ack)
+			continue;
+
+		len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
+				(unsigned long long)s << bb->shift,
+				length << bb->shift);
+	}
+	if (unack && len == 0)
+		bb->unacked_exist = 0;
+
+	if (read_seqretry(&bb->lock, seq))
+		goto retry;
+
+	return len;
+}
+EXPORT_SYMBOL_GPL(badblocks_show);
+
+/**
+ * badblocks_store() - sysfs access to bad-blocks list
+ * @bb:		the badblocks structure that holds all badblock information
+ * @page:	buffer received from sysfs
+ * @len:	length of data received from sysfs
+ * @unack:	weather to show unacknowledged badblocks
+ *
+ * Return:
+ *  Length of the buffer processed or -ve error.
+ */
+ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
+			int unack)
+{
+	unsigned long long sector;
+	int length;
+	char newline;
+
+	switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
+	case 3:
+		if (newline != '\n')
+			return -EINVAL;
+	case 2:
+		if (length <= 0)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (badblocks_set(bb, sector, length, !unack))
+		return -ENOSPC;
+	else
+		return len;
+}
+EXPORT_SYMBOL_GPL(badblocks_store);
+
+/**
+ * badblocks_init() - initialize the badblocks structure
+ * @bb:		the badblocks structure that holds all badblock information
+ * @enable:	weather to enable badblocks accounting
+ *
+ * Return:
+ *  0: success
+ *  -ve errno: on error
+ */
+int badblocks_init(struct badblocks *bb, int enable)
+{
+	bb->count = 0;
+	if (enable)
+		bb->shift = 0;
+	else
+		bb->shift = -1;
+	bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (bb->page == (u64 *)0) {
+		bb->shift = -1;
+		return -ENOMEM;
+	}
+	seqlock_init(&bb->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(badblocks_init);
+
+/**
+ * badblocks_free() - free the badblocks structure
+ * @bb:		the badblocks structure that holds all badblock information
+ */
+void badblocks_free(struct badblocks *bb)
+{
+	kfree(bb->page);
+	bb->page = NULL;
+}
+EXPORT_SYMBOL_GPL(badblocks_free);
diff --git a/include/linux/badblocks.h b/include/linux/badblocks.h
new file mode 100644
index 0000000..9293446
--- /dev/null
+++ b/include/linux/badblocks.h
@@ -0,0 +1,53 @@
+#ifndef _LINUX_BADBLOCKS_H
+#define _LINUX_BADBLOCKS_H
+
+#include <linux/seqlock.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
+
+#define BB_LEN_MASK	(0x00000000000001FFULL)
+#define BB_OFFSET_MASK	(0x7FFFFFFFFFFFFE00ULL)
+#define BB_ACK_MASK	(0x8000000000000000ULL)
+#define BB_MAX_LEN	512
+#define BB_OFFSET(x)	(((x) & BB_OFFSET_MASK) >> 9)
+#define BB_LEN(x)	(((x) & BB_LEN_MASK) + 1)
+#define BB_ACK(x)	(!!((x) & BB_ACK_MASK))
+#define BB_MAKE(a, l, ack) (((a)<<9) | ((l)-1) | ((u64)(!!(ack)) << 63))
+
+/* Bad block numbers are stored sorted in a single page.
+ * 64bits is used for each block or extent.
+ * 54 bits are sector number, 9 bits are extent size,
+ * 1 bit is an 'acknowledged' flag.
+ */
+#define MAX_BADBLOCKS	(PAGE_SIZE/8)
+
+struct badblocks {
+	int count;		/* count of bad blocks */
+	int unacked_exist;	/* there probably are unacknowledged
+				 * bad blocks.  This is only cleared
+				 * when a read discovers none
+				 */
+	int shift;		/* shift from sectors to block size
+				 * a -ve shift means badblocks are
+				 * disabled.*/
+	u64 *page;		/* badblock list */
+	int changed;
+	seqlock_t lock;
+	sector_t sector;
+	sector_t size;		/* in sectors */
+};
+
+int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
+		   sector_t *first_bad, int *bad_sectors);
+int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
+			int acknowledged);
+int badblocks_clear(struct badblocks *bb, sector_t s, int sectors);
+void ack_all_badblocks(struct badblocks *bb);
+ssize_t badblocks_show(struct badblocks *bb, char *page, int unack);
+ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
+			int unack);
+int badblocks_init(struct badblocks *bb, int enable);
+void badblocks_free(struct badblocks *bb);
+
+#endif
-- 
cgit v0.10.2


From 99e6608c9e7414ae4f2168df8bf8fae3eb49e41f Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Sat, 9 Jan 2016 08:36:51 -0800
Subject: block: Add badblock management for gendisks

NVDIMM devices, which can behave more like DRAM rather than block
devices, may develop bad cache lines, or 'poison'. A block device
exposed by the pmem driver can then consume poison via a read (or
write), and cause a machine check. On platforms without machine
check recovery features, this would mean a crash.

The block device maintaining a runtime list of all known sectors that
have poison can directly avoid this, and also provide a path forward
to enable proper handling/recovery for DAX faults on such a device.

Use the new badblock management interfaces to add a badblocks list to
gendisks.

Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/block/genhd.c b/block/genhd.c
index 1a971be..88579cf 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -20,6 +20,7 @@
 #include <linux/idr.h>
 #include <linux/log2.h>
 #include <linux/pm_runtime.h>
+#include <linux/badblocks.h>
 
 #include "blk.h"
 
@@ -505,6 +506,16 @@ static int exact_lock(dev_t devt, void *data)
 	return 0;
 }
 
+int disk_alloc_badblocks(struct gendisk *disk)
+{
+	disk->bb = kzalloc(sizeof(*(disk->bb)), GFP_KERNEL);
+	if (!disk->bb)
+		return -ENOMEM;
+
+	return badblocks_init(disk->bb, 1);
+}
+EXPORT_SYMBOL(disk_alloc_badblocks);
+
 static void register_disk(struct gendisk *disk)
 {
 	struct device *ddev = disk_to_dev(disk);
@@ -659,6 +670,11 @@ void del_gendisk(struct gendisk *disk)
 	blk_unregister_queue(disk);
 	blk_unregister_region(disk_devt(disk), disk->minors);
 
+	if (disk->bb) {
+		badblocks_free(disk->bb);
+		kfree(disk->bb);
+	}
+
 	part_stat_set_all(&disk->part0, 0);
 	disk->part0.stamp = 0;
 
@@ -671,6 +687,63 @@ void del_gendisk(struct gendisk *disk)
 }
 EXPORT_SYMBOL(del_gendisk);
 
+/*
+ * The gendisk usage of badblocks does not track acknowledgements for
+ * badblocks. We always assume they are acknowledged.
+ */
+int disk_check_badblocks(struct gendisk *disk, sector_t s, int sectors,
+		   sector_t *first_bad, int *bad_sectors)
+{
+	if (!disk->bb)
+		return 0;
+
+	return badblocks_check(disk->bb, s, sectors, first_bad, bad_sectors);
+}
+EXPORT_SYMBOL(disk_check_badblocks);
+
+int disk_set_badblocks(struct gendisk *disk, sector_t s, int sectors)
+{
+	if (!disk->bb)
+		return 0;
+
+	return badblocks_set(disk->bb, s, sectors, 1);
+}
+EXPORT_SYMBOL(disk_set_badblocks);
+
+int disk_clear_badblocks(struct gendisk *disk, sector_t s, int sectors)
+{
+	if (!disk->bb)
+		return 0;
+
+	return badblocks_clear(disk->bb, s, sectors);
+}
+EXPORT_SYMBOL(disk_clear_badblocks);
+
+/* sysfs access to bad-blocks list. */
+static ssize_t disk_badblocks_show(struct device *dev,
+					struct device_attribute *attr,
+					char *page)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	if (!disk->bb)
+		return sprintf(page, "\n");
+
+	return badblocks_show(disk->bb, page, 0);
+}
+
+static ssize_t disk_badblocks_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *page, size_t len)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	if (!disk->bb)
+		return -ENXIO;
+
+	return badblocks_store(disk->bb, page, len, 0);
+}
+
 /**
  * get_gendisk - get partitioning information for a given device
  * @devt: device to get partitioning information for
@@ -989,6 +1062,8 @@ static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show,
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
+static DEVICE_ATTR(badblocks, S_IRUGO | S_IWUSR, disk_badblocks_show,
+		disk_badblocks_store);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
 	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -1010,6 +1085,7 @@ static struct attribute *disk_attrs[] = {
 	&dev_attr_capability.attr,
 	&dev_attr_stat.attr,
 	&dev_attr_inflight.attr,
+	&dev_attr_badblocks.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 847cc1d..0bbec68 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -162,6 +162,7 @@ struct disk_part_tbl {
 };
 
 struct disk_events;
+struct badblocks;
 
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 
@@ -213,6 +214,7 @@ struct gendisk {
 	struct kobject integrity_kobj;
 #endif	/* CONFIG_BLK_DEV_INTEGRITY */
 	int node_id;
+	struct badblocks *bb;
 };
 
 static inline struct gendisk *part_to_disk(struct hd_struct *part)
@@ -433,6 +435,11 @@ extern void add_disk(struct gendisk *disk);
 extern void del_gendisk(struct gendisk *gp);
 extern struct gendisk *get_gendisk(dev_t dev, int *partno);
 extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
+int disk_alloc_badblocks(struct gendisk *disk);
+extern int disk_check_badblocks(struct gendisk *disk, sector_t s, int sectors,
+		   sector_t *first_bad, int *bad_sectors);
+extern int disk_set_badblocks(struct gendisk *disk, sector_t s, int sectors);
+extern int disk_clear_badblocks(struct gendisk *disk, sector_t s, int sectors);
 
 extern void set_device_ro(struct block_device *bdev, int flag);
 extern void set_disk_ro(struct gendisk *disk, int flag);
-- 
cgit v0.10.2


From fc974ee2bffdde47d1e4b220cf326952cc2c4794 Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Thu, 24 Dec 2015 19:20:34 -0700
Subject: md: convert to use the generic badblocks code

Retain badblocks as part of rdev, but use the accessor functions from
include/linux/badblocks for all manipulation.

Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 807095f..1e48aa9 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -34,6 +34,7 @@
 
 #include <linux/kthread.h>
 #include <linux/blkdev.h>
+#include <linux/badblocks.h>
 #include <linux/sysctl.h>
 #include <linux/seq_file.h>
 #include <linux/fs.h>
@@ -709,8 +710,7 @@ void md_rdev_clear(struct md_rdev *rdev)
 		put_page(rdev->bb_page);
 		rdev->bb_page = NULL;
 	}
-	kfree(rdev->badblocks.page);
-	rdev->badblocks.page = NULL;
+	badblocks_free(&rdev->badblocks);
 }
 EXPORT_SYMBOL_GPL(md_rdev_clear);
 
@@ -1360,8 +1360,6 @@ static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
 	return cpu_to_le32(csum);
 }
 
-static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
-			    int acknowledged);
 static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
 {
 	struct mdp_superblock_1 *sb;
@@ -1486,8 +1484,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
 			count <<= sb->bblog_shift;
 			if (bb + 1 == 0)
 				break;
-			if (md_set_badblocks(&rdev->badblocks,
-					     sector, count, 1) == 0)
+			if (badblocks_set(&rdev->badblocks, sector, count, 1))
 				return -EINVAL;
 		}
 	} else if (sb->bblog_offset != 0)
@@ -2319,7 +2316,7 @@ repeat:
 			rdev_for_each(rdev, mddev) {
 				if (rdev->badblocks.changed) {
 					rdev->badblocks.changed = 0;
-					md_ack_all_badblocks(&rdev->badblocks);
+					ack_all_badblocks(&rdev->badblocks);
 					md_error(mddev, rdev);
 				}
 				clear_bit(Blocked, &rdev->flags);
@@ -2445,7 +2442,7 @@ repeat:
 			clear_bit(Blocked, &rdev->flags);
 
 		if (any_badblocks_changed)
-			md_ack_all_badblocks(&rdev->badblocks);
+			ack_all_badblocks(&rdev->badblocks);
 		clear_bit(BlockedBadBlocks, &rdev->flags);
 		wake_up(&rdev->blocked_wait);
 	}
@@ -3046,11 +3043,17 @@ static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_
 static struct rdev_sysfs_entry rdev_recovery_start =
 __ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
 
-static ssize_t
-badblocks_show(struct badblocks *bb, char *page, int unack);
-static ssize_t
-badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack);
-
+/* sysfs access to bad-blocks list.
+ * We present two files.
+ * 'bad-blocks' lists sector numbers and lengths of ranges that
+ *    are recorded as bad.  The list is truncated to fit within
+ *    the one-page limit of sysfs.
+ *    Writing "sector length" to this file adds an acknowledged
+ *    bad block list.
+ * 'unacknowledged-bad-blocks' lists bad blocks that have not yet
+ *    been acknowledged.  Writing to this file adds bad blocks
+ *    without acknowledging them.  This is largely for testing.
+ */
 static ssize_t bb_show(struct md_rdev *rdev, char *page)
 {
 	return badblocks_show(&rdev->badblocks, page, 0);
@@ -3165,14 +3168,7 @@ int md_rdev_init(struct md_rdev *rdev)
 	 * This reserves the space even on arrays where it cannot
 	 * be used - I wonder if that matters
 	 */
-	rdev->badblocks.count = 0;
-	rdev->badblocks.shift = -1; /* disabled until explicitly enabled */
-	rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	seqlock_init(&rdev->badblocks.lock);
-	if (rdev->badblocks.page == NULL)
-		return -ENOMEM;
-
-	return 0;
+	return badblocks_init(&rdev->badblocks, 0);
 }
 EXPORT_SYMBOL_GPL(md_rdev_init);
 /*
@@ -8478,254 +8474,9 @@ void md_finish_reshape(struct mddev *mddev)
 }
 EXPORT_SYMBOL(md_finish_reshape);
 
-/* Bad block management.
- * We can record which blocks on each device are 'bad' and so just
- * fail those blocks, or that stripe, rather than the whole device.
- * Entries in the bad-block table are 64bits wide.  This comprises:
- * Length of bad-range, in sectors: 0-511 for lengths 1-512
- * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
- *  A 'shift' can be set so that larger blocks are tracked and
- *  consequently larger devices can be covered.
- * 'Acknowledged' flag - 1 bit. - the most significant bit.
- *
- * Locking of the bad-block table uses a seqlock so md_is_badblock
- * might need to retry if it is very unlucky.
- * We will sometimes want to check for bad blocks in a bi_end_io function,
- * so we use the write_seqlock_irq variant.
- *
- * When looking for a bad block we specify a range and want to
- * know if any block in the range is bad.  So we binary-search
- * to the last range that starts at-or-before the given endpoint,
- * (or "before the sector after the target range")
- * then see if it ends after the given start.
- * We return
- *  0 if there are no known bad blocks in the range
- *  1 if there are known bad block which are all acknowledged
- * -1 if there are bad blocks which have not yet been acknowledged in metadata.
- * plus the start/length of the first bad section we overlap.
- */
-int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
-		   sector_t *first_bad, int *bad_sectors)
-{
-	int hi;
-	int lo;
-	u64 *p = bb->page;
-	int rv;
-	sector_t target = s + sectors;
-	unsigned seq;
-
-	if (bb->shift > 0) {
-		/* round the start down, and the end up */
-		s >>= bb->shift;
-		target += (1<<bb->shift) - 1;
-		target >>= bb->shift;
-		sectors = target - s;
-	}
-	/* 'target' is now the first block after the bad range */
-
-retry:
-	seq = read_seqbegin(&bb->lock);
-	lo = 0;
-	rv = 0;
-	hi = bb->count;
-
-	/* Binary search between lo and hi for 'target'
-	 * i.e. for the last range that starts before 'target'
-	 */
-	/* INVARIANT: ranges before 'lo' and at-or-after 'hi'
-	 * are known not to be the last range before target.
-	 * VARIANT: hi-lo is the number of possible
-	 * ranges, and decreases until it reaches 1
-	 */
-	while (hi - lo > 1) {
-		int mid = (lo + hi) / 2;
-		sector_t a = BB_OFFSET(p[mid]);
-		if (a < target)
-			/* This could still be the one, earlier ranges
-			 * could not. */
-			lo = mid;
-		else
-			/* This and later ranges are definitely out. */
-			hi = mid;
-	}
-	/* 'lo' might be the last that started before target, but 'hi' isn't */
-	if (hi > lo) {
-		/* need to check all range that end after 's' to see if
-		 * any are unacknowledged.
-		 */
-		while (lo >= 0 &&
-		       BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
-			if (BB_OFFSET(p[lo]) < target) {
-				/* starts before the end, and finishes after
-				 * the start, so they must overlap
-				 */
-				if (rv != -1 && BB_ACK(p[lo]))
-					rv = 1;
-				else
-					rv = -1;
-				*first_bad = BB_OFFSET(p[lo]);
-				*bad_sectors = BB_LEN(p[lo]);
-			}
-			lo--;
-		}
-	}
-
-	if (read_seqretry(&bb->lock, seq))
-		goto retry;
-
-	return rv;
-}
-EXPORT_SYMBOL_GPL(md_is_badblock);
-
-/*
- * Add a range of bad blocks to the table.
- * This might extend the table, or might contract it
- * if two adjacent ranges can be merged.
- * We binary-search to find the 'insertion' point, then
- * decide how best to handle it.
- */
-static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
-			    int acknowledged)
-{
-	u64 *p;
-	int lo, hi;
-	int rv = 1;
-	unsigned long flags;
-
-	if (bb->shift < 0)
-		/* badblocks are disabled */
-		return 0;
-
-	if (bb->shift) {
-		/* round the start down, and the end up */
-		sector_t next = s + sectors;
-		s >>= bb->shift;
-		next += (1<<bb->shift) - 1;
-		next >>= bb->shift;
-		sectors = next - s;
-	}
-
-	write_seqlock_irqsave(&bb->lock, flags);
-
-	p = bb->page;
-	lo = 0;
-	hi = bb->count;
-	/* Find the last range that starts at-or-before 's' */
-	while (hi - lo > 1) {
-		int mid = (lo + hi) / 2;
-		sector_t a = BB_OFFSET(p[mid]);
-		if (a <= s)
-			lo = mid;
-		else
-			hi = mid;
-	}
-	if (hi > lo && BB_OFFSET(p[lo]) > s)
-		hi = lo;
-
-	if (hi > lo) {
-		/* we found a range that might merge with the start
-		 * of our new range
-		 */
-		sector_t a = BB_OFFSET(p[lo]);
-		sector_t e = a + BB_LEN(p[lo]);
-		int ack = BB_ACK(p[lo]);
-		if (e >= s) {
-			/* Yes, we can merge with a previous range */
-			if (s == a && s + sectors >= e)
-				/* new range covers old */
-				ack = acknowledged;
-			else
-				ack = ack && acknowledged;
-
-			if (e < s + sectors)
-				e = s + sectors;
-			if (e - a <= BB_MAX_LEN) {
-				p[lo] = BB_MAKE(a, e-a, ack);
-				s = e;
-			} else {
-				/* does not all fit in one range,
-				 * make p[lo] maximal
-				 */
-				if (BB_LEN(p[lo]) != BB_MAX_LEN)
-					p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
-				s = a + BB_MAX_LEN;
-			}
-			sectors = e - s;
-		}
-	}
-	if (sectors && hi < bb->count) {
-		/* 'hi' points to the first range that starts after 's'.
-		 * Maybe we can merge with the start of that range */
-		sector_t a = BB_OFFSET(p[hi]);
-		sector_t e = a + BB_LEN(p[hi]);
-		int ack = BB_ACK(p[hi]);
-		if (a <= s + sectors) {
-			/* merging is possible */
-			if (e <= s + sectors) {
-				/* full overlap */
-				e = s + sectors;
-				ack = acknowledged;
-			} else
-				ack = ack && acknowledged;
-
-			a = s;
-			if (e - a <= BB_MAX_LEN) {
-				p[hi] = BB_MAKE(a, e-a, ack);
-				s = e;
-			} else {
-				p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
-				s = a + BB_MAX_LEN;
-			}
-			sectors = e - s;
-			lo = hi;
-			hi++;
-		}
-	}
-	if (sectors == 0 && hi < bb->count) {
-		/* we might be able to combine lo and hi */
-		/* Note: 's' is at the end of 'lo' */
-		sector_t a = BB_OFFSET(p[hi]);
-		int lolen = BB_LEN(p[lo]);
-		int hilen = BB_LEN(p[hi]);
-		int newlen = lolen + hilen - (s - a);
-		if (s >= a && newlen < BB_MAX_LEN) {
-			/* yes, we can combine them */
-			int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
-			p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
-			memmove(p + hi, p + hi + 1,
-				(bb->count - hi - 1) * 8);
-			bb->count--;
-		}
-	}
-	while (sectors) {
-		/* didn't merge (it all).
-		 * Need to add a range just before 'hi' */
-		if (bb->count >= MD_MAX_BADBLOCKS) {
-			/* No room for more */
-			rv = 0;
-			break;
-		} else {
-			int this_sectors = sectors;
-			memmove(p + hi + 1, p + hi,
-				(bb->count - hi) * 8);
-			bb->count++;
-
-			if (this_sectors > BB_MAX_LEN)
-				this_sectors = BB_MAX_LEN;
-			p[hi] = BB_MAKE(s, this_sectors, acknowledged);
-			sectors -= this_sectors;
-			s += this_sectors;
-		}
-	}
-
-	bb->changed = 1;
-	if (!acknowledged)
-		bb->unacked_exist = 1;
-	write_sequnlock_irqrestore(&bb->lock, flags);
-
-	return rv;
-}
+/* Bad block management */
 
+/* Returns 1 on success, 0 on failure */
 int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
 		       int is_new)
 {
@@ -8734,114 +8485,19 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
 		s += rdev->new_data_offset;
 	else
 		s += rdev->data_offset;
-	rv = md_set_badblocks(&rdev->badblocks,
-			      s, sectors, 0);
-	if (rv) {
+	rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
+	if (rv == 0) {
 		/* Make sure they get written out promptly */
 		sysfs_notify_dirent_safe(rdev->sysfs_state);
 		set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
 		set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags);
 		md_wakeup_thread(rdev->mddev->thread);
-	}
-	return rv;
+		return 1;
+	} else
+		return 0;
 }
 EXPORT_SYMBOL_GPL(rdev_set_badblocks);
 
-/*
- * Remove a range of bad blocks from the table.
- * This may involve extending the table if we spilt a region,
- * but it must not fail.  So if the table becomes full, we just
- * drop the remove request.
- */
-static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors)
-{
-	u64 *p;
-	int lo, hi;
-	sector_t target = s + sectors;
-	int rv = 0;
-
-	if (bb->shift > 0) {
-		/* When clearing we round the start up and the end down.
-		 * This should not matter as the shift should align with
-		 * the block size and no rounding should ever be needed.
-		 * However it is better the think a block is bad when it
-		 * isn't than to think a block is not bad when it is.
-		 */
-		s += (1<<bb->shift) - 1;
-		s >>= bb->shift;
-		target >>= bb->shift;
-		sectors = target - s;
-	}
-
-	write_seqlock_irq(&bb->lock);
-
-	p = bb->page;
-	lo = 0;
-	hi = bb->count;
-	/* Find the last range that starts before 'target' */
-	while (hi - lo > 1) {
-		int mid = (lo + hi) / 2;
-		sector_t a = BB_OFFSET(p[mid]);
-		if (a < target)
-			lo = mid;
-		else
-			hi = mid;
-	}
-	if (hi > lo) {
-		/* p[lo] is the last range that could overlap the
-		 * current range.  Earlier ranges could also overlap,
-		 * but only this one can overlap the end of the range.
-		 */
-		if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
-			/* Partial overlap, leave the tail of this range */
-			int ack = BB_ACK(p[lo]);
-			sector_t a = BB_OFFSET(p[lo]);
-			sector_t end = a + BB_LEN(p[lo]);
-
-			if (a < s) {
-				/* we need to split this range */
-				if (bb->count >= MD_MAX_BADBLOCKS) {
-					rv = -ENOSPC;
-					goto out;
-				}
-				memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
-				bb->count++;
-				p[lo] = BB_MAKE(a, s-a, ack);
-				lo++;
-			}
-			p[lo] = BB_MAKE(target, end - target, ack);
-			/* there is no longer an overlap */
-			hi = lo;
-			lo--;
-		}
-		while (lo >= 0 &&
-		       BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
-			/* This range does overlap */
-			if (BB_OFFSET(p[lo]) < s) {
-				/* Keep the early parts of this range. */
-				int ack = BB_ACK(p[lo]);
-				sector_t start = BB_OFFSET(p[lo]);
-				p[lo] = BB_MAKE(start, s - start, ack);
-				/* now low doesn't overlap, so.. */
-				break;
-			}
-			lo--;
-		}
-		/* 'lo' is strictly before, 'hi' is strictly after,
-		 * anything between needs to be discarded
-		 */
-		if (hi - lo > 1) {
-			memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
-			bb->count -= (hi - lo - 1);
-		}
-	}
-
-	bb->changed = 1;
-out:
-	write_sequnlock_irq(&bb->lock);
-	return rv;
-}
-
 int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
 			 int is_new)
 {
@@ -8849,133 +8505,11 @@ int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
 		s += rdev->new_data_offset;
 	else
 		s += rdev->data_offset;
-	return md_clear_badblocks(&rdev->badblocks,
+	return badblocks_clear(&rdev->badblocks,
 				  s, sectors);
 }
 EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
 
-/*
- * Acknowledge all bad blocks in a list.
- * This only succeeds if ->changed is clear.  It is used by
- * in-kernel metadata updates
- */
-void md_ack_all_badblocks(struct badblocks *bb)
-{
-	if (bb->page == NULL || bb->changed)
-		/* no point even trying */
-		return;
-	write_seqlock_irq(&bb->lock);
-
-	if (bb->changed == 0 && bb->unacked_exist) {
-		u64 *p = bb->page;
-		int i;
-		for (i = 0; i < bb->count ; i++) {
-			if (!BB_ACK(p[i])) {
-				sector_t start = BB_OFFSET(p[i]);
-				int len = BB_LEN(p[i]);
-				p[i] = BB_MAKE(start, len, 1);
-			}
-		}
-		bb->unacked_exist = 0;
-	}
-	write_sequnlock_irq(&bb->lock);
-}
-EXPORT_SYMBOL_GPL(md_ack_all_badblocks);
-
-/* sysfs access to bad-blocks list.
- * We present two files.
- * 'bad-blocks' lists sector numbers and lengths of ranges that
- *    are recorded as bad.  The list is truncated to fit within
- *    the one-page limit of sysfs.
- *    Writing "sector length" to this file adds an acknowledged
- *    bad block list.
- * 'unacknowledged-bad-blocks' lists bad blocks that have not yet
- *    been acknowledged.  Writing to this file adds bad blocks
- *    without acknowledging them.  This is largely for testing.
- */
-
-static ssize_t
-badblocks_show(struct badblocks *bb, char *page, int unack)
-{
-	size_t len;
-	int i;
-	u64 *p = bb->page;
-	unsigned seq;
-
-	if (bb->shift < 0)
-		return 0;
-
-retry:
-	seq = read_seqbegin(&bb->lock);
-
-	len = 0;
-	i = 0;
-
-	while (len < PAGE_SIZE && i < bb->count) {
-		sector_t s = BB_OFFSET(p[i]);
-		unsigned int length = BB_LEN(p[i]);
-		int ack = BB_ACK(p[i]);
-		i++;
-
-		if (unack && ack)
-			continue;
-
-		len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
-				(unsigned long long)s << bb->shift,
-				length << bb->shift);
-	}
-	if (unack && len == 0)
-		bb->unacked_exist = 0;
-
-	if (read_seqretry(&bb->lock, seq))
-		goto retry;
-
-	return len;
-}
-
-#define DO_DEBUG 1
-
-static ssize_t
-badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack)
-{
-	unsigned long long sector;
-	int length;
-	char newline;
-#ifdef DO_DEBUG
-	/* Allow clearing via sysfs *only* for testing/debugging.
-	 * Normally only a successful write may clear a badblock
-	 */
-	int clear = 0;
-	if (page[0] == '-') {
-		clear = 1;
-		page++;
-	}
-#endif /* DO_DEBUG */
-
-	switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
-	case 3:
-		if (newline != '\n')
-			return -EINVAL;
-	case 2:
-		if (length <= 0)
-			return -EINVAL;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-#ifdef DO_DEBUG
-	if (clear) {
-		md_clear_badblocks(bb, sector, length);
-		return len;
-	}
-#endif /* DO_DEBUG */
-	if (md_set_badblocks(bb, sector, length, !unack))
-		return len;
-	else
-		return -ENOSPC;
-}
-
 static int md_notify_reboot(struct notifier_block *this,
 			    unsigned long code, void *x)
 {
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 2bea51e..389afc4 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -17,6 +17,7 @@
 
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
+#include <linux/badblocks.h>
 #include <linux/kobject.h>
 #include <linux/list.h>
 #include <linux/mm.h>
@@ -28,13 +29,6 @@
 
 #define MaxSector (~(sector_t)0)
 
-/* Bad block numbers are stored sorted in a single page.
- * 64bits is used for each block or extent.
- * 54 bits are sector number, 9 bits are extent size,
- * 1 bit is an 'acknowledged' flag.
- */
-#define MD_MAX_BADBLOCKS	(PAGE_SIZE/8)
-
 /*
  * MD's 'extended' device
  */
@@ -117,22 +111,7 @@ struct md_rdev {
 	struct kernfs_node *sysfs_state; /* handle for 'state'
 					   * sysfs entry */
 
-	struct badblocks {
-		int	count;		/* count of bad blocks */
-		int	unacked_exist;	/* there probably are unacknowledged
-					 * bad blocks.  This is only cleared
-					 * when a read discovers none
-					 */
-		int	shift;		/* shift from sectors to block size
-					 * a -ve shift means badblocks are
-					 * disabled.*/
-		u64	*page;		/* badblock list */
-		int	changed;
-		seqlock_t lock;
-
-		sector_t sector;
-		sector_t size;		/* in sectors */
-	} badblocks;
+	struct badblocks badblocks;
 };
 enum flag_bits {
 	Faulty,			/* device is known to have a fault */
@@ -185,22 +164,11 @@ enum flag_bits {
 				 */
 };
 
-#define BB_LEN_MASK	(0x00000000000001FFULL)
-#define BB_OFFSET_MASK	(0x7FFFFFFFFFFFFE00ULL)
-#define BB_ACK_MASK	(0x8000000000000000ULL)
-#define BB_MAX_LEN	512
-#define BB_OFFSET(x)	(((x) & BB_OFFSET_MASK) >> 9)
-#define BB_LEN(x)	(((x) & BB_LEN_MASK) + 1)
-#define BB_ACK(x)	(!!((x) & BB_ACK_MASK))
-#define BB_MAKE(a, l, ack) (((a)<<9) | ((l)-1) | ((u64)(!!(ack)) << 63))
-
-extern int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
-			  sector_t *first_bad, int *bad_sectors);
 static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
 			      sector_t *first_bad, int *bad_sectors)
 {
 	if (unlikely(rdev->badblocks.count)) {
-		int rv = md_is_badblock(&rdev->badblocks, rdev->data_offset + s,
+		int rv = badblocks_check(&rdev->badblocks, rdev->data_offset + s,
 					sectors,
 					first_bad, bad_sectors);
 		if (rv)
@@ -213,8 +181,6 @@ extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
 			      int is_new);
 extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
 				int is_new);
-extern void md_ack_all_badblocks(struct badblocks *bb);
-
 struct md_cluster_info;
 
 struct mddev {
-- 
cgit v0.10.2


From d26f73f083ed6fbea7fd3fdbacb527b7f3e75ac0 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 30 Dec 2015 15:01:19 -0800
Subject: nfit_test: Enable DSMs for all test NFITs

In preparation for getting a poison list using ARS DSMs, enable DSMs for
all manufactured NFITs supplied by the test framework.  Also, supply
valid response data for ars_status.

Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 51cf825..90bd2ea 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -248,6 +248,8 @@ static int nfit_test_cmd_ars_status(struct nd_cmd_ars_status *nd_cmd,
 
 	nd_cmd->out_length = 256;
 	nd_cmd->num_records = 0;
+	nd_cmd->address = 0;
+	nd_cmd->length = -1ULL;
 	nd_cmd->status = 0;
 
 	return 0;
@@ -1088,6 +1090,8 @@ static void nfit_test1_setup(struct nfit_test *t)
 	struct acpi_nfit_memory_map *memdev;
 	struct acpi_nfit_control_region *dcr;
 	struct acpi_nfit_system_address *spa;
+	struct nvdimm_bus_descriptor *nd_desc;
+	struct acpi_nfit_desc *acpi_desc;
 
 	offset = 0;
 	/* spa0 (flat range with no bdw aliasing) */
@@ -1135,6 +1139,13 @@ static void nfit_test1_setup(struct nfit_test *t)
 	dcr->command_size = 0;
 	dcr->status_offset = 0;
 	dcr->status_size = 0;
+
+	acpi_desc = &t->acpi_desc;
+	set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en);
+	set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en);
+	set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en);
+	nd_desc = &acpi_desc->nd_desc;
+	nd_desc->ndctl = nfit_test_ctl;
 }
 
 static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
-- 
cgit v0.10.2


From 0caeef63e6d2f866d85bb507bf63e0ce8ec91cef Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Thu, 24 Dec 2015 19:21:43 -0700
Subject: libnvdimm: Add a poison list and export badblocks

During region creation, perform Address Range Scrubs (ARS) for the SPA
(System Physical Address) ranges to retrieve known poison locations from
firmware. Add a new data structure 'nd_poison' which is used as a list
in nvdimm_bus to store these poison locations.

When creating a pmem namespace, if there is any known poison associated
with its physical address space, convert the poison ranges to bad sectors
that are exposed using the badblocks interface.

Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index e7ed39b..e1dbc8d 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/ndctl.h>
+#include <linux/delay.h>
 #include <linux/list.h>
 #include <linux/acpi.h>
 #include <linux/sort.h>
@@ -1473,6 +1474,201 @@ static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
 	/* devm will free nfit_blk */
 }
 
+static int ars_get_cap(struct nvdimm_bus_descriptor *nd_desc,
+		struct nd_cmd_ars_cap *cmd, u64 addr, u64 length)
+{
+	cmd->address = addr;
+	cmd->length = length;
+
+	return nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd,
+			sizeof(*cmd));
+}
+
+static int ars_do_start(struct nvdimm_bus_descriptor *nd_desc,
+		struct nd_cmd_ars_start *cmd, u64 addr, u64 length)
+{
+	int rc;
+
+	cmd->address = addr;
+	cmd->length = length;
+	cmd->type = ND_ARS_PERSISTENT;
+
+	while (1) {
+		rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, cmd,
+				sizeof(*cmd));
+		if (rc)
+			return rc;
+		switch (cmd->status) {
+		case 0:
+			return 0;
+		case 1:
+			/* ARS unsupported, but we should never get here */
+			return 0;
+		case 2:
+			return -EINVAL;
+		case 3:
+			/* ARS is in progress */
+			msleep(1000);
+			break;
+		default:
+			return -ENXIO;
+		}
+	}
+}
+
+static int ars_get_status(struct nvdimm_bus_descriptor *nd_desc,
+		struct nd_cmd_ars_status *cmd)
+{
+	int rc;
+
+	while (1) {
+		rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, cmd,
+			sizeof(*cmd));
+		if (rc || cmd->status & 0xffff)
+			return -ENXIO;
+
+		/* Check extended status (Upper two bytes) */
+		switch (cmd->status >> 16) {
+		case 0:
+			return 0;
+		case 1:
+			/* ARS is in progress */
+			msleep(1000);
+			break;
+		case 2:
+			/* No ARS performed for the current boot */
+			return 0;
+		default:
+			return -ENXIO;
+		}
+	}
+}
+
+static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus,
+		struct nd_cmd_ars_status *ars_status, u64 start)
+{
+	int rc;
+	u32 i;
+
+	/*
+	 * The address field returned by ars_status should be either
+	 * less than or equal to the address we last started ARS for.
+	 * The (start, length) returned by ars_status should also have
+	 * non-zero overlap with the range we started ARS for.
+	 * If this is not the case, bail.
+	 */
+	if (ars_status->address > start ||
+			(ars_status->address + ars_status->length < start))
+		return -ENXIO;
+
+	for (i = 0; i < ars_status->num_records; i++) {
+		rc = nvdimm_bus_add_poison(nvdimm_bus,
+				ars_status->records[i].err_address,
+				ars_status->records[i].length);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc,
+		struct nd_region_desc *ndr_desc)
+{
+	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+	struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus;
+	struct nd_cmd_ars_status *ars_status = NULL;
+	struct nd_cmd_ars_start *ars_start = NULL;
+	struct nd_cmd_ars_cap *ars_cap = NULL;
+	u64 start, len, cur, remaining;
+	int rc;
+
+	ars_cap = kzalloc(sizeof(*ars_cap), GFP_KERNEL);
+	if (!ars_cap)
+		return -ENOMEM;
+
+	start = ndr_desc->res->start;
+	len = ndr_desc->res->end - ndr_desc->res->start + 1;
+
+	rc = ars_get_cap(nd_desc, ars_cap, start, len);
+	if (rc)
+		goto out;
+
+	/*
+	 * If ARS is unsupported, or if the 'Persistent Memory Scrub' flag in
+	 * extended status is not set, skip this but continue initialization
+	 */
+	if ((ars_cap->status & 0xffff) ||
+		!(ars_cap->status >> 16 & ND_ARS_PERSISTENT)) {
+		dev_warn(acpi_desc->dev,
+			"ARS unsupported (status: 0x%x), won't create an error list\n",
+			ars_cap->status);
+		goto out;
+	}
+
+	/*
+	 * Check if a full-range ARS has been run. If so, use those results
+	 * without having to start a new ARS.
+	 */
+	ars_status = kzalloc(ars_cap->max_ars_out + sizeof(*ars_status),
+			GFP_KERNEL);
+	if (!ars_status) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	rc = ars_get_status(nd_desc, ars_status);
+	if (rc)
+		goto out;
+
+	if (ars_status->address <= start &&
+		(ars_status->address + ars_status->length >= start + len)) {
+		rc = ars_status_process_records(nvdimm_bus, ars_status, start);
+		goto out;
+	}
+
+	/*
+	 * ARS_STATUS can overflow if the number of poison entries found is
+	 * greater than the maximum buffer size (ars_cap->max_ars_out)
+	 * To detect overflow, check if the length field of ars_status
+	 * is less than the length we supplied. If so, process the
+	 * error entries we got, adjust the start point, and start again
+	 */
+	ars_start = kzalloc(sizeof(*ars_start), GFP_KERNEL);
+	if (!ars_start)
+		return -ENOMEM;
+
+	cur = start;
+	remaining = len;
+	do {
+		u64 done, end;
+
+		rc = ars_do_start(nd_desc, ars_start, cur, remaining);
+		if (rc)
+			goto out;
+
+		rc = ars_get_status(nd_desc, ars_status);
+		if (rc)
+			goto out;
+
+		rc = ars_status_process_records(nvdimm_bus, ars_status, cur);
+		if (rc)
+			goto out;
+
+		end = min(cur + remaining,
+			ars_status->address + ars_status->length);
+		done = end - cur;
+		cur += done;
+		remaining -= done;
+	} while (remaining);
+
+ out:
+	kfree(ars_cap);
+	kfree(ars_start);
+	kfree(ars_status);
+	return rc;
+}
+
 static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
 		struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc,
 		struct acpi_nfit_memory_map *memdev,
@@ -1585,6 +1781,13 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
 
 	nvdimm_bus = acpi_desc->nvdimm_bus;
 	if (nfit_spa_type(spa) == NFIT_SPA_PM) {
+		rc = acpi_nfit_find_poison(acpi_desc, ndr_desc);
+		if (rc) {
+			dev_err(acpi_desc->dev,
+				"error while performing ARS to find poison: %d\n",
+				rc);
+			return rc;
+		}
 		if (!nvdimm_pmem_region_create(nvdimm_bus, ndr_desc))
 			return -ENOMEM;
 	} else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) {
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 82c49bb..21003b7 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -325,6 +325,7 @@ struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
 	if (!nvdimm_bus)
 		return NULL;
 	INIT_LIST_HEAD(&nvdimm_bus->list);
+	INIT_LIST_HEAD(&nvdimm_bus->poison_list);
 	init_waitqueue_head(&nvdimm_bus->probe_wait);
 	nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
 	mutex_init(&nvdimm_bus->reconfig_mutex);
@@ -359,6 +360,191 @@ struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
 }
 EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
 
+/**
+ * __add_badblock_range() - Convert a physical address range to bad sectors
+ * @disk:	the disk associated with the namespace
+ * @ns_offset:	namespace offset where the error range begins (in bytes)
+ * @len:	number of bytes of poison to be added
+ *
+ * This assumes that the range provided with (ns_offset, len) is within
+ * the bounds of physical addresses for this namespace, i.e. lies in the
+ * interval [ns_start, ns_start + ns_size)
+ */
+static int __add_badblock_range(struct gendisk *disk, u64 ns_offset, u64 len)
+{
+	unsigned int sector_size = queue_logical_block_size(disk->queue);
+	sector_t start_sector;
+	u64 num_sectors;
+	u32 rem;
+	int rc;
+
+	start_sector = div_u64(ns_offset, sector_size);
+	num_sectors = div_u64_rem(len, sector_size, &rem);
+	if (rem)
+		num_sectors++;
+
+	if (!disk->bb) {
+		rc = disk_alloc_badblocks(disk);
+		if (rc)
+			return rc;
+	}
+
+	if (unlikely(num_sectors > (u64)INT_MAX)) {
+		u64 remaining = num_sectors;
+		sector_t s = start_sector;
+
+		while (remaining) {
+			int done = min_t(u64, remaining, INT_MAX);
+
+			rc = disk_set_badblocks(disk, s, done);
+			if (rc)
+				return rc;
+			remaining -= done;
+			s += done;
+		}
+		return 0;
+	} else
+		return disk_set_badblocks(disk, start_sector, num_sectors);
+}
+
+/**
+ * nvdimm_namespace_add_poison() - Convert a list of poison ranges to badblocks
+ * @disk:	the gendisk associated with the namespace where badblocks
+ *		will be stored
+ * @offset:	offset at the start of the namespace before 'sector 0'
+ * @ndns:	the namespace containing poison ranges
+ *
+ * The poison list generated during NFIT initialization may contain multiple,
+ * possibly overlapping ranges in the SPA (System Physical Address) space.
+ * Compare each of these ranges to the namespace currently being initialized,
+ * and add badblocks to the gendisk for all matching sub-ranges
+ *
+ * Return:
+ * 0 - Success
+ */
+int nvdimm_namespace_add_poison(struct gendisk *disk, resource_size_t offset,
+		struct nd_namespace_common *ndns)
+{
+	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+	struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
+	struct nvdimm_bus *nvdimm_bus;
+	struct list_head *poison_list;
+	u64 ns_start, ns_end, ns_size;
+	struct nd_poison *pl;
+	int rc;
+
+	ns_size = nvdimm_namespace_capacity(ndns) - offset;
+	ns_start = nsio->res.start + offset;
+	ns_end = nsio->res.end;
+
+	nvdimm_bus = to_nvdimm_bus(nd_region->dev.parent);
+	poison_list = &nvdimm_bus->poison_list;
+	if (list_empty(poison_list))
+		return 0;
+
+	list_for_each_entry(pl, poison_list, list) {
+		u64 pl_end = pl->start + pl->length - 1;
+
+		/* Discard intervals with no intersection */
+		if (pl_end < ns_start)
+			continue;
+		if (pl->start > ns_end)
+			continue;
+		/* Deal with any overlap after start of the namespace */
+		if (pl->start >= ns_start) {
+			u64 start = pl->start;
+			u64 len;
+
+			if (pl_end <= ns_end)
+				len = pl->length;
+			else
+				len = ns_start + ns_size - pl->start;
+
+			rc = __add_badblock_range(disk, start - ns_start, len);
+			if (rc)
+				return rc;
+			dev_info(&nvdimm_bus->dev,
+				"Found a poison range (0x%llx, 0x%llx)\n",
+				start, len);
+			continue;
+		}
+		/* Deal with overlap for poison starting before the namespace */
+		if (pl->start < ns_start) {
+			u64 len;
+
+			if (pl_end < ns_end)
+				len = pl->start + pl->length - ns_start;
+			else
+				len = ns_size;
+
+			rc = __add_badblock_range(disk, 0, len);
+			if (rc)
+				return rc;
+			dev_info(&nvdimm_bus->dev,
+				"Found a poison range (0x%llx, 0x%llx)\n",
+				pl->start, len);
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nvdimm_namespace_add_poison);
+
+static int __add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
+{
+	struct nd_poison *pl;
+
+	pl = kzalloc(sizeof(*pl), GFP_KERNEL);
+	if (!pl)
+		return -ENOMEM;
+
+	pl->start = addr;
+	pl->length = length;
+	list_add_tail(&pl->list, &nvdimm_bus->poison_list);
+
+	return 0;
+}
+
+int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
+{
+	struct nd_poison *pl;
+
+	if (list_empty(&nvdimm_bus->poison_list))
+		return __add_poison(nvdimm_bus, addr, length);
+
+	/*
+	 * There is a chance this is a duplicate, check for those first.
+	 * This will be the common case as ARS_STATUS returns all known
+	 * errors in the SPA space, and we can't query it per region
+	 */
+	list_for_each_entry(pl, &nvdimm_bus->poison_list, list)
+		if (pl->start == addr) {
+			/* If length has changed, update this list entry */
+			if (pl->length != length)
+				pl->length = length;
+			return 0;
+		}
+
+	/*
+	 * If not a duplicate or a simple length update, add the entry as is,
+	 * as any overlapping ranges will get resolved when the list is consumed
+	 * and converted to badblocks
+	 */
+	return __add_poison(nvdimm_bus, addr, length);
+}
+EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
+
+static void free_poison_list(struct list_head *poison_list)
+{
+	struct nd_poison *pl, *next;
+
+	list_for_each_entry_safe(pl, next, poison_list, list) {
+		list_del(&pl->list);
+		kfree(pl);
+	}
+	list_del_init(poison_list);
+}
+
 static int child_unregister(struct device *dev, void *data)
 {
 	/*
@@ -385,6 +571,7 @@ void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
 
 	nd_synchronize();
 	device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
+	free_poison_list(&nvdimm_bus->poison_list);
 	nvdimm_bus_destroy_ndctl(nvdimm_bus);
 
 	device_unregister(&nvdimm_bus->dev);
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 159aed5..d3b7ea7 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -30,6 +30,7 @@ struct nvdimm_bus {
 	struct list_head list;
 	struct device dev;
 	int id, probe_active;
+	struct list_head poison_list;
 	struct mutex reconfig_mutex;
 };
 
@@ -89,4 +90,6 @@ bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
 ssize_t nd_namespace_store(struct device *dev,
 		struct nd_namespace_common **_ndns, const char *buf,
 		size_t len);
+int nvdimm_namespace_add_poison(struct gendisk *disk, resource_size_t offset,
+		struct nd_namespace_common *ndns);
 #endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 417e521..ba91fcd 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -38,6 +38,12 @@ enum {
 #endif
 };
 
+struct nd_poison {
+	u64 start;
+	u64 length;
+	struct list_head list;
+};
+
 struct nvdimm_drvdata {
 	struct device *dev;
 	int nsindex_size;
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 8ee7989..5b95043 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -27,6 +27,7 @@
 #include <linux/slab.h>
 #include <linux/pmem.h>
 #include <linux/nd.h>
+#include "nd-core.h"
 #include "pfn.h"
 #include "nd.h"
 
@@ -168,6 +169,7 @@ static int pmem_attach_disk(struct device *dev,
 {
 	int nid = dev_to_node(dev);
 	struct gendisk *disk;
+	int ret;
 
 	pmem->pmem_queue = blk_alloc_queue_node(GFP_KERNEL, nid);
 	if (!pmem->pmem_queue)
@@ -196,6 +198,10 @@ static int pmem_attach_disk(struct device *dev,
 	set_capacity(disk, (pmem->size - pmem->data_offset) / 512);
 	pmem->pmem_disk = disk;
 
+	ret = nvdimm_namespace_add_poison(disk, pmem->data_offset, ndns);
+	if (ret)
+		return ret;
+
 	add_disk(disk);
 	revalidate_disk(disk);
 
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 3f021dc..bed40df 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -116,6 +116,7 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
 
 }
 
+int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length);
 struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
 		struct nvdimm_bus_descriptor *nfit_desc, struct module *module);
 #define nvdimm_bus_register(parent, desc) \
-- 
cgit v0.10.2


From ad9a8bde2cb19f6876f964fc48acc8b6a2f325ff Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 6 Jan 2016 12:03:41 -0800
Subject: libnvdimm, pmem: move definition of nvdimm_namespace_add_poison to
 nd.h

nd-core.h is private to the libnvdimm core internals and should not be
used by drivers.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index d3b7ea7..29acdaa 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -90,6 +90,4 @@ bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
 ssize_t nd_namespace_store(struct device *dev,
 		struct nd_namespace_common **_ndns, const char *buf,
 		size_t len);
-int nvdimm_namespace_add_poison(struct gendisk *disk, resource_size_t offset,
-		struct nd_namespace_common *ndns);
 #endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index ba91fcd..198933d 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -268,6 +268,8 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns);
 int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns);
 const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
 		char *name);
+int nvdimm_namespace_add_poison(struct gendisk *disk, resource_size_t offset,
+		struct nd_namespace_common *ndns);
 int nd_blk_region_init(struct nd_region *nd_region);
 void __nd_iostat_start(struct bio *bio, unsigned long *start);
 static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 5b95043..65b2056 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -27,7 +27,6 @@
 #include <linux/slab.h>
 #include <linux/pmem.h>
 #include <linux/nd.h>
-#include "nd-core.h"
 #include "pfn.h"
 #include "nd.h"
 
-- 
cgit v0.10.2


From d3b407fb3f782bd915db64e266010ea30a2d381e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 6 Jan 2016 12:19:22 -0800
Subject: badblocks: rename badblocks_free to badblocks_exit

For symmetry with badblocks_init() make it clear that this path only
destroys incremental allocations of a badblocks instance, and does not
free the badblocks instance itself.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/block/badblocks.c b/block/badblocks.c
index 96aeb91..fabf6b6 100644
--- a/block/badblocks.c
+++ b/block/badblocks.c
@@ -550,12 +550,12 @@ int badblocks_init(struct badblocks *bb, int enable)
 EXPORT_SYMBOL_GPL(badblocks_init);
 
 /**
- * badblocks_free() - free the badblocks structure
+ * badblocks_exit() - free the badblocks structure
  * @bb:		the badblocks structure that holds all badblock information
  */
-void badblocks_free(struct badblocks *bb)
+void badblocks_exit(struct badblocks *bb)
 {
 	kfree(bb->page);
 	bb->page = NULL;
 }
-EXPORT_SYMBOL_GPL(badblocks_free);
+EXPORT_SYMBOL_GPL(badblocks_exit);
diff --git a/block/genhd.c b/block/genhd.c
index 88579cf..f463c67 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -671,7 +671,7 @@ void del_gendisk(struct gendisk *disk)
 	blk_unregister_region(disk_devt(disk), disk->minors);
 
 	if (disk->bb) {
-		badblocks_free(disk->bb);
+		badblocks_exit(disk->bb);
 		kfree(disk->bb);
 	}
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1e48aa9..96a9918 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -710,7 +710,7 @@ void md_rdev_clear(struct md_rdev *rdev)
 		put_page(rdev->bb_page);
 		rdev->bb_page = NULL;
 	}
-	badblocks_free(&rdev->badblocks);
+	badblocks_exit(&rdev->badblocks);
 }
 EXPORT_SYMBOL_GPL(md_rdev_clear);
 
diff --git a/include/linux/badblocks.h b/include/linux/badblocks.h
index 9293446..2d98c02 100644
--- a/include/linux/badblocks.h
+++ b/include/linux/badblocks.h
@@ -48,6 +48,6 @@ ssize_t badblocks_show(struct badblocks *bb, char *page, int unack);
 ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
 			int unack);
 int badblocks_init(struct badblocks *bb, int enable);
-void badblocks_free(struct badblocks *bb);
+void badblocks_exit(struct badblocks *bb);
 
 #endif
-- 
cgit v0.10.2


From 20a308f09e0d29ce6f5a4114cc476a998d569bfb Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 6 Jan 2016 12:03:41 -0800
Subject: block: clarify badblocks lifetime

The badblocks list attached to a gendisk is allocated by the driver
which equates to the driver owning the lifetime of the object.  Do not
automatically free it in del_gendisk(). This is in preparation for
expanding the use of badblocks in libnvdimm drivers and introducing
devm_init_badblocks().

Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/block/badblocks.c b/block/badblocks.c
index fabf6b6..37e5c0a 100644
--- a/block/badblocks.c
+++ b/block/badblocks.c
@@ -555,6 +555,8 @@ EXPORT_SYMBOL_GPL(badblocks_init);
  */
 void badblocks_exit(struct badblocks *bb)
 {
+	if (!bb)
+		return;
 	kfree(bb->page);
 	bb->page = NULL;
 }
diff --git a/block/genhd.c b/block/genhd.c
index f463c67..aa38cd0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -670,11 +670,6 @@ void del_gendisk(struct gendisk *disk)
 	blk_unregister_queue(disk);
 	blk_unregister_region(disk_devt(disk), disk->minors);
 
-	if (disk->bb) {
-		badblocks_exit(disk->bb);
-		kfree(disk->bb);
-	}
-
 	part_stat_set_all(&disk->part0, 0);
 	disk->part0.stamp = 0;
 
-- 
cgit v0.10.2


From 16263ff6c72eb4cc00aa287230144dda12ccad12 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 4 Jan 2016 23:50:23 -0800
Subject: block, badblocks: introduce devm_init_badblocks

Provide a devres interface for initializing a badblocks instance.  The
pmem driver has several scenarios where it will be beneficial to have
this structure automatically freed when the device is disabled / fails
probe.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/block/badblocks.c b/block/badblocks.c
index 37e5c0a..7be53cb 100644
--- a/block/badblocks.c
+++ b/block/badblocks.c
@@ -17,6 +17,7 @@
 
 #include <linux/badblocks.h>
 #include <linux/seqlock.h>
+#include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/stddef.h>
@@ -522,24 +523,20 @@ ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
 }
 EXPORT_SYMBOL_GPL(badblocks_store);
 
-/**
- * badblocks_init() - initialize the badblocks structure
- * @bb:		the badblocks structure that holds all badblock information
- * @enable:	weather to enable badblocks accounting
- *
- * Return:
- *  0: success
- *  -ve errno: on error
- */
-int badblocks_init(struct badblocks *bb, int enable)
+static int __badblocks_init(struct device *dev, struct badblocks *bb,
+		int enable)
 {
+	bb->dev = dev;
 	bb->count = 0;
 	if (enable)
 		bb->shift = 0;
 	else
 		bb->shift = -1;
-	bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
-	if (bb->page == (u64 *)0) {
+	if (dev)
+		bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
+	else
+		bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!bb->page) {
 		bb->shift = -1;
 		return -ENOMEM;
 	}
@@ -547,8 +544,30 @@ int badblocks_init(struct badblocks *bb, int enable)
 
 	return 0;
 }
+
+/**
+ * badblocks_init() - initialize the badblocks structure
+ * @bb:		the badblocks structure that holds all badblock information
+ * @enable:	weather to enable badblocks accounting
+ *
+ * Return:
+ *  0: success
+ *  -ve errno: on error
+ */
+int badblocks_init(struct badblocks *bb, int enable)
+{
+	return __badblocks_init(NULL, bb, enable);
+}
 EXPORT_SYMBOL_GPL(badblocks_init);
 
+int devm_init_badblocks(struct device *dev, struct badblocks *bb)
+{
+	if (!bb)
+		return -EINVAL;
+	return __badblocks_init(dev, bb, 1);
+}
+EXPORT_SYMBOL_GPL(devm_init_badblocks);
+
 /**
  * badblocks_exit() - free the badblocks structure
  * @bb:		the badblocks structure that holds all badblock information
@@ -557,7 +576,10 @@ void badblocks_exit(struct badblocks *bb)
 {
 	if (!bb)
 		return;
-	kfree(bb->page);
+	if (bb->dev)
+		devm_kfree(bb->dev, bb->page);
+	else
+		kfree(bb->page);
 	bb->page = NULL;
 }
 EXPORT_SYMBOL_GPL(badblocks_exit);
diff --git a/include/linux/badblocks.h b/include/linux/badblocks.h
index 2d98c02..c3bdf8c 100644
--- a/include/linux/badblocks.h
+++ b/include/linux/badblocks.h
@@ -2,6 +2,7 @@
 #define _LINUX_BADBLOCKS_H
 
 #include <linux/seqlock.h>
+#include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
@@ -23,6 +24,7 @@
 #define MAX_BADBLOCKS	(PAGE_SIZE/8)
 
 struct badblocks {
+	struct device *dev;	/* set by devm_init_badblocks */
 	int count;		/* count of bad blocks */
 	int unacked_exist;	/* there probably are unacknowledged
 				 * bad blocks.  This is only cleared
@@ -49,5 +51,15 @@ ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
 			int unack);
 int badblocks_init(struct badblocks *bb, int enable);
 void badblocks_exit(struct badblocks *bb);
-
+struct device;
+int devm_init_badblocks(struct device *dev, struct badblocks *bb);
+static inline void devm_exit_badblocks(struct device *dev, struct badblocks *bb)
+{
+	if (bb->dev != dev) {
+		dev_WARN_ONCE(dev, 1, "%s: badblocks instance not associated\n",
+				__func__);
+		return;
+	}
+	badblocks_exit(bb);
+}
 #endif
-- 
cgit v0.10.2


From 87ba05dff3510f9e058b35d3c3fa222b6f406ecc Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 9 Jan 2016 07:48:43 -0800
Subject: libnvdimm: don't fail init for full badblocks list

If the badblocks list runs out of space it simply means that software is
unable to intercept all errors.  This is no different than the latent
discovery of new badblocks case and should not be an initialization
failure condition.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 21003b7..e419d66 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -360,6 +360,18 @@ struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
 }
 EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
 
+static void set_badblock(struct gendisk *disk, sector_t s, int num)
+{
+	struct device *dev = disk->driverfs_dev;
+
+	dev_dbg(dev, "Found a poison range (0x%llx, 0x%llx)\n",
+			(u64) s * 512, (u64) num * 512);
+	/* this isn't an error as the hardware will still throw an exception */
+	if (disk_set_badblocks(disk, s, num))
+		dev_info_once(dev, "%s: failed for sector %llx\n",
+				__func__, (u64) s);
+}
+
 /**
  * __add_badblock_range() - Convert a physical address range to bad sectors
  * @disk:	the disk associated with the namespace
@@ -396,15 +408,14 @@ static int __add_badblock_range(struct gendisk *disk, u64 ns_offset, u64 len)
 		while (remaining) {
 			int done = min_t(u64, remaining, INT_MAX);
 
-			rc = disk_set_badblocks(disk, s, done);
-			if (rc)
-				return rc;
+			set_badblock(disk, s, done);
 			remaining -= done;
 			s += done;
 		}
-		return 0;
 	} else
-		return disk_set_badblocks(disk, start_sector, num_sectors);
+		set_badblock(disk, start_sector, num_sectors);
+
+	return 0;
 }
 
 /**
@@ -463,9 +474,6 @@ int nvdimm_namespace_add_poison(struct gendisk *disk, resource_size_t offset,
 			rc = __add_badblock_range(disk, start - ns_start, len);
 			if (rc)
 				return rc;
-			dev_info(&nvdimm_bus->dev,
-				"Found a poison range (0x%llx, 0x%llx)\n",
-				start, len);
 			continue;
 		}
 		/* Deal with overlap for poison starting before the namespace */
@@ -480,9 +488,6 @@ int nvdimm_namespace_add_poison(struct gendisk *disk, resource_size_t offset,
 			rc = __add_badblock_range(disk, 0, len);
 			if (rc)
 				return rc;
-			dev_info(&nvdimm_bus->dev,
-				"Found a poison range (0x%llx, 0x%llx)\n",
-				pl->start, len);
 		}
 	}
 
-- 
cgit v0.10.2


From b95f5f4391fad65f1819c2404080b05ca95bdd92 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 4 Jan 2016 23:50:23 -0800
Subject: libnvdimm: convert to statically allocated badblocks

If a device will ever have badblocks it should always have a badblocks
instance available.  So, similar to md, embed a badblocks instance in
pmem_device.  This reduces pointer chasing in the i/o fast path, and
simplifies the init path.

Reported-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index e419d66..2e2832b 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -11,6 +11,7 @@
  * General Public License for more details.
  */
 #include <linux/libnvdimm.h>
+#include <linux/badblocks.h>
 #include <linux/export.h>
 #include <linux/module.h>
 #include <linux/blkdev.h>
@@ -360,21 +361,19 @@ struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
 }
 EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
 
-static void set_badblock(struct gendisk *disk, sector_t s, int num)
+static void set_badblock(struct badblocks *bb, sector_t s, int num)
 {
-	struct device *dev = disk->driverfs_dev;
-
-	dev_dbg(dev, "Found a poison range (0x%llx, 0x%llx)\n",
+	dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n",
 			(u64) s * 512, (u64) num * 512);
 	/* this isn't an error as the hardware will still throw an exception */
-	if (disk_set_badblocks(disk, s, num))
-		dev_info_once(dev, "%s: failed for sector %llx\n",
+	if (badblocks_set(bb, s, num, 1))
+		dev_info_once(bb->dev, "%s: failed for sector %llx\n",
 				__func__, (u64) s);
 }
 
 /**
  * __add_badblock_range() - Convert a physical address range to bad sectors
- * @disk:	the disk associated with the namespace
+ * @bb:		badblocks instance to populate
  * @ns_offset:	namespace offset where the error range begins (in bytes)
  * @len:	number of bytes of poison to be added
  *
@@ -382,25 +381,18 @@ static void set_badblock(struct gendisk *disk, sector_t s, int num)
  * the bounds of physical addresses for this namespace, i.e. lies in the
  * interval [ns_start, ns_start + ns_size)
  */
-static int __add_badblock_range(struct gendisk *disk, u64 ns_offset, u64 len)
+static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len)
 {
-	unsigned int sector_size = queue_logical_block_size(disk->queue);
+	const unsigned int sector_size = 512;
 	sector_t start_sector;
 	u64 num_sectors;
 	u32 rem;
-	int rc;
 
 	start_sector = div_u64(ns_offset, sector_size);
 	num_sectors = div_u64_rem(len, sector_size, &rem);
 	if (rem)
 		num_sectors++;
 
-	if (!disk->bb) {
-		rc = disk_alloc_badblocks(disk);
-		if (rc)
-			return rc;
-	}
-
 	if (unlikely(num_sectors > (u64)INT_MAX)) {
 		u64 remaining = num_sectors;
 		sector_t s = start_sector;
@@ -408,33 +400,27 @@ static int __add_badblock_range(struct gendisk *disk, u64 ns_offset, u64 len)
 		while (remaining) {
 			int done = min_t(u64, remaining, INT_MAX);
 
-			set_badblock(disk, s, done);
+			set_badblock(bb, s, done);
 			remaining -= done;
 			s += done;
 		}
 	} else
-		set_badblock(disk, start_sector, num_sectors);
-
-	return 0;
+		set_badblock(bb, start_sector, num_sectors);
 }
 
 /**
  * nvdimm_namespace_add_poison() - Convert a list of poison ranges to badblocks
- * @disk:	the gendisk associated with the namespace where badblocks
- *		will be stored
- * @offset:	offset at the start of the namespace before 'sector 0'
  * @ndns:	the namespace containing poison ranges
+ * @bb:		badblocks instance to populate
+ * @offset:	offset at the start of the namespace before 'sector 0'
  *
  * The poison list generated during NFIT initialization may contain multiple,
  * possibly overlapping ranges in the SPA (System Physical Address) space.
  * Compare each of these ranges to the namespace currently being initialized,
  * and add badblocks to the gendisk for all matching sub-ranges
- *
- * Return:
- * 0 - Success
  */
-int nvdimm_namespace_add_poison(struct gendisk *disk, resource_size_t offset,
-		struct nd_namespace_common *ndns)
+void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns,
+		struct badblocks *bb, resource_size_t offset)
 {
 	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
 	struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
@@ -442,7 +428,6 @@ int nvdimm_namespace_add_poison(struct gendisk *disk, resource_size_t offset,
 	struct list_head *poison_list;
 	u64 ns_start, ns_end, ns_size;
 	struct nd_poison *pl;
-	int rc;
 
 	ns_size = nvdimm_namespace_capacity(ndns) - offset;
 	ns_start = nsio->res.start + offset;
@@ -451,7 +436,7 @@ int nvdimm_namespace_add_poison(struct gendisk *disk, resource_size_t offset,
 	nvdimm_bus = to_nvdimm_bus(nd_region->dev.parent);
 	poison_list = &nvdimm_bus->poison_list;
 	if (list_empty(poison_list))
-		return 0;
+		return;
 
 	list_for_each_entry(pl, poison_list, list) {
 		u64 pl_end = pl->start + pl->length - 1;
@@ -470,10 +455,7 @@ int nvdimm_namespace_add_poison(struct gendisk *disk, resource_size_t offset,
 				len = pl->length;
 			else
 				len = ns_start + ns_size - pl->start;
-
-			rc = __add_badblock_range(disk, start - ns_start, len);
-			if (rc)
-				return rc;
+			__add_badblock_range(bb, start - ns_start, len);
 			continue;
 		}
 		/* Deal with overlap for poison starting before the namespace */
@@ -484,14 +466,9 @@ int nvdimm_namespace_add_poison(struct gendisk *disk, resource_size_t offset,
 				len = pl->start + pl->length - ns_start;
 			else
 				len = ns_size;
-
-			rc = __add_badblock_range(disk, 0, len);
-			if (rc)
-				return rc;
+			__add_badblock_range(bb, 0, len);
 		}
 	}
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(nvdimm_namespace_add_poison);
 
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 198933d..288d96e 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -268,8 +268,8 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns);
 int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns);
 const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
 		char *name);
-int nvdimm_namespace_add_poison(struct gendisk *disk, resource_size_t offset,
-		struct nd_namespace_common *ndns);
+void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns,
+		struct badblocks *bb, resource_size_t offset);
 int nd_blk_region_init(struct nd_region *nd_region);
 void __nd_iostat_start(struct bio *bio, unsigned long *start);
 static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 65b2056..2b1f300 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -23,6 +23,7 @@
 #include <linux/module.h>
 #include <linux/memory_hotplug.h>
 #include <linux/moduleparam.h>
+#include <linux/badblocks.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/pmem.h>
@@ -41,6 +42,7 @@ struct pmem_device {
 	phys_addr_t		data_offset;
 	void __pmem		*virt_addr;
 	size_t			size;
+	struct badblocks	bb;
 };
 
 static int pmem_major;
@@ -168,7 +170,6 @@ static int pmem_attach_disk(struct device *dev,
 {
 	int nid = dev_to_node(dev);
 	struct gendisk *disk;
-	int ret;
 
 	pmem->pmem_queue = blk_alloc_queue_node(GFP_KERNEL, nid);
 	if (!pmem->pmem_queue)
@@ -196,10 +197,9 @@ static int pmem_attach_disk(struct device *dev,
 	disk->driverfs_dev = dev;
 	set_capacity(disk, (pmem->size - pmem->data_offset) / 512);
 	pmem->pmem_disk = disk;
-
-	ret = nvdimm_namespace_add_poison(disk, pmem->data_offset, ndns);
-	if (ret)
-		return ret;
+	if (devm_init_badblocks(dev, &pmem->bb))
+		return -ENOMEM;
+	nvdimm_namespace_add_poison(ndns, &pmem->bb, pmem->data_offset);
 
 	add_disk(disk);
 	revalidate_disk(disk);
-- 
cgit v0.10.2


From e10624f8c09710b3b0740ea3847627ea02f55c39 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 6 Jan 2016 12:03:41 -0800
Subject: pmem: fail io-requests to known bad blocks

Check the sectors specified in a read bio to see if they hit a known bad
block, and return an error code pmem_do_bvec().

Note that the ->rw_page() is not in a position to return errors.  For
now, copy the same layering violation present in zram_rw_page() to avoid
crashes of the form:

 kernel BUG at mm/filemap.c:822!
 [..]
 Call Trace:
  [<ffffffff811c540e>] page_endio+0x1e/0x60
  [<ffffffff81290d29>] mpage_end_io+0x39/0x60
  [<ffffffff8141c4ef>] bio_endio+0x3f/0x60
  [<ffffffffa005c491>] pmem_make_request+0x111/0x230 [nd_pmem]

...i.e. unlock a page that was already unlocked via pmem_rw_page() =>
page_endio().

Reported-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 2b1f300..d00c659 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -47,7 +47,20 @@ struct pmem_device {
 
 static int pmem_major;
 
-static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
+static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len)
+{
+	if (bb->count) {
+		sector_t first_bad;
+		int num_bad;
+
+		return !!badblocks_check(bb, sector, len / 512, &first_bad,
+				&num_bad);
+	}
+
+	return false;
+}
+
+static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 			unsigned int len, unsigned int off, int rw,
 			sector_t sector)
 {
@@ -56,6 +69,8 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 	void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
 
 	if (rw == READ) {
+		if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
+			return -EIO;
 		memcpy_from_pmem(mem + off, pmem_addr, len);
 		flush_dcache_page(page);
 	} else {
@@ -64,10 +79,12 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
 	}
 
 	kunmap_atomic(mem);
+	return 0;
 }
 
 static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 {
+	int rc = 0;
 	bool do_acct;
 	unsigned long start;
 	struct bio_vec bvec;
@@ -76,9 +93,15 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
 	struct pmem_device *pmem = bdev->bd_disk->private_data;
 
 	do_acct = nd_iostat_start(bio, &start);
-	bio_for_each_segment(bvec, bio, iter)
-		pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, bvec.bv_offset,
-				bio_data_dir(bio), iter.bi_sector);
+	bio_for_each_segment(bvec, bio, iter) {
+		rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
+				bvec.bv_offset, bio_data_dir(bio),
+				iter.bi_sector);
+		if (rc) {
+			bio->bi_error = rc;
+			break;
+		}
+	}
 	if (do_acct)
 		nd_iostat_end(bio, start);
 
@@ -93,13 +116,22 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 		       struct page *page, int rw)
 {
 	struct pmem_device *pmem = bdev->bd_disk->private_data;
+	int rc;
 
-	pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector);
+	rc = pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector);
 	if (rw & WRITE)
 		wmb_pmem();
-	page_endio(page, rw & WRITE, 0);
 
-	return 0;
+	/*
+	 * The ->rw_page interface is subtle and tricky.  The core
+	 * retries on any error, so we can only invoke page_endio() in
+	 * the successful completion case.  Otherwise, we'll see crashes
+	 * caused by double completion.
+	 */
+	if (rc == 0)
+		page_endio(page, rw & WRITE, 0);
+
+	return rc;
 }
 
 static long pmem_direct_access(struct block_device *bdev, sector_t sector,
-- 
cgit v0.10.2


From 57f7f317abdd07954cb116280c88d18378afb33e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 6 Jan 2016 12:03:42 -0800
Subject: pmem, dax: disable dax in the presence of bad blocks

Longer term teach dax to punch "error" holes in mapping requests and
deliver SIGBUS to applications that consume a bad pmem page.  For now,
simply disable the dax performance optimization in the presence of known
errors.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/block/ioctl.c b/block/ioctl.c
index 7a964d8..2c84683 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -4,6 +4,7 @@
 #include <linux/gfp.h>
 #include <linux/blkpg.h>
 #include <linux/hdreg.h>
+#include <linux/badblocks.h>
 #include <linux/backing-dev.h>
 #include <linux/fs.h>
 #include <linux/blktrace_api.h>
@@ -422,6 +423,15 @@ bool blkdev_dax_capable(struct block_device *bdev)
 			|| (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
 		return false;
 
+	/*
+	 * If the device has known bad blocks, force all I/O through the
+	 * driver / page cache.
+	 *
+	 * TODO: support finer grained dax error handling
+	 */
+	if (disk->bb && disk->bb->count)
+		return false;
+
 	return true;
 }
 
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index d00c659..6a1832b 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -233,6 +233,7 @@ static int pmem_attach_disk(struct device *dev,
 		return -ENOMEM;
 	nvdimm_namespace_add_poison(ndns, &pmem->bb, pmem->data_offset);
 
+	disk->bb = &pmem->bb;
 	add_disk(disk);
 	revalidate_disk(disk);
 
-- 
cgit v0.10.2


From 710d69cc99507803ed91b4ec7368fbd66d59f014 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 4 Jan 2016 23:31:24 -0800
Subject: libnvdimm, pmem: nvdimm_read_bytes() badblocks support

Support badblock checking in all the pmem read paths that do not go
through the block layer.  This protects info block reads (btt or pfn) as
well as data reads to a pmem namespace via a btt instance.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 6a1832b..a88762d 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -229,6 +229,7 @@ static int pmem_attach_disk(struct device *dev,
 	disk->driverfs_dev = dev;
 	set_capacity(disk, (pmem->size - pmem->data_offset) / 512);
 	pmem->pmem_disk = disk;
+	devm_exit_badblocks(dev, &pmem->bb);
 	if (devm_init_badblocks(dev, &pmem->bb))
 		return -ENOMEM;
 	nvdimm_namespace_add_poison(ndns, &pmem->bb, pmem->data_offset);
@@ -250,9 +251,13 @@ static int pmem_rw_bytes(struct nd_namespace_common *ndns,
 		return -EFAULT;
 	}
 
-	if (rw == READ)
+	if (rw == READ) {
+		unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
+
+		if (unlikely(is_bad_pmem(&pmem->bb, offset / 512, sz_align)))
+			return -EIO;
 		memcpy_from_pmem(buf, pmem->virt_addr + offset, size);
-	else {
+	} else {
 		memcpy_to_pmem(pmem->virt_addr + offset, buf, size);
 		wmb_pmem();
 	}
@@ -427,6 +432,9 @@ static int nd_pmem_probe(struct device *dev)
 	pmem->ndns = ndns;
 	dev_set_drvdata(dev, pmem);
 	ndns->rw_bytes = pmem_rw_bytes;
+	if (devm_init_badblocks(dev, &pmem->bb))
+		return -ENOMEM;
+	nvdimm_namespace_add_poison(ndns, &pmem->bb, 0);
 
 	if (is_nd_btt(dev))
 		return nvdimm_namespace_attach_btt(ndns);
-- 
cgit v0.10.2


From 55f5560d8c18fe33fc169f8d244a9247dcac7612 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 5 Jan 2016 00:28:18 -0800
Subject: block: kill disk_{check|set|clear|alloc}_badblocks

These actions are completely managed by a block driver or can use the
badblocks api directly.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/block/genhd.c b/block/genhd.c
index aa38cd0..5aaeb2a 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -506,16 +506,6 @@ static int exact_lock(dev_t devt, void *data)
 	return 0;
 }
 
-int disk_alloc_badblocks(struct gendisk *disk)
-{
-	disk->bb = kzalloc(sizeof(*(disk->bb)), GFP_KERNEL);
-	if (!disk->bb)
-		return -ENOMEM;
-
-	return badblocks_init(disk->bb, 1);
-}
-EXPORT_SYMBOL(disk_alloc_badblocks);
-
 static void register_disk(struct gendisk *disk)
 {
 	struct device *ddev = disk_to_dev(disk);
@@ -682,38 +672,6 @@ void del_gendisk(struct gendisk *disk)
 }
 EXPORT_SYMBOL(del_gendisk);
 
-/*
- * The gendisk usage of badblocks does not track acknowledgements for
- * badblocks. We always assume they are acknowledged.
- */
-int disk_check_badblocks(struct gendisk *disk, sector_t s, int sectors,
-		   sector_t *first_bad, int *bad_sectors)
-{
-	if (!disk->bb)
-		return 0;
-
-	return badblocks_check(disk->bb, s, sectors, first_bad, bad_sectors);
-}
-EXPORT_SYMBOL(disk_check_badblocks);
-
-int disk_set_badblocks(struct gendisk *disk, sector_t s, int sectors)
-{
-	if (!disk->bb)
-		return 0;
-
-	return badblocks_set(disk->bb, s, sectors, 1);
-}
-EXPORT_SYMBOL(disk_set_badblocks);
-
-int disk_clear_badblocks(struct gendisk *disk, sector_t s, int sectors)
-{
-	if (!disk->bb)
-		return 0;
-
-	return badblocks_clear(disk->bb, s, sectors);
-}
-EXPORT_SYMBOL(disk_clear_badblocks);
-
 /* sysfs access to bad-blocks list. */
 static ssize_t disk_badblocks_show(struct device *dev,
 					struct device_attribute *attr,
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 0bbec68..5c70676 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -435,11 +435,6 @@ extern void add_disk(struct gendisk *disk);
 extern void del_gendisk(struct gendisk *gp);
 extern struct gendisk *get_gendisk(dev_t dev, int *partno);
 extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
-int disk_alloc_badblocks(struct gendisk *disk);
-extern int disk_check_badblocks(struct gendisk *disk, sector_t s, int sectors,
-		   sector_t *first_bad, int *bad_sectors);
-extern int disk_set_badblocks(struct gendisk *disk, sector_t s, int sectors);
-extern int disk_clear_badblocks(struct gendisk *disk, sector_t s, int sectors);
 
 extern void set_device_ro(struct block_device *bdev, int flag);
 extern void set_disk_ro(struct gendisk *disk, int flag);
-- 
cgit v0.10.2