From a259d5320537576c0744238f01ca6e75ad776674 Mon Sep 17 00:00:00 2001
From: Michael Schmitz <schmitzmic@gmail.com>
Date: Sat, 1 Feb 2014 13:48:13 +1300
Subject: m68k/atari - ide: do not register interrupt if host->get_lock is set

On m68k, host->get_lock is used to both lock and register the interrupt
that the IDE host shares with other device drivers. Registering the
IDE interrupt handler in ide-probe.c results in duplicating the
interrupt registered (once via host->get lock, and also via init_irq()),
and may result in IDE accepting interrupts even when another driver has
locked the interrupt hardware. This opens the whole locking scheme up
to races.

host->get_lock is set on m68k only, so other drivers' behaviour is not
changed.

Signed-off-by: Michael Schmitz <schmitz@debian.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: linux-ide@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 2a744a9..a3d3b17 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -853,8 +853,9 @@ static int init_irq (ide_hwif_t *hwif)
 	if (irq_handler == NULL)
 		irq_handler = ide_intr;
 
-	if (request_irq(hwif->irq, irq_handler, sa, hwif->name, hwif))
-		goto out_up;
+	if (!host->get_lock)
+		if (request_irq(hwif->irq, irq_handler, sa, hwif->name, hwif))
+			goto out_up;
 
 #if !defined(__mc68000__)
 	printk(KERN_INFO "%s at 0x%03lx-0x%03lx,0x%03lx on irq %d", hwif->name,
@@ -1533,7 +1534,8 @@ static void ide_unregister(ide_hwif_t *hwif)
 
 	ide_proc_unregister_port(hwif);
 
-	free_irq(hwif->irq, hwif);
+	if (!hwif->host->get_lock)
+		free_irq(hwif->irq, hwif);
 
 	device_unregister(hwif->portdev);
 	device_unregister(&hwif->gendev);
-- 
cgit v0.10.2


From 2f2d4dd63d4e0db6d3a9a246624a7ea335957e98 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Tue, 11 Mar 2014 12:44:54 +0100
Subject: ide: Fix CS5520 and CS5530 dependencies

As far as I know, the CS5520 and CS5530 chipsets were only used with
32-bit x86 Geode processors, so I think their drivers are only needed
on this architecture, except for build testing purpose.

While we're here, simplify the dependencies for the CS5535 driver.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 8fb46aa..1bbf48e 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -416,6 +416,7 @@ config BLK_DEV_CY82C693
 
 config BLK_DEV_CS5520
 	tristate "Cyrix CS5510/20 MediaGX chipset support (VERY EXPERIMENTAL)"
+	depends on X86_32 || COMPILE_TEST
 	select BLK_DEV_IDEDMA_PCI
 	help
 	  Include support for PIO tuning and virtual DMA on the Cyrix MediaGX
@@ -426,6 +427,7 @@ config BLK_DEV_CS5520
 
 config BLK_DEV_CS5530
 	tristate "Cyrix/National Semiconductor CS5530 MediaGX chipset support"
+	depends on X86_32 || COMPILE_TEST
 	select BLK_DEV_IDEDMA_PCI
 	help
 	  Include support for UDMA on the Cyrix MediaGX 5530 chipset. This
@@ -435,7 +437,7 @@ config BLK_DEV_CS5530
 
 config BLK_DEV_CS5535
 	tristate "AMD CS5535 chipset support"
-	depends on X86 && !X86_64
+	depends on X86_32
 	select BLK_DEV_IDEDMA_PCI
 	help
 	  Include support for UDMA on the NSC/AMD CS5535 companion chipset.
-- 
cgit v0.10.2


From 5b40dd30bbfaa7fcba0cd945a4852a146c552ea7 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Fri, 14 Mar 2014 17:54:31 +0100
Subject: ide: Fix SC1200 dependencies

The SC1200 is a SoC based on the Geode GX1 32-bit x86 processor, so
its drivers are only needed on this architecture, except for build
testing purpose.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 1bbf48e..a04c49f 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -488,6 +488,7 @@ config BLK_DEV_JMICRON
 
 config BLK_DEV_SC1200
 	tristate "National SCx200 chipset support"
+	depends on X86_32 || COMPILE_TEST
 	select BLK_DEV_IDEDMA_PCI
 	help
 	  This driver adds support for the on-board IDE controller on the
-- 
cgit v0.10.2


From 4856fbd12d69965d3ab680c686222db93872728d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 11 Jun 2014 11:49:31 -0300
Subject: [media] staging: tighten omap4iss dependencies

The OMAP4 camera support depends on I2C and VIDEO_V4L2, both
of which can be loadable modules. This causes build failures
if we want the camera driver to be built-in.

This can be solved by turning the option into "tristate",
which unfortunately causes another problem, because the
driver incorrectly calls a platform-internal interface
for omap4_ctrl_pad_readl/omap4_ctrl_pad_writel.

Instead, this patch just forbids the invalid configurations
and ensures that the driver can only be built if all its
dependencies are built-in.

Cc: stable@vger.kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>

diff --git a/drivers/staging/media/omap4iss/Kconfig b/drivers/staging/media/omap4iss/Kconfig
index 78b0fba..8afc6fe 100644
--- a/drivers/staging/media/omap4iss/Kconfig
+++ b/drivers/staging/media/omap4iss/Kconfig
@@ -1,6 +1,6 @@
 config VIDEO_OMAP4
 	bool "OMAP 4 Camera support"
-	depends on VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API && I2C && ARCH_OMAP4
+	depends on VIDEO_V4L2=y && VIDEO_V4L2_SUBDEV_API && I2C=y && ARCH_OMAP4
 	select VIDEOBUF2_DMA_CONTIG
 	---help---
 	  Driver for an OMAP 4 ISS controller.
-- 
cgit v0.10.2


From eefae30a1b3aabab6085be2ca0e314021253daa2 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Fri, 13 Jun 2014 11:08:25 -0300
Subject: [media] si2168: add one missing parenthesis

Fix following warnings:
si2168_cmd_execute() warn: add some parenthesis here?
si2168_cmd_execute() warn: maybe use && instead of &

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>

diff --git a/drivers/media/dvb-frontends/si2168.c b/drivers/media/dvb-frontends/si2168.c
index 8637d2e..f205736 100644
--- a/drivers/media/dvb-frontends/si2168.c
+++ b/drivers/media/dvb-frontends/si2168.c
@@ -60,7 +60,7 @@ static int si2168_cmd_execute(struct si2168 *s, struct si2168_cmd *cmd)
 				jiffies_to_msecs(jiffies) -
 				(jiffies_to_msecs(timeout) - TIMEOUT));
 
-		if (!(cmd->args[0] >> 7) & 0x01) {
+		if (!((cmd->args[0] >> 7) & 0x01)) {
 			ret = -ETIMEDOUT;
 			goto err_mutex_unlock;
 		}
-- 
cgit v0.10.2


From a811e6ec87d910faceda561fae9b0088d70ee831 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Fri, 13 Jun 2014 11:19:07 -0300
Subject: [media] si2157: add one missing parenthesis

Fix following warnings:
si2157_cmd_execute() warn: add some parenthesis here?
si2157_cmd_execute() warn: maybe use && instead of &

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>

diff --git a/drivers/media/tuners/si2157.c b/drivers/media/tuners/si2157.c
index 271a752..fa4cc7b 100644
--- a/drivers/media/tuners/si2157.c
+++ b/drivers/media/tuners/si2157.c
@@ -57,7 +57,7 @@ static int si2157_cmd_execute(struct si2157 *s, struct si2157_cmd *cmd)
 			jiffies_to_msecs(jiffies) -
 			(jiffies_to_msecs(timeout) - TIMEOUT));
 
-	if (!(buf[0] >> 7) & 0x01) {
+	if (!((buf[0] >> 7) & 0x01)) {
 		ret = -ETIMEDOUT;
 		goto err_mutex_unlock;
 	} else {
-- 
cgit v0.10.2


From 0c76e68d6ec6ade4dd0ae15fb08a827525fec3a2 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Fri, 13 Jun 2014 19:29:55 -0300
Subject: [media] si2168: firmware download fix

First 8 bytes belonging to firmware image were hard-coded and uploaded
by the driver mistakenly. Introduce new corrected firmware file and
remove those 8 bytes from the driver.

New firmware image could be extracted from the PCTV 292e driver CD
using following command:

$ dd if=/TVC 6.4.8/Driver/PCTV Empia/emOEM.sys ibs=1 skip=1089408 count=2728 of=dvb-demod-si2168-02.fw
$ md5sum dvb-demod-si2168-02.fw
d8da7ff67cd56cd8aa4e101aea45e052  dvb-demod-si2168-02.fw
$ sudo cp dvb-demod-si2168-02.fw /lib/firmware/

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>

diff --git a/drivers/media/dvb-frontends/si2168.c b/drivers/media/dvb-frontends/si2168.c
index f205736..2e3cdcf 100644
--- a/drivers/media/dvb-frontends/si2168.c
+++ b/drivers/media/dvb-frontends/si2168.c
@@ -485,20 +485,6 @@ static int si2168_init(struct dvb_frontend *fe)
 	if (ret)
 		goto err;
 
-	cmd.args[0] = 0x05;
-	cmd.args[1] = 0x00;
-	cmd.args[2] = 0xaa;
-	cmd.args[3] = 0x4d;
-	cmd.args[4] = 0x56;
-	cmd.args[5] = 0x40;
-	cmd.args[6] = 0x00;
-	cmd.args[7] = 0x00;
-	cmd.wlen = 8;
-	cmd.rlen = 1;
-	ret = si2168_cmd_execute(s, &cmd);
-	if (ret)
-		goto err;
-
 	/* cold state - try to download firmware */
 	dev_info(&s->client->dev, "%s: found a '%s' in cold state\n",
 			KBUILD_MODNAME, si2168_ops.info.name);
diff --git a/drivers/media/dvb-frontends/si2168_priv.h b/drivers/media/dvb-frontends/si2168_priv.h
index 2a343e8..53f7f06 100644
--- a/drivers/media/dvb-frontends/si2168_priv.h
+++ b/drivers/media/dvb-frontends/si2168_priv.h
@@ -22,7 +22,7 @@
 #include <linux/firmware.h>
 #include <linux/i2c-mux.h>
 
-#define SI2168_FIRMWARE "dvb-demod-si2168-01.fw"
+#define SI2168_FIRMWARE "dvb-demod-si2168-02.fw"
 
 /* state struct */
 struct si2168 {
-- 
cgit v0.10.2


From f71920efb1066d71d74811e1dbed658173adf9bf Mon Sep 17 00:00:00 2001
From: Rickard Strandqvist <rickard_strandqvist@spectrumdigital.se>
Date: Sat, 14 Jun 2014 08:37:09 -0300
Subject: [media] media: v4l2-core: v4l2-dv-timings.c: Cleaning up code wrong
 value used in aspect ratio

Wrong value used in same cases for the aspect ratio.

Signed-off-by: Rickard Strandqvist <rickard_strandqvist@spectrumdigital.se>
Acked-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Cc: stable@vger.kernel.org      # for v3.12 and up
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>

diff --git a/drivers/media/v4l2-core/v4l2-dv-timings.c b/drivers/media/v4l2-core/v4l2-dv-timings.c
index 4ae54ca..ce1c9f5 100644
--- a/drivers/media/v4l2-core/v4l2-dv-timings.c
+++ b/drivers/media/v4l2-core/v4l2-dv-timings.c
@@ -610,10 +610,10 @@ struct v4l2_fract v4l2_calc_aspect_ratio(u8 hor_landscape, u8 vert_portrait)
 		aspect.denominator = 9;
 	} else if (ratio == 34) {
 		aspect.numerator = 4;
-		aspect.numerator = 3;
+		aspect.denominator = 3;
 	} else if (ratio == 68) {
 		aspect.numerator = 15;
-		aspect.numerator = 9;
+		aspect.denominator = 9;
 	} else {
 		aspect.numerator = hor_landscape + 99;
 		aspect.denominator = 100;
-- 
cgit v0.10.2


From 13936af3d2f04f173a83cc050dbc4b20d8562b81 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Mon, 16 Jun 2014 04:32:37 -0300
Subject: [media] saa7134: use unlocked_ioctl instead of ioctl

The saa7134 driver uses core-locking, so there is no longer any need
to use the ioctl op instead of the unlocked_ioctl op. This change
was forgotten for the saa7134-empress.c, so fix this.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>

diff --git a/drivers/media/pci/saa7134/saa7134-empress.c b/drivers/media/pci/saa7134/saa7134-empress.c
index e65c760..0006d6b 100644
--- a/drivers/media/pci/saa7134/saa7134-empress.c
+++ b/drivers/media/pci/saa7134/saa7134-empress.c
@@ -179,7 +179,7 @@ static const struct v4l2_file_operations ts_fops =
 	.read	  = vb2_fop_read,
 	.poll	  = vb2_fop_poll,
 	.mmap	  = vb2_fop_mmap,
-	.ioctl	  = video_ioctl2,
+	.unlocked_ioctl = video_ioctl2,
 };
 
 static const struct v4l2_ioctl_ops ts_ioctl_ops = {
-- 
cgit v0.10.2


From 546a9d8519ed137b2804a3f5a3659003039dd49c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 19 Jun 2014 14:57:10 -0700
Subject: rcu: Export debug_init_rcu_head() and and debug_init_rcu_head()

Currently, call_rcu() relies on implicit allocation and initialization
for the debug-objects handling of RCU callbacks.  If you hammer the
kernel hard enough with Sasha's modified version of trinity, you can end
up with the sl*b allocators recursing into themselves via this implicit
call_rcu() allocation.

This commit therefore exports the debug_init_rcu_head() and
debug_rcu_head_free() functions, which permits the allocators to allocated
and pre-initialize the debug-objects information, so that there no longer
any need for call_rcu() to do that initialization, which in turn prevents
the recursion into the memory allocators.

Reported-by: Sasha Levin <sasha.levin@oracle.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Looks-good-to: Christoph Lameter <cl@linux.com>

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 5a75d19..13bbfbd 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -358,9 +358,19 @@ void wait_rcu_gp(call_rcu_func_t crf);
  * initialization.
  */
 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
+void init_rcu_head(struct rcu_head *head);
+void destroy_rcu_head(struct rcu_head *head);
 void init_rcu_head_on_stack(struct rcu_head *head);
 void destroy_rcu_head_on_stack(struct rcu_head *head);
 #else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
+static inline void init_rcu_head(struct rcu_head *head)
+{
+}
+
+static inline void destroy_rcu_head(struct rcu_head *head)
+{
+}
+
 static inline void init_rcu_head_on_stack(struct rcu_head *head)
 {
 }
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index a2aeb4d..0fb691e 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -200,12 +200,12 @@ void wait_rcu_gp(call_rcu_func_t crf)
 EXPORT_SYMBOL_GPL(wait_rcu_gp);
 
 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
-static inline void debug_init_rcu_head(struct rcu_head *head)
+void init_rcu_head(struct rcu_head *head)
 {
 	debug_object_init(head, &rcuhead_debug_descr);
 }
 
-static inline void debug_rcu_head_free(struct rcu_head *head)
+void destroy_rcu_head(struct rcu_head *head)
 {
 	debug_object_free(head, &rcuhead_debug_descr);
 }
-- 
cgit v0.10.2


From 4a81e8328d3791a4f99bf5b436d050f6dc5ffea3 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 20 Jun 2014 16:49:01 -0700
Subject: rcu: Reduce overhead of cond_resched() checks for RCU

Commit ac1bea85781e (Make cond_resched() report RCU quiescent states)
fixed a problem where a CPU looping in the kernel with but one runnable
task would give RCU CPU stall warnings, even if the in-kernel loop
contained cond_resched() calls.  Unfortunately, in so doing, it introduced
performance regressions in Anton Blanchard's will-it-scale "open1" test.
The problem appears to be not so much the increased cond_resched() path
length as an increase in the rate at which grace periods complete, which
increased per-update grace-period overhead.

This commit takes a different approach to fixing this bug, mainly by
moving the RCU-visible quiescent state from cond_resched() to
rcu_note_context_switch(), and by further reducing the check to a
simple non-zero test of a single per-CPU variable.  However, this
approach requires that the force-quiescent-state processing send
resched IPIs to the offending CPUs.  These will be sent only once
the grace period has reached an age specified by the boot/sysfs
parameter rcutree.jiffies_till_sched_qs, or once the grace period
reaches an age halfway to the point at which RCU CPU stall warnings
will be emitted, whichever comes first.

Reported-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Christoph Lameter <cl@gentwo.org>
Cc: Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
[ paulmck: Made rcu_momentary_dyntick_idle() as suggested by the
  ktest build robot.  Also fixed smp_mb() comment as noted by
  Oleg Nesterov. ]

Merge with e552592e (Reduce overhead of cond_resched() checks for RCU)

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 6eaa9cd..910c382 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2785,6 +2785,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			leaf rcu_node structure.  Useful for very large
 			systems.
 
+	rcutree.jiffies_till_sched_qs= [KNL]
+			Set required age in jiffies for a
+			given grace period before RCU starts
+			soliciting quiescent-state help from
+			rcu_note_context_switch().
+
 	rcutree.jiffies_till_first_fqs= [KNL]
 			Set delay from grace-period initialization to
 			first attempt to force quiescent states.
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 13bbfbd..6a94cc8 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -44,7 +44,6 @@
 #include <linux/debugobjects.h>
 #include <linux/bug.h>
 #include <linux/compiler.h>
-#include <linux/percpu.h>
 #include <asm/barrier.h>
 
 extern int rcu_expedited; /* for sysctl */
@@ -300,41 +299,6 @@ bool __rcu_is_watching(void);
 #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */
 
 /*
- * Hooks for cond_resched() and friends to avoid RCU CPU stall warnings.
- */
-
-#define RCU_COND_RESCHED_LIM 256	/* ms vs. 100s of ms. */
-DECLARE_PER_CPU(int, rcu_cond_resched_count);
-void rcu_resched(void);
-
-/*
- * Is it time to report RCU quiescent states?
- *
- * Note unsynchronized access to rcu_cond_resched_count.  Yes, we might
- * increment some random CPU's count, and possibly also load the result from
- * yet another CPU's count.  We might even clobber some other CPU's attempt
- * to zero its counter.  This is all OK because the goal is not precision,
- * but rather reasonable amortization of rcu_note_context_switch() overhead
- * and extremely high probability of avoiding RCU CPU stall warnings.
- * Note that this function has to be preempted in just the wrong place,
- * many thousands of times in a row, for anything bad to happen.
- */
-static inline bool rcu_should_resched(void)
-{
-	return raw_cpu_inc_return(rcu_cond_resched_count) >=
-	       RCU_COND_RESCHED_LIM;
-}
-
-/*
- * Report quiscent states to RCU if it is time to do so.
- */
-static inline void rcu_cond_resched(void)
-{
-	if (unlikely(rcu_should_resched()))
-		rcu_resched();
-}
-
-/*
  * Infrastructure to implement the synchronize_() primitives in
  * TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
  */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f1ba773..625d0b0 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -206,6 +206,70 @@ void rcu_bh_qs(int cpu)
 	rdp->passed_quiesce = 1;
 }
 
+static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
+
+static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
+	.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
+	.dynticks = ATOMIC_INIT(1),
+#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
+	.dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
+	.dynticks_idle = ATOMIC_INIT(1),
+#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
+};
+
+/*
+ * Let the RCU core know that this CPU has gone through the scheduler,
+ * which is a quiescent state.  This is called when the need for a
+ * quiescent state is urgent, so we burn an atomic operation and full
+ * memory barriers to let the RCU core know about it, regardless of what
+ * this CPU might (or might not) do in the near future.
+ *
+ * We inform the RCU core by emulating a zero-duration dyntick-idle
+ * period, which we in turn do by incrementing the ->dynticks counter
+ * by two.
+ */
+static void rcu_momentary_dyntick_idle(void)
+{
+	unsigned long flags;
+	struct rcu_data *rdp;
+	struct rcu_dynticks *rdtp;
+	int resched_mask;
+	struct rcu_state *rsp;
+
+	local_irq_save(flags);
+
+	/*
+	 * Yes, we can lose flag-setting operations.  This is OK, because
+	 * the flag will be set again after some delay.
+	 */
+	resched_mask = raw_cpu_read(rcu_sched_qs_mask);
+	raw_cpu_write(rcu_sched_qs_mask, 0);
+
+	/* Find the flavor that needs a quiescent state. */
+	for_each_rcu_flavor(rsp) {
+		rdp = raw_cpu_ptr(rsp->rda);
+		if (!(resched_mask & rsp->flavor_mask))
+			continue;
+		smp_mb(); /* rcu_sched_qs_mask before cond_resched_completed. */
+		if (ACCESS_ONCE(rdp->mynode->completed) !=
+		    ACCESS_ONCE(rdp->cond_resched_completed))
+			continue;
+
+		/*
+		 * Pretend to be momentarily idle for the quiescent state.
+		 * This allows the grace-period kthread to record the
+		 * quiescent state, with no need for this CPU to do anything
+		 * further.
+		 */
+		rdtp = this_cpu_ptr(&rcu_dynticks);
+		smp_mb__before_atomic(); /* Earlier stuff before QS. */
+		atomic_add(2, &rdtp->dynticks);  /* QS. */
+		smp_mb__after_atomic(); /* Later stuff after QS. */
+		break;
+	}
+	local_irq_restore(flags);
+}
+
 /*
  * Note a context switch.  This is a quiescent state for RCU-sched,
  * and requires special handling for preemptible RCU.
@@ -216,19 +280,12 @@ void rcu_note_context_switch(int cpu)
 	trace_rcu_utilization(TPS("Start context switch"));
 	rcu_sched_qs(cpu);
 	rcu_preempt_note_context_switch(cpu);
+	if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
+		rcu_momentary_dyntick_idle();
 	trace_rcu_utilization(TPS("End context switch"));
 }
 EXPORT_SYMBOL_GPL(rcu_note_context_switch);
 
-static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
-	.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
-	.dynticks = ATOMIC_INIT(1),
-#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-	.dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
-	.dynticks_idle = ATOMIC_INIT(1),
-#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
-};
-
 static long blimit = 10;	/* Maximum callbacks per rcu_do_batch. */
 static long qhimark = 10000;	/* If this many pending, ignore blimit. */
 static long qlowmark = 100;	/* Once only this many pending, use blimit. */
@@ -243,6 +300,13 @@ static ulong jiffies_till_next_fqs = ULONG_MAX;
 module_param(jiffies_till_first_fqs, ulong, 0644);
 module_param(jiffies_till_next_fqs, ulong, 0644);
 
+/*
+ * How long the grace period must be before we start recruiting
+ * quiescent-state help from rcu_note_context_switch().
+ */
+static ulong jiffies_till_sched_qs = HZ / 20;
+module_param(jiffies_till_sched_qs, ulong, 0644);
+
 static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
 				  struct rcu_data *rdp);
 static void force_qs_rnp(struct rcu_state *rsp,
@@ -853,6 +917,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 				    bool *isidle, unsigned long *maxj)
 {
 	unsigned int curr;
+	int *rcrmp;
 	unsigned int snap;
 
 	curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks);
@@ -893,27 +958,43 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
 	}
 
 	/*
-	 * There is a possibility that a CPU in adaptive-ticks state
-	 * might run in the kernel with the scheduling-clock tick disabled
-	 * for an extended time period.  Invoke rcu_kick_nohz_cpu() to
-	 * force the CPU to restart the scheduling-clock tick in this
-	 * CPU is in this state.
-	 */
-	rcu_kick_nohz_cpu(rdp->cpu);
-
-	/*
-	 * Alternatively, the CPU might be running in the kernel
-	 * for an extended period of time without a quiescent state.
-	 * Attempt to force the CPU through the scheduler to gain the
-	 * needed quiescent state, but only if the grace period has gone
-	 * on for an uncommonly long time.  If there are many stuck CPUs,
-	 * we will beat on the first one until it gets unstuck, then move
-	 * to the next.  Only do this for the primary flavor of RCU.
+	 * A CPU running for an extended time within the kernel can
+	 * delay RCU grace periods.  When the CPU is in NO_HZ_FULL mode,
+	 * even context-switching back and forth between a pair of
+	 * in-kernel CPU-bound tasks cannot advance grace periods.
+	 * So if the grace period is old enough, make the CPU pay attention.
+	 * Note that the unsynchronized assignments to the per-CPU
+	 * rcu_sched_qs_mask variable are safe.  Yes, setting of
+	 * bits can be lost, but they will be set again on the next
+	 * force-quiescent-state pass.  So lost bit sets do not result
+	 * in incorrect behavior, merely in a grace period lasting
+	 * a few jiffies longer than it might otherwise.  Because
+	 * there are at most four threads involved, and because the
+	 * updates are only once every few jiffies, the probability of
+	 * lossage (and thus of slight grace-period extension) is
+	 * quite low.
+	 *
+	 * Note that if the jiffies_till_sched_qs boot/sysfs parameter
+	 * is set too high, we override with half of the RCU CPU stall
+	 * warning delay.
 	 */
-	if (rdp->rsp == rcu_state_p &&
+	rcrmp = &per_cpu(rcu_sched_qs_mask, rdp->cpu);
+	if (ULONG_CMP_GE(jiffies,
+			 rdp->rsp->gp_start + jiffies_till_sched_qs) ||
 	    ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
-		rdp->rsp->jiffies_resched += 5;
-		resched_cpu(rdp->cpu);
+		if (!(ACCESS_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) {
+			ACCESS_ONCE(rdp->cond_resched_completed) =
+				ACCESS_ONCE(rdp->mynode->completed);
+			smp_mb(); /* ->cond_resched_completed before *rcrmp. */
+			ACCESS_ONCE(*rcrmp) =
+				ACCESS_ONCE(*rcrmp) + rdp->rsp->flavor_mask;
+			resched_cpu(rdp->cpu);  /* Force CPU into scheduler. */
+			rdp->rsp->jiffies_resched += 5; /* Enable beating. */
+		} else if (ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
+			/* Time to beat on that CPU again! */
+			resched_cpu(rdp->cpu);  /* Force CPU into scheduler. */
+			rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
+		}
 	}
 
 	return 0;
@@ -3491,6 +3572,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
 			       "rcu_node_fqs_1",
 			       "rcu_node_fqs_2",
 			       "rcu_node_fqs_3" };  /* Match MAX_RCU_LVLS */
+	static u8 fl_mask = 0x1;
 	int cpustride = 1;
 	int i;
 	int j;
@@ -3509,6 +3591,8 @@ static void __init rcu_init_one(struct rcu_state *rsp,
 	for (i = 1; i < rcu_num_lvls; i++)
 		rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1];
 	rcu_init_levelspread(rsp);
+	rsp->flavor_mask = fl_mask;
+	fl_mask <<= 1;
 
 	/* Initialize the elements themselves, starting from the leaves. */
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index bf2c1e6..0f69a79 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -307,6 +307,9 @@ struct rcu_data {
 	/* 4) reasons this CPU needed to be kicked by force_quiescent_state */
 	unsigned long dynticks_fqs;	/* Kicked due to dynticks idle. */
 	unsigned long offline_fqs;	/* Kicked due to being offline. */
+	unsigned long cond_resched_completed;
+					/* Grace period that needs help */
+					/*  from cond_resched(). */
 
 	/* 5) __rcu_pending() statistics. */
 	unsigned long n_rcu_pending;	/* rcu_pending() calls since boot. */
@@ -392,6 +395,7 @@ struct rcu_state {
 	struct rcu_node *level[RCU_NUM_LVLS];	/* Hierarchy levels. */
 	u32 levelcnt[MAX_RCU_LVLS + 1];		/* # nodes in each level. */
 	u8 levelspread[RCU_NUM_LVLS];		/* kids/node in each level. */
+	u8 flavor_mask;				/* bit in flavor mask. */
 	struct rcu_data __percpu *rda;		/* pointer of percu rcu_data. */
 	void (*call)(struct rcu_head *head,	/* call_rcu() flavor. */
 		     void (*func)(struct rcu_head *head));
@@ -563,7 +567,7 @@ static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
 static void do_nocb_deferred_wakeup(struct rcu_data *rdp);
 static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
 static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
-static void rcu_kick_nohz_cpu(int cpu);
+static void __maybe_unused rcu_kick_nohz_cpu(int cpu);
 static bool init_nocb_callback_list(struct rcu_data *rdp);
 static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq);
 static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index cbc2c45..02ac0fb 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2404,7 +2404,7 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
  * if an adaptive-ticks CPU is failing to respond to the current grace
  * period and has not be idle from an RCU perspective, kick it.
  */
-static void rcu_kick_nohz_cpu(int cpu)
+static void __maybe_unused rcu_kick_nohz_cpu(int cpu)
 {
 #ifdef CONFIG_NO_HZ_FULL
 	if (tick_nohz_full_cpu(cpu))
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 0fb691e..bc78835 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -350,21 +350,3 @@ static int __init check_cpu_stall_init(void)
 early_initcall(check_cpu_stall_init);
 
 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */
-
-/*
- * Hooks for cond_resched() and friends to avoid RCU CPU stall warnings.
- */
-
-DEFINE_PER_CPU(int, rcu_cond_resched_count);
-
-/*
- * Report a set of RCU quiescent states, for use by cond_resched()
- * and friends.  Out of line due to being called infrequently.
- */
-void rcu_resched(void)
-{
-	preempt_disable();
-	__this_cpu_write(rcu_cond_resched_count, 0);
-	rcu_note_context_switch(smp_processor_id());
-	preempt_enable();
-}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3bdf01b..bc1638b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4147,7 +4147,6 @@ static void __cond_resched(void)
 
 int __sched _cond_resched(void)
 {
-	rcu_cond_resched();
 	if (should_resched()) {
 		__cond_resched();
 		return 1;
@@ -4166,18 +4165,15 @@ EXPORT_SYMBOL(_cond_resched);
  */
 int __cond_resched_lock(spinlock_t *lock)
 {
-	bool need_rcu_resched = rcu_should_resched();
 	int resched = should_resched();
 	int ret = 0;
 
 	lockdep_assert_held(lock);
 
-	if (spin_needbreak(lock) || resched || need_rcu_resched) {
+	if (spin_needbreak(lock) || resched) {
 		spin_unlock(lock);
 		if (resched)
 			__cond_resched();
-		else if (unlikely(need_rcu_resched))
-			rcu_resched();
 		else
 			cpu_relax();
 		ret = 1;
@@ -4191,7 +4187,6 @@ int __sched __cond_resched_softirq(void)
 {
 	BUG_ON(!in_softirq());
 
-	rcu_cond_resched();  /* BH disabled OK, just recording QSes. */
 	if (should_resched()) {
 		local_bh_enable();
 		__cond_resched();
-- 
cgit v0.10.2


From 511c66818d87db2a8931e7f7f92c7904bdd84f72 Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Wed, 18 Jun 2014 18:45:05 +0300
Subject: KVM: PPC: Book3E: Unlock mmu_lock when setting caching atttribute

The patch 08c9a188d0d0fc0f0c5e17d89a06bb59c493110f
  	kvm: powerpc: use caching attributes as per linux pte
do not handle properly the error case, letting mmu_lock locked. The lock
will further generate a RCU stall from kvmppc_e500_emul_tlbwe() caller.

In case of an error go to out label.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index dd2cc03..86903d3 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -473,7 +473,8 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 		if (printk_ratelimit())
 			pr_err("%s: pte not present: gfn %lx, pfn %lx\n",
 				__func__, (long)gfn, pfn);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 	kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg);
 
-- 
cgit v0.10.2


From 341acbb3aabbcfbf069d7de4ad35f51b58176faf Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 16 Jun 2014 00:17:07 +0530
Subject: KVM: PPC: BOOK3S: HV: Use base page size when comparing against slb
 value

With guests supporting Multiple page size per segment (MPSS),
hpte_page_size returns the actual page size used. Add a new function to
return base page size and use that to compare against the the page size
calculated from SLB. Without this patch a hpte lookup can fail since
we are comparing wrong page size in kvmppc_hv_find_lock_hpte.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index fddb72b..d645428 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -198,8 +198,10 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 	return rb;
 }
 
-static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
+static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l,
+					     bool is_base_size)
 {
+
 	int size, a_psize;
 	/* Look at the 8 bit LP value */
 	unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
@@ -214,14 +216,27 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
 				continue;
 
 			a_psize = __hpte_actual_psize(lp, size);
-			if (a_psize != -1)
+			if (a_psize != -1) {
+				if (is_base_size)
+					return 1ul << mmu_psize_defs[size].shift;
 				return 1ul << mmu_psize_defs[a_psize].shift;
+			}
 		}
 
 	}
 	return 0;
 }
 
+static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
+{
+	return __hpte_page_size(h, l, 0);
+}
+
+static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l)
+{
+	return __hpte_page_size(h, l, 1);
+}
+
 static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
 {
 	return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 8056107..68468d6 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1562,7 +1562,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 				goto out;
 			}
 			if (!rma_setup && is_vrma_hpte(v)) {
-				unsigned long psize = hpte_page_size(v, r);
+				unsigned long psize = hpte_base_page_size(v, r);
 				unsigned long senc = slb_pgsize_encoding(psize);
 				unsigned long lpcr;
 
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 6e62243..5a24d3c 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -814,13 +814,10 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
 			r = hpte[i+1];
 
 			/*
-			 * Check the HPTE again, including large page size
-			 * Since we don't currently allow any MPSS (mixed
-			 * page-size segment) page sizes, it is sufficient
-			 * to check against the actual page size.
+			 * Check the HPTE again, including base page size
 			 */
 			if ((v & valid) && (v & mask) == val &&
-			    hpte_page_size(v, r) == (1ul << pshift))
+			    hpte_base_page_size(v, r) == (1ul << pshift))
 				/* Return with the HPTE still locked */
 				return (hash << 3) + (i >> 1);
 
-- 
cgit v0.10.2


From 9715a2e8515217206ebf53040c979fdbeb805a21 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 26 Jun 2014 13:19:40 +0200
Subject: PPC: Add _GLOBAL_TOC for 32bit

Commit ac5a8ee8 started using _GLOBAL_TOC on ppc32 code. Unfortunately it's only
defined for 64bit targets though. Define it for ppc32 as well, fixing the build
breakage that commit introduced.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 9ea266e..7e46125 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -277,6 +277,8 @@ n:
 	.globl n;	\
 n:
 
+#define _GLOBAL_TOC(name) _GLOBAL(name)
+
 #define _KPROBE(n)	\
 	.section ".kprobes.text","a";	\
 	.globl	n;	\
-- 
cgit v0.10.2


From 63283dd21ed2bf25a71909a820ed3e8fe412e15d Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 27 Jun 2014 18:51:39 +0200
Subject: netfilter: nf_tables: skip transaction if no update flags in tables

Skip transaction handling for table updates with no changes in
the flags. This fixes a crash when passing the table flag with all
bits unset.

Reported-by: Ana Rey <anarey@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ab4566c..da5dc37 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -407,6 +407,9 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
 	if (flags & ~NFT_TABLE_F_DORMANT)
 		return -EINVAL;
 
+	if (flags == ctx->table->flags)
+		return 0;
+
 	trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
 				sizeof(struct nft_trans_table));
 	if (trans == NULL)
-- 
cgit v0.10.2


From e9110361a9a4e258b072b14bd44eb78cf11453cb Mon Sep 17 00:00:00 2001
From: Heiko Schocher <hs@denx.de>
Date: Tue, 24 Jun 2014 09:25:18 +0200
Subject: UBI: fix the volumes tree sorting criteria

Commig "604b592 UBI: fix rb_tree node comparison in add_map"
broke fastmap backward compatibility and older fastmap images
cannot be mounted anymore. The reason is that it changes the
volumes RB-tree sorting criteria. This patch fixes the problem.

Artem: re-write the commit message

Signed-off-by: Heiko Schocher <hs@denx.de>
Acked-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>

diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c
index b04e7d0..72f39da 100644
--- a/drivers/mtd/ubi/fastmap.c
+++ b/drivers/mtd/ubi/fastmap.c
@@ -125,7 +125,7 @@ static struct ubi_ainf_volume *add_vol(struct ubi_attach_info *ai, int vol_id,
 		parent = *p;
 		av = rb_entry(parent, struct ubi_ainf_volume, rb);
 
-		if (vol_id < av->vol_id)
+		if (vol_id > av->vol_id)
 			p = &(*p)->rb_left;
 		else
 			p = &(*p)->rb_right;
-- 
cgit v0.10.2


From 0e2be4c1121ae3dc2771c4d9b99d4c39ea9577d8 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Tue, 1 Jul 2014 17:23:06 +0200
Subject: ARM: mvebu: fix SMP boot for Armada 38x and Armada 375 Z1 in big
 endian

The SMP boot on Armada 38x and Armada 375 Z1 is currently broken in
big-endian configurations, and this commit fixes it for both
platforms.

For Armada 375 Z1, the problem was in the
armada_375_smp_cpu1_enable_code part of the code that gets copied to
the Crypto SRAM as a work-around for an issue of the Z1 stepping. This
piece of code was not switching the CPU core to big-endian, and not
endian-swapping the value read from the Resume Address register (the
value is stored little-endian). Due to the introduction of the
conditional 'rev r1, r1' instruction, the offset between the 'ldr r0,
[pc, #4]' instruction and the value it was looking is different
between LE and BE configurations. To solve this, we instead use one
'adr' instruction followed by one 'ldr'.

For Armada 38x, the problem was simply that the CPU core was not
switched to big endian in the secondary CPU startup function.

This change was tested in LE and BE configurations on Armada 385,
Armada 375 Z1 and Armada 375 A0.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Link: https://lkml.kernel.org/r/1404228186-21203-1-git-send-email-thomas.petazzoni@free-electrons.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>

diff --git a/arch/arm/mach-mvebu/headsmp-a9.S b/arch/arm/mach-mvebu/headsmp-a9.S
index 5925366..da5bb29 100644
--- a/arch/arm/mach-mvebu/headsmp-a9.S
+++ b/arch/arm/mach-mvebu/headsmp-a9.S
@@ -15,6 +15,8 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 
+#include <asm/assembler.h>
+
 	__CPUINIT
 #define CPU_RESUME_ADDR_REG 0xf10182d4
 
@@ -22,13 +24,18 @@
 .global armada_375_smp_cpu1_enable_code_end
 
 armada_375_smp_cpu1_enable_code_start:
-	ldr     r0, [pc, #4]
+ARM_BE8(setend	be)
+	adr     r0, 1f
+	ldr	r0, [r0]
 	ldr     r1, [r0]
+ARM_BE8(rev	r1, r1)
 	mov     pc, r1
+1:
 	.word   CPU_RESUME_ADDR_REG
 armada_375_smp_cpu1_enable_code_end:
 
 ENTRY(mvebu_cortex_a9_secondary_startup)
+ARM_BE8(setend	be)
 	bl      v7_invalidate_l1
 	b	secondary_startup
 ENDPROC(mvebu_cortex_a9_secondary_startup)
-- 
cgit v0.10.2


From 1b6478231c6f5f844185acb32045cf195028cfce Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Wed, 2 Jul 2014 17:25:23 +0100
Subject: xen/manage: fix potential deadlock when resuming the console

Calling xen_console_resume() in xen_suspend() causes a warning because
it locks irq_mapping_update_lock (a mutex) and this may sleep.  If a
userspace process is using the evtchn device then this mutex may be
locked at the point of the stop_machine() call and
xen_console_resume() would then deadlock.

Resuming the console after stop_machine() returns avoids this
deadlock.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: <stable@vger.kernel.org>

diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index c3667b2..5f1e1f3 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -88,7 +88,6 @@ static int xen_suspend(void *data)
 
 	if (!si->cancelled) {
 		xen_irq_resume();
-		xen_console_resume();
 		xen_timer_resume();
 	}
 
@@ -135,6 +134,10 @@ static void do_suspend(void)
 
 	err = stop_machine(xen_suspend, &si, cpumask_of(0));
 
+	/* Resume console as early as possible. */
+	if (!si.cancelled)
+		xen_console_resume();
+
 	raw_notifier_call_chain(&xen_resume_notifier, 0, NULL);
 
 	dpm_resume_start(si.cancelled ? PMSG_THAW : PMSG_RESTORE);
-- 
cgit v0.10.2


From fb9a0c443691ceaab3daba966bbbd9f5ff3aa26f Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Fri, 27 Jun 2014 10:42:03 +0100
Subject: xen/balloon: set ballooned out pages as invalid in p2m

Since cd9151e26d31048b2b5e00fd02e110e07d2200c9 (xen/balloon: set a
mapping for ballooned out pages), a ballooned out page had its entry
in the p2m set to the MFN of one of the scratch pages.  This means
that the p2m will contain many entries pointing to the same MFN.

During a domain save, these many-to-one entries are not identified as
such and the scratch page is saved multiple times. On restore the
ballooned pages are populated with new frames and the domain may use
up its allocation before all pages can be restored.

Since the original fix only needed to keep a mapping for the ballooned
page it is safe to set ballooned out pages as INVALID_P2M_ENTRY in the
p2m (as they were before). Thus preventing them from being saved and
re-populated on restore.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reported-by: Marek Marczykowski <marmarek@invisiblethingslab.com>
Tested-by: Marek Marczykowski <marmarek@invisiblethingslab.com>
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: <stable@vger.kernel.org>

diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index b7a506f..5c660c7 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -426,20 +426,18 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 		 * p2m are consistent.
 		 */
 		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-			unsigned long p;
-			struct page   *scratch_page = get_balloon_scratch_page();
-
 			if (!PageHighMem(page)) {
+				struct page *scratch_page = get_balloon_scratch_page();
+
 				ret = HYPERVISOR_update_va_mapping(
 						(unsigned long)__va(pfn << PAGE_SHIFT),
 						pfn_pte(page_to_pfn(scratch_page),
 							PAGE_KERNEL_RO), 0);
 				BUG_ON(ret);
-			}
-			p = page_to_pfn(scratch_page);
-			__set_phys_to_machine(pfn, pfn_to_mfn(p));
 
-			put_balloon_scratch_page();
+				put_balloon_scratch_page();
+			}
+			__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
 		}
 #endif
 
-- 
cgit v0.10.2


From 1cbbf90d0406913ad4b44194b07f4f41bde84e54 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Tue, 24 Jun 2014 10:03:59 -0300
Subject: [media] af9035: override tuner id when bad value set into eeprom

Tuner ID set into EEPROM is wrong in some cases, which causes driver
to select wrong tuner profile. That leads device non-working. Fix
issue by overriding known bad tuner IDs with suitable default value.

Thanks to MX-NET Telekomunikace s.r.o. for providing non-working
DTV stick, that I could fix the bug!

Cc: stable@vger.kernel.org # v3.15+
Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>

diff --git a/drivers/media/usb/dvb-usb-v2/af9035.c b/drivers/media/usb/dvb-usb-v2/af9035.c
index 021e4d3..7b9b75f 100644
--- a/drivers/media/usb/dvb-usb-v2/af9035.c
+++ b/drivers/media/usb/dvb-usb-v2/af9035.c
@@ -704,15 +704,41 @@ static int af9035_read_config(struct dvb_usb_device *d)
 		if (ret < 0)
 			goto err;
 
-		if (tmp == 0x00)
-			dev_dbg(&d->udev->dev,
-					"%s: [%d]tuner not set, using default\n",
-					__func__, i);
-		else
+		dev_dbg(&d->udev->dev, "%s: [%d]tuner=%02x\n",
+				__func__, i, tmp);
+
+		/* tuner sanity check */
+		if (state->chip_type == 0x9135) {
+			if (state->chip_version == 0x02) {
+				/* IT9135 BX (v2) */
+				switch (tmp) {
+				case AF9033_TUNER_IT9135_60:
+				case AF9033_TUNER_IT9135_61:
+				case AF9033_TUNER_IT9135_62:
+					state->af9033_config[i].tuner = tmp;
+					break;
+				}
+			} else {
+				/* IT9135 AX (v1) */
+				switch (tmp) {
+				case AF9033_TUNER_IT9135_38:
+				case AF9033_TUNER_IT9135_51:
+				case AF9033_TUNER_IT9135_52:
+					state->af9033_config[i].tuner = tmp;
+					break;
+				}
+			}
+		} else {
+			/* AF9035 */
 			state->af9033_config[i].tuner = tmp;
+		}
 
-		dev_dbg(&d->udev->dev, "%s: [%d]tuner=%02x\n",
-				__func__, i, state->af9033_config[i].tuner);
+		if (state->af9033_config[i].tuner != tmp) {
+			dev_info(&d->udev->dev,
+					"%s: [%d] overriding tuner from %02x to %02x\n",
+					KBUILD_MODNAME, i, tmp,
+					state->af9033_config[i].tuner);
+		}
 
 		switch (state->af9033_config[i].tuner) {
 		case AF9033_TUNER_TUA9001:
-- 
cgit v0.10.2


From 9249196867803227dfb6b5f01f7683ce9b5572fd Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 12 Jun 2014 04:01:45 -0300
Subject: [media] davinci: vpif: missing unlocks on error

We recently changed some locking around so we need some new unlocks on
the error paths.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>

diff --git a/drivers/media/platform/davinci/vpif_capture.c b/drivers/media/platform/davinci/vpif_capture.c
index a7ed164..1e4ec69 100644
--- a/drivers/media/platform/davinci/vpif_capture.c
+++ b/drivers/media/platform/davinci/vpif_capture.c
@@ -269,6 +269,7 @@ err:
 		list_del(&buf->list);
 		vb2_buffer_done(&buf->vb, VB2_BUF_STATE_QUEUED);
 	}
+	spin_unlock_irqrestore(&common->irqlock, flags);
 
 	return ret;
 }
diff --git a/drivers/media/platform/davinci/vpif_display.c b/drivers/media/platform/davinci/vpif_display.c
index 5bb085b..b431b58 100644
--- a/drivers/media/platform/davinci/vpif_display.c
+++ b/drivers/media/platform/davinci/vpif_display.c
@@ -233,6 +233,7 @@ err:
 		list_del(&buf->list);
 		vb2_buffer_done(&buf->vb, VB2_BUF_STATE_QUEUED);
 	}
+	spin_unlock_irqrestore(&common->irqlock, flags);
 
 	return ret;
 }
-- 
cgit v0.10.2


From 3445857b22eafb70a6ac258979e955b116bfd2c6 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Mon, 16 Jun 2014 09:08:29 -0300
Subject: [media] hdpvr: fix two audio bugs

When the audio encoding is changed the driver calls hdpvr_set_audio
with the current opt->audio_input value. However, that should have
been opt->audio_input + 1. So changing the audio encoding inadvertently
changes the input as well. This bug has always been there.

The second bug was introduced in kernel 3.10 and that broke the
default_audio_input module option handling: the audio encoding was
never switched to AC3 if default_audio_input was set to 2 (SPDIF input).

In addition, since starting with 3.10 the audio encoding is always set
at the start the first bug now always happens when the driver is loaded.
In the past this bug would only surface if the user would change the
audio encoding after the driver was loaded.

Also fixes a small trivial typo (bufffer -> buffer).

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Reported-by: Scott Doty <scott@corp.sonic.net>
Cc: stable@vger.kernel.org      # for v3.10 and up
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>

diff --git a/drivers/media/usb/hdpvr/hdpvr-video.c b/drivers/media/usb/hdpvr/hdpvr-video.c
index 0500c417..6bce01a 100644
--- a/drivers/media/usb/hdpvr/hdpvr-video.c
+++ b/drivers/media/usb/hdpvr/hdpvr-video.c
@@ -82,7 +82,7 @@ static void hdpvr_read_bulk_callback(struct urb *urb)
 }
 
 /*=========================================================================*/
-/* bufffer bits */
+/* buffer bits */
 
 /* function expects dev->io_mutex to be hold by caller */
 int hdpvr_cancel_queue(struct hdpvr_device *dev)
@@ -926,7 +926,7 @@ static int hdpvr_s_ctrl(struct v4l2_ctrl *ctrl)
 	case V4L2_CID_MPEG_AUDIO_ENCODING:
 		if (dev->flags & HDPVR_FLAG_AC3_CAP) {
 			opt->audio_codec = ctrl->val;
-			return hdpvr_set_audio(dev, opt->audio_input,
+			return hdpvr_set_audio(dev, opt->audio_input + 1,
 					      opt->audio_codec);
 		}
 		return 0;
@@ -1198,7 +1198,7 @@ int hdpvr_register_videodev(struct hdpvr_device *dev, struct device *parent,
 	v4l2_ctrl_new_std_menu(hdl, &hdpvr_ctrl_ops,
 		V4L2_CID_MPEG_AUDIO_ENCODING,
 		ac3 ? V4L2_MPEG_AUDIO_ENCODING_AC3 : V4L2_MPEG_AUDIO_ENCODING_AAC,
-		0x7, V4L2_MPEG_AUDIO_ENCODING_AAC);
+		0x7, ac3 ? dev->options.audio_codec : V4L2_MPEG_AUDIO_ENCODING_AAC);
 	v4l2_ctrl_new_std_menu(hdl, &hdpvr_ctrl_ops,
 		V4L2_CID_MPEG_VIDEO_ENCODING,
 		V4L2_MPEG_VIDEO_ENCODING_MPEG_4_AVC, 0x3,
-- 
cgit v0.10.2


From a55c072dfe520f8fa03cf11b07b9268a8a17820a Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 13 Jun 2014 13:11:51 +0200
Subject: efi/arm64: efistub: remove local copy of linux_banner

The shared efistub code for ARM and arm64 contains a local copy of
linux_banner, allowing it to be referenced from separate executables
such as the ARM decompressor. However, this introduces a dependency on
generated header files, causing unnecessary rebuilds of the stub itself
and, in case of arm64, vmlinux which contains it.

On arm64, the copy is not actually needed since we can reference the
original symbol directly, and as it turns out, there may be better ways
to deal with this for ARM as well, so let's remove it from the shared
code. If it still needs to be reintroduced for ARM later, it should live
under arch/arm anyway and not in shared code.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>

diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c
index 60e98a63..e786e6c 100644
--- a/arch/arm64/kernel/efi-stub.c
+++ b/arch/arm64/kernel/efi-stub.c
@@ -12,8 +12,6 @@
 #include <linux/efi.h>
 #include <linux/libfdt.h>
 #include <asm/sections.h>
-#include <generated/compile.h>
-#include <generated/utsrelease.h>
 
 /*
  * AArch64 requires the DTB to be 8-byte aligned in the first 512MiB from
diff --git a/drivers/firmware/efi/fdt.c b/drivers/firmware/efi/fdt.c
index 82d7741..507a3df 100644
--- a/drivers/firmware/efi/fdt.c
+++ b/drivers/firmware/efi/fdt.c
@@ -23,16 +23,6 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
 	u32 fdt_val32;
 	u64 fdt_val64;
 
-	/*
-	 * Copy definition of linux_banner here.  Since this code is
-	 * built as part of the decompressor for ARM v7, pulling
-	 * in version.c where linux_banner is defined for the
-	 * kernel brings other kernel dependencies with it.
-	 */
-	const char linux_banner[] =
-	    "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
-	    LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n";
-
 	/* Do some checks on provided FDT, if it exists*/
 	if (orig_fdt) {
 		if (fdt_check_header(orig_fdt)) {
-- 
cgit v0.10.2


From d033f48f3a4a9279c7475891fbb060d4881c22da Mon Sep 17 00:00:00 2001
From: Varun Sethi <Varun.Sethi@freescale.com>
Date: Tue, 24 Jun 2014 19:27:15 +0530
Subject: iommu/fsl: Fix PAMU window size check.

is_power_of_2 requires an unsigned long parameter which would
lead to truncation of 64 bit values on 32 bit architectures.

__ffs also expects an unsigned long parameter thus won't work
for 64 bit values on 32 bit architectures.

Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
Tested-by: Emil Medve <Emilian.Medve@Freescale.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>

diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
index b99dd88..bb446d7 100644
--- a/drivers/iommu/fsl_pamu.c
+++ b/drivers/iommu/fsl_pamu.c
@@ -170,10 +170,10 @@ int pamu_disable_liodn(int liodn)
 static unsigned int map_addrspace_size_to_wse(phys_addr_t addrspace_size)
 {
 	/* Bug if not a power of 2 */
-	BUG_ON(!is_power_of_2(addrspace_size));
+	BUG_ON((addrspace_size & (addrspace_size - 1)));
 
 	/* window size is 2^(WSE+1) bytes */
-	return __ffs(addrspace_size) - 1;
+	return fls64(addrspace_size) - 2;
 }
 
 /* Derive the PAACE window count encoding for the subwindow count */
@@ -351,7 +351,7 @@ int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size,
 	struct paace *ppaace;
 	unsigned long fspi;
 
-	if (!is_power_of_2(win_size) || win_size < PAMU_PAGE_SIZE) {
+	if ((win_size & (win_size - 1)) || win_size < PAMU_PAGE_SIZE) {
 		pr_debug("window size too small or not a power of two %llx\n", win_size);
 		return -EINVAL;
 	}
@@ -464,7 +464,7 @@ int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin,
 		return -ENOENT;
 	}
 
-	if (!is_power_of_2(subwin_size) || subwin_size < PAMU_PAGE_SIZE) {
+	if ((subwin_size & (subwin_size - 1)) || subwin_size < PAMU_PAGE_SIZE) {
 		pr_debug("subwindow size out of range, or not a power of 2\n");
 		return -EINVAL;
 	}
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index 93072ba..3dd0b8e 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -301,7 +301,7 @@ static int check_size(u64 size, dma_addr_t iova)
 	 * Size must be a power of two and at least be equal
 	 * to PAMU page size.
 	 */
-	if (!is_power_of_2(size) || size < PAMU_PAGE_SIZE) {
+	if ((size & (size - 1)) || size < PAMU_PAGE_SIZE) {
 		pr_debug("%s: size too small or not a power of two\n", __func__);
 		return -EINVAL;
 	}
-- 
cgit v0.10.2


From 75f0e4615dc328e67634eccd86bc71597da9f8a3 Mon Sep 17 00:00:00 2001
From: Varun Sethi <Varun.Sethi@freescale.com>
Date: Tue, 24 Jun 2014 19:27:16 +0530
Subject: iommu/fsl: Fix the device domain attach condition.

Store the domain information for the device, only if it's not already
attached to a domain.

Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>

diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index 3dd0b8e..54060d1 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -335,11 +335,6 @@ static struct fsl_dma_domain *iommu_alloc_dma_domain(void)
 	return domain;
 }
 
-static inline struct device_domain_info *find_domain(struct device *dev)
-{
-	return dev->archdata.iommu_domain;
-}
-
 static void remove_device_ref(struct device_domain_info *info, u32 win_cnt)
 {
 	unsigned long flags;
@@ -380,7 +375,7 @@ static void attach_device(struct fsl_dma_domain *dma_domain, int liodn, struct d
 	 * Check here if the device is already attached to domain or not.
 	 * If the device is already attached to a domain detach it.
 	 */
-	old_domain_info = find_domain(dev);
+	old_domain_info = dev->archdata.iommu_domain;
 	if (old_domain_info && old_domain_info->domain != dma_domain) {
 		spin_unlock_irqrestore(&device_domain_lock, flags);
 		detach_device(dev, old_domain_info->domain);
@@ -399,7 +394,7 @@ static void attach_device(struct fsl_dma_domain *dma_domain, int liodn, struct d
 	 * the info for the first LIODN as all
 	 * LIODNs share the same domain
 	 */
-	if (!old_domain_info)
+	if (!dev->archdata.iommu_domain)
 		dev->archdata.iommu_domain = info;
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 
-- 
cgit v0.10.2


From 3170447c1f264d51b8d1f3898bf2588588a64fdc Mon Sep 17 00:00:00 2001
From: Varun Sethi <Varun.Sethi@freescale.com>
Date: Tue, 24 Jun 2014 19:27:17 +0530
Subject: iommu/fsl: Fix the error condition during iommu group

Earlier PTR_ERR was being returned even if group was set to null.
Now, we explicitly set an ERR_PTR value in case the group  pointer is
NULL.

Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>

diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index 54060d1..af47648 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -1037,12 +1037,15 @@ root_bus:
 			group = get_shared_pci_device_group(pdev);
 	}
 
+	if (!group)
+		group = ERR_PTR(-ENODEV);
+
 	return group;
 }
 
 static int fsl_pamu_add_device(struct device *dev)
 {
-	struct iommu_group *group = NULL;
+	struct iommu_group *group = ERR_PTR(-ENODEV);
 	struct pci_dev *pdev;
 	const u32 *prop;
 	int ret, len;
@@ -1065,7 +1068,7 @@ static int fsl_pamu_add_device(struct device *dev)
 			group = get_device_iommu_group(dev);
 	}
 
-	if (!group || IS_ERR(group))
+	if (IS_ERR(group))
 		return PTR_ERR(group);
 
 	ret = iommu_group_add_device(group, dev);
-- 
cgit v0.10.2


From 6ed179b67ca1a05034728ab160905900416b1835 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 12 Jun 2014 18:16:53 +1000
Subject: KVM: PPC: Assembly functions exported to modules need _GLOBAL_TOC()

Both kvmppc_hv_entry_trampoline and kvmppc_entry_trampoline are
assembly functions that are exported to modules and also require
a valid r2.

As such we need to use _GLOBAL_TOC so we provide a global entry
point that establishes the TOC (r2).

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 77356fd..8d9c5d2 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -48,7 +48,7 @@
  *
  * LR = return address to continue at after eventually re-enabling MMU
  */
-_GLOBAL(kvmppc_hv_entry_trampoline)
+_GLOBAL_TOC(kvmppc_hv_entry_trampoline)
 	mflr	r0
 	std	r0, PPC_LR_STKOFF(r1)
 	stdu	r1, -112(r1)
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 9eec675..4850a22 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -146,7 +146,7 @@ kvmppc_handler_skip_ins:
  * On entry, r4 contains the guest shadow MSR
  * MSR.EE has to be 0 when calling this function
  */
-_GLOBAL(kvmppc_entry_trampoline)
+_GLOBAL_TOC(kvmppc_entry_trampoline)
 	mfmsr	r5
 	LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter)
 	toreal(r7)
-- 
cgit v0.10.2


From 55ab169b7b9276e6e1e01a88531bcf34803dcde2 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 16 Jun 2014 14:37:53 +0200
Subject: KVM: PPC: Book3S PR: Fix ABIv2 on LE

We switched to ABIv2 on Little Endian systems now which gets rid of the
dotted function names. Branch to the actual functions when we see such
a system.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index e2c29e3..d044b8b 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -25,7 +25,11 @@
 #include <asm/exception-64s.h>
 
 #if defined(CONFIG_PPC_BOOK3S_64)
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define FUNC(name) 		name
+#else
 #define FUNC(name) 		GLUE(.,name)
+#endif
 #define GET_SHADOW_VCPU(reg)    addi	reg, r13, PACA_SVCPU
 
 #elif defined(CONFIG_PPC_BOOK3S_32)
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 4850a22..16c4d88 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -36,7 +36,11 @@
 
 #if defined(CONFIG_PPC_BOOK3S_64)
 
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define FUNC(name) 		name
+#else
 #define FUNC(name) 		GLUE(.,name)
+#endif
 
 #elif defined(CONFIG_PPC_BOOK3S_32)
 
-- 
cgit v0.10.2


From 6c642e442e99af1ca026af55a16f23b5f8ee612a Mon Sep 17 00:00:00 2001
From: zhangdianfang <zhangdianfang@huawei.com>
Date: Fri, 30 May 2014 08:37:28 +0800
Subject: tools/liblockdep: Fix comparison of a boolean value with a value of 2

Comparison of a boolean value (!__init_state) with a value of 2 (done)
as currently happens in the code is unlikely to succeed and causes
repeated initialization of the pthread function pointers.

Instead, remove boolean comparison so that we would initialize said
function pointers only once.

Ref: https://bugzilla.kernel.org/show_bug.cgi?id=76741
Cc: Jean Delvare <jdelvare@suse.de>
Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: Dianfang Zhang <zhangdianfang@huawei.com>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>

diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c
index 23bd69c..b5e52af 100644
--- a/tools/lib/lockdep/preload.c
+++ b/tools/lib/lockdep/preload.c
@@ -92,7 +92,7 @@ enum { none, prepare, done, } __init_state;
 static void init_preload(void);
 static void try_init_preload(void)
 {
-	if (!__init_state != done)
+	if (__init_state != done)
 		init_preload();
 }
 
-- 
cgit v0.10.2


From 0c37c686b336aeead2f0b982f24eafaeb19435b1 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Mon, 7 Jul 2014 12:14:27 -0400
Subject: tools/liblockdep: Remove debug print left over from development

Remove a debug print in init_preload() which was left over from
development and isn't usefull at all currently. It was also causing
false positive test results.

Reported-by: S. Lockwood-Childs <sjl@vctlabs.com>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>

diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c
index b5e52af..51ec2ec8 100644
--- a/tools/lib/lockdep/preload.c
+++ b/tools/lib/lockdep/preload.c
@@ -439,8 +439,6 @@ __attribute__((constructor)) static void init_preload(void)
 	ll_pthread_rwlock_unlock = dlsym(RTLD_NEXT, "pthread_rwlock_unlock");
 #endif
 
-	printf("%p\n", ll_pthread_mutex_trylock);fflush(stdout);
-
 	lockdep_init();
 
 	__init_state = done;
-- 
cgit v0.10.2


From b10827814e9c81c5a14fb73c5a6e06bd85df3f94 Mon Sep 17 00:00:00 2001
From: "S. Lockwood-Childs" <sjl@dent.vctlabs.com>
Date: Mon, 7 Jul 2014 00:17:33 -0700
Subject: tools/liblockdep: Account for bitfield changes in lockdeps
 lock_acquire

Commit fb9edbe984 shortened held_lock->check from a 2-bit field
to a 1-bit field.

Make liblockdep compatible with the new definition by passing check=1
to lock_acquire() calls, rather than the old value check=2 (which
inadvertently disabled checks by overflowing to 0).

Without this fix, several of the test cases in liblockdep run_tests.sh
were failing.

Signed-off-by: S. Lockwood-Childs <sjl@vctlabs.com>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>

diff --git a/tools/lib/lockdep/include/liblockdep/mutex.h b/tools/lib/lockdep/include/liblockdep/mutex.h
index c342f70..ee53a42 100644
--- a/tools/lib/lockdep/include/liblockdep/mutex.h
+++ b/tools/lib/lockdep/include/liblockdep/mutex.h
@@ -35,7 +35,7 @@ static inline int __mutex_init(liblockdep_pthread_mutex_t *lock,
 
 static inline int liblockdep_pthread_mutex_lock(liblockdep_pthread_mutex_t *lock)
 {
-	lock_acquire(&lock->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_);
+	lock_acquire(&lock->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
 	return pthread_mutex_lock(&lock->mutex);
 }
 
@@ -47,7 +47,7 @@ static inline int liblockdep_pthread_mutex_unlock(liblockdep_pthread_mutex_t *lo
 
 static inline int liblockdep_pthread_mutex_trylock(liblockdep_pthread_mutex_t *lock)
 {
-	lock_acquire(&lock->dep_map, 0, 1, 0, 2, NULL, (unsigned long)_RET_IP_);
+	lock_acquire(&lock->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
 	return pthread_mutex_trylock(&lock->mutex) == 0 ? 1 : 0;
 }
 
diff --git a/tools/lib/lockdep/include/liblockdep/rwlock.h b/tools/lib/lockdep/include/liblockdep/rwlock.h
index a680ab8..4ec03f8 100644
--- a/tools/lib/lockdep/include/liblockdep/rwlock.h
+++ b/tools/lib/lockdep/include/liblockdep/rwlock.h
@@ -36,7 +36,7 @@ static inline int __rwlock_init(liblockdep_pthread_rwlock_t *lock,
 
 static inline int liblockdep_pthread_rwlock_rdlock(liblockdep_pthread_rwlock_t *lock)
 {
-	lock_acquire(&lock->dep_map, 0, 0, 2, 2, NULL, (unsigned long)_RET_IP_);
+	lock_acquire(&lock->dep_map, 0, 0, 2, 1, NULL, (unsigned long)_RET_IP_);
 	return pthread_rwlock_rdlock(&lock->rwlock);
 
 }
@@ -49,19 +49,19 @@ static inline int liblockdep_pthread_rwlock_unlock(liblockdep_pthread_rwlock_t *
 
 static inline int liblockdep_pthread_rwlock_wrlock(liblockdep_pthread_rwlock_t *lock)
 {
-	lock_acquire(&lock->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_);
+	lock_acquire(&lock->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
 	return pthread_rwlock_wrlock(&lock->rwlock);
 }
 
 static inline int liblockdep_pthread_rwlock_tryrdlock(liblockdep_pthread_rwlock_t *lock)
 {
-	lock_acquire(&lock->dep_map, 0, 1, 2, 2, NULL, (unsigned long)_RET_IP_);
+	lock_acquire(&lock->dep_map, 0, 1, 2, 1, NULL, (unsigned long)_RET_IP_);
 	return pthread_rwlock_tryrdlock(&lock->rwlock) == 0 ? 1 : 0;
 }
 
 static inline int liblockdep_pthread_rwlock_trywlock(liblockdep_pthread_rwlock_t *lock)
 {
-	lock_acquire(&lock->dep_map, 0, 1, 0, 2, NULL, (unsigned long)_RET_IP_);
+	lock_acquire(&lock->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
 	return pthread_rwlock_trywlock(&lock->rwlock) == 0 ? 1 : 0;
 }
 
diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c
index 51ec2ec8..6f80360 100644
--- a/tools/lib/lockdep/preload.c
+++ b/tools/lib/lockdep/preload.c
@@ -252,7 +252,7 @@ int pthread_mutex_lock(pthread_mutex_t *mutex)
 
 	try_init_preload();
 
-	lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 2, NULL,
+	lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 1, NULL,
 			(unsigned long)_RET_IP_);
 	/*
 	 * Here's the thing with pthread mutexes: unlike the kernel variant,
@@ -281,7 +281,7 @@ int pthread_mutex_trylock(pthread_mutex_t *mutex)
 
 	try_init_preload();
 
-	lock_acquire(&__get_lock(mutex)->dep_map, 0, 1, 0, 2, NULL, (unsigned long)_RET_IP_);
+	lock_acquire(&__get_lock(mutex)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
 	r = ll_pthread_mutex_trylock(mutex);
 	if (r)
 		lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_);
@@ -303,7 +303,7 @@ int pthread_mutex_unlock(pthread_mutex_t *mutex)
 	 */
 	r = ll_pthread_mutex_unlock(mutex);
 	if (r)
-		lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_);
+		lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
 
 	return r;
 }
@@ -352,7 +352,7 @@ int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock)
 
         init_preload();
 
-	lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 2, 2, NULL, (unsigned long)_RET_IP_);
+	lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 2, 1, NULL, (unsigned long)_RET_IP_);
 	r = ll_pthread_rwlock_rdlock(rwlock);
 	if (r)
 		lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
@@ -366,7 +366,7 @@ int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock)
 
         init_preload();
 
-	lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 2, 2, NULL, (unsigned long)_RET_IP_);
+	lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 2, 1, NULL, (unsigned long)_RET_IP_);
 	r = ll_pthread_rwlock_tryrdlock(rwlock);
 	if (r)
 		lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
@@ -380,7 +380,7 @@ int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock)
 
         init_preload();
 
-	lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 0, 2, NULL, (unsigned long)_RET_IP_);
+	lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
 	r = ll_pthread_rwlock_trywrlock(rwlock);
 	if (r)
                 lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
@@ -394,7 +394,7 @@ int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock)
 
         init_preload();
 
-	lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_);
+	lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
 	r = ll_pthread_rwlock_wrlock(rwlock);
 	if (r)
 		lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
@@ -411,7 +411,7 @@ int pthread_rwlock_unlock(pthread_rwlock_t *rwlock)
 	lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
 	r = ll_pthread_rwlock_unlock(rwlock);
 	if (r)
-		lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 2, NULL, (unsigned long)_RET_IP_);
+		lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
 
 	return r;
 }
-- 
cgit v0.10.2


From 19a44ecff52fd67d77d49fb4d43b289c53cdc392 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 7 Jul 2014 21:05:33 +0200
Subject: KVM: PPC: RTAS: Do byte swaps explicitly

In commit b59d9d26b we introduced implicit byte swaps for RTAS calls.
Unfortunately we messed up and didn't swizzle return values properly.

Also the old approach wasn't "sparse" compatible - we were randomly
reading __be32 values on an LE system.

Let's just do all of the swizzling explicitly with byte swaps right
where values get used. That way we can at least catch bugs using sparse.

This patch fixes XICS RTAS emulation on little endian hosts for me.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index edb14ba..ef27fbd 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -23,20 +23,20 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
 	u32 irq, server, priority;
 	int rc;
 
-	if (args->nargs != 3 || args->nret != 1) {
+	if (be32_to_cpu(args->nargs) != 3 || be32_to_cpu(args->nret) != 1) {
 		rc = -3;
 		goto out;
 	}
 
-	irq = args->args[0];
-	server = args->args[1];
-	priority = args->args[2];
+	irq = be32_to_cpu(args->args[0]);
+	server = be32_to_cpu(args->args[1]);
+	priority = be32_to_cpu(args->args[2]);
 
 	rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
 	if (rc)
 		rc = -3;
 out:
-	args->rets[0] = rc;
+	args->rets[0] = cpu_to_be32(rc);
 }
 
 static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
@@ -44,12 +44,12 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
 	u32 irq, server, priority;
 	int rc;
 
-	if (args->nargs != 1 || args->nret != 3) {
+	if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 3) {
 		rc = -3;
 		goto out;
 	}
 
-	irq = args->args[0];
+	irq = be32_to_cpu(args->args[0]);
 
 	server = priority = 0;
 	rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
@@ -58,10 +58,10 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
 		goto out;
 	}
 
-	args->rets[1] = server;
-	args->rets[2] = priority;
+	args->rets[1] = cpu_to_be32(server);
+	args->rets[2] = cpu_to_be32(priority);
 out:
-	args->rets[0] = rc;
+	args->rets[0] = cpu_to_be32(rc);
 }
 
 static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
@@ -69,18 +69,18 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
 	u32 irq;
 	int rc;
 
-	if (args->nargs != 1 || args->nret != 1) {
+	if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) {
 		rc = -3;
 		goto out;
 	}
 
-	irq = args->args[0];
+	irq = be32_to_cpu(args->args[0]);
 
 	rc = kvmppc_xics_int_off(vcpu->kvm, irq);
 	if (rc)
 		rc = -3;
 out:
-	args->rets[0] = rc;
+	args->rets[0] = cpu_to_be32(rc);
 }
 
 static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
@@ -88,18 +88,18 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
 	u32 irq;
 	int rc;
 
-	if (args->nargs != 1 || args->nret != 1) {
+	if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) {
 		rc = -3;
 		goto out;
 	}
 
-	irq = args->args[0];
+	irq = be32_to_cpu(args->args[0]);
 
 	rc = kvmppc_xics_int_on(vcpu->kvm, irq);
 	if (rc)
 		rc = -3;
 out:
-	args->rets[0] = rc;
+	args->rets[0] = cpu_to_be32(rc);
 }
 #endif /* CONFIG_KVM_XICS */
 
@@ -205,32 +205,6 @@ int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp)
 	return rc;
 }
 
-static void kvmppc_rtas_swap_endian_in(struct rtas_args *args)
-{
-#ifdef __LITTLE_ENDIAN__
-	int i;
-
-	args->token = be32_to_cpu(args->token);
-	args->nargs = be32_to_cpu(args->nargs);
-	args->nret = be32_to_cpu(args->nret);
-	for (i = 0; i < args->nargs; i++)
-		args->args[i] = be32_to_cpu(args->args[i]);
-#endif
-}
-
-static void kvmppc_rtas_swap_endian_out(struct rtas_args *args)
-{
-#ifdef __LITTLE_ENDIAN__
-	int i;
-
-	for (i = 0; i < args->nret; i++)
-		args->args[i] = cpu_to_be32(args->args[i]);
-	args->token = cpu_to_be32(args->token);
-	args->nargs = cpu_to_be32(args->nargs);
-	args->nret = cpu_to_be32(args->nret);
-#endif
-}
-
 int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
 {
 	struct rtas_token_definition *d;
@@ -249,8 +223,6 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
 	if (rc)
 		goto fail;
 
-	kvmppc_rtas_swap_endian_in(&args);
-
 	/*
 	 * args->rets is a pointer into args->args. Now that we've
 	 * copied args we need to fix it up to point into our copy,
@@ -258,13 +230,13 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
 	 * value so we can restore it on the way out.
 	 */
 	orig_rets = args.rets;
-	args.rets = &args.args[args.nargs];
+	args.rets = &args.args[be32_to_cpu(args.nargs)];
 
 	mutex_lock(&vcpu->kvm->lock);
 
 	rc = -ENOENT;
 	list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) {
-		if (d->token == args.token) {
+		if (d->token == be32_to_cpu(args.token)) {
 			d->handler->handler(vcpu, &args);
 			rc = 0;
 			break;
@@ -275,7 +247,6 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
 
 	if (rc == 0) {
 		args.rets = orig_rets;
-		kvmppc_rtas_swap_endian_out(&args);
 		rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args));
 		if (rc)
 			goto fail;
-- 
cgit v0.10.2


From 9242b5b60df8b13b469bc6b7be08ff6ebb551ad3 Mon Sep 17 00:00:00 2001
From: Bandan Das <bsd@redhat.com>
Date: Tue, 8 Jul 2014 00:30:23 -0400
Subject: KVM: x86: Check for nested events if there is an injectable interrupt

With commit b6b8a1451fc40412c57d1 that introduced
vmx_check_nested_events, checks for injectable interrupts happen
at different points in time for L1 and L2 that could potentially
cause a race. The regression occurs because KVM_REQ_EVENT is always
set when nested_run_pending is set even if there's no pending interrupt.
Consequently, there could be a small window when check_nested_events
returns without exiting to L1, but an interrupt comes through soon
after and it incorrectly, gets injected to L2 by inject_pending_event
Fix this by adding a call to check for nested events too when a check
for injectable interrupt returns true

Signed-off-by: Bandan Das <bsd@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f644933..ef432f8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5887,6 +5887,18 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 			kvm_x86_ops->set_nmi(vcpu);
 		}
 	} else if (kvm_cpu_has_injectable_intr(vcpu)) {
+		/*
+		 * Because interrupts can be injected asynchronously, we are
+		 * calling check_nested_events again here to avoid a race condition.
+		 * See https://lkml.org/lkml/2014/7/2/60 for discussion about this
+		 * proposal and current concerns.  Perhaps we should be setting
+		 * KVM_REQ_EVENT only on certain events and not unconditionally?
+		 */
+		if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
+			r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
+			if (r != 0)
+				return r;
+		}
 		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
 					    false);
-- 
cgit v0.10.2


From 16927776ae757d0d132bdbfabbfe2c498342bd59 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 7 Jul 2014 14:06:11 -0700
Subject: alarmtimer: Fix bug where relative alarm timers were treated as
 absolute

Sharvil noticed with the posix timer_settime interface, using the
CLOCK_REALTIME_ALARM or CLOCK_BOOTTIME_ALARM clockid, if the users
tried to specify a relative time timer, it would incorrectly be
treated as absolute regardless of the state of the flags argument.

This patch corrects this, properly checking the absolute/relative flag,
as well as adds further error checking that no invalid flag bits are set.

Reported-by: Sharvil Nanavati <sharvil@google.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Sharvil Nanavati <sharvil@google.com>
Cc: stable <stable@vger.kernel.org> #3.0+
Link: http://lkml.kernel.org/r/1404767171-6902-1-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 88c9c65..fe75444 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -585,9 +585,14 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
 				struct itimerspec *new_setting,
 				struct itimerspec *old_setting)
 {
+	ktime_t exp;
+
 	if (!rtcdev)
 		return -ENOTSUPP;
 
+	if (flags & ~TIMER_ABSTIME)
+		return -EINVAL;
+
 	if (old_setting)
 		alarm_timer_get(timr, old_setting);
 
@@ -597,8 +602,16 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
 
 	/* start the timer */
 	timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval);
-	alarm_start(&timr->it.alarm.alarmtimer,
-			timespec_to_ktime(new_setting->it_value));
+	exp = timespec_to_ktime(new_setting->it_value);
+	/* Convert (if necessary) to absolute time */
+	if (flags != TIMER_ABSTIME) {
+		ktime_t now;
+
+		now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime();
+		exp = ktime_add(now, exp);
+	}
+
+	alarm_start(&timr->it.alarm.alarmtimer, exp);
 	return 0;
 }
 
@@ -730,6 +743,9 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
 	if (!alarmtimer_get_rtcdev())
 		return -ENOTSUPP;
 
+	if (flags & ~TIMER_ABSTIME)
+		return -EINVAL;
+
 	if (!capable(CAP_WAKE_ALARM))
 		return -EPERM;
 
-- 
cgit v0.10.2


From d45b3279a5a2252cafcd665bbf2db8c9b31ef783 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 8 Jul 2014 12:25:28 +0200
Subject: block: don't assume last put of shared tags is for the host

There is no inherent reason why the last put of a tag structure must be
the one for the Scsi_Host, as device model objects can be held for
arbitrary periods.  Merge blk_free_tags and __blk_free_tags into a single
funtion that just release a references and get rid of the BUG() when the
host reference wasn't the last.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Cc: stable@kernel.org
Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/block/blk-tag.c b/block/blk-tag.c
index 3f33d86..a185b86 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -27,18 +27,15 @@ struct request *blk_queue_find_tag(struct request_queue *q, int tag)
 EXPORT_SYMBOL(blk_queue_find_tag);
 
 /**
- * __blk_free_tags - release a given set of tag maintenance info
+ * blk_free_tags - release a given set of tag maintenance info
  * @bqt:	the tag map to free
  *
- * Tries to free the specified @bqt.  Returns true if it was
- * actually freed and false if there are still references using it
+ * Drop the reference count on @bqt and frees it when the last reference
+ * is dropped.
  */
-static int __blk_free_tags(struct blk_queue_tag *bqt)
+void blk_free_tags(struct blk_queue_tag *bqt)
 {
-	int retval;
-
-	retval = atomic_dec_and_test(&bqt->refcnt);
-	if (retval) {
+	if (atomic_dec_and_test(&bqt->refcnt)) {
 		BUG_ON(find_first_bit(bqt->tag_map, bqt->max_depth) <
 							bqt->max_depth);
 
@@ -50,9 +47,8 @@ static int __blk_free_tags(struct blk_queue_tag *bqt)
 
 		kfree(bqt);
 	}
-
-	return retval;
 }
+EXPORT_SYMBOL(blk_free_tags);
 
 /**
  * __blk_queue_free_tags - release tag maintenance info
@@ -69,28 +65,13 @@ void __blk_queue_free_tags(struct request_queue *q)
 	if (!bqt)
 		return;
 
-	__blk_free_tags(bqt);
+	blk_free_tags(bqt);
 
 	q->queue_tags = NULL;
 	queue_flag_clear_unlocked(QUEUE_FLAG_QUEUED, q);
 }
 
 /**
- * blk_free_tags - release a given set of tag maintenance info
- * @bqt:	the tag map to free
- *
- * For externally managed @bqt frees the map.  Callers of this
- * function must guarantee to have released all the queues that
- * might have been using this tag map.
- */
-void blk_free_tags(struct blk_queue_tag *bqt)
-{
-	if (unlikely(!__blk_free_tags(bqt)))
-		BUG();
-}
-EXPORT_SYMBOL(blk_free_tags);
-
-/**
  * blk_queue_free_tags - release tag maintenance info
  * @q:  the request queue for the device
  *
-- 
cgit v0.10.2


From 0d461e1b087048b0cc37c9d7b351649578c507b4 Mon Sep 17 00:00:00 2001
From: Gregory CLEMENT <gregory.clement@free-electrons.com>
Date: Fri, 4 Jul 2014 16:22:16 +0200
Subject: ARM: mvebu: Fix the operand list in the inline asm of
 armada_370_xp_pmsu_idle_enter

In the inline asm part of the function armada_370_xp_pmsu_idle_enter()
the input operand was used. The intent here was to let the compiler
choose this register so it could do the optimization it
needed.

However an input operand is not supposed to be modified by the inline
asm code. This can lead to improper generated instructions.

In some case generated instruction the compiler made the choice to
reuse the same register to store the return value. But in the assembly
part this register was modified, so it can lead to return an wrong
value.

The fix is to use a clobber. Thanks to this the compiler will know
that the value of this register will be modified.

Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Reviewed-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Link: https://lkml.kernel.org/r/1404483736-16938-1-git-send-email-gregory.clement@free-electrons.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>

diff --git a/arch/arm/mach-mvebu/pmsu.c b/arch/arm/mach-mvebu/pmsu.c
index a1d407c..25aa823 100644
--- a/arch/arm/mach-mvebu/pmsu.c
+++ b/arch/arm/mach-mvebu/pmsu.c
@@ -201,12 +201,12 @@ static noinline int do_armada_370_xp_cpu_suspend(unsigned long deepidle)
 
 	/* Test the CR_C bit and set it if it was cleared */
 	asm volatile(
-	"mrc	p15, 0, %0, c1, c0, 0 \n\t"
-	"tst	%0, #(1 << 2) \n\t"
-	"orreq	%0, %0, #(1 << 2) \n\t"
-	"mcreq	p15, 0, %0, c1, c0, 0 \n\t"
+	"mrc	p15, 0, r0, c1, c0, 0 \n\t"
+	"tst	r0, #(1 << 2) \n\t"
+	"orreq	r0, r0, #(1 << 2) \n\t"
+	"mcreq	p15, 0, r0, c1, c0, 0 \n\t"
 	"isb	"
-	: : "r" (0));
+	: : : "r0");
 
 	pr_warn("Failed to suspend the system\n");
 
-- 
cgit v0.10.2


From a728b977429383b3fe92b6e3bff9e69365609e0f Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Date: Tue, 8 Jul 2014 10:37:37 -0300
Subject: ARM: mvebu: Fix coherency bus notifiers by using separate notifiers

Currently, the coherency fabric support registers two bus notifiers;
one for platform, one for pci bus types, with the same notifier block.
However, this is illegal and can cause serious issues: the notifier
block is also a link in the notifier list and cannot be inserted twice.

This commit fixes this by using different notifier blocks (with the same
notifier callback) to set the platform and pci bus types notifiers.

Fixes: b0063aad5dd8 ("ARM: mvebu: use hardware I/O coherency also for PCI devices")
Reported-by: Paolo Pisati <p.pisati@gmail.com>
Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Link: https://lkml.kernel.org/r/1404826657-6977-1-git-send-email-ezequiel.garcia@free-electrons.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>

diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c
index 477202f..2bdc323 100644
--- a/arch/arm/mach-mvebu/coherency.c
+++ b/arch/arm/mach-mvebu/coherency.c
@@ -292,6 +292,10 @@ static struct notifier_block mvebu_hwcc_nb = {
 	.notifier_call = mvebu_hwcc_notifier,
 };
 
+static struct notifier_block mvebu_hwcc_pci_nb = {
+	.notifier_call = mvebu_hwcc_notifier,
+};
+
 static void __init armada_370_coherency_init(struct device_node *np)
 {
 	struct resource res;
@@ -427,7 +431,7 @@ static int __init coherency_pci_init(void)
 {
 	if (coherency_available())
 		bus_register_notifier(&pci_bus_type,
-				       &mvebu_hwcc_nb);
+				       &mvebu_hwcc_pci_nb);
 	return 0;
 }
 
-- 
cgit v0.10.2


From 74adf83f5d7720925499b4938f930591f947b660 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 18 Jun 2014 11:07:03 +0200
Subject: nfs: only show Posix ACLs in listxattr if actually present

The big ACL switched nfs to use generic_listxattr, which calls all existing
->list handlers.  Add a custom .listxattr implementation that only lists
the ACLs if they actually are present on the given inode.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Philippe Troin <phil@fifi.org>
Tested-by: Philippe Troin <phil@fifi.org>
Fixes: 013cdf1088d7 (nfs: use generic posix ACL infrastructure ...)
Cc: stable@vger.kernel.org # 3.14+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>

diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 871d6ed..8f854dd 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -247,3 +247,46 @@ const struct xattr_handler *nfs3_xattr_handlers[] = {
 	&posix_acl_default_xattr_handler,
 	NULL,
 };
+
+static int
+nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data,
+		size_t size, ssize_t *result)
+{
+	struct posix_acl *acl;
+	char *p = data + *result;
+
+	acl = get_acl(inode, type);
+	if (!acl)
+		return 0;
+
+	posix_acl_release(acl);
+
+	*result += strlen(name);
+	*result += 1;
+	if (!size)
+		return 0;
+	if (*result > size)
+		return -ERANGE;
+
+	strcpy(p, name);
+	return 0;
+}
+
+ssize_t
+nfs3_listxattr(struct dentry *dentry, char *data, size_t size)
+{
+	struct inode *inode = dentry->d_inode;
+	ssize_t result = 0;
+	int error;
+
+	error = nfs3_list_one_acl(inode, ACL_TYPE_ACCESS,
+			POSIX_ACL_XATTR_ACCESS, data, size, &result);
+	if (error)
+		return error;
+
+	error = nfs3_list_one_acl(inode, ACL_TYPE_DEFAULT,
+			POSIX_ACL_XATTR_DEFAULT, data, size, &result);
+	if (error)
+		return error;
+	return result;
+}
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index e7daa42..f0afa29 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -885,7 +885,7 @@ static const struct inode_operations nfs3_dir_inode_operations = {
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
 #ifdef CONFIG_NFS_V3_ACL
-	.listxattr	= generic_listxattr,
+	.listxattr	= nfs3_listxattr,
 	.getxattr	= generic_getxattr,
 	.setxattr	= generic_setxattr,
 	.removexattr	= generic_removexattr,
@@ -899,7 +899,7 @@ static const struct inode_operations nfs3_file_inode_operations = {
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
 #ifdef CONFIG_NFS_V3_ACL
-	.listxattr	= generic_listxattr,
+	.listxattr	= nfs3_listxattr,
 	.getxattr	= generic_getxattr,
 	.setxattr	= generic_setxattr,
 	.removexattr	= generic_removexattr,
-- 
cgit v0.10.2


From a97e8027b1d28eafe6bafe062556c1ec926a49c6 Mon Sep 17 00:00:00 2001
From: Matthias Brugger <matthias.bgg@gmail.com>
Date: Thu, 3 Jul 2014 13:58:52 +0200
Subject: irqchip: gic: Add support for cortex a7 compatible string

Patch 0a68214b "ARM: DT: Add binding for GIC virtualization extentions (VGIC)" added
the "arm,cortex-a7-gic" compatible string, but the corresponding IRQCHIP_DECLARE
was never added to the gic driver.

To let real Cortex-A7 SoCs use it, add the necessary declaration to the device driver.

Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
Link: https://lkml.kernel.org/r/1404388732-28890-1-git-send-email-matthias.bgg@gmail.com
Fixes: 0a68214b76ca ("ARM: DT: Add binding for GIC virtualization extentions (VGIC)")
Cc: <stable@vger.kernel.org> # v3.5+
Signed-off-by: Jason Cooper <jason@lakedaemon.net>

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 7e11c9d..aadd6f5 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1073,6 +1073,7 @@ gic_of_init(struct device_node *node, struct device_node *parent)
 }
 IRQCHIP_DECLARE(cortex_a15_gic, "arm,cortex-a15-gic", gic_of_init);
 IRQCHIP_DECLARE(cortex_a9_gic, "arm,cortex-a9-gic", gic_of_init);
+IRQCHIP_DECLARE(cortex_a7_gic, "arm,cortex-a7-gic", gic_of_init);
 IRQCHIP_DECLARE(msm_8660_qgic, "qcom,msm-8660-qgic", gic_of_init);
 IRQCHIP_DECLARE(msm_qgic2, "qcom,msm-qgic2", gic_of_init);
 
-- 
cgit v0.10.2


From 233b43010330ed8cf39cf636880017df3e33f102 Mon Sep 17 00:00:00 2001
From: Hariprasad S <hariprasad@chelsio.com>
Date: Mon, 23 Jun 2014 19:12:35 +0530
Subject: RDMA/cxgb4: Fix skb_leak in reject_cr()

Based on origninal work by Steve Wise <swise@opengridcomputing.com>

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 5e153f6..cc36e9b 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -2180,7 +2180,6 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
 	PDBG("%s c4iw_dev %p tid %u\n", __func__, dev, hwtid);
 	BUG_ON(skb_cloned(skb));
 	skb_trim(skb, sizeof(struct cpl_tid_release));
-	skb_get(skb);
 	release_tid(&dev->rdev, hwtid, skb);
 	return;
 }
-- 
cgit v0.10.2


From 5dab6d3ab1abed99be6166b844af58237d52a135 Mon Sep 17 00:00:00 2001
From: Hariprasad S <hariprasad@chelsio.com>
Date: Mon, 23 Jun 2014 19:12:36 +0530
Subject: RDMA/cxgb4: Clean up connection on ARP error

Based on origninal work by Steve Wise <swise@opengridcomputing.com>

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index cc36e9b..6a93280 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -432,8 +432,17 @@ static void arp_failure_discard(void *handle, struct sk_buff *skb)
  */
 static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
 {
+	struct c4iw_ep *ep = handle;
+
 	printk(KERN_ERR MOD "ARP failure duing connect\n");
 	kfree_skb(skb);
+	connect_reply_upcall(ep, -EHOSTUNREACH);
+	state_set(&ep->com, DEAD);
+	remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
+	cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
+	dst_release(ep->dst);
+	cxgb4_l2t_release(ep->l2t);
+	c4iw_put_ep(&ep->com);
 }
 
 /*
@@ -658,7 +667,7 @@ static int send_connect(struct c4iw_ep *ep)
 		opt2 |= T5_OPT_2_VALID;
 		opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE);
 	}
-	t4_set_arp_err_handler(skb, NULL, act_open_req_arp_failure);
+	t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
 
 	if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
 		if (ep->com.remote_addr.ss_family == AF_INET) {
-- 
cgit v0.10.2


From 6b54d54dea82ae214e4a45a503c4ef755a8ecee8 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Tue, 8 Jul 2014 10:20:35 -0500
Subject: RDMA/cxgb4: Initialize the device status page

The status page is mapped to user processes and allows sharing the
device state between the kernel and user processes.  This state isn't
getting initialized and thus intermittently causes problems.  Namely,
the user process can mistakenly think the user doorbell writes are
disabled which causes SQ work requests to never get fetched by HW.

Fixes: 05eb23893c2c ("cxgb4/iw_cxgb4: Doorbell Drop Avoidance Bug Fixes").
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Cc: <stable@vger.kernel.org> # v3.15
Signed-off-by: Roland Dreier <roland@purestorage.com>

diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index dd93aad..16b75de 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -696,6 +696,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 		pr_err(MOD "error allocating status page\n");
 		goto err4;
 	}
+	rdev->status_page->db_off = 0;
 	return 0;
 err4:
 	c4iw_rqtpool_destroy(rdev);
-- 
cgit v0.10.2


From 6ef07a9f369742a7b18c77484411cff0bd790291 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Sun, 8 Jun 2014 10:00:59 +0300
Subject: mlx5_core: Fix possible race between mr tree insert/delete

In mlx5_core_destroy_mkey(), we must first remove the mr from the
radix tree and then destroy it.  Otherwise we might hit a race if the
key was reallocated and we attempted to insert it to the radix tree.

Also handle radix tree insert/delete failures.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Eli Cohen <elic@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <roland@purestorage.com>

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index ba0401d4..184c361 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -94,6 +94,11 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
 	write_lock_irq(&table->lock);
 	err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->key), mr);
 	write_unlock_irq(&table->lock);
+	if (err) {
+		mlx5_core_warn(dev, "failed radix tree insert of mr 0x%x, %d\n",
+			       mlx5_base_mkey(mr->key), err);
+		mlx5_core_destroy_mkey(dev, mr);
+	}
 
 	return err;
 }
@@ -104,12 +109,22 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr)
 	struct mlx5_mr_table *table = &dev->priv.mr_table;
 	struct mlx5_destroy_mkey_mbox_in in;
 	struct mlx5_destroy_mkey_mbox_out out;
+	struct mlx5_core_mr *deleted_mr;
 	unsigned long flags;
 	int err;
 
 	memset(&in, 0, sizeof(in));
 	memset(&out, 0, sizeof(out));
 
+	write_lock_irqsave(&table->lock, flags);
+	deleted_mr = radix_tree_delete(&table->tree, mlx5_base_mkey(mr->key));
+	write_unlock_irqrestore(&table->lock, flags);
+	if (!deleted_mr) {
+		mlx5_core_warn(dev, "failed radix tree delete of mr 0x%x\n",
+			       mlx5_base_mkey(mr->key));
+		return -ENOENT;
+	}
+
 	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_MKEY);
 	in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key));
 	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
@@ -119,10 +134,6 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr)
 	if (out.hdr.status)
 		return mlx5_cmd_status_to_err(&out.hdr);
 
-	write_lock_irqsave(&table->lock, flags);
-	radix_tree_delete(&table->tree, mlx5_base_mkey(mr->key));
-	write_unlock_irqrestore(&table->lock, flags);
-
 	return err;
 }
 EXPORT_SYMBOL(mlx5_core_destroy_mkey);
-- 
cgit v0.10.2


From bbc1c5e8ad6dfebf9d13b8a4ccdf66c92913eac9 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Wed, 9 Jul 2014 21:18:32 +0200
Subject: drbd: fix regression 'out of mem, failed to invoke fence-peer helper'

Since linux kernel 3.13, kthread_run() internally uses
wait_for_completion_killable().  We sometimes may use kthread_run()
while we still have a signal pending, which we used to kick our threads
out of potentially blocking network functions, causing kthread_run() to
mistake that as a new fatal signal and fail.

Fix: flush_signals() before kthread_run().

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 1b35c45..3f2e167 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -544,6 +544,12 @@ void conn_try_outdate_peer_async(struct drbd_connection *connection)
 	struct task_struct *opa;
 
 	kref_get(&connection->kref);
+	/* We may just have force_sig()'ed this thread
+	 * to get it out of some blocking network function.
+	 * Clear signals; otherwise kthread_run(), which internally uses
+	 * wait_on_completion_killable(), will mistake our pending signal
+	 * for a new fatal signal and fail. */
+	flush_signals(current);
 	opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
 	if (IS_ERR(opa)) {
 		drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
-- 
cgit v0.10.2


From 29e2435fd6d71e0136e2c2ff0433b7dbeeaaccfd Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 8 Jul 2014 16:54:18 +0100
Subject: efi: fdt: Do not report an error during boot if UEFI is not available

Currently, fdt_find_uefi_params() reports an error if no EFI parameters
are found in the DT. This is however a valid case for non-UEFI kernel
booting. This patch checks changes the error reporting to a
pr_info("UEFI not found") when no EFI parameters are found in the DT.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Leif Lindholm <leif.lindholm@linaro.org>
Tested-by: Leif Lindholm <leif.lindholm@linaro.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index eff1a2f..dc79346 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -346,6 +346,7 @@ static __initdata struct {
 
 struct param_info {
 	int verbose;
+	int found;
 	void *params;
 };
 
@@ -362,16 +363,12 @@ static int __init fdt_find_uefi_params(unsigned long node, const char *uname,
 	    (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0))
 		return 0;
 
-	pr_info("Getting parameters from FDT:\n");
-
 	for (i = 0; i < ARRAY_SIZE(dt_params); i++) {
 		prop = of_get_flat_dt_prop(node, dt_params[i].propname, &len);
-		if (!prop) {
-			pr_err("Can't find %s in device tree!\n",
-			       dt_params[i].name);
+		if (!prop)
 			return 0;
-		}
 		dest = info->params + dt_params[i].offset;
+		info->found++;
 
 		val = of_read_number(prop, len / sizeof(u32));
 
@@ -390,10 +387,21 @@ static int __init fdt_find_uefi_params(unsigned long node, const char *uname,
 int __init efi_get_fdt_params(struct efi_fdt_params *params, int verbose)
 {
 	struct param_info info;
+	int ret;
+
+	pr_info("Getting EFI parameters from FDT:\n");
 
 	info.verbose = verbose;
+	info.found = 0;
 	info.params = params;
 
-	return of_scan_flat_dt(fdt_find_uefi_params, &info);
+	ret = of_scan_flat_dt(fdt_find_uefi_params, &info);
+	if (!info.found)
+		pr_info("UEFI not found.\n");
+	else if (!ret)
+		pr_err("Can't find '%s' in device tree!\n",
+		       dt_params[info.found].name);
+
+	return ret;
 }
 #endif /* CONFIG_EFI_PARAMS_FROM_FDT */
-- 
cgit v0.10.2


From c7fb93ec51d462ec3540a729ba446663c26a0505 Mon Sep 17 00:00:00 2001
From: Michael Brown <mbrown@fensystems.co.uk>
Date: Thu, 10 Jul 2014 12:26:20 +0100
Subject: x86/efi: Include a .bss section within the PE/COFF headers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PE/COFF headers currently describe only the initialised-data
portions of the image, and result in no space being allocated for the
uninitialised-data portions.  Consequently, the EFI boot stub will end
up overwriting unexpected areas of memory, with unpredictable results.

Fix by including a .bss section in the PE/COFF headers (functionally
equivalent to the init_size field in the bzImage header).

Signed-off-by: Michael Brown <mbrown@fensystems.co.uk>
Cc: Thomas Bächler <thomas@archlinux.org>
Cc: Josh Boyer <jwboyer@fedoraproject.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>

diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 84c2234..7a6d43a 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -91,10 +91,9 @@ bs_die:
 
 	.section ".bsdata", "a"
 bugger_off_msg:
-	.ascii	"Direct floppy boot is not supported. "
-	.ascii	"Use a boot loader program instead.\r\n"
+	.ascii	"Use a boot loader.\r\n"
 	.ascii	"\n"
-	.ascii	"Remove disk and press any key to reboot ...\r\n"
+	.ascii	"Remove disk and press any key to reboot...\r\n"
 	.byte	0
 
 #ifdef CONFIG_EFI_STUB
@@ -108,7 +107,7 @@ coff_header:
 #else
 	.word	0x8664				# x86-64
 #endif
-	.word	3				# nr_sections
+	.word	4				# nr_sections
 	.long	0 				# TimeDateStamp
 	.long	0				# PointerToSymbolTable
 	.long	1				# NumberOfSymbols
@@ -250,6 +249,25 @@ section_table:
 	.word	0				# NumberOfLineNumbers
 	.long	0x60500020			# Characteristics (section flags)
 
+	#
+	# The offset & size fields are filled in by build.c.
+	#
+	.ascii	".bss"
+	.byte	0
+	.byte	0
+	.byte	0
+	.byte	0
+	.long	0
+	.long	0x0
+	.long	0				# Size of initialized data
+						# on disk
+	.long	0x0
+	.long	0				# PointerToRelocations
+	.long	0				# PointerToLineNumbers
+	.word	0				# NumberOfRelocations
+	.word	0				# NumberOfLineNumbers
+	.long	0xc8000080			# Characteristics (section flags)
+
 #endif /* CONFIG_EFI_STUB */
 
 	# Kernel attributes; used by setup.  This is part 1 of the
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 1a2f212..a7661c4 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -143,7 +143,7 @@ static void usage(void)
 
 #ifdef CONFIG_EFI_STUB
 
-static void update_pecoff_section_header(char *section_name, u32 offset, u32 size)
+static void update_pecoff_section_header_fields(char *section_name, u32 vma, u32 size, u32 datasz, u32 offset)
 {
 	unsigned int pe_header;
 	unsigned short num_sections;
@@ -164,10 +164,10 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz
 			put_unaligned_le32(size, section + 0x8);
 
 			/* section header vma field */
-			put_unaligned_le32(offset, section + 0xc);
+			put_unaligned_le32(vma, section + 0xc);
 
 			/* section header 'size of initialised data' field */
-			put_unaligned_le32(size, section + 0x10);
+			put_unaligned_le32(datasz, section + 0x10);
 
 			/* section header 'file offset' field */
 			put_unaligned_le32(offset, section + 0x14);
@@ -179,6 +179,11 @@ static void update_pecoff_section_header(char *section_name, u32 offset, u32 siz
 	}
 }
 
+static void update_pecoff_section_header(char *section_name, u32 offset, u32 size)
+{
+	update_pecoff_section_header_fields(section_name, offset, size, size, offset);
+}
+
 static void update_pecoff_setup_and_reloc(unsigned int size)
 {
 	u32 setup_offset = 0x200;
@@ -203,9 +208,6 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz)
 
 	pe_header = get_unaligned_le32(&buf[0x3c]);
 
-	/* Size of image */
-	put_unaligned_le32(file_sz, &buf[pe_header + 0x50]);
-
 	/*
 	 * Size of code: Subtract the size of the first sector (512 bytes)
 	 * which includes the header.
@@ -220,6 +222,22 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz)
 	update_pecoff_section_header(".text", text_start, text_sz);
 }
 
+static void update_pecoff_bss(unsigned int file_sz, unsigned int init_sz)
+{
+	unsigned int pe_header;
+	unsigned int bss_sz = init_sz - file_sz;
+
+	pe_header = get_unaligned_le32(&buf[0x3c]);
+
+	/* Size of uninitialized data */
+	put_unaligned_le32(bss_sz, &buf[pe_header + 0x24]);
+
+	/* Size of image */
+	put_unaligned_le32(init_sz, &buf[pe_header + 0x50]);
+
+	update_pecoff_section_header_fields(".bss", file_sz, bss_sz, 0, 0);
+}
+
 static int reserve_pecoff_reloc_section(int c)
 {
 	/* Reserve 0x20 bytes for .reloc section */
@@ -259,6 +277,8 @@ static void efi_stub_entry_update(void)
 static inline void update_pecoff_setup_and_reloc(unsigned int size) {}
 static inline void update_pecoff_text(unsigned int text_start,
 				      unsigned int file_sz) {}
+static inline void update_pecoff_bss(unsigned int file_sz,
+				     unsigned int init_sz) {}
 static inline void efi_stub_defaults(void) {}
 static inline void efi_stub_entry_update(void) {}
 
@@ -310,7 +330,7 @@ static void parse_zoffset(char *fname)
 
 int main(int argc, char ** argv)
 {
-	unsigned int i, sz, setup_sectors;
+	unsigned int i, sz, setup_sectors, init_sz;
 	int c;
 	u32 sys_size;
 	struct stat sb;
@@ -376,7 +396,9 @@ int main(int argc, char ** argv)
 	buf[0x1f1] = setup_sectors-1;
 	put_unaligned_le32(sys_size, &buf[0x1f4]);
 
-	update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz));
+	update_pecoff_text(setup_sectors * 512, i + (sys_size * 16));
+	init_sz = get_unaligned_le32(&buf[0x260]);
+	update_pecoff_bss(i + (sys_size * 16), init_sz);
 
 	efi_stub_entry_update();
 
-- 
cgit v0.10.2


From 78b3321610bf920d7fceb1a0236faa881be0bcf3 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Thu, 7 Aug 2014 22:03:00 +0100
Subject: iio:core: Handle error when mask type is not separate

When event spec is shared by multiple channels, which has definition
for mask_shared_by_type, iio_device_register_eventset fails.

For example:
static const struct iio_event_spec iio_dummy_events[] = {
	{
		.type = IIO_EV_TYPE_THRESH,
		.dir = IIO_EV_DIR_RISING,
		.mask_separate = BIT(IIO_EV_INFO_ENABLE),
		.mask_shared_by_type = BIT(IIO_EV_INFO_VALUE),
	}, {
		.type = IIO_EV_TYPE_THRESH,
		.dir = IIO_EV_DIR_FALLING,
		.mask_separate = BIT(IIO_EV_INFO_ENABLE),a
		.mask_shared_by_type = BIT(IIO_EV_INFO_VALUE),
	}
};

If two channels use this event spec, this will result in error.

This change handles EBUSY error similar to iio_device_add_info_mask_type().

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Cc: Stable@vger.kernel.org

diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c
index 258a973..bfbf4d4 100644
--- a/drivers/iio/industrialio-event.c
+++ b/drivers/iio/industrialio-event.c
@@ -345,6 +345,9 @@ static int iio_device_add_event(struct iio_dev *indio_dev,
 			&indio_dev->event_interface->dev_attr_list);
 		kfree(postfix);
 
+		if ((ret == -EBUSY) && (shared_by != IIO_SEPARATE))
+			continue;
+
 		if (ret)
 			return ret;
 
-- 
cgit v0.10.2


From 1d9dbf154295aa33819b08340464ff15495e83d8 Mon Sep 17 00:00:00 2001
From: Darren Hart <dvhart@linux.intel.com>
Date: Fri, 11 Jul 2014 11:03:39 +0000
Subject: ACPI / documentation: Remove reference to acpi_platform_device_ids
 from enumeration.txt

As of:

    4845934 ACPI / scan: use platform bus type by default for _HID enumeration

ACPI uses the platform bus by default, changing the opt-in to an opt-out
policy, eliminating the acpi_platform_device_ids table and replacing it
with forbidden_id_list[].

Remove the qualifying paragraph from the acpi/enumeration documentation
as it no longer applies.

Reported-by: Max Eliaser <max@meliaserlow.dyndns.tv>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/Documentation/acpi/enumeration.txt b/Documentation/acpi/enumeration.txt
index fd786ea..e182be5 100644
--- a/Documentation/acpi/enumeration.txt
+++ b/Documentation/acpi/enumeration.txt
@@ -60,12 +60,6 @@ If the driver needs to perform more complex initialization like getting and
 configuring GPIOs it can get its ACPI handle and extract this information
 from ACPI tables.
 
-Currently the kernel is not able to automatically determine from which ACPI
-device it should make the corresponding platform device so we need to add
-the ACPI device explicitly to acpi_platform_device_ids list defined in
-drivers/acpi/acpi_platform.c. This limitation is only for the platform
-devices, SPI and I2C devices are created automatically as described below.
-
 DMA support
 ~~~~~~~~~~~
 DMA controllers enumerated via ACPI should be registered in the system to
-- 
cgit v0.10.2


From aff008ad813c7cf3cfe7b532e7ba2c526c136f22 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 17 Jun 2014 15:51:02 -0700
Subject: platform_get_irq: Revert to platform_get_resource if of_irq_get fails

Commits 9ec36ca (of/irq: do irq resolution in platform_get_irq)
and ad69674 (of/irq: do irq resolution in platform_get_irq_byname)
change the semantics of platform_get_irq and platform_get_irq_byname
to always rely on devicetree information if devicetree is enabled
and if a devicetree node is attached to the device. The functions
now return an error if the devicetree data does not include interrupt
information, even if the information is available as platform resource
data.

This causes mfd client drivers to fail if the interrupt number is
passed via platform resources. Therefore, if of_irq_get fails, try
platform_get_resource as method of last resort. This restores the
original functionality for drivers depending on platform resources
to get irq information.

Cc: Russell King <linux@arm.linux.org.uk>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Rob Herring <robh@kernel.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 9e9227e..eee48c4 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -89,8 +89,13 @@ int platform_get_irq(struct platform_device *dev, unsigned int num)
 	return dev->archdata.irqs[num];
 #else
 	struct resource *r;
-	if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node)
-		return of_irq_get(dev->dev.of_node, num);
+	if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node) {
+		int ret;
+
+		ret = of_irq_get(dev->dev.of_node, num);
+		if (ret >= 0 || ret == -EPROBE_DEFER)
+			return ret;
+	}
 
 	r = platform_get_resource(dev, IORESOURCE_IRQ, num);
 
@@ -133,8 +138,13 @@ int platform_get_irq_byname(struct platform_device *dev, const char *name)
 {
 	struct resource *r;
 
-	if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node)
-		return of_irq_get_byname(dev->dev.of_node, name);
+	if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node) {
+		int ret;
+
+		ret = of_irq_get_byname(dev->dev.of_node, name);
+		if (ret >= 0 || ret == -EPROBE_DEFER)
+			return ret;
+	}
 
 	r = platform_get_resource_byname(dev, IORESOURCE_IRQ, name);
 	return r ? r->start : -ENXIO;
-- 
cgit v0.10.2


From 71702e6e52c8312f4c6797a9787d0f8b5656156f Mon Sep 17 00:00:00 2001
From: Martin Fuzzey <mfuzzey@parkeon.com>
Date: Fri, 7 Nov 2014 13:54:00 +0000
Subject: iio: mma8452: Use correct acceleration units.

The userspace interface for acceleration sensors is documented as using
m/s^2 units [Documentation/ABI/testing/sysfs-bus-iio]

The fullscale raw value for the mma8452 (-2048) corresponds to -2G, -4G or -8G
depending on the seleted mode.

The scale table was converting to G rather than m/s^2.
Change the scaling table to match the documented interface.

Signed-off-by: Martin Fuzzey <mfuzzey@parkeon.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Cc: stable@vger.kernel.org

diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c
index 17aeea1..2a5fa9a 100644
--- a/drivers/iio/accel/mma8452.c
+++ b/drivers/iio/accel/mma8452.c
@@ -111,8 +111,14 @@ static const int mma8452_samp_freq[8][2] = {
 	{6, 250000}, {1, 560000}
 };
 
+/* 
+ * Hardware has fullscale of -2G, -4G, -8G corresponding to raw value -2048
+ * The userspace interface uses m/s^2 and we declare micro units
+ * So scale factor is given by:
+ * 	g * N * 1000000 / 2048 for N = 2, 4, 8 and g=9.80665
+ */
 static const int mma8452_scales[3][2] = {
-	{0, 977}, {0, 1953}, {0, 3906}
+	{0, 9577}, {0, 19154}, {0, 38307}
 };
 
 static ssize_t mma8452_show_samp_freq_avail(struct device *dev,
-- 
cgit v0.10.2


From 0b462c89e31f7eb6789713437eb551833ee16ff3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 5 Jul 2014 18:43:21 -0400
Subject: blkcg: don't call into policy draining if root_blkg is already gone

While a queue is being destroyed, all the blkgs are destroyed and its
->root_blkg pointer is set to NULL.  If someone else starts to drain
while the queue is in this state, the following oops happens.

  NULL pointer dereference at 0000000000000028
  IP: [<ffffffff8144e944>] blk_throtl_drain+0x84/0x230
  PGD e4a1067 PUD b773067 PMD 0
  Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
  Modules linked in: cfq_iosched(-) [last unloaded: cfq_iosched]
  CPU: 1 PID: 537 Comm: bash Not tainted 3.16.0-rc3-work+ #2
  Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
  task: ffff88000e222250 ti: ffff88000efd4000 task.ti: ffff88000efd4000
  RIP: 0010:[<ffffffff8144e944>]  [<ffffffff8144e944>] blk_throtl_drain+0x84/0x230
  RSP: 0018:ffff88000efd7bf0  EFLAGS: 00010046
  RAX: 0000000000000000 RBX: ffff880015091450 RCX: 0000000000000001
  RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
  RBP: ffff88000efd7c10 R08: 0000000000000000 R09: 0000000000000001
  R10: ffff88000e222250 R11: 0000000000000000 R12: ffff880015091450
  R13: ffff880015092e00 R14: ffff880015091d70 R15: ffff88001508fc28
  FS:  00007f1332650740(0000) GS:ffff88001fa80000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
  CR2: 0000000000000028 CR3: 0000000009446000 CR4: 00000000000006e0
  Stack:
   ffffffff8144e8f6 ffff880015091450 0000000000000000 ffff880015091d80
   ffff88000efd7c28 ffffffff8144ae2f ffff880015091450 ffff88000efd7c58
   ffffffff81427641 ffff880015091450 ffffffff82401f00 ffff880015091450
  Call Trace:
   [<ffffffff8144ae2f>] blkcg_drain_queue+0x1f/0x60
   [<ffffffff81427641>] __blk_drain_queue+0x71/0x180
   [<ffffffff81429b3e>] blk_queue_bypass_start+0x6e/0xb0
   [<ffffffff814498b8>] blkcg_deactivate_policy+0x38/0x120
   [<ffffffff8144ec44>] blk_throtl_exit+0x34/0x50
   [<ffffffff8144aea5>] blkcg_exit_queue+0x35/0x40
   [<ffffffff8142d476>] blk_release_queue+0x26/0xd0
   [<ffffffff81454968>] kobject_cleanup+0x38/0x70
   [<ffffffff81454848>] kobject_put+0x28/0x60
   [<ffffffff81427505>] blk_put_queue+0x15/0x20
   [<ffffffff817d07bb>] scsi_device_dev_release_usercontext+0x16b/0x1c0
   [<ffffffff810bc339>] execute_in_process_context+0x89/0xa0
   [<ffffffff817d064c>] scsi_device_dev_release+0x1c/0x20
   [<ffffffff817930e2>] device_release+0x32/0xa0
   [<ffffffff81454968>] kobject_cleanup+0x38/0x70
   [<ffffffff81454848>] kobject_put+0x28/0x60
   [<ffffffff817934d7>] put_device+0x17/0x20
   [<ffffffff817d11b9>] __scsi_remove_device+0xa9/0xe0
   [<ffffffff817d121b>] scsi_remove_device+0x2b/0x40
   [<ffffffff817d1257>] sdev_store_delete+0x27/0x30
   [<ffffffff81792ca8>] dev_attr_store+0x18/0x30
   [<ffffffff8126f75e>] sysfs_kf_write+0x3e/0x50
   [<ffffffff8126ea87>] kernfs_fop_write+0xe7/0x170
   [<ffffffff811f5e9f>] vfs_write+0xaf/0x1d0
   [<ffffffff811f69bd>] SyS_write+0x4d/0xc0
   [<ffffffff81d24692>] system_call_fastpath+0x16/0x1b

776687bce42b ("block, blk-mq: draining can't be skipped even if
bypass_depth was non-zero") made it easier to trigger this bug by
making blk_queue_bypass_start() drain even when it loses the first
bypass test to blk_cleanup_queue(); however, the bug has always been
there even before the commit as blk_queue_bypass_start() could race
against queue destruction, win the initial bypass test but perform the
actual draining after blk_cleanup_queue() already destroyed all blkgs.

Fix it by skippping calling into policy draining if all the blkgs are
already gone.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Shirish Pargaonkar <spargaonkar@suse.com>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Reported-by: Jet Chen <jet.chen@intel.com>
Cc: stable@vger.kernel.org
Tested-by: Shirish Pargaonkar <spargaonkar@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b9f4cc4..28d227c 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -872,6 +872,13 @@ void blkcg_drain_queue(struct request_queue *q)
 {
 	lockdep_assert_held(q->queue_lock);
 
+	/*
+	 * @q could be exiting and already have destroyed all blkgs as
+	 * indicated by NULL root_blkg.  If so, don't confuse policies.
+	 */
+	if (!q->root_blkg)
+		return;
+
 	blk_throtl_drain(q);
 }
 
-- 
cgit v0.10.2


From 17089a29a25a3bfe8d14520cd866b7d635ffe5ba Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Fri, 11 Jul 2014 10:20:45 -0400
Subject: nfs: mark nfs_page reqs with flag for extra ref

Change the use of PG_INODE_REF - set it when taking extra reference on
subrequests and take care to only release once for each request.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index b6ee3a6..7368b21 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -251,8 +251,10 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
 		/* grab extra ref if head request has extra ref from
 		 * the write/commit path to handle handoff between write
 		 * and commit lists */
-		if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags))
+		if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) {
+			set_bit(PG_INODE_REF, &req->wb_flags);
 			kref_get(&req->wb_kref);
+		}
 	}
 }
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 98ff061..8e5745a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -448,7 +448,9 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 		set_page_private(req->wb_page, (unsigned long)req);
 	}
 	nfsi->npages++;
-	set_bit(PG_INODE_REF, &req->wb_flags);
+	/* this a head request for a page group - mark it as having an
+	 * extra reference so sub groups can follow suit */
+	WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
 	kref_get(&req->wb_kref);
 	spin_unlock(&inode->i_lock);
 }
@@ -474,7 +476,9 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 		nfsi->npages--;
 		spin_unlock(&inode->i_lock);
 	}
-	nfs_release_request(req);
+
+	if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
+		nfs_release_request(req);
 }
 
 static void
-- 
cgit v0.10.2


From 85710a837c2026aae80b7c64187edf1f10027b0b Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Fri, 11 Jul 2014 10:20:46 -0400
Subject: nfs: nfs_page should take a ref on the head req

nfs_pages that aren't the the head of a group must take a reference on the
head as long as ->wb_head is set to it. This stops the head from hitting
a refcount of 0 while there is still an active nfs_page for the page group.

This avoids kref warnings in the writeback code when the page group head
is found and referenced.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7368b21..05a6359 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -239,15 +239,21 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
 	WARN_ON_ONCE(prev == req);
 
 	if (!prev) {
+		/* a head request */
 		req->wb_head = req;
 		req->wb_this_page = req;
 	} else {
+		/* a subrequest */
 		WARN_ON_ONCE(prev->wb_this_page != prev->wb_head);
 		WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags));
 		req->wb_head = prev->wb_head;
 		req->wb_this_page = prev->wb_this_page;
 		prev->wb_this_page = req;
 
+		/* All subrequests take a ref on the head request until
+		 * nfs_page_group_destroy is called */
+		kref_get(&req->wb_head->wb_kref);
+
 		/* grab extra ref if head request has extra ref from
 		 * the write/commit path to handle handoff between write
 		 * and commit lists */
@@ -271,6 +277,10 @@ nfs_page_group_destroy(struct kref *kref)
 	struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
 	struct nfs_page *tmp, *next;
 
+	/* subrequests must release the ref on the head request */
+	if (req->wb_head != req)
+		nfs_release_request(req->wb_head);
+
 	if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN))
 		return;
 
-- 
cgit v0.10.2


From 84d3a9a913ba6a90c79b7763d063bb42554a8906 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Fri, 11 Jul 2014 10:20:47 -0400
Subject: nfs: change find_request to find_head_request

nfs_page_find_request_locked* should find the head request for that page.
Rename the functions and add comments to make this clear, and fix a bug
that could return a subrequest when page_private isn't set on the page.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 8e5745a..53c4a99 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -91,8 +91,15 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
 	set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
 }
 
+/*
+ * nfs_page_find_head_request_locked - find head request associated with @page
+ *
+ * must be called while holding the inode lock.
+ *
+ * returns matching head request with reference held, or NULL if not found.
+ */
 static struct nfs_page *
-nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)
+nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
 {
 	struct nfs_page *req = NULL;
 
@@ -104,25 +111,33 @@ nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)
 		/* Linearly search the commit list for the correct req */
 		list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) {
 			if (freq->wb_page == page) {
-				req = freq;
+				req = freq->wb_head;
 				break;
 			}
 		}
 	}
 
-	if (req)
+	if (req) {
+		WARN_ON_ONCE(req->wb_head != req);
+
 		kref_get(&req->wb_kref);
+	}
 
 	return req;
 }
 
-static struct nfs_page *nfs_page_find_request(struct page *page)
+/*
+ * nfs_page_find_head_request - find head request associated with @page
+ *
+ * returns matching head request with reference held, or NULL if not found.
+ */
+static struct nfs_page *nfs_page_find_head_request(struct page *page)
 {
 	struct inode *inode = page_file_mapping(page)->host;
 	struct nfs_page *req = NULL;
 
 	spin_lock(&inode->i_lock);
-	req = nfs_page_find_request_locked(NFS_I(inode), page);
+	req = nfs_page_find_head_request_locked(NFS_I(inode), page);
 	spin_unlock(&inode->i_lock);
 	return req;
 }
@@ -282,7 +297,7 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo
 
 	spin_lock(&inode->i_lock);
 	for (;;) {
-		req = nfs_page_find_request_locked(NFS_I(inode), page);
+		req = nfs_page_find_head_request_locked(NFS_I(inode), page);
 		if (req == NULL)
 			break;
 		if (nfs_lock_request(req))
@@ -773,7 +788,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
 	spin_lock(&inode->i_lock);
 
 	for (;;) {
-		req = nfs_page_find_request_locked(NFS_I(inode), page);
+		req = nfs_page_find_head_request_locked(NFS_I(inode), page);
 		if (req == NULL)
 			goto out_unlock;
 
@@ -881,7 +896,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
 	 * dropped page.
 	 */
 	do {
-		req = nfs_page_find_request(page);
+		req = nfs_page_find_head_request(page);
 		if (req == NULL)
 			return 0;
 		l_ctx = req->wb_lock_context;
@@ -1575,7 +1590,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 
 	for (;;) {
 		wait_on_page_writeback(page);
-		req = nfs_page_find_request(page);
+		req = nfs_page_find_head_request(page);
 		if (req == NULL)
 			break;
 		if (nfs_lock_request(req)) {
-- 
cgit v0.10.2


From d458138353726ea6dcbc53ae3597e489d0432c25 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Fri, 11 Jul 2014 10:20:48 -0400
Subject: nfs: handle multiple reqs in nfs_page_async_flush

Change nfs_find_and_lock_request so nfs_page_async_flush can handle multiple
requests in a page. There is only one request for a page the first time
nfs_page_async_flush is called, but if a write or commit fails, async_flush
is called again and there may be multiple requests associated with the page.
The solution is to merge all the requests in a page group into a single
request before calling nfs_pageio_add_request.

Rename nfs_find_and_lock_request to nfs_lock_and_join_requests and
change it to first lock all requests for the page, then cancel and merge
all subrequests into the head request.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 82ddbf4..f415cbf 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -244,6 +244,7 @@ void nfs_pgio_data_release(struct nfs_pgio_data *);
 int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
 int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
 		      const struct rpc_call_ops *, int, int);
+void nfs_free_request(struct nfs_page *req);
 
 static inline void nfs_iocounter_init(struct nfs_io_counter *c)
 {
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 05a6359..0aefc81 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -29,8 +29,6 @@
 static struct kmem_cache *nfs_page_cachep;
 static const struct rpc_call_ops nfs_pgio_common_ops;
 
-static void nfs_free_request(struct nfs_page *);
-
 static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
 {
 	p->npages = pagecount;
@@ -406,7 +404,7 @@ static void nfs_clear_request(struct nfs_page *req)
  *
  * Note: Should never be called with the spinlock held!
  */
-static void nfs_free_request(struct nfs_page *req)
+void nfs_free_request(struct nfs_page *req)
 {
 	WARN_ON_ONCE(req->wb_this_page != req);
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 53c4a99..9f4424c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -46,6 +46,7 @@ static const struct rpc_call_ops nfs_commit_ops;
 static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
 static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
 static const struct nfs_rw_ops nfs_rw_write_ops;
+static void nfs_clear_request_commit(struct nfs_page *req);
 
 static struct kmem_cache *nfs_wdata_cachep;
 static mempool_t *nfs_wdata_mempool;
@@ -289,36 +290,246 @@ static void nfs_end_page_writeback(struct nfs_page *req)
 		clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
 }
 
-static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
+
+/* nfs_page_group_clear_bits
+ *   @req - an nfs request
+ * clears all page group related bits from @req
+ */
+static void
+nfs_page_group_clear_bits(struct nfs_page *req)
+{
+	clear_bit(PG_TEARDOWN, &req->wb_flags);
+	clear_bit(PG_UNLOCKPAGE, &req->wb_flags);
+	clear_bit(PG_UPTODATE, &req->wb_flags);
+	clear_bit(PG_WB_END, &req->wb_flags);
+	clear_bit(PG_REMOVE, &req->wb_flags);
+}
+
+
+/*
+ * nfs_unroll_locks_and_wait -  unlock all newly locked reqs and wait on @req
+ *
+ * this is a helper function for nfs_lock_and_join_requests
+ *
+ * @inode - inode associated with request page group, must be holding inode lock
+ * @head  - head request of page group, must be holding head lock
+ * @req   - request that couldn't lock and needs to wait on the req bit lock
+ * @nonblock - if true, don't actually wait
+ *
+ * NOTE: this must be called holding page_group bit lock and inode spin lock
+ *       and BOTH will be released before returning.
+ *
+ * returns 0 on success, < 0 on error.
+ */
+static int
+nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,
+			  struct nfs_page *req, bool nonblock)
+	__releases(&inode->i_lock)
+{
+	struct nfs_page *tmp;
+	int ret;
+
+	/* relinquish all the locks successfully grabbed this run */
+	for (tmp = head ; tmp != req; tmp = tmp->wb_this_page)
+		nfs_unlock_request(tmp);
+
+	WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
+
+	/* grab a ref on the request that will be waited on */
+	kref_get(&req->wb_kref);
+
+	nfs_page_group_unlock(head);
+	spin_unlock(&inode->i_lock);
+
+	/* release ref from nfs_page_find_head_request_locked */
+	nfs_release_request(head);
+
+	if (!nonblock)
+		ret = nfs_wait_on_request(req);
+	else
+		ret = -EAGAIN;
+	nfs_release_request(req);
+
+	return ret;
+}
+
+/*
+ * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
+ *
+ * @destroy_list - request list (using wb_this_page) terminated by @old_head
+ * @old_head - the old head of the list
+ *
+ * All subrequests must be locked and removed from all lists, so at this point
+ * they are only "active" in this function, and possibly in nfs_wait_on_request
+ * with a reference held by some other context.
+ */
+static void
+nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
+				 struct nfs_page *old_head)
+{
+	while (destroy_list) {
+		struct nfs_page *subreq = destroy_list;
+
+		destroy_list = (subreq->wb_this_page == old_head) ?
+				   NULL : subreq->wb_this_page;
+
+		WARN_ON_ONCE(old_head != subreq->wb_head);
+
+		/* make sure old group is not used */
+		subreq->wb_head = subreq;
+		subreq->wb_this_page = subreq;
+
+		nfs_clear_request_commit(subreq);
+
+		/* subreq is now totally disconnected from page group or any
+		 * write / commit lists. last chance to wake any waiters */
+		nfs_unlock_request(subreq);
+
+		if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) {
+			/* release ref on old head request */
+			nfs_release_request(old_head);
+
+			nfs_page_group_clear_bits(subreq);
+
+			/* release the PG_INODE_REF reference */
+			if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags))
+				nfs_release_request(subreq);
+			else
+				WARN_ON_ONCE(1);
+		} else {
+			WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags));
+			/* zombie requests have already released the last
+			 * reference and were waiting on the rest of the
+			 * group to complete. Since it's no longer part of a
+			 * group, simply free the request */
+			nfs_page_group_clear_bits(subreq);
+			nfs_free_request(subreq);
+		}
+	}
+}
+
+/*
+ * nfs_lock_and_join_requests - join all subreqs to the head req and return
+ *                              a locked reference, cancelling any pending
+ *                              operations for this page.
+ *
+ * @page - the page used to lookup the "page group" of nfs_page structures
+ * @nonblock - if true, don't block waiting for request locks
+ *
+ * This function joins all sub requests to the head request by first
+ * locking all requests in the group, cancelling any pending operations
+ * and finally updating the head request to cover the whole range covered by
+ * the (former) group.  All subrequests are removed from any write or commit
+ * lists, unlinked from the group and destroyed.
+ *
+ * Returns a locked, referenced pointer to the head request - which after
+ * this call is guaranteed to be the only request associated with the page.
+ * Returns NULL if no requests are found for @page, or a ERR_PTR if an
+ * error was encountered.
+ */
+static struct nfs_page *
+nfs_lock_and_join_requests(struct page *page, bool nonblock)
 {
 	struct inode *inode = page_file_mapping(page)->host;
-	struct nfs_page *req;
+	struct nfs_page *head, *subreq;
+	struct nfs_page *destroy_list = NULL;
+	unsigned int total_bytes;
 	int ret;
 
+try_again:
+	total_bytes = 0;
+
+	WARN_ON_ONCE(destroy_list);
+
 	spin_lock(&inode->i_lock);
-	for (;;) {
-		req = nfs_page_find_head_request_locked(NFS_I(inode), page);
-		if (req == NULL)
-			break;
-		if (nfs_lock_request(req))
-			break;
-		/* Note: If we hold the page lock, as is the case in nfs_writepage,
-		 *	 then the call to nfs_lock_request() will always
-		 *	 succeed provided that someone hasn't already marked the
-		 *	 request as dirty (in which case we don't care).
-		 */
+
+	/*
+	 * A reference is taken only on the head request which acts as a
+	 * reference to the whole page group - the group will not be destroyed
+	 * until the head reference is released.
+	 */
+	head = nfs_page_find_head_request_locked(NFS_I(inode), page);
+
+	if (!head) {
 		spin_unlock(&inode->i_lock);
-		if (!nonblock)
-			ret = nfs_wait_on_request(req);
-		else
-			ret = -EAGAIN;
-		nfs_release_request(req);
-		if (ret != 0)
+		return NULL;
+	}
+
+	/* lock each request in the page group */
+	nfs_page_group_lock(head);
+	subreq = head;
+	do {
+		/*
+		 * Subrequests are always contiguous, non overlapping
+		 * and in order. If not, it's a programming error.
+		 */
+		WARN_ON_ONCE(subreq->wb_offset !=
+		     (head->wb_offset + total_bytes));
+
+		/* keep track of how many bytes this group covers */
+		total_bytes += subreq->wb_bytes;
+
+		if (!nfs_lock_request(subreq)) {
+			/* releases page group bit lock and
+			 * inode spin lock and all references */
+			ret = nfs_unroll_locks_and_wait(inode, head,
+				subreq, nonblock);
+
+			if (ret == 0)
+				goto try_again;
+
 			return ERR_PTR(ret);
-		spin_lock(&inode->i_lock);
+		}
+
+		subreq = subreq->wb_this_page;
+	} while (subreq != head);
+
+	/* Now that all requests are locked, make sure they aren't on any list.
+	 * Commit list removal accounting is done after locks are dropped */
+	subreq = head;
+	do {
+		nfs_list_remove_request(subreq);
+		subreq = subreq->wb_this_page;
+	} while (subreq != head);
+
+	/* unlink subrequests from head, destroy them later */
+	if (head->wb_this_page != head) {
+		/* destroy list will be terminated by head */
+		destroy_list = head->wb_this_page;
+		head->wb_this_page = head;
+
+		/* change head request to cover whole range that
+		 * the former page group covered */
+		head->wb_bytes = total_bytes;
 	}
+
+	/*
+	 * prepare head request to be added to new pgio descriptor
+	 */
+	nfs_page_group_clear_bits(head);
+
+	/*
+	 * some part of the group was still on the inode list - otherwise
+	 * the group wouldn't be involved in async write.
+	 * grab a reference for the head request, iff it needs one.
+	 */
+	if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags))
+		kref_get(&head->wb_kref);
+
+	nfs_page_group_unlock(head);
+
+	/* drop lock to clear_request_commit the head req and clean up
+	 * requests on destroy list */
 	spin_unlock(&inode->i_lock);
-	return req;
+
+	nfs_destroy_unlinked_subrequests(destroy_list, head);
+
+	/* clean up commit list state */
+	nfs_clear_request_commit(head);
+
+	/* still holds ref on head from nfs_page_find_head_request_locked
+	 * and still has lock on head from lock loop */
+	return head;
 }
 
 /*
@@ -331,7 +542,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 	struct nfs_page *req;
 	int ret = 0;
 
-	req = nfs_find_and_lock_request(page, nonblock);
+	req = nfs_lock_and_join_requests(page, nonblock);
 	if (!req)
 		goto out;
 	ret = PTR_ERR(req);
-- 
cgit v0.10.2


From 3e2170451e91327bfa8a82040fea78043847533a Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Fri, 11 Jul 2014 10:20:49 -0400
Subject: nfs: handle multiple reqs in nfs_wb_page_cancel

Use nfs_lock_and_join_requests to merge all subrequests into the head request -
this cancels and dereferences all subrequests.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9f4424c..bdc4db2 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1799,27 +1799,28 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
 	struct nfs_page *req;
 	int ret = 0;
 
-	for (;;) {
-		wait_on_page_writeback(page);
-		req = nfs_page_find_head_request(page);
-		if (req == NULL)
-			break;
-		if (nfs_lock_request(req)) {
-			nfs_clear_request_commit(req);
-			nfs_inode_remove_request(req);
-			/*
-			 * In case nfs_inode_remove_request has marked the
-			 * page as being dirty
-			 */
-			cancel_dirty_page(page, PAGE_CACHE_SIZE);
-			nfs_unlock_and_release_request(req);
-			break;
-		}
-		ret = nfs_wait_on_request(req);
-		nfs_release_request(req);
-		if (ret < 0)
-			break;
+	wait_on_page_writeback(page);
+
+	/* blocking call to cancel all requests and join to a single (head)
+	 * request */
+	req = nfs_lock_and_join_requests(page, false);
+
+	if (IS_ERR(req)) {
+		ret = PTR_ERR(req);
+	} else if (req) {
+		/* all requests from this page have been cancelled by
+		 * nfs_lock_and_join_requests, so just remove the head
+		 * request from the inode / page_private pointer and
+		 * release it */
+		nfs_inode_remove_request(req);
+		/*
+		 * In case nfs_inode_remove_request has marked the
+		 * page as being dirty
+		 */
+		cancel_dirty_page(page, PAGE_CACHE_SIZE);
+		nfs_unlock_and_release_request(req);
 	}
+
 	return ret;
 }
 
-- 
cgit v0.10.2


From aafe37504c70954fc104c88d9d15d553572dae69 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 12 Jul 2014 17:23:39 -0400
Subject: NFS: Remove 2 unused variables

Cc: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8f98138..f11b9ee 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -756,7 +756,6 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 	spin_unlock(&dreq->lock);
 
 	while (!list_empty(&hdr->pages)) {
-		bool do_destroy = true;
 
 		req = nfs_list_entry(hdr->pages.next);
 		nfs_list_remove_request(req);
@@ -765,7 +764,6 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 		case NFS_IOHDR_NEED_COMMIT:
 			kref_get(&req->wb_kref);
 			nfs_mark_request_commit(req, hdr->lseg, &cinfo);
-			do_destroy = false;
 		}
 		nfs_unlock_and_release_request(req);
 	}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index bdc4db2..5e2f1030 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -868,7 +868,6 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 {
 	struct nfs_commit_info cinfo;
 	unsigned long bytes = 0;
-	bool do_destroy;
 
 	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
 		goto out;
@@ -898,7 +897,6 @@ remove_req:
 next:
 		nfs_unlock_request(req);
 		nfs_end_page_writeback(req);
-		do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);
 		nfs_release_request(req);
 	}
 out:
-- 
cgit v0.10.2


From 46c1376db1b85ae412a7917cec148c6e60f79428 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Fri, 20 Jun 2014 14:26:25 -0500
Subject: RDMA/cxgb4: Call iwpm_init() only once

We need to only register with the iwpm core once.  Currently it is
being done for every adapter, which causes a failure for each adapter
but the first, making multiple adapters unusable.

Fixes: 9eccfe109b27 ("RDMA/cxgb4: Add support for iWARP Port Mapper user space service")
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 6a93280..768a0fb 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -3925,7 +3925,7 @@ int __init c4iw_cm_init(void)
 	return 0;
 }
 
-void __exit c4iw_cm_term(void)
+void c4iw_cm_term(void)
 {
 	WARN_ON(!list_empty(&timeout_list));
 	flush_workqueue(workq);
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 16b75de..7db82b2 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -730,7 +730,6 @@ static void c4iw_dealloc(struct uld_ctx *ctx)
 	if (ctx->dev->rdev.oc_mw_kva)
 		iounmap(ctx->dev->rdev.oc_mw_kva);
 	ib_dealloc_device(&ctx->dev->ibdev);
-	iwpm_exit(RDMA_NL_C4IW);
 	ctx->dev = NULL;
 }
 
@@ -827,12 +826,6 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
 		setup_debugfs(devp);
 	}
 
-	ret = iwpm_init(RDMA_NL_C4IW);
-	if (ret) {
-		pr_err("port mapper initialization failed with %d\n", ret);
-		ib_dealloc_device(&devp->ibdev);
-		return ERR_PTR(ret);
-	}
 
 	return devp;
 }
@@ -1333,6 +1326,15 @@ static int __init c4iw_init_module(void)
 		pr_err("%s[%u]: Failed to add netlink callback\n"
 		       , __func__, __LINE__);
 
+	err = iwpm_init(RDMA_NL_C4IW);
+	if (err) {
+		pr_err("port mapper initialization failed with %d\n", err);
+		ibnl_remove_client(RDMA_NL_C4IW);
+		c4iw_cm_term();
+		debugfs_remove_recursive(c4iw_debugfs_root);
+		return err;
+	}
+
 	cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
 
 	return 0;
@@ -1350,6 +1352,7 @@ static void __exit c4iw_exit_module(void)
 	}
 	mutex_unlock(&dev_mutex);
 	cxgb4_unregister_uld(CXGB4_ULD_RDMA);
+	iwpm_exit(RDMA_NL_C4IW);
 	ibnl_remove_client(RDMA_NL_C4IW);
 	c4iw_cm_term();
 	debugfs_remove_recursive(c4iw_debugfs_root);
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 125bc5d1..361fff7 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -908,7 +908,7 @@ int c4iw_destroy_ctrl_qp(struct c4iw_rdev *rdev);
 int c4iw_register_device(struct c4iw_dev *dev);
 void c4iw_unregister_device(struct c4iw_dev *dev);
 int __init c4iw_cm_init(void);
-void __exit c4iw_cm_term(void);
+void c4iw_cm_term(void);
 void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
 			       struct c4iw_dev_ucontext *uctx);
 void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
-- 
cgit v0.10.2


From f563b89b182594f827b4100bd34f916339785a77 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 13 Jul 2014 15:13:19 -0400
Subject: NFS: Don't reset pg_moreio in __nfs_pageio_add_request

Once we've started sending unstable NFS writes, we do not want to
clear pg_moreio, or we may end up sending the very last request as
a stable write if the commit lists are still empty.

Do, however, reset pg_moreio in the case where we end up having to
recoalesce the write if an attempt to use pNFS failed.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 0aefc81..17fab89 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -935,7 +935,6 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
 			nfs_pageio_doio(desc);
 			if (desc->pg_error < 0)
 				return 0;
-			desc->pg_moreio = 0;
 			if (desc->pg_recoalesce)
 				return 0;
 			/* retry add_request for this subreq */
@@ -982,6 +981,7 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
 		desc->pg_count = 0;
 		desc->pg_base = 0;
 		desc->pg_recoalesce = 0;
+		desc->pg_moreio = 0;
 
 		while (!list_empty(&head)) {
 			struct nfs_page *req;
-- 
cgit v0.10.2


From 6b89cddee051945a83cc67436ad1680ba7d9f766 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 9 Jul 2014 22:35:53 +0200
Subject: Revert "drm/i915: Don't set the 8to6 dither flag when not scaling"

This reverts commit 773875bfb6737982903c42d1ee88cf60af80089c.

It is very much needed and the lack of dithering has been reported by
a large list of people with various gen2/3 hardware.

Also, the original patch was complete non-sense since the WARNING
backtraces in the references bugzilla are about
gmch_pfit.lvds_border_bits mismatch, not at all about the dither bit.
That one seems to work.

Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Hans de Bruin <jmdebruin@xmsnet.nl>
Cc: stable@vger.kernel.org
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>

diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 2312602..5e5a72f 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -111,6 +111,13 @@ static void intel_lvds_get_config(struct intel_encoder *encoder,
 
 	pipe_config->adjusted_mode.flags |= flags;
 
+	/* gen2/3 store dither state in pfit control, needs to match */
+	if (INTEL_INFO(dev)->gen < 4) {
+		tmp = I915_READ(PFIT_CONTROL);
+
+		pipe_config->gmch_pfit.control |= tmp & PANEL_8TO6_DITHER_ENABLE;
+	}
+
 	dotclock = pipe_config->port_clock;
 
 	if (HAS_PCH_SPLIT(dev_priv->dev))
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 628cd89..12b02fe 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -361,16 +361,16 @@ void intel_gmch_panel_fitting(struct intel_crtc *intel_crtc,
 		pfit_control |= ((intel_crtc->pipe << PFIT_PIPE_SHIFT) |
 				 PFIT_FILTER_FUZZY);
 
-	/* Make sure pre-965 set dither correctly for 18bpp panels. */
-	if (INTEL_INFO(dev)->gen < 4 && pipe_config->pipe_bpp == 18)
-		pfit_control |= PANEL_8TO6_DITHER_ENABLE;
-
 out:
 	if ((pfit_control & PFIT_ENABLE) == 0) {
 		pfit_control = 0;
 		pfit_pgm_ratios = 0;
 	}
 
+	/* Make sure pre-965 set dither correctly for 18bpp panels. */
+	if (INTEL_INFO(dev)->gen < 4 && pipe_config->pipe_bpp == 18)
+		pfit_control |= PANEL_8TO6_DITHER_ENABLE;
+
 	pipe_config->gmch_pfit.control = pfit_control;
 	pipe_config->gmch_pfit.pgm_ratios = pfit_pgm_ratios;
 	pipe_config->gmch_pfit.lvds_border_bits = border;
-- 
cgit v0.10.2


From 724cb06fa9b1e1ffd98188275543fdb3b8eaca4f Mon Sep 17 00:00:00 2001
From: Scot Doyle <lkml14@scotdoyle.com>
Date: Fri, 11 Jul 2014 22:16:30 +0000
Subject: drm/i915: Ignore VBT backlight presence check on HP Chromebook 14

commit c675949ec58ca50d5a3ae3c757892f1560f6e896
    drm/i915: do not setup backlight if not available according to VBT

caused a regression on the HP Chromebook 14 (with Celeron 2955U CPU),
which has a misconfigured VBT. Apply quirk to ignore the VBT backlight
presence check during backlight setup.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=79813
Signed-off-by: Scot Doyle <lkml14@scotdoyle.com>
Tested-by: Stefan Nagy <public@stefan-nagy.at>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index e27e780..82e7d57 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11673,6 +11673,9 @@ static struct intel_quirk intel_quirks[] = {
 
 	/* Toshiba CB35 Chromebook (Celeron 2955U) */
 	{ 0x0a06, 0x1179, 0x0a88, quirk_backlight_present },
+
+	/* HP Chromebook 14 (Celeron 2955U) */
+	{ 0x0a06, 0x103c, 0x21ed, quirk_backlight_present },
 };
 
 static void intel_init_quirks(struct drm_device *dev)
-- 
cgit v0.10.2


From e688a7f8c6cb7a18aae7e55ccdd175f0ad9e69c0 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 1 Jul 2014 11:49:18 +0200
Subject: netfilter: nf_tables: safe RCU iteration on list when dumping

The dump operation through netlink is not protected by the nfnl_lock.
Thus, a reader process can be dumping any of the existing object
lists while another process can be updating the list content.

This patch resolves this situation by protecting all the object
lists with RCU in the netlink dump path which is the reader side.
The updater path is already protected via nfnl_lock, so use list
manipulation RCU-safe operations.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index da5dc37..a27a7c5 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -35,7 +35,7 @@ int nft_register_afinfo(struct net *net, struct nft_af_info *afi)
 {
 	INIT_LIST_HEAD(&afi->tables);
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
-	list_add_tail(&afi->list, &net->nft.af_info);
+	list_add_tail_rcu(&afi->list, &net->nft.af_info);
 	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
 	return 0;
 }
@@ -51,7 +51,7 @@ EXPORT_SYMBOL_GPL(nft_register_afinfo);
 void nft_unregister_afinfo(struct nft_af_info *afi)
 {
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
-	list_del(&afi->list);
+	list_del_rcu(&afi->list);
 	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
 }
 EXPORT_SYMBOL_GPL(nft_unregister_afinfo);
@@ -277,11 +277,12 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
 
-	list_for_each_entry(afi, &net->nft.af_info, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
 		if (family != NFPROTO_UNSPEC && family != afi->family)
 			continue;
 
-		list_for_each_entry(table, &afi->tables, list) {
+		list_for_each_entry_rcu(table, &afi->tables, list) {
 			if (idx < s_idx)
 				goto cont;
 			if (idx > s_idx)
@@ -299,6 +300,7 @@ cont:
 		}
 	}
 done:
+	rcu_read_unlock();
 	cb->args[0] = idx;
 	return skb->len;
 }
@@ -517,7 +519,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
 		module_put(afi->owner);
 		return err;
 	}
-	list_add_tail(&table->list, &afi->tables);
+	list_add_tail_rcu(&table->list, &afi->tables);
 	return 0;
 }
 
@@ -549,7 +551,7 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	list_del(&table->list);
+	list_del_rcu(&table->list);
 	return 0;
 }
 
@@ -764,12 +766,13 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
 
-	list_for_each_entry(afi, &net->nft.af_info, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
 		if (family != NFPROTO_UNSPEC && family != afi->family)
 			continue;
 
-		list_for_each_entry(table, &afi->tables, list) {
-			list_for_each_entry(chain, &table->chains, list) {
+		list_for_each_entry_rcu(table, &afi->tables, list) {
+			list_for_each_entry_rcu(chain, &table->chains, list) {
 				if (idx < s_idx)
 					goto cont;
 				if (idx > s_idx)
@@ -787,11 +790,11 @@ cont:
 		}
 	}
 done:
+	rcu_read_unlock();
 	cb->args[0] = idx;
 	return skb->len;
 }
 
-
 static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
 			      const struct nlmsghdr *nlh,
 			      const struct nlattr * const nla[])
@@ -1133,7 +1136,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 		goto err2;
 
 	table->use++;
-	list_add_tail(&chain->list, &table->chains);
+	list_add_tail_rcu(&chain->list, &table->chains);
 	return 0;
 err2:
 	if (!(table->flags & NFT_TABLE_F_DORMANT) &&
@@ -1183,7 +1186,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
 		return err;
 
 	table->use--;
-	list_del(&chain->list);
+	list_del_rcu(&chain->list);
 	return 0;
 }
 
@@ -1202,9 +1205,9 @@ int nft_register_expr(struct nft_expr_type *type)
 {
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
 	if (type->family == NFPROTO_UNSPEC)
-		list_add_tail(&type->list, &nf_tables_expressions);
+		list_add_tail_rcu(&type->list, &nf_tables_expressions);
 	else
-		list_add(&type->list, &nf_tables_expressions);
+		list_add_rcu(&type->list, &nf_tables_expressions);
 	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
 	return 0;
 }
@@ -1219,7 +1222,7 @@ EXPORT_SYMBOL_GPL(nft_register_expr);
 void nft_unregister_expr(struct nft_expr_type *type)
 {
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
-	list_del(&type->list);
+	list_del_rcu(&type->list);
 	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
 }
 EXPORT_SYMBOL_GPL(nft_unregister_expr);
@@ -1555,13 +1558,14 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 	u8 genctr = ACCESS_ONCE(net->nft.genctr);
 	u8 gencursor = ACCESS_ONCE(net->nft.gencursor);
 
-	list_for_each_entry(afi, &net->nft.af_info, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
 		if (family != NFPROTO_UNSPEC && family != afi->family)
 			continue;
 
-		list_for_each_entry(table, &afi->tables, list) {
-			list_for_each_entry(chain, &table->chains, list) {
-				list_for_each_entry(rule, &chain->rules, list) {
+		list_for_each_entry_rcu(table, &afi->tables, list) {
+			list_for_each_entry_rcu(chain, &table->chains, list) {
+				list_for_each_entry_rcu(rule, &chain->rules, list) {
 					if (!nft_rule_is_active(net, rule))
 						goto cont;
 					if (idx < s_idx)
@@ -1582,6 +1586,8 @@ cont:
 		}
 	}
 done:
+	rcu_read_unlock();
+
 	/* Invalidate this dump, a transition to the new generation happened */
 	if (gencursor != net->nft.gencursor || genctr != net->nft.genctr)
 		return -EBUSY;
@@ -1935,7 +1941,7 @@ static LIST_HEAD(nf_tables_set_ops);
 int nft_register_set(struct nft_set_ops *ops)
 {
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
-	list_add_tail(&ops->list, &nf_tables_set_ops);
+	list_add_tail_rcu(&ops->list, &nf_tables_set_ops);
 	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
 	return 0;
 }
@@ -1944,7 +1950,7 @@ EXPORT_SYMBOL_GPL(nft_register_set);
 void nft_unregister_set(struct nft_set_ops *ops)
 {
 	nfnl_lock(NFNL_SUBSYS_NFTABLES);
-	list_del(&ops->list);
+	list_del_rcu(&ops->list);
 	nfnl_unlock(NFNL_SUBSYS_NFTABLES);
 }
 EXPORT_SYMBOL_GPL(nft_unregister_set);
@@ -2237,7 +2243,8 @@ static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb,
 	if (cb->args[1])
 		return skb->len;
 
-	list_for_each_entry(set, &ctx->table->sets, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(set, &ctx->table->sets, list) {
 		if (idx < s_idx)
 			goto cont;
 		if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
@@ -2250,6 +2257,7 @@ cont:
 	}
 	cb->args[1] = 1;
 done:
+	rcu_read_unlock();
 	return skb->len;
 }
 
@@ -2263,7 +2271,8 @@ static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb,
 	if (cb->args[1])
 		return skb->len;
 
-	list_for_each_entry(table, &ctx->afi->tables, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(table, &ctx->afi->tables, list) {
 		if (cur_table) {
 			if (cur_table != table)
 				continue;
@@ -2272,7 +2281,7 @@ static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb,
 		}
 		ctx->table = table;
 		idx = 0;
-		list_for_each_entry(set, &ctx->table->sets, list) {
+		list_for_each_entry_rcu(set, &ctx->table->sets, list) {
 			if (idx < s_idx)
 				goto cont;
 			if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
@@ -2287,6 +2296,7 @@ cont:
 	}
 	cb->args[1] = 1;
 done:
+	rcu_read_unlock();
 	return skb->len;
 }
 
@@ -2303,7 +2313,8 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
 	if (cb->args[1])
 		return skb->len;
 
-	list_for_each_entry(afi, &net->nft.af_info, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
 		if (cur_family) {
 			if (afi->family != cur_family)
 				continue;
@@ -2311,7 +2322,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
 			cur_family = 0;
 		}
 
-		list_for_each_entry(table, &afi->tables, list) {
+		list_for_each_entry_rcu(table, &afi->tables, list) {
 			if (cur_table) {
 				if (cur_table != table)
 					continue;
@@ -2322,7 +2333,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
 			ctx->table = table;
 			ctx->afi = afi;
 			idx = 0;
-			list_for_each_entry(set, &ctx->table->sets, list) {
+			list_for_each_entry_rcu(set, &ctx->table->sets, list) {
 				if (idx < s_idx)
 					goto cont;
 				if (nf_tables_fill_set(skb, ctx, set,
@@ -2342,6 +2353,7 @@ cont:
 	}
 	cb->args[1] = 1;
 done:
+	rcu_read_unlock();
 	return skb->len;
 }
 
@@ -2600,7 +2612,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 	if (err < 0)
 		goto err2;
 
-	list_add_tail(&set->list, &table->sets);
+	list_add_tail_rcu(&set->list, &table->sets);
 	table->use++;
 	return 0;
 
@@ -2620,7 +2632,7 @@ static void nft_set_destroy(struct nft_set *set)
 
 static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
 {
-	list_del(&set->list);
+	list_del_rcu(&set->list);
 	nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC);
 	nft_set_destroy(set);
 }
@@ -2655,7 +2667,7 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	list_del(&set->list);
+	list_del_rcu(&set->list);
 	ctx.table->use--;
 	return 0;
 }
@@ -2707,14 +2719,14 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 	}
 bind:
 	binding->chain = ctx->chain;
-	list_add_tail(&binding->list, &set->bindings);
+	list_add_tail_rcu(&binding->list, &set->bindings);
 	return 0;
 }
 
 void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
 			  struct nft_set_binding *binding)
 {
-	list_del(&binding->list);
+	list_del_rcu(&binding->list);
 
 	if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
 	    !(set->flags & NFT_SET_INACTIVE))
@@ -3494,12 +3506,12 @@ static int nf_tables_abort(struct sk_buff *skb)
 				}
 				nft_trans_destroy(trans);
 			} else {
-				list_del(&trans->ctx.table->list);
+				list_del_rcu(&trans->ctx.table->list);
 			}
 			break;
 		case NFT_MSG_DELTABLE:
-			list_add_tail(&trans->ctx.table->list,
-				      &trans->ctx.afi->tables);
+			list_add_tail_rcu(&trans->ctx.table->list,
+					  &trans->ctx.afi->tables);
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWCHAIN:
@@ -3510,7 +3522,7 @@ static int nf_tables_abort(struct sk_buff *skb)
 				nft_trans_destroy(trans);
 			} else {
 				trans->ctx.table->use--;
-				list_del(&trans->ctx.chain->list);
+				list_del_rcu(&trans->ctx.chain->list);
 				if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
 				    trans->ctx.chain->flags & NFT_BASE_CHAIN) {
 					nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
@@ -3520,8 +3532,8 @@ static int nf_tables_abort(struct sk_buff *skb)
 			break;
 		case NFT_MSG_DELCHAIN:
 			trans->ctx.table->use++;
-			list_add_tail(&trans->ctx.chain->list,
-				      &trans->ctx.table->chains);
+			list_add_tail_rcu(&trans->ctx.chain->list,
+					  &trans->ctx.table->chains);
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWRULE:
@@ -3535,12 +3547,12 @@ static int nf_tables_abort(struct sk_buff *skb)
 			break;
 		case NFT_MSG_NEWSET:
 			trans->ctx.table->use--;
-			list_del(&nft_trans_set(trans)->list);
+			list_del_rcu(&nft_trans_set(trans)->list);
 			break;
 		case NFT_MSG_DELSET:
 			trans->ctx.table->use++;
-			list_add_tail(&nft_trans_set(trans)->list,
-				      &trans->ctx.table->sets);
+			list_add_tail_rcu(&nft_trans_set(trans)->list,
+					  &trans->ctx.table->sets);
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWSETELEM:
-- 
cgit v0.10.2


From 38e029f14a9702f71d5953246df9f722bca49017 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 1 Jul 2014 12:23:12 +0200
Subject: netfilter: nf_tables: set NLM_F_DUMP_INTR if netlink dumping is stale

An updater may interfer with the dumping of any of the object lists.
Fix this by using a per-net generation counter and use the
nl_dump_check_consistent() interface so the NLM_F_DUMP_INTR flag is set
to notify userspace that it has to restart the dump since an updater
has interfered.

This patch also replaces the existing consistency checking code in the
rule dumping path since it is broken. Basically, the value that the
dump callback returns is not propagated to userspace via
netlink_dump_start().

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index 26a394c..eee608b 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -13,8 +13,8 @@ struct netns_nftables {
 	struct nft_af_info	*inet;
 	struct nft_af_info	*arp;
 	struct nft_af_info	*bridge;
+	unsigned int		base_seq;
 	u8			gencursor;
-	u8			genctr;
 };
 
 #endif
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index a27a7c5..ac03d74 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -278,6 +278,8 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
 	int family = nfmsg->nfgen_family;
 
 	rcu_read_lock();
+	cb->seq = net->nft.base_seq;
+
 	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
 		if (family != NFPROTO_UNSPEC && family != afi->family)
 			continue;
@@ -295,6 +297,8 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
 						      NLM_F_MULTI,
 						      afi->family, table) < 0)
 				goto done;
+
+			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 cont:
 			idx++;
 		}
@@ -767,6 +771,8 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
 	int family = nfmsg->nfgen_family;
 
 	rcu_read_lock();
+	cb->seq = net->nft.base_seq;
+
 	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
 		if (family != NFPROTO_UNSPEC && family != afi->family)
 			continue;
@@ -784,6 +790,8 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
 							      NLM_F_MULTI,
 							      afi->family, table, chain) < 0)
 					goto done;
+
+				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 cont:
 				idx++;
 			}
@@ -1555,10 +1563,10 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 	unsigned int idx = 0, s_idx = cb->args[0];
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
-	u8 genctr = ACCESS_ONCE(net->nft.genctr);
-	u8 gencursor = ACCESS_ONCE(net->nft.gencursor);
 
 	rcu_read_lock();
+	cb->seq = net->nft.base_seq;
+
 	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
 		if (family != NFPROTO_UNSPEC && family != afi->family)
 			continue;
@@ -1579,6 +1587,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
 								      NLM_F_MULTI | NLM_F_APPEND,
 								      afi->family, table, chain, rule) < 0)
 						goto done;
+
+					nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 cont:
 					idx++;
 				}
@@ -1588,10 +1598,6 @@ cont:
 done:
 	rcu_read_unlock();
 
-	/* Invalidate this dump, a transition to the new generation happened */
-	if (gencursor != net->nft.gencursor || genctr != net->nft.genctr)
-		return -EBUSY;
-
 	cb->args[0] = idx;
 	return skb->len;
 }
@@ -2244,6 +2250,8 @@ static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb,
 		return skb->len;
 
 	rcu_read_lock();
+	cb->seq = ctx->net->nft.base_seq;
+
 	list_for_each_entry_rcu(set, &ctx->table->sets, list) {
 		if (idx < s_idx)
 			goto cont;
@@ -2252,6 +2260,7 @@ static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb,
 			cb->args[0] = idx;
 			goto done;
 		}
+		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 cont:
 		idx++;
 	}
@@ -2272,6 +2281,8 @@ static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb,
 		return skb->len;
 
 	rcu_read_lock();
+	cb->seq = ctx->net->nft.base_seq;
+
 	list_for_each_entry_rcu(table, &ctx->afi->tables, list) {
 		if (cur_table) {
 			if (cur_table != table)
@@ -2290,6 +2301,7 @@ static int nf_tables_dump_sets_family(struct nft_ctx *ctx, struct sk_buff *skb,
 				cb->args[2] = (unsigned long) table;
 				goto done;
 			}
+			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 cont:
 			idx++;
 		}
@@ -2314,6 +2326,8 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
 		return skb->len;
 
 	rcu_read_lock();
+	cb->seq = net->nft.base_seq;
+
 	list_for_each_entry_rcu(afi, &net->nft.af_info, list) {
 		if (cur_family) {
 			if (afi->family != cur_family)
@@ -2344,6 +2358,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
 					cb->args[3] = afi->family;
 					goto done;
 				}
+				nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 cont:
 				idx++;
 			}
@@ -3361,7 +3376,7 @@ static int nf_tables_commit(struct sk_buff *skb)
 	struct nft_set *set;
 
 	/* Bump generation counter, invalidate any dump in progress */
-	net->nft.genctr++;
+	while (++net->nft.base_seq == 0);
 
 	/* A new generation has just started */
 	net->nft.gencursor = gencursor_next(net);
@@ -3966,6 +3981,7 @@ static int nf_tables_init_net(struct net *net)
 {
 	INIT_LIST_HEAD(&net->nft.af_info);
 	INIT_LIST_HEAD(&net->nft.commit_list);
+	net->nft.base_seq = 1;
 	return 0;
 }
 
-- 
cgit v0.10.2


From ce355e209feb030945dae4c358c02f29a84f3f8b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 9 Jul 2014 15:14:06 +0200
Subject: netfilter: nf_tables: 64bit stats need some extra synchronization

Use generic u64_stats_sync infrastructure to get proper 64bit stats,
even on 32bit arches, at no extra cost for 64bit arches.

Without this fix, 32bit arches can have some wrong counters at the time
the carry is propagated into upper word.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 713b0b8..c4d8619 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -6,6 +6,7 @@
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/nf_tables.h>
+#include <linux/u64_stats_sync.h>
 #include <net/netlink.h>
 
 #define NFT_JUMP_STACK_SIZE	16
@@ -528,8 +529,9 @@ enum nft_chain_type {
 };
 
 struct nft_stats {
-	u64 bytes;
-	u64 pkts;
+	u64			bytes;
+	u64			pkts;
+	struct u64_stats_sync	syncp;
 };
 
 #define NFT_HOOK_OPS_MAX		2
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ac03d74..8746ff9 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -644,13 +644,20 @@ static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats)
 {
 	struct nft_stats *cpu_stats, total;
 	struct nlattr *nest;
+	unsigned int seq;
+	u64 pkts, bytes;
 	int cpu;
 
 	memset(&total, 0, sizeof(total));
 	for_each_possible_cpu(cpu) {
 		cpu_stats = per_cpu_ptr(stats, cpu);
-		total.pkts += cpu_stats->pkts;
-		total.bytes += cpu_stats->bytes;
+		do {
+			seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+			pkts = cpu_stats->pkts;
+			bytes = cpu_stats->bytes;
+		} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
+		total.pkts += pkts;
+		total.bytes += bytes;
 	}
 	nest = nla_nest_start(skb, NFTA_CHAIN_COUNTERS);
 	if (nest == NULL)
@@ -875,7 +882,7 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
 	if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
 		return ERR_PTR(-EINVAL);
 
-	newstats = alloc_percpu(struct nft_stats);
+	newstats = netdev_alloc_pcpu_stats(struct nft_stats);
 	if (newstats == NULL)
 		return ERR_PTR(-ENOMEM);
 
@@ -1091,7 +1098,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 			}
 			basechain->stats = stats;
 		} else {
-			stats = alloc_percpu(struct nft_stats);
+			stats = netdev_alloc_pcpu_stats(struct nft_stats);
 			if (IS_ERR(stats)) {
 				module_put(type->owner);
 				kfree(basechain);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 345acfb..3b90eb2 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -109,7 +109,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
 	struct nft_data data[NFT_REG_MAX + 1];
 	unsigned int stackptr = 0;
 	struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
-	struct nft_stats __percpu *stats;
+	struct nft_stats *stats;
 	int rulenum;
 	/*
 	 * Cache cursor to avoid problems in case that the cursor is updated
@@ -205,9 +205,11 @@ next_rule:
 		nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY);
 
 	rcu_read_lock_bh();
-	stats = rcu_dereference(nft_base_chain(basechain)->stats);
-	__this_cpu_inc(stats->pkts);
-	__this_cpu_add(stats->bytes, pkt->skb->len);
+	stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
+	u64_stats_update_begin(&stats->syncp);
+	stats->pkts++;
+	stats->bytes += pkt->skb->len;
+	u64_stats_update_end(&stats->syncp);
 	rcu_read_unlock_bh();
 
 	return nft_base_chain(basechain)->policy;
-- 
cgit v0.10.2


From 3b3a1814d1703027f9867d0f5cbbfaf6c7482474 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 2 Jul 2014 12:46:23 -0400
Subject: block: provide compat ioctl for BLKZEROOUT

This patch provides the compat BLKZEROOUT ioctl. The argument is a pointer
to two uint64_t values, so there is no need to translate it.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org	# 3.7+
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@fb.com>

diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index fbd5a67..a0926a6 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -690,6 +690,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	case BLKROSET:
 	case BLKDISCARD:
 	case BLKSECDISCARD:
+	case BLKZEROOUT:
 	/*
 	 * the ones below are implemented in blkdev_locked_ioctl,
 	 * but we call blkdev_ioctl, which gets the lock for us
-- 
cgit v0.10.2


From d3cc7996473a7bdd33256029988ea690754e4e2a Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Thu, 10 Jul 2014 15:42:34 +0530
Subject: hwrng: fetch randomness only after device init

Commit d9e7972619334 "hwrng: add randomness to system from rng sources"
added a call to rng_get_data() from the hwrng_register() function.
However, some rng devices need initialization before data can be read
from them.

This commit makes the call to rng_get_data() depend on no init fn
pointer being registered by the device.  If an init function is
registered, this call is made after device init.

CC: Kees Cook <keescook@chromium.org>
CC: Jason Cooper <jason@lakedaemon.net>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: <stable@vger.kernel.org> # For v3.15+
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Reviewed-by: Jason Cooper <jason@lakedaemon.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 334601c..2a451b1 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -55,16 +55,35 @@ static DEFINE_MUTEX(rng_mutex);
 static int data_avail;
 static u8 *rng_buffer;
 
+static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size,
+			       int wait);
+
 static size_t rng_buffer_size(void)
 {
 	return SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES;
 }
 
+static void add_early_randomness(struct hwrng *rng)
+{
+	unsigned char bytes[16];
+	int bytes_read;
+
+	bytes_read = rng_get_data(rng, bytes, sizeof(bytes), 1);
+	if (bytes_read > 0)
+		add_device_randomness(bytes, bytes_read);
+}
+
 static inline int hwrng_init(struct hwrng *rng)
 {
-	if (!rng->init)
-		return 0;
-	return rng->init(rng);
+	if (rng->init) {
+		int ret;
+
+		ret =  rng->init(rng);
+		if (ret)
+			return ret;
+	}
+	add_early_randomness(rng);
+	return 0;
 }
 
 static inline void hwrng_cleanup(struct hwrng *rng)
@@ -304,8 +323,6 @@ int hwrng_register(struct hwrng *rng)
 {
 	int err = -EINVAL;
 	struct hwrng *old_rng, *tmp;
-	unsigned char bytes[16];
-	int bytes_read;
 
 	if (rng->name == NULL ||
 	    (rng->data_read == NULL && rng->read == NULL))
@@ -347,9 +364,17 @@ int hwrng_register(struct hwrng *rng)
 	INIT_LIST_HEAD(&rng->list);
 	list_add_tail(&rng->list, &rng_list);
 
-	bytes_read = rng_get_data(rng, bytes, sizeof(bytes), 1);
-	if (bytes_read > 0)
-		add_device_randomness(bytes, bytes_read);
+	if (old_rng && !rng->init) {
+		/*
+		 * Use a new device's input to add some randomness to
+		 * the system.  If this rng device isn't going to be
+		 * used right away, its init function hasn't been
+		 * called yet; so only use the randomness from devices
+		 * that don't need an init callback.
+		 */
+		add_early_randomness(rng);
+	}
+
 out_unlock:
 	mutex_unlock(&rng_mutex);
 out:
-- 
cgit v0.10.2


From e052dbf554610e2104c5a7518c4d8374bed701bb Mon Sep 17 00:00:00 2001
From: Amit Shah <amit.shah@redhat.com>
Date: Thu, 10 Jul 2014 15:42:35 +0530
Subject: hwrng: virtio - ensure reads happen after successful probe

The hwrng core asks for random data in the hwrng_register() call itself
from commit d9e7972619.  This doesn't play well with virtio -- the
DRIVER_OK bit is only set by virtio core on a successful probe, and
we're not yet out of our probe routine when this call is made.  This
causes the host to not acknowledge any requests we put in the virtqueue,
and the insmod or kernel boot process just waits for data to arrive from
the host, which never happens.

CC: Kees Cook <keescook@chromium.org>
CC: Jason Cooper <jason@lakedaemon.net>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: <stable@vger.kernel.org> # For v3.15+
Reviewed-by: Jason Cooper <jason@lakedaemon.net>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 2a451b1..c4419ea 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -68,6 +68,12 @@ static void add_early_randomness(struct hwrng *rng)
 	unsigned char bytes[16];
 	int bytes_read;
 
+	/*
+	 * Currently only virtio-rng cannot return data during device
+	 * probe, and that's handled in virtio-rng.c itself.  If there
+	 * are more such devices, this call to rng_get_data can be
+	 * made conditional here instead of doing it per-device.
+	 */
 	bytes_read = rng_get_data(rng, bytes, sizeof(bytes), 1);
 	if (bytes_read > 0)
 		add_device_randomness(bytes, bytes_read);
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index f3e7150..e9b15bc 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -38,6 +38,8 @@ struct virtrng_info {
 	int index;
 };
 
+static bool probe_done;
+
 static void random_recv_done(struct virtqueue *vq)
 {
 	struct virtrng_info *vi = vq->vdev->priv;
@@ -67,6 +69,13 @@ static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait)
 	int ret;
 	struct virtrng_info *vi = (struct virtrng_info *)rng->priv;
 
+	/*
+	 * Don't ask host for data till we're setup.  This call can
+	 * happen during hwrng_register(), after commit d9e7972619.
+	 */
+	if (unlikely(!probe_done))
+		return 0;
+
 	if (!vi->busy) {
 		vi->busy = true;
 		init_completion(&vi->have_data);
@@ -137,6 +146,7 @@ static int probe_common(struct virtio_device *vdev)
 		return err;
 	}
 
+	probe_done = true;
 	return 0;
 }
 
-- 
cgit v0.10.2


From 7188b067576db95445bf4e9498f1bdb2e612dd2f Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 14 Jul 2014 15:41:25 +0200
Subject: MAINTAINERS: Add Hans de Goede as ahci-platform maintainer

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index 134483f..c92bb80 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7993,6 +7993,16 @@ F:	drivers/ata/
 F:	include/linux/ata.h
 F:	include/linux/libata.h
 
+SERIAL ATA AHCI PLATFORM devices support
+M:	Hans de Goede <hdegoede@redhat.com>
+M:	Tejun Heo <tj@kernel.org>
+L:	linux-ide@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
+S:	Supported
+F:	drivers/ata/ahci_platform.c
+F:	drivers/ata/libahci_platform.c
+F:	include/linux/ahci_platform.h
+
 SERVER ENGINES 10Gbps iSCSI - BladeEngine 2 DRIVER
 M:	Jayamohan Kallickal <jayamohan.kallickal@emulex.com>
 L:	linux-scsi@vger.kernel.org
-- 
cgit v0.10.2


From 1871ee134b73fb4cadab75752a7152ed2813c751 Mon Sep 17 00:00:00 2001
From: Kevin Hao <haokexin@gmail.com>
Date: Sat, 12 Jul 2014 12:08:24 +0800
Subject: libata: support the ata host which implements a queue depth less than
 32

The sata on fsl mpc8315e is broken after the commit 8a4aeec8d2d6
("libata/ahci: accommodate tag ordered controllers"). The reason is
that the ata controller on this SoC only implement a queue depth of
16. When issuing the commands in tag order, all the commands in tag
16 ~ 31 are mapped to tag 0 unconditionally and then causes the sata
malfunction. It makes no senses to use a 32 queue in software while
the hardware has less queue depth. So consider the queue depth
implemented by the hardware when requesting a command tag.

Fixes: 8a4aeec8d2d6 ("libata/ahci: accommodate tag ordered controllers")
Cc: stable@vger.kernel.org
Signed-off-by: Kevin Hao <haokexin@gmail.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 18d97d5..d19c37a7 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4787,6 +4787,10 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words)
  *	ata_qc_new - Request an available ATA command, for queueing
  *	@ap: target port
  *
+ *	Some ATA host controllers may implement a queue depth which is less
+ *	than ATA_MAX_QUEUE. So we shouldn't allocate a tag which is beyond
+ *	the hardware limitation.
+ *
  *	LOCKING:
  *	None.
  */
@@ -4794,14 +4798,16 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words)
 static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap)
 {
 	struct ata_queued_cmd *qc = NULL;
-	unsigned int i, tag;
+	unsigned int i, tag, max_queue;
+
+	max_queue = ap->scsi_host->can_queue;
 
 	/* no command while frozen */
 	if (unlikely(ap->pflags & ATA_PFLAG_FROZEN))
 		return NULL;
 
-	for (i = 0; i < ATA_MAX_QUEUE; i++) {
-		tag = (i + ap->last_tag + 1) % ATA_MAX_QUEUE;
+	for (i = 0, tag = ap->last_tag + 1; i < max_queue; i++, tag++) {
+		tag = tag < max_queue ? tag : 0;
 
 		/* the last tag is reserved for internal command. */
 		if (tag == ATA_TAG_INTERNAL)
@@ -6169,6 +6175,16 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht)
 {
 	int i, rc;
 
+	/*
+	 * The max queue supported by hardware must not be greater than
+	 * ATA_MAX_QUEUE.
+	 */
+	if (sht->can_queue > ATA_MAX_QUEUE) {
+		dev_err(host->dev, "BUG: the hardware max queue is too large\n");
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
 	/* host must have been started */
 	if (!(host->flags & ATA_HOST_STARTED)) {
 		dev_err(host->dev, "BUG: trying to register unstarted host\n");
-- 
cgit v0.10.2


From 9c8958bc24e241de2c0d4740e6dea861fe47daa5 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 14 Jul 2014 19:35:31 +0200
Subject: drm/i915: Track the primary plane correctly when reassigning planes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 98ec77397a5c68ce753dc283aaa6f4742328bcdd
Author: Ville Syrjälä <ville.syrjala@linux.intel.com>
Date:   Wed Apr 30 17:43:01 2014 +0300

    drm/i915: Make primary_enabled match the actual hardware state

introduced more accurate tracking of the primary plane and some
checks. It missed the plane->pipe reassignement code for gen2/3
though, which the checks caught and resulted in WARNING backtraces.

Since we only use this path if the plane is on and on the wrong pipe
we can just always set the tracking bit to "enabled".

Reported-and-tested-by: Paul Bolle <pebolle@tiscali.nl>
Cc: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 82e7d57..f0be855 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11914,6 +11914,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
 		 * ...  */
 		plane = crtc->plane;
 		crtc->plane = !plane;
+		crtc->primary_enabled = true;
 		dev_priv->display.crtc_disable(&crtc->base);
 		crtc->plane = plane;
 
-- 
cgit v0.10.2


From ee14b644daaa58afe1e91bb9ebd9cf1b18d1f5fa Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Wed, 9 Jul 2014 09:18:59 +0800
Subject: hwmon: (da9052) Don't use dash in the name attribute

Dashes are not allowed in hwmon name attributes.
Use "da9052" instead of "da9052-hwmon".

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Cc: stable@vger.kernel.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>

diff --git a/drivers/hwmon/da9052-hwmon.c b/drivers/hwmon/da9052-hwmon.c
index afd3104..d14ab3c 100644
--- a/drivers/hwmon/da9052-hwmon.c
+++ b/drivers/hwmon/da9052-hwmon.c
@@ -194,7 +194,7 @@ static ssize_t da9052_hwmon_show_name(struct device *dev,
 				      struct device_attribute *devattr,
 				      char *buf)
 {
-	return sprintf(buf, "da9052-hwmon\n");
+	return sprintf(buf, "da9052\n");
 }
 
 static ssize_t show_label(struct device *dev,
-- 
cgit v0.10.2


From 6b00f440dd678d786389a7100a2e03fe44478431 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Wed, 9 Jul 2014 09:22:54 +0800
Subject: hwmon: (da9055) Don't use dash in the name attribute

Dashes are not allowed in hwmon name attributes.
Use "da9055" instead of "da9055-hwmon".

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Cc: stable@vger.kernel.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>

diff --git a/drivers/hwmon/da9055-hwmon.c b/drivers/hwmon/da9055-hwmon.c
index 73b3865..35eb773 100644
--- a/drivers/hwmon/da9055-hwmon.c
+++ b/drivers/hwmon/da9055-hwmon.c
@@ -204,7 +204,7 @@ static ssize_t da9055_hwmon_show_name(struct device *dev,
 				      struct device_attribute *devattr,
 				      char *buf)
 {
-	return sprintf(buf, "da9055-hwmon\n");
+	return sprintf(buf, "da9055\n");
 }
 
 static ssize_t show_label(struct device *dev,
-- 
cgit v0.10.2


From 2843768b701971ab10e62c77d5c75ad7c306f1bd Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 14 Jul 2014 19:35:45 +0200
Subject: Revert "ACPI / video: change acpi-video brightness_switch_enabled
 default to 0"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 886129a8eebeb (ACPI / video: change acpi-video
brightness_switch_enabled default to 0) as it is reported to cause
problems to happen.

Fixes: 886129a8eebeb (ACPI / video: change acpi-video brightness_switch_enabled default to 0)
Link: http://marc.info/?l=linux-acpi&m=140534286826819&w=2
Reported by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index c1b9aa8..e332e71 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3526,7 +3526,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			the allocated input device; If set to 0, video driver
 			will only send out the event without touching backlight
 			brightness level.
-			default: 0
+			default: 1
 
 	virtio_mmio.device=
 			[VMMIO] Memory mapped virtio (platform) device.
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 071c1df..29649c1 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -68,7 +68,7 @@ MODULE_AUTHOR("Bruno Ducrot");
 MODULE_DESCRIPTION("ACPI Video Driver");
 MODULE_LICENSE("GPL");
 
-static bool brightness_switch_enabled;
+static bool brightness_switch_enabled = 1;
 module_param(brightness_switch_enabled, bool, 0644);
 
 /*
-- 
cgit v0.10.2


From 2448e3493cb3874baa90725c87869455ebf11cd2 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 11 Jul 2014 21:06:38 +0200
Subject: tracing: instance_rmdir() leaks ftrace_event_file->filter

instance_rmdir() path destroys the event files but forgets to free
file->filter. Change remove_event_file_dir() to free_event_filter().

Link: http://lkml.kernel.org/p/20140711190638.GA19517@redhat.com

Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: "zhangwei(Jovi)" <jovi.zhangwei@huawei.com>
Cc: stable@vger.kernel.org # 3.11+
Fixes: f6a84bdc75b5 "tracing: Introduce remove_event_file_dir()"
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f99e0b3..2de5362 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -470,6 +470,7 @@ static void remove_event_file_dir(struct ftrace_event_file *file)
 
 	list_del(&file->list);
 	remove_subsystem(file->system);
+	free_event_filter(file->filter);
 	kmem_cache_free(file_cachep, file);
 }
 
-- 
cgit v0.10.2


From 8762e5092828c4dc0f49da5a47a644c670df77f3 Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Wed, 9 Jul 2014 13:18:18 -0400
Subject: x86/espfix/xen: Fix allocation of pages for paravirt page tables

init_espfix_ap() is currently off by one level when informing hypervisor
that allocated pages will be used for ministacks' page tables.

The most immediate effect of this on a PV guest is that if
'stack_page = __get_free_page()' returns a non-zeroed-out page the hypervisor
will refuse to use it for a page table (which it shouldn't be anyway). This will
result in warnings by both Xen and Linux.

More importantly, a subsequent write to that page (again, by a PV guest) is
likely to result in fatal page fault.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Link: http://lkml.kernel.org/r/1404926298-5565-1-git-send-email-boris.ostrovsky@oracle.com
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: <stable@vger.kernel.org>

diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 6afbb16..94d857f 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -175,7 +175,7 @@ void init_espfix_ap(void)
 	if (!pud_present(pud)) {
 		pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP);
 		pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask));
-		paravirt_alloc_pud(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);
+		paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);
 		for (n = 0; n < ESPFIX_PUD_CLONES; n++)
 			set_pud(&pud_p[n], pud);
 	}
@@ -185,7 +185,7 @@ void init_espfix_ap(void)
 	if (!pmd_present(pmd)) {
 		pte_p = (pte_t *)__get_free_page(PGALLOC_GFP);
 		pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask));
-		paravirt_alloc_pmd(&init_mm, __pa(pte_p) >> PAGE_SHIFT);
+		paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT);
 		for (n = 0; n < ESPFIX_PMD_CLONES; n++)
 			set_pmd(&pmd_p[n], pmd);
 	}
@@ -193,7 +193,6 @@ void init_espfix_ap(void)
 	pte_p = pte_offset_kernel(&pmd, addr);
 	stack_page = (void *)__get_free_page(GFP_KERNEL);
 	pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask));
-	paravirt_alloc_pte(&init_mm, __pa(stack_page) >> PAGE_SHIFT);
 	for (n = 0; n < ESPFIX_PTE_CLONES; n++)
 		set_pte(&pte_p[n*PTE_STRIDE], pte);
 
-- 
cgit v0.10.2


From aa182e64f16fc29a4984c2d79191b161888bbd9b Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 15 Jul 2014 07:08:10 +1000
Subject: Revert "xfs: block allocation work needs to be kswapd aware"

This reverts commit 1f6d64829db78a7e1d63e15c9f48f0a5d2b5a679.

This commit resulted in regressions in performance in low
memory situations where kswapd was doing writeback of delayed
allocation blocks. It resulted in significant parallelism of the
kswapd work and with the special kswapd flags meant that hundreds of
active allocation could dip into kswapd specific memory reserves and
avoid being throttled. This cause a large amount of performance
variation, as well as random OOM-killer invocations that didn't
previously exist.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 703b3ec..057f671 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -258,23 +258,14 @@ xfs_bmapi_allocate_worker(
 	struct xfs_bmalloca	*args = container_of(work,
 						struct xfs_bmalloca, work);
 	unsigned long		pflags;
-	unsigned long		new_pflags = PF_FSTRANS;
 
-	/*
-	 * we are in a transaction context here, but may also be doing work
-	 * in kswapd context, and hence we may need to inherit that state
-	 * temporarily to ensure that we don't block waiting for memory reclaim
-	 * in any way.
-	 */
-	if (args->kswapd)
-		new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
-
-	current_set_flags_nested(&pflags, new_pflags);
+	/* we are in a transaction context here */
+	current_set_flags_nested(&pflags, PF_FSTRANS);
 
 	args->result = __xfs_bmapi_allocate(args);
 	complete(args->done);
 
-	current_restore_flags_nested(&pflags, new_pflags);
+	current_restore_flags_nested(&pflags, PF_FSTRANS);
 }
 
 /*
@@ -293,7 +284,6 @@ xfs_bmapi_allocate(
 
 
 	args->done = &done;
-	args->kswapd = current_is_kswapd();
 	INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
 	queue_work(xfs_alloc_wq, &args->work);
 	wait_for_completion(&done);
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 075f722..935ed2b 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -50,13 +50,12 @@ struct xfs_bmalloca {
 	xfs_extlen_t		total;	/* total blocks needed for xaction */
 	xfs_extlen_t		minlen;	/* minimum allocation size (blocks) */
 	xfs_extlen_t		minleft; /* amount must be left after alloc */
-	bool			eof;	/* set if allocating past last extent */
-	bool			wasdel;	/* replacing a delayed allocation */
-	bool			userdata;/* set if is user data */
-	bool			aeof;	/* allocated space at eof */
-	bool			conv;	/* overwriting unwritten extents */
-	bool			stack_switch;
-	bool			kswapd;	/* allocation in kswapd context */
+	char			eof;	/* set if allocating past last extent */
+	char			wasdel;	/* replacing a delayed allocation */
+	char			userdata;/* set if is user data */
+	char			aeof;	/* allocated space at eof */
+	char			conv;	/* overwriting unwritten extents */
+	char			stack_switch;
 	int			flags;
 	struct completion	*done;
 	struct work_struct	work;
-- 
cgit v0.10.2


From cf11da9c5d374962913ca5ba0ce0886b58286224 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 15 Jul 2014 07:08:24 +1000
Subject: xfs: refine the allocation stack switch

The allocation stack switch at xfs_bmapi_allocate() has served it's
purpose, but is no longer a sufficient solution to the stack usage
problem we have in the XFS allocation path.

Whilst the kernel stack size is now 16k, that is not a valid reason
for undoing all our "keep stack usage down" modifications. What it
does allow us to do is have the freedom to refine and perfect the
modifications knowing that if we get it wrong it won't blow up in
our faces - we have a safety net now.

This is important because we still have the issue of older kernels
having smaller stacks and that they are still supported and are
demonstrating a wide range of different stack overflows.  Red Hat
has several open bugs for allocation based stack overflows from
directory modifications and direct IO block allocation and these
problems still need to be solved. If we can solve them upstream,
then distro's won't need to bake their own unique solutions.

To that end, I've observed that every allocation based stack
overflow report has had a specific characteristic - it has happened
during or directly after a bmap btree block split. That event
requires a new block to be allocated to the tree, and so we
effectively stack one allocation stack on top of another, and that's
when we get into trouble.

A further observation is that bmap btree block splits are much rarer
than writeback allocation - over a range of different workloads I've
observed the ratio of bmap btree inserts to splits ranges from 100:1
(xfstests run) to 10000:1 (local VM image server with sparse files
that range in the hundreds of thousands to millions of extents).
Either way, bmap btree split events are much, much rarer than
allocation events.

Finally, we have to move the kswapd state to the allocation workqueue
work when allocation is done on behalf of kswapd. This is proving to
cause significant perturbation in performance under memory pressure
and appears to be generating allocation deadlock warnings under some
workloads, so avoiding the use of a workqueue for the majority of
kswapd writeback allocation will minimise the impact of such
behaviour.

Hence it makes sense to move the stack switch to xfs_btree_split()
and only do it for bmap btree splits. Stack switches during
allocation will be much rarer, so there won't be significant
performacne overhead caused by switching stacks. The worse case
stack from all allocation paths will be split, not just writeback.
And the majority of memory allocations will be done in the correct
context (e.g. kswapd) without causing additional latency, and so we
simplify the memory reclaim interactions between processes,
workqueues and kswapd.

The worst stack I've been able to generate with this patch in place
is 5600 bytes deep. It's very revealing because we exit XFS at:

37)     1768      64   kmem_cache_alloc+0x13b/0x170

about 1800 bytes of stack consumed, and the remaining 3800 bytes
(and 36 functions) is memory reclaim, swap and the IO stack. And
this occurs in the inode allocation from an open(O_CREAT) syscall,
not writeback.

The amount of stack being used is much less than I've previously be
able to generate - fs_mark testing has been able to generate stack
usage of around 7k without too much trouble; with this patch it's
only just getting to 5.5k. This is primarily because the metadata
allocation paths (e.g. directory blocks) are no longer causing
double splits on the same stack, and hence now stack tracing is
showing swapping being the worst stack consumer rather than XFS.

Performance of fs_mark inode create workloads is unchanged.
Performance of fs_mark async fsync workloads is consistently good
with context switches reduced by around 150,000/s (30%).
Performance of dbench, streaming IO and postmark is unchanged.
Allocation deadlock warnings have not been seen on the workloads
that generated them since adding this patch.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 96175df..75c3fe5 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4298,8 +4298,8 @@ xfs_bmapi_delay(
 }
 
 
-int
-__xfs_bmapi_allocate(
+static int
+xfs_bmapi_allocate(
 	struct xfs_bmalloca	*bma)
 {
 	struct xfs_mount	*mp = bma->ip->i_mount;
@@ -4578,9 +4578,6 @@ xfs_bmapi_write(
 	bma.flist = flist;
 	bma.firstblock = firstblock;
 
-	if (flags & XFS_BMAPI_STACK_SWITCH)
-		bma.stack_switch = 1;
-
 	while (bno < end && n < *nmap) {
 		inhole = eof || bma.got.br_startoff > bno;
 		wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 38ba36e..b879ca5 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -77,7 +77,6 @@ typedef	struct xfs_bmap_free
  * from written to unwritten, otherwise convert from unwritten to written.
  */
 #define XFS_BMAPI_CONVERT	0x040
-#define XFS_BMAPI_STACK_SWITCH	0x080
 
 #define XFS_BMAPI_FLAGS \
 	{ XFS_BMAPI_ENTIRE,	"ENTIRE" }, \
@@ -86,8 +85,7 @@ typedef	struct xfs_bmap_free
 	{ XFS_BMAPI_PREALLOC,	"PREALLOC" }, \
 	{ XFS_BMAPI_IGSTATE,	"IGSTATE" }, \
 	{ XFS_BMAPI_CONTIG,	"CONTIG" }, \
-	{ XFS_BMAPI_CONVERT,	"CONVERT" }, \
-	{ XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" }
+	{ XFS_BMAPI_CONVERT,	"CONVERT" }
 
 
 static inline int xfs_bmapi_aflag(int w)
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 057f671..64731ef 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -249,49 +249,6 @@ xfs_bmap_rtalloc(
 }
 
 /*
- * Stack switching interfaces for allocation
- */
-static void
-xfs_bmapi_allocate_worker(
-	struct work_struct	*work)
-{
-	struct xfs_bmalloca	*args = container_of(work,
-						struct xfs_bmalloca, work);
-	unsigned long		pflags;
-
-	/* we are in a transaction context here */
-	current_set_flags_nested(&pflags, PF_FSTRANS);
-
-	args->result = __xfs_bmapi_allocate(args);
-	complete(args->done);
-
-	current_restore_flags_nested(&pflags, PF_FSTRANS);
-}
-
-/*
- * Some allocation requests often come in with little stack to work on. Push
- * them off to a worker thread so there is lots of stack to use. Otherwise just
- * call directly to avoid the context switch overhead here.
- */
-int
-xfs_bmapi_allocate(
-	struct xfs_bmalloca	*args)
-{
-	DECLARE_COMPLETION_ONSTACK(done);
-
-	if (!args->stack_switch)
-		return __xfs_bmapi_allocate(args);
-
-
-	args->done = &done;
-	INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
-	queue_work(xfs_alloc_wq, &args->work);
-	wait_for_completion(&done);
-	destroy_work_on_stack(&args->work);
-	return args->result;
-}
-
-/*
  * Check if the endoff is outside the last extent. If so the caller will grow
  * the allocation to a stripe unit boundary.  All offsets are considered outside
  * the end of file for an empty fork, so 1 is returned in *eof in that case.
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 935ed2b..2fdb72d 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -50,12 +50,11 @@ struct xfs_bmalloca {
 	xfs_extlen_t		total;	/* total blocks needed for xaction */
 	xfs_extlen_t		minlen;	/* minimum allocation size (blocks) */
 	xfs_extlen_t		minleft; /* amount must be left after alloc */
-	char			eof;	/* set if allocating past last extent */
-	char			wasdel;	/* replacing a delayed allocation */
-	char			userdata;/* set if is user data */
-	char			aeof;	/* allocated space at eof */
-	char			conv;	/* overwriting unwritten extents */
-	char			stack_switch;
+	bool			eof;	/* set if allocating past last extent */
+	bool			wasdel;	/* replacing a delayed allocation */
+	bool			userdata;/* set if is user data */
+	bool			aeof;	/* allocated space at eof */
+	bool			conv;	/* overwriting unwritten extents */
 	int			flags;
 	struct completion	*done;
 	struct work_struct	work;
@@ -65,8 +64,6 @@ struct xfs_bmalloca {
 int	xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
 			int *committed);
 int	xfs_bmap_rtalloc(struct xfs_bmalloca *ap);
-int	xfs_bmapi_allocate(struct xfs_bmalloca *args);
-int	__xfs_bmapi_allocate(struct xfs_bmalloca *args);
 int	xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
 		     int whichfork, int *eof);
 int	xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index bf810c6..cf893bc 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -33,6 +33,7 @@
 #include "xfs_error.h"
 #include "xfs_trace.h"
 #include "xfs_cksum.h"
+#include "xfs_alloc.h"
 
 /*
  * Cursor allocation zone.
@@ -2323,7 +2324,7 @@ error1:
  * record (to be inserted into parent).
  */
 STATIC int					/* error */
-xfs_btree_split(
+__xfs_btree_split(
 	struct xfs_btree_cur	*cur,
 	int			level,
 	union xfs_btree_ptr	*ptrp,
@@ -2503,6 +2504,85 @@ error0:
 	return error;
 }
 
+struct xfs_btree_split_args {
+	struct xfs_btree_cur	*cur;
+	int			level;
+	union xfs_btree_ptr	*ptrp;
+	union xfs_btree_key	*key;
+	struct xfs_btree_cur	**curp;
+	int			*stat;		/* success/failure */
+	int			result;
+	bool			kswapd;	/* allocation in kswapd context */
+	struct completion	*done;
+	struct work_struct	work;
+};
+
+/*
+ * Stack switching interfaces for allocation
+ */
+static void
+xfs_btree_split_worker(
+	struct work_struct	*work)
+{
+	struct xfs_btree_split_args	*args = container_of(work,
+						struct xfs_btree_split_args, work);
+	unsigned long		pflags;
+	unsigned long		new_pflags = PF_FSTRANS;
+
+	/*
+	 * we are in a transaction context here, but may also be doing work
+	 * in kswapd context, and hence we may need to inherit that state
+	 * temporarily to ensure that we don't block waiting for memory reclaim
+	 * in any way.
+	 */
+	if (args->kswapd)
+		new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
+
+	current_set_flags_nested(&pflags, new_pflags);
+
+	args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
+					 args->key, args->curp, args->stat);
+	complete(args->done);
+
+	current_restore_flags_nested(&pflags, new_pflags);
+}
+
+/*
+ * BMBT split requests often come in with little stack to work on. Push
+ * them off to a worker thread so there is lots of stack to use. For the other
+ * btree types, just call directly to avoid the context switch overhead here.
+ */
+STATIC int					/* error */
+xfs_btree_split(
+	struct xfs_btree_cur	*cur,
+	int			level,
+	union xfs_btree_ptr	*ptrp,
+	union xfs_btree_key	*key,
+	struct xfs_btree_cur	**curp,
+	int			*stat)		/* success/failure */
+{
+	struct xfs_btree_split_args	args;
+	DECLARE_COMPLETION_ONSTACK(done);
+
+	if (cur->bc_btnum != XFS_BTNUM_BMAP)
+		return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
+
+	args.cur = cur;
+	args.level = level;
+	args.ptrp = ptrp;
+	args.key = key;
+	args.curp = curp;
+	args.stat = stat;
+	args.done = &done;
+	args.kswapd = current_is_kswapd();
+	INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
+	queue_work(xfs_alloc_wq, &args.work);
+	wait_for_completion(&done);
+	destroy_work_on_stack(&args.work);
+	return args.result;
+}
+
+
 /*
  * Copy the old inode root contents into a real block and make the
  * broot point to it.
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 6c5eb4c..6d3ec2b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -749,8 +749,7 @@ xfs_iomap_write_allocate(
 			 * pointer that the caller gave to us.
 			 */
 			error = xfs_bmapi_write(tp, ip, map_start_fsb,
-						count_fsb,
-						XFS_BMAPI_STACK_SWITCH,
+						count_fsb, 0,
 						&first_block, 1,
 						imap, &nimaps, &free_list);
 			if (error)
-- 
cgit v0.10.2


From c6930992948adf0f8fc1f6ff1da51c5002a2cf95 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 14 Jul 2014 11:04:39 +1000
Subject: Revert "drm/i915: reverse dp link param selection, prefer fast over
 wide again"

This reverts commit 38aecea0ccbb909d635619cba22f1891e589b434.

This breaks Haswell Thinkpad + Lenovo dock in SST mode with a HDMI monitor attached.

Before this we can 1920x1200 mode, after this we only ever get 1024x768, and
a lot of deferring.

This didn't revert clean, but this should be fine.

bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1117008
Cc: stable@vger.kernel.org # v3.15
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 075170d..8a1a4fb 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -906,8 +906,8 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 		mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock,
 						   bpp);
 
-		for (lane_count = min_lane_count; lane_count <= max_lane_count; lane_count <<= 1) {
-			for (clock = min_clock; clock <= max_clock; clock++) {
+		for (clock = min_clock; clock <= max_clock; clock++) {
+			for (lane_count = min_lane_count; lane_count <= max_lane_count; lane_count <<= 1) {
 				link_clock = drm_dp_bw_code_to_link_rate(bws[clock]);
 				link_avail = intel_dp_max_data_rate(link_clock,
 								    lane_count);
-- 
cgit v0.10.2


From 03e01349c654fbdea80d3d9b4ab599244eb55bb7 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 15 Jul 2014 07:28:41 +1000
Subject: xfs: null unused quota inodes when quota is on

When quota is on, it is expected that unused quota inodes have a
value of NULLFSINO. The changes to support a separate project quota
in 3.12 broken this rule for non-project quota inode enabled
filesystem, as the code now refuses to write the group quota inode
if neither group or project quotas are enabled. This regression was
introduced by commit d892d58 ("xfs: Start using pquotaino from the
superblock").

In this case, we should be writing NULLFSINO rather than nothing to
ensure that we leave the group quota inode in a valid state while
quotas are enabled.

Failure to do so doesn't cause a current kernel to break - the
separate project quota inodes introduced translation code to always
treat a zero inode as NULLFSINO. This was introduced by commit
0102629 ("xfs: Initialize all quota inodes to be NULLFSINO") with is
also in 3.12 but older kernels do not do this and hence taking a
filesystem back to an older kernel can result in quotas failing
initialisation at mount time. When that happens, we see this in
dmesg:

[ 1649.215390] XFS (sdb): Mounting Filesystem
[ 1649.316894] XFS (sdb): Failed to initialize disk quotas.
[ 1649.316902] XFS (sdb): Ending clean mount

By ensuring that we write NULLFSINO to quota inodes that aren't
active, we avoid this problem. We have to be really careful when
determining if the quota inodes are active or not, because we don't
want to write a NULLFSINO if the quota inodes are active and we
simply aren't updating them.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>

diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c
index c3453b1..7703fa6 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/xfs_sb.c
@@ -483,10 +483,16 @@ xfs_sb_quota_to_disk(
 	}
 
 	/*
-	 * GQUOTINO and PQUOTINO cannot be used together in versions
-	 * of superblock that do not have pquotino. from->sb_flags
-	 * tells us which quota is active and should be copied to
-	 * disk.
+	 * GQUOTINO and PQUOTINO cannot be used together in versions of
+	 * superblock that do not have pquotino. from->sb_flags tells us which
+	 * quota is active and should be copied to disk. If neither are active,
+	 * make sure we write NULLFSINO to the sb_gquotino field as a quota
+	 * inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature
+	 * bit is set.
+	 *
+	 * Note that we don't need to handle the sb_uquotino or sb_pquotino here
+	 * as they do not require any translation. Hence the main sb field loop
+	 * will write them appropriately from the in-core superblock.
 	 */
 	if ((*fields & XFS_SB_GQUOTINO) &&
 				(from->sb_qflags & XFS_GQUOTA_ACCT))
@@ -494,6 +500,17 @@ xfs_sb_quota_to_disk(
 	else if ((*fields & XFS_SB_PQUOTINO) &&
 				(from->sb_qflags & XFS_PQUOTA_ACCT))
 		to->sb_gquotino = cpu_to_be64(from->sb_pquotino);
+	else {
+		/*
+		 * We can't rely on just the fields being logged to tell us
+		 * that it is safe to write NULLFSINO - we should only do that
+		 * if quotas are not actually enabled. Hence only write
+		 * NULLFSINO if both in-core quota inodes are NULL.
+		 */
+		if (from->sb_gquotino == NULLFSINO &&
+		    from->sb_pquotino == NULLFSINO)
+			to->sb_gquotino = cpu_to_be64(NULLFSINO);
+	}
 
 	*fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO);
 }
-- 
cgit v0.10.2


From db4175ae2095634dbecd4c847da439f9c83e1b3b Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Fri, 4 Jul 2014 05:44:39 -0300
Subject: [media] tda10071: force modulation to QPSK on DVB-S

Only supported modulation for DVB-S is QPSK. Modulation parameter
contains invalid value for DVB-S on some cases, which leads driver
refusing tuning attempt. Due to that, hard code modulation to QPSK
in case of DVB-S.

Cc: stable@vger.kernel.org
Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>

diff --git a/drivers/media/dvb-frontends/tda10071.c b/drivers/media/dvb-frontends/tda10071.c
index 522fe00..49874e7 100644
--- a/drivers/media/dvb-frontends/tda10071.c
+++ b/drivers/media/dvb-frontends/tda10071.c
@@ -668,6 +668,7 @@ static int tda10071_set_frontend(struct dvb_frontend *fe)
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 	int ret, i;
 	u8 mode, rolloff, pilot, inversion, div;
+	fe_modulation_t modulation;
 
 	dev_dbg(&priv->i2c->dev,
 			"%s: delivery_system=%d modulation=%d frequency=%d symbol_rate=%d inversion=%d pilot=%d rolloff=%d\n",
@@ -702,10 +703,13 @@ static int tda10071_set_frontend(struct dvb_frontend *fe)
 
 	switch (c->delivery_system) {
 	case SYS_DVBS:
+		modulation = QPSK;
 		rolloff = 0;
 		pilot = 2;
 		break;
 	case SYS_DVBS2:
+		modulation = c->modulation;
+
 		switch (c->rolloff) {
 		case ROLLOFF_20:
 			rolloff = 2;
@@ -750,7 +754,7 @@ static int tda10071_set_frontend(struct dvb_frontend *fe)
 
 	for (i = 0, mode = 0xff; i < ARRAY_SIZE(TDA10071_MODCOD); i++) {
 		if (c->delivery_system == TDA10071_MODCOD[i].delivery_system &&
-			c->modulation == TDA10071_MODCOD[i].modulation &&
+			modulation == TDA10071_MODCOD[i].modulation &&
 			c->fec_inner == TDA10071_MODCOD[i].fec) {
 			mode = TDA10071_MODCOD[i].val;
 			dev_dbg(&priv->i2c->dev, "%s: mode found=%02x\n",
-- 
cgit v0.10.2


From bc760cdae9b67e689ed29c66c9c2d78d6f5f8c4b Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Mon, 7 Jul 2014 09:05:15 -0300
Subject: [media] tda10071: add missing DVB-S2/PSK-8 FEC AUTO

FEC AUTO is valid for PSK-8 modulation too. Add it.

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>

diff --git a/drivers/media/dvb-frontends/tda10071_priv.h b/drivers/media/dvb-frontends/tda10071_priv.h
index 4baf14b..4204861 100644
--- a/drivers/media/dvb-frontends/tda10071_priv.h
+++ b/drivers/media/dvb-frontends/tda10071_priv.h
@@ -55,6 +55,7 @@ static struct tda10071_modcod {
 	{ SYS_DVBS2, QPSK,  FEC_8_9,  0x0a },
 	{ SYS_DVBS2, QPSK,  FEC_9_10, 0x0b },
 	/* 8PSK */
+	{ SYS_DVBS2, PSK_8, FEC_AUTO, 0x00 },
 	{ SYS_DVBS2, PSK_8, FEC_3_5,  0x0c },
 	{ SYS_DVBS2, PSK_8, FEC_2_3,  0x0d },
 	{ SYS_DVBS2, PSK_8, FEC_3_4,  0x0e },
-- 
cgit v0.10.2


From b32725e84c02b4d01472770b96d1b33737b23b6d Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Mon, 7 Jul 2014 09:52:28 -0300
Subject: [media] tda10071: fix spec inversion reporting

Inversion ON was reported as inversion OFF and vice versa.

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>

diff --git a/drivers/media/dvb-frontends/tda10071.c b/drivers/media/dvb-frontends/tda10071.c
index 49874e7..d590798 100644
--- a/drivers/media/dvb-frontends/tda10071.c
+++ b/drivers/media/dvb-frontends/tda10071.c
@@ -838,10 +838,10 @@ static int tda10071_get_frontend(struct dvb_frontend *fe)
 
 	switch ((buf[1] >> 0) & 0x01) {
 	case 0:
-		c->inversion = INVERSION_OFF;
+		c->inversion = INVERSION_ON;
 		break;
 	case 1:
-		c->inversion = INVERSION_ON;
+		c->inversion = INVERSION_OFF;
 		break;
 	}
 
-- 
cgit v0.10.2


From c2c1a6e5851fe354d39e5b7907c6c9d0a997ec16 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Tue, 8 Jul 2014 02:48:28 -0300
Subject: [media] tda10071: fix returned symbol rate calculation

Detected symbol rate value was returned too small.

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>

diff --git a/drivers/media/dvb-frontends/tda10071.c b/drivers/media/dvb-frontends/tda10071.c
index d590798..9619be5 100644
--- a/drivers/media/dvb-frontends/tda10071.c
+++ b/drivers/media/dvb-frontends/tda10071.c
@@ -860,7 +860,7 @@ static int tda10071_get_frontend(struct dvb_frontend *fe)
 	if (ret)
 		goto error;
 
-	c->symbol_rate = (buf[0] << 16) | (buf[1] << 8) | (buf[2] << 0);
+	c->symbol_rate = ((buf[0] << 16) | (buf[1] << 8) | (buf[2] << 0)) * 1000;
 
 	return ret;
 error:
-- 
cgit v0.10.2


From 242841d3d71191348f98310e2d2001e1001d8630 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 9 Jul 2014 06:20:44 -0300
Subject: [media] gspca_pac7302: Add new usb-id for Genius i-Look 317

Tested-and-reported-by: yullaw <yullaw@mageia.cz>

Cc: stable@vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>

diff --git a/drivers/media/usb/gspca/pac7302.c b/drivers/media/usb/gspca/pac7302.c
index 2fd1c5e..339adce 100644
--- a/drivers/media/usb/gspca/pac7302.c
+++ b/drivers/media/usb/gspca/pac7302.c
@@ -928,6 +928,7 @@ static const struct usb_device_id device_table[] = {
 	{USB_DEVICE(0x093a, 0x2620)},
 	{USB_DEVICE(0x093a, 0x2621)},
 	{USB_DEVICE(0x093a, 0x2622), .driver_info = FL_VFLIP},
+	{USB_DEVICE(0x093a, 0x2623), .driver_info = FL_VFLIP},
 	{USB_DEVICE(0x093a, 0x2624), .driver_info = FL_VFLIP},
 	{USB_DEVICE(0x093a, 0x2625)},
 	{USB_DEVICE(0x093a, 0x2626)},
-- 
cgit v0.10.2


From bb78e7a12aa4898901d44bcb9d418f4188594427 Mon Sep 17 00:00:00 2001
From: Martin Peres <martin.peres@free.fr>
Date: Mon, 14 Jul 2014 11:12:56 +0200
Subject: drm/nouveau/therm: fix a potential deadlock in the therm monitoring
 code

Signed-off-by: Martin Peres <martin.peres@free.fr>
Tested-by: Stefan Ringel <mail@stefanringel.de>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>

diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c b/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c
index cfde9eb..6212537 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/temp.c
@@ -192,11 +192,11 @@ alarm_timer_callback(struct nouveau_alarm *alarm)
 	nouveau_therm_threshold_hyst_polling(therm, &sensor->thrs_shutdown,
 					     NOUVEAU_THERM_THRS_SHUTDOWN);
 
+	spin_unlock_irqrestore(&priv->sensor.alarm_program_lock, flags);
+
 	/* schedule the next poll in one second */
 	if (therm->temp_get(therm) >= 0 && list_empty(&alarm->head))
-		ptimer->alarm(ptimer, 1000 * 1000 * 1000, alarm);
-
-	spin_unlock_irqrestore(&priv->sensor.alarm_program_lock, flags);
+		ptimer->alarm(ptimer, 1000000000ULL, alarm);
 }
 
 void
-- 
cgit v0.10.2


From 4320f6b1d9db4ca912c5eb6ecb328b2e090e1586 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 15 Jul 2014 08:51:27 +0200
Subject: PM / sleep: Fix request_firmware() error at resume

The commit [247bc037: PM / Sleep: Mitigate race between the freezer
and request_firmware()] introduced the finer state control, but it
also leads to a new bug; for example, a bug report regarding the
firmware loading of intel BT device at suspend/resume:
  https://bugzilla.novell.com/show_bug.cgi?id=873790

The root cause seems to be a small window between the process resume
and the clear of usermodehelper lock.  The request_firmware() function
checks the UMH lock and gives up when it's in UMH_DISABLE state.  This
is for avoiding the invalid  f/w loading during suspend/resume phase.
The problem is, however, that usermodehelper_enable() is called at the
end of thaw_processes().  Thus, a thawed process in between can kick
off the f/w loader code path (in this case, via btusb_setup_intel())
even before the call of usermodehelper_enable().  Then
usermodehelper_read_trylock() returns an error and request_firmware()
spews WARN_ON() in the end.

This oneliner patch fixes the issue just by setting to UMH_FREEZING
state again before restarting tasks, so that the call of
request_firmware() will be blocked until the end of this function
instead of returning an error.

Fixes: 247bc0374254 (PM / Sleep: Mitigate race between the freezer and request_firmware())
Link: https://bugzilla.novell.com/show_bug.cgi?id=873790
Cc: 3.4+ <stable@vger.kernel.org> # 3.4+
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/kernel/power/process.c b/kernel/power/process.c
index 0ca8d83..4ee194e 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -186,6 +186,7 @@ void thaw_processes(void)
 
 	printk("Restarting tasks ... ");
 
+	__usermodehelper_set_disable_depth(UMH_FREEZING);
 	thaw_workqueues();
 
 	read_lock(&tasklist_lock);
-- 
cgit v0.10.2


From 653a3538f865d26350727df25397bee2bacde773 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Tue, 15 Jul 2014 14:26:13 +0200
Subject: PM / sleep: fix freeze_ops NULL pointer dereferences

This patch fixes a NULL pointer dereference issue introduced by
commit 1f0b63866fc1 (ACPI / PM: Hold ACPI scan lock over the "freeze"
sleep state).

Fixes: 1f0b63866fc1 (ACPI / PM: Hold ACPI scan lock over the "freeze" sleep state)
Link: http://marc.info/?l=linux-pm&m=140541182017443&w=2
Reported-and-tested-by: Alexander Stein <alexander.stein@systec-electronic.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 4dd8822..ed35a47 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -306,7 +306,7 @@ int suspend_devices_and_enter(suspend_state_t state)
 		error = suspend_ops->begin(state);
 		if (error)
 			goto Close;
-	} else if (state == PM_SUSPEND_FREEZE && freeze_ops->begin) {
+	} else if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->begin) {
 		error = freeze_ops->begin();
 		if (error)
 			goto Close;
@@ -335,7 +335,7 @@ int suspend_devices_and_enter(suspend_state_t state)
  Close:
 	if (need_suspend_ops(state) && suspend_ops->end)
 		suspend_ops->end();
-	else if (state == PM_SUSPEND_FREEZE && freeze_ops->end)
+	else if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->end)
 		freeze_ops->end();
 
 	return error;
-- 
cgit v0.10.2


From 8abfb8727f4a724d31f9ccfd8013fbd16d539445 Mon Sep 17 00:00:00 2001
From: "zhangwei(Jovi)" <jovi.zhangwei@huawei.com>
Date: Thu, 18 Jul 2013 16:31:05 +0800
Subject: tracing: Add ftrace_trace_stack into __trace_puts/__trace_bputs

Currently trace option stacktrace is not applicable for
trace_printk with constant string argument, the reason is
in __trace_puts/__trace_bputs ftrace_trace_stack is missing.

In contrast, when using trace_printk with non constant string
argument(will call into __trace_printk/__trace_bprintk), then
trace option stacktrace is workable, this inconstant result
will confuses users a lot.

Link: http://lkml.kernel.org/p/51E7A7C9.9040401@huawei.com

Cc: stable@vger.kernel.org # 3.10+
Signed-off-by: zhangwei(Jovi) <jovi.zhangwei@huawei.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f243444..a6ffc89 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -466,6 +466,9 @@ int __trace_puts(unsigned long ip, const char *str, int size)
 	struct print_entry *entry;
 	unsigned long irq_flags;
 	int alloc;
+	int pc;
+
+	pc = preempt_count();
 
 	if (unlikely(tracing_selftest_running || tracing_disabled))
 		return 0;
@@ -475,7 +478,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)
 	local_save_flags(irq_flags);
 	buffer = global_trace.trace_buffer.buffer;
 	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
-					  irq_flags, preempt_count());
+					  irq_flags, pc);
 	if (!event)
 		return 0;
 
@@ -492,6 +495,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)
 		entry->buf[size] = '\0';
 
 	__buffer_unlock_commit(buffer, event);
+	ftrace_trace_stack(buffer, irq_flags, 4, pc);
 
 	return size;
 }
@@ -509,6 +513,9 @@ int __trace_bputs(unsigned long ip, const char *str)
 	struct bputs_entry *entry;
 	unsigned long irq_flags;
 	int size = sizeof(struct bputs_entry);
+	int pc;
+
+	pc = preempt_count();
 
 	if (unlikely(tracing_selftest_running || tracing_disabled))
 		return 0;
@@ -516,7 +523,7 @@ int __trace_bputs(unsigned long ip, const char *str)
 	local_save_flags(irq_flags);
 	buffer = global_trace.trace_buffer.buffer;
 	event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
-					  irq_flags, preempt_count());
+					  irq_flags, pc);
 	if (!event)
 		return 0;
 
@@ -525,6 +532,7 @@ int __trace_bputs(unsigned long ip, const char *str)
 	entry->str			= str;
 
 	__buffer_unlock_commit(buffer, event);
+	ftrace_trace_stack(buffer, irq_flags, 4, pc);
 
 	return 1;
 }
-- 
cgit v0.10.2


From 5f8bf2d263a20b986225ae1ed7d6759dc4b93af9 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 15 Jul 2014 11:05:12 -0400
Subject: tracing: Fix graph tracer with stack tracer on other archs

Running my ftrace tests on PowerPC, it failed the test that checks
if function_graph tracer is affected by the stack tracer. It was.
Looking into this, I found that the update_function_graph_func()
must be called even if the trampoline function is not changed.
This is because archs like PowerPC do not support ftrace_ops being
passed by assembly and instead uses a helper function (what the
trampoline function points to). Since this function is not changed
even when multiple ftrace_ops are added to the code, the test that
falls out before calling update_function_graph_func() will miss that
the update must still be done.

Call update_function_graph_function() for all calls to
update_ftrace_function()

Cc: stable@vger.kernel.org # 3.3+
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5b372e3..ac9d1da 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -265,12 +265,12 @@ static void update_ftrace_function(void)
 		func = ftrace_ops_list_func;
 	}
 
+	update_function_graph_func();
+
 	/* If there's no change, then do nothing more here */
 	if (ftrace_trace_function == func)
 		return;
 
-	update_function_graph_func();
-
 	/*
 	 * If we are using the list function, it doesn't care
 	 * about the function_trace_ops.
-- 
cgit v0.10.2


From eec7e1c16d2b65e38137686dd9b7e102c2150905 Mon Sep 17 00:00:00 2001
From: Alexey Asemov <alex@alex-at.ru>
Date: Tue, 15 Jul 2014 10:28:42 +0400
Subject: libata: EH should handle AMNF error condition as a media error

libata-eh.c should handle AMNF error condition (error byte bit 0,
usually code 0x01) in libata-eh.c along with UNC as a media error so
SCSI stack can handle it properly (translation code 0x01 is already
present in libata-scsi.c) but was never passed down due to lack of
handling in EH.

While using linux-based machine (AMD 6550M-based notebook, PCI IDs for the
controller are 1022:7801 subsys 1025:059d) and ddrescue to salvage data
from failing hard drive (WD7500BPVT 2.5" 750G SATA2), I've found that pure
AMNF 0x01 error code generates generic "device error" that is retried
several times by SCSI stack instead of "media error" that is passed up to
software.

So we may assume deprecated AMNF error code is surely not dead yet, and
it's better for it to be handled properly. As we may see it is used by
modern enough devices, and used properly: drive returned AMNF only when IDs
for track cannot be read completely due to dying head or positioning,
otherwise it returned UNC(orrectables).

Not handling it causes wrong generic error code ("device error") reporting
down the stack, can damage failing drives further because of excessive
retries, and slows salvaging down a lot. Also, there is handling code in
libata-scsi.c for 0x01 AMNF error already.

https://bugzilla.kernel.org/show_bug.cgi?id=80031

tj: Shortened $SUBJ and moved its content to the first paragraph.

Signed-off-by: Alexey Asemov <alex@alex-at.ru>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 6760fc4..dad83df 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -1811,7 +1811,7 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
 	case ATA_DEV_ATA:
 		if (err & ATA_ICRC)
 			qc->err_mask |= AC_ERR_ATA_BUS;
-		if (err & ATA_UNC)
+		if (err & (ATA_UNC | ATA_AMNF))
 			qc->err_mask |= AC_ERR_MEDIA;
 		if (err & ATA_IDNF)
 			qc->err_mask |= AC_ERR_INVALID;
@@ -2556,11 +2556,12 @@ static void ata_eh_link_report(struct ata_link *link)
 		}
 
 		if (cmd->command != ATA_CMD_PACKET &&
-		    (res->feature & (ATA_ICRC | ATA_UNC | ATA_IDNF |
-				     ATA_ABORTED)))
-			ata_dev_err(qc->dev, "error: { %s%s%s%s}\n",
+		    (res->feature & (ATA_ICRC | ATA_UNC | ATA_AMNF |
+				     ATA_IDNF | ATA_ABORTED)))
+			ata_dev_err(qc->dev, "error: { %s%s%s%s%s}\n",
 			  res->feature & ATA_ICRC ? "ICRC " : "",
 			  res->feature & ATA_UNC ? "UNC " : "",
+			  res->feature & ATA_AMNF ? "AMNF " : "",
 			  res->feature & ATA_IDNF ? "IDNF " : "",
 			  res->feature & ATA_ABORTED ? "ABRT " : "");
 #endif
-- 
cgit v0.10.2


From f0160a5a2912267c02cfe692eac955c360de5fdf Mon Sep 17 00:00:00 2001
From: "zhangwei(Jovi)" <jovi.zhangwei@huawei.com>
Date: Thu, 18 Jul 2013 16:31:18 +0800
Subject: tracing: Add TRACE_ITER_PRINTK flag check in
 __trace_puts/__trace_bputs

The TRACE_ITER_PRINTK check in __trace_puts/__trace_bputs is missing,
so add it, to be consistent with __trace_printk/__trace_bprintk.
Those functions are all called by the same function: trace_printk().

Link: http://lkml.kernel.org/p/51E7A7D6.8090900@huawei.com

Cc: stable@vger.kernel.org # 3.11+
Signed-off-by: zhangwei(Jovi) <jovi.zhangwei@huawei.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a6ffc89..bda9621 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -468,6 +468,9 @@ int __trace_puts(unsigned long ip, const char *str, int size)
 	int alloc;
 	int pc;
 
+	if (!(trace_flags & TRACE_ITER_PRINTK))
+		return 0;
+
 	pc = preempt_count();
 
 	if (unlikely(tracing_selftest_running || tracing_disabled))
@@ -515,6 +518,9 @@ int __trace_bputs(unsigned long ip, const char *str)
 	int size = sizeof(struct bputs_entry);
 	int pc;
 
+	if (!(trace_flags & TRACE_ITER_PRINTK))
+		return 0;
+
 	pc = preempt_count();
 
 	if (unlikely(tracing_selftest_running || tracing_disabled))
-- 
cgit v0.10.2


From 9aec8629ec829fc9403788cd959e05dd87988bd1 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Mon, 14 Jul 2014 16:35:54 -0400
Subject: dm thin metadata: do not allow the data block size to change

The block size for the thin-pool's data device must remained fixed for
the life of the thin-pool.  Disallow any attempt to change the
thin-pool's data block size.

It should be noted that attempting to change the data block size via
thin-pool table reload will be ignored as a side-effect of the thin-pool
handover that the thin-pool target does during thin-pool table reload.

Here is an example outcome of attempting to load a thin-pool table that
reduced the thin-pool's data block size from 1024K to 512K.

Before:
kernel: device-mapper: thin: 253:4: growing the data device from 204800 to 409600 blocks

After:
kernel: device-mapper: thin metadata: changing the data block size (from 2048 to 1024) is not supported
kernel: device-mapper: table: 253:4: thin-pool: Error creating metadata object
kernel: device-mapper: ioctl: error adding target to table

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Acked-by: Joe Thornber <ejt@redhat.com>
Cc: stable@vger.kernel.org

diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index b086a94..e9d33ad 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -613,6 +613,15 @@ static int __open_metadata(struct dm_pool_metadata *pmd)
 
 	disk_super = dm_block_data(sblock);
 
+	/* Verify the data block size hasn't changed */
+	if (le32_to_cpu(disk_super->data_block_size) != pmd->data_block_size) {
+		DMERR("changing the data block size (from %u to %llu) is not supported",
+		      le32_to_cpu(disk_super->data_block_size),
+		      (unsigned long long)pmd->data_block_size);
+		r = -EINVAL;
+		goto bad_unlock_sblock;
+	}
+
 	r = __check_incompat_features(disk_super, pmd);
 	if (r < 0)
 		goto bad_unlock_sblock;
-- 
cgit v0.10.2


From 048e5a07f282c57815b3901d4a68a77fa131ce0a Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Mon, 14 Jul 2014 16:59:39 -0400
Subject: dm cache metadata: do not allow the data block size to change

The block size for the dm-cache's data device must remained fixed for
the life of the cache.  Disallow any attempt to change the cache's data
block size.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Acked-by: Joe Thornber <ejt@redhat.com>
Cc: stable@vger.kernel.org

diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 4ead4ba..d2899e7 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -425,6 +425,15 @@ static int __open_metadata(struct dm_cache_metadata *cmd)
 
 	disk_super = dm_block_data(sblock);
 
+	/* Verify the data block size hasn't changed */
+	if (le32_to_cpu(disk_super->data_block_size) != cmd->data_block_size) {
+		DMERR("changing the data block size (from %u to %llu) is not supported",
+		      le32_to_cpu(disk_super->data_block_size),
+		      (unsigned long long)cmd->data_block_size);
+		r = -EINVAL;
+		goto bad;
+	}
+
 	r = __check_incompat_features(disk_super, cmd);
 	if (r < 0)
 		goto bad;
-- 
cgit v0.10.2


From 97b8ee845393701edc06e27ccec2876ff9596019 Mon Sep 17 00:00:00 2001
From: Martin Lau <kafai@fb.com>
Date: Mon, 9 Jun 2014 23:06:42 -0700
Subject: ring-buffer: Fix polling on trace_pipe

ring_buffer_poll_wait() should always put the poll_table to its wait_queue
even there is immediate data available.  Otherwise, the following epoll and
read sequence will eventually hang forever:

1. Put some data to make the trace_pipe ring_buffer read ready first
2. epoll_ctl(efd, EPOLL_CTL_ADD, trace_pipe_fd, ee)
3. epoll_wait()
4. read(trace_pipe_fd) till EAGAIN
5. Add some more data to the trace_pipe ring_buffer
6. epoll_wait() -> this epoll_wait() will block forever

~ During the epoll_ctl(efd, EPOLL_CTL_ADD,...) call in step 2,
  ring_buffer_poll_wait() returns immediately without adding poll_table,
  which has poll_table->_qproc pointing to ep_poll_callback(), to its
  wait_queue.
~ During the epoll_wait() call in step 3 and step 6,
  ring_buffer_poll_wait() cannot add ep_poll_callback() to its wait_queue
  because the poll_table->_qproc is NULL and it is how epoll works.
~ When there is new data available in step 6, ring_buffer does not know
  it has to call ep_poll_callback() because it is not in its wait queue.
  Hence, block forever.

Other poll implementation seems to call poll_wait() unconditionally as the very
first thing to do.  For example, tcp_poll() in tcp.c.

Link: http://lkml.kernel.org/p/20140610060637.GA14045@devbig242.prn2.facebook.com

Cc: stable@vger.kernel.org # 2.6.27
Fixes: 2a2cc8f7c4d0 "ftrace: allow the event pipe to be polled"
Reviewed-by: Chris Mason <clm@fb.com>
Signed-off-by: Martin Lau <kafai@fb.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7c56c3d..ff70271 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -616,10 +616,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct rb_irq_work *work;
 
-	if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
-	    (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
-		return POLLIN | POLLRDNORM;
-
 	if (cpu == RING_BUFFER_ALL_CPUS)
 		work = &buffer->irq_work;
 	else {
-- 
cgit v0.10.2


From 44305ebde243a7cce2c592cc89afe5041d8bf884 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 20 May 2014 22:35:38 -0700
Subject: UBI: fastmap: do not miss bit-flips

The return value from 'ubi_io_read_ec_hdr()' was stored in 'err', not in 'ret'.
This fix makes sure Fastmap-enabled UBI does not miss bit-flip while reading EC
headers, events and scrubs the affected PEBs.

This issue was reported by Coverity Scan.

Artem: improved the commit message.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Acked-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>

diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c
index 72f39da..0431b46 100644
--- a/drivers/mtd/ubi/fastmap.c
+++ b/drivers/mtd/ubi/fastmap.c
@@ -423,7 +423,7 @@ static int scan_pool(struct ubi_device *ubi, struct ubi_attach_info *ai,
 				pnum, err);
 			ret = err > 0 ? UBI_BAD_FASTMAP : err;
 			goto out;
-		} else if (ret == UBI_IO_BITFLIPS)
+		} else if (err == UBI_IO_BITFLIPS)
 			scrub = 1;
 
 		/*
-- 
cgit v0.10.2


From 4a36b44c77515ca1ad799577d3f9e2fa4d68bffa Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Wed, 18 Jun 2014 12:32:19 +0200
Subject: s390: require mvcos facility, not tod clock steering facility

Inlined uaccess functions require the mvcos facility (bit 27), not the tod
clock steering facility (bit 28) for z10 and newer machines.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 7ba7d67..e88d35d 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -437,11 +437,11 @@ ENTRY(startup_kdump)
 
 #if defined(CONFIG_64BIT)
 #if defined(CONFIG_MARCH_ZEC12)
-	.long 3, 0xc100efea, 0xf46ce800, 0x00400000
+	.long 3, 0xc100eff2, 0xf46ce800, 0x00400000
 #elif defined(CONFIG_MARCH_Z196)
-	.long 2, 0xc100efea, 0xf46c0000
+	.long 2, 0xc100eff2, 0xf46c0000
 #elif defined(CONFIG_MARCH_Z10)
-	.long 2, 0xc100efea, 0xf0680000
+	.long 2, 0xc100eff2, 0xf0680000
 #elif defined(CONFIG_MARCH_Z9_109)
 	.long 1, 0xc100efc2
 #elif defined(CONFIG_MARCH_Z990)
-- 
cgit v0.10.2


From 7cbe4afe854a9a352d9562106449bc55d17d5e5b Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Thu, 26 Jun 2014 10:48:56 +0200
Subject: s390/3270: correct size detection with the read-partition command

The size detection for 3270 terminals with the read-partition command is
broken. The raw3270_reset_device_cb function clears the init_data array,
but if raw3270_writesf_readpart has been called the read-partition command
is queued which needs the init_data array. In this case the size detection
will fail and the invalid command does funny things to the terminal.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c
index 15b3459..220acb4 100644
--- a/drivers/s390/char/raw3270.c
+++ b/drivers/s390/char/raw3270.c
@@ -633,7 +633,6 @@ raw3270_reset_device_cb(struct raw3270_request *rq, void *data)
 	} else
 		raw3270_writesf_readpart(rp);
 	memset(&rp->init_reset, 0, sizeof(rp->init_reset));
-	memset(&rp->init_data, 0, sizeof(rp->init_data));
 }
 
 static int
-- 
cgit v0.10.2


From 8fb878c5f12bf7fd6099d466139bd4564418e583 Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Tue, 8 Jul 2014 10:08:05 +0800
Subject: s390/MSI: Use standard mask and unmask funtions

MSI irqchip in s390 has its own mask and unmask MSI irq
functions, zpci_enable_irq() and zpci_disable_irq().
They mask and unmask MSI irq in standard ways, no arch
special. MSI driver provides two global standard functions
mask_msi_irq() and unmask_msi_irq(). Local zpci_enable_irq()
and zpci_disable_irq() are almost the same as the standard
two. the difference is local mask/unmask functions
read the mask status before mask and unmask everytime.
Then change the value and rewrite to hardware. In standard
functions, save the mask status after mask and unmask msi
irq, and use the cached status to change the mask status.
When we mask or unmask a MSI irq, we always cache its
mask status except we know need not to cache it, like in
pci_msi_shutdown. So use the standard functions to replace
the local is safe.

Signed-off-by: Yijing Wang <wangyijing@huawei.com>
[sebott: fixed inverted function pointers]
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 9ddc51e..30de427 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -48,13 +48,10 @@
 static LIST_HEAD(zpci_list);
 static DEFINE_SPINLOCK(zpci_list_lock);
 
-static void zpci_enable_irq(struct irq_data *data);
-static void zpci_disable_irq(struct irq_data *data);
-
 static struct irq_chip zpci_irq_chip = {
 	.name = "zPCI",
-	.irq_unmask = zpci_enable_irq,
-	.irq_mask = zpci_disable_irq,
+	.irq_unmask = unmask_msi_irq,
+	.irq_mask = mask_msi_irq,
 };
 
 static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
@@ -244,43 +241,6 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
 	return rc;
 }
 
-static int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag)
-{
-	int offset, pos;
-	u32 mask_bits;
-
-	if (msi->msi_attrib.is_msix) {
-		offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
-			PCI_MSIX_ENTRY_VECTOR_CTRL;
-		msi->masked = readl(msi->mask_base + offset);
-		writel(flag, msi->mask_base + offset);
-	} else if (msi->msi_attrib.maskbit) {
-		pos = (long) msi->mask_base;
-		pci_read_config_dword(msi->dev, pos, &mask_bits);
-		mask_bits &= ~(mask);
-		mask_bits |= flag & mask;
-		pci_write_config_dword(msi->dev, pos, mask_bits);
-	} else
-		return 0;
-
-	msi->msi_attrib.maskbit = !!flag;
-	return 1;
-}
-
-static void zpci_enable_irq(struct irq_data *data)
-{
-	struct msi_desc *msi = irq_get_msi_desc(data->irq);
-
-	zpci_msi_set_mask_bits(msi, 1, 0);
-}
-
-static void zpci_disable_irq(struct irq_data *data)
-{
-	struct msi_desc *msi = irq_get_msi_desc(data->irq);
-
-	zpci_msi_set_mask_bits(msi, 1, 1);
-}
-
 void pcibios_fixup_bus(struct pci_bus *bus)
 {
 }
@@ -487,7 +447,10 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
 
 	/* Release MSI interrupts */
 	list_for_each_entry(msi, &pdev->msi_list, list) {
-		zpci_msi_set_mask_bits(msi, 1, 1);
+		if (msi->msi_attrib.is_msix)
+			default_msix_mask_irq(msi, 1);
+		else
+			default_msi_mask_irq(msi, 1, 1);
 		irq_set_msi_desc(msi->irq, NULL);
 		irq_free_desc(msi->irq);
 		msi->msg.address_lo = 0;
-- 
cgit v0.10.2


From dab6cf55f81a6e16b8147aed9a843e1691dcd318 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 23 Jun 2014 15:29:40 +0200
Subject: s390/ptrace: fix PSW mask check

The PSW mask check of the PTRACE_POKEUSR_AREA command is incorrect.
The PSW_MASK_USER define contains the PSW_MASK_ASC bits, the ptrace
interface accepts all combinations for the address-space-control
bits. To protect the kernel space the PSW mask check in ptrace needs
to reject the address-space-control bit combination for home space.

Fixes CVE-2014-3534

Cc: stable@vger.kernel.org
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 2d716734..5dc7ad9 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -334,9 +334,14 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
 			unsigned long mask = PSW_MASK_USER;
 
 			mask |= is_ri_task(child) ? PSW_MASK_RI : 0;
-			if ((data & ~mask) != PSW_USER_BITS)
+			if ((data ^ PSW_USER_BITS) & ~mask)
+				/* Invalid psw mask. */
+				return -EINVAL;
+			if ((data & PSW_MASK_ASC) == PSW_ASC_HOME)
+				/* Invalid address-space-control bits */
 				return -EINVAL;
 			if ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA))
+				/* Invalid addressing mode bits */
 				return -EINVAL;
 		}
 		*(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data;
@@ -672,9 +677,12 @@ static int __poke_user_compat(struct task_struct *child,
 
 			mask |= is_ri_task(child) ? PSW32_MASK_RI : 0;
 			/* Build a 64 bit psw mask from 31 bit mask. */
-			if ((tmp & ~mask) != PSW32_USER_BITS)
+			if ((tmp ^ PSW32_USER_BITS) & ~mask)
 				/* Invalid psw mask. */
 				return -EINVAL;
+			if ((data & PSW32_MASK_ASC) == PSW32_ASC_HOME)
+				/* Invalid address-space-control bits */
+				return -EINVAL;
 			regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
 				(regs->psw.mask & PSW_MASK_BA) |
 				(__u64)(tmp & mask) << 32;
-- 
cgit v0.10.2


From 666e68e0dde826ae146b980099f1719f74fa968c Mon Sep 17 00:00:00 2001
From: Ingo Tuchscherer <ingo.tuchscherer@de.ibm.com>
Date: Mon, 14 Jul 2014 19:11:48 +0200
Subject: s390/zcrypt: improve device probing for zcrypt adapter cards

Improve device probing process for zcrypt adapters to
transmit service request during registration process.

Signed-off-by: Ingo Tuchscherer <ingo.tuchscherer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 69ef4f8..4038437 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -901,10 +901,15 @@ static int ap_device_probe(struct device *dev)
 	int rc;
 
 	ap_dev->drv = ap_drv;
+
+	spin_lock_bh(&ap_device_list_lock);
+	list_add(&ap_dev->list, &ap_device_list);
+	spin_unlock_bh(&ap_device_list_lock);
+
 	rc = ap_drv->probe ? ap_drv->probe(ap_dev) : -ENODEV;
-	if (!rc) {
+	if (rc) {
 		spin_lock_bh(&ap_device_list_lock);
-		list_add(&ap_dev->list, &ap_device_list);
+		list_del_init(&ap_dev->list);
 		spin_unlock_bh(&ap_device_list_lock);
 	}
 	return rc;
-- 
cgit v0.10.2


From 9f86745722d95bc7f855069bd82285bd10dc97ff Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 15 Jul 2014 10:41:37 +0200
Subject: s390: fix restore of invalid floating-point-control

The fixup of the inline assembly to restore the floating-point-control
register needs to check for instruction address *after* the lfcp
instruction as the specification and data exceptions are suppresssing.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index df38c70..18ea9e3 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -51,8 +51,8 @@ static inline int restore_fp_ctl(u32 *fpc)
 		return 0;
 
 	asm volatile(
-		"0:	lfpc    %1\n"
-		"	la	%0,0\n"
+		"	lfpc    %1\n"
+		"0:	la	%0,0\n"
 		"1:\n"
 		EX_TABLE(0b,1b)
 		: "=d" (rc) : "Q" (*fpc), "0" (-EINVAL));
-- 
cgit v0.10.2


From d3f44fbabe55132832e152606365adb640296378 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Mon, 30 Jun 2014 16:32:29 +0200
Subject: x86: Remove unused variable "polling"

Compile tested. "polling" is unused since commit f80c5b39b80a
("sched/idle, x86: Switch from TS_POLLING to TIF_POLLING_NRFLAG").

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Link: http://lkml.kernel.org/r/1404138749.2978.6.camel@x41
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index f3a1f04..5848744 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -841,7 +841,6 @@ static int apm_do_idle(void)
 	u32 eax;
 	u8 ret = 0;
 	int idled = 0;
-	int polling;
 	int err = 0;
 
 	if (!need_resched()) {
-- 
cgit v0.10.2


From 37e9562453b813d2ea527bd9531fef2c3c592847 Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Fri, 4 Jul 2014 20:49:32 -0700
Subject: locking/rwsem: Allow conservative optimistic spinning when readers
 have lock

Commit 4fc828e24cd9 ("locking/rwsem: Support optimistic spinning")
introduced a major performance regression for workloads such as
xfs_repair which mix read and write locking of the mmap_sem across
many threads. The result was xfs_repair ran 5x slower on 3.16-rc2
than on 3.15 and using 20x more system CPU time.

Perf profiles indicate in some workloads that significant time can
be spent spinning on !owner. This is because we don't set the lock
owner when readers(s) obtain the rwsem.

In this patch, we'll modify rwsem_can_spin_on_owner() such that we'll
return false if there is no lock owner. The rationale is that if we
just entered the slowpath, yet there is no lock owner, then there is
a possibility that a reader has the lock. To be conservative, we'll
avoid spinning in these situations.

This patch reduced the total run time of the xfs_repair workload from
about 4 minutes 24 seconds down to approximately 1 minute 26 seconds,
back to close to the same performance as on 3.15.

Retesting of AIM7, which were some of the workloads used to test the
original optimistic spinning code, confirmed that we still get big
performance gains with optimistic spinning, even with this additional
regression fix. Davidlohr found that while the 'custom' workload took
a performance hit of ~-14% to throughput for >300 users with this
additional patch, the overall gain with optimistic spinning is
still ~+45%. The 'disk' workload even improved by ~+15% at >1000 users.

Tested-by: Dave Chinner <dchinner@redhat.com>
Acked-by: Davidlohr Bueso <davidlohr@hp.com>
Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1404532172.2572.30.camel@j-VirtualBox
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index dacc321..c40c7d2 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -285,10 +285,10 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
 static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
 {
 	struct task_struct *owner;
-	bool on_cpu = true;
+	bool on_cpu = false;
 
 	if (need_resched())
-		return 0;
+		return false;
 
 	rcu_read_lock();
 	owner = ACCESS_ONCE(sem->owner);
@@ -297,9 +297,9 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
 	rcu_read_unlock();
 
 	/*
-	 * If sem->owner is not set, the rwsem owner may have
-	 * just acquired it and not set the owner yet or the rwsem
-	 * has been released.
+	 * If sem->owner is not set, yet we have just recently entered the
+	 * slowpath, then there is a possibility reader(s) may have the lock.
+	 * To be safe, avoid spinning in these situations.
 	 */
 	return on_cpu;
 }
-- 
cgit v0.10.2


From 046a619d8e9746fa4c0e29e8c6b78e16efc008a8 Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Mon, 14 Jul 2014 10:27:48 -0700
Subject: locking/spinlocks/mcs: Rename optimistic_spin_queue() to
 optimistic_spin_node()

Currently, the per-cpu nodes structure for the cancellable MCS spinlock is
named "optimistic_spin_queue". However, in a follow up patch in the series
we will be introducing a new structure that serves as the new "handle" for
the lock. It would make more sense if that structure is named
"optimistic_spin_queue". Additionally, since the current use of the
"optimistic_spin_queue" structure are  "nodes", it might be better if we
rename them to "node" anyway.

This preparatory patch renames all current "optimistic_spin_queue"
to "optimistic_spin_node".

Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Scott Norton <scott.norton@hp.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Waiman Long <waiman.long@hp.com>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Aswin Chandramouleeswaran <aswin@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Chris Mason <clm@fb.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Josef Bacik <jbacik@fusionio.com>
Link: http://lkml.kernel.org/r/1405358872-3732-2-git-send-email-jason.low2@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 11692de..885f3f5 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -46,7 +46,7 @@
  * - detects multi-task circular deadlocks and prints out all affected
  *   locks and tasks (and only those tasks)
  */
-struct optimistic_spin_queue;
+struct optimistic_spin_node;
 struct mutex {
 	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
 	atomic_t		count;
@@ -56,7 +56,7 @@ struct mutex {
 	struct task_struct	*owner;
 #endif
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-	struct optimistic_spin_queue	*osq;	/* Spinner MCS lock */
+	struct optimistic_spin_node	*osq;	/* Spinner MCS lock */
 #endif
 #ifdef CONFIG_DEBUG_MUTEXES
 	const char 		*name;
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 8d79708..ba3f108 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -16,7 +16,7 @@
 
 #include <linux/atomic.h>
 
-struct optimistic_spin_queue;
+struct optimistic_spin_node;
 struct rw_semaphore;
 
 #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
@@ -33,7 +33,7 @@ struct rw_semaphore {
 	 * if the owner is running on the cpu.
 	 */
 	struct task_struct *owner;
-	struct optimistic_spin_queue *osq; /* spinner MCS lock */
+	struct optimistic_spin_node *osq; /* spinner MCS lock */
 #endif
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map	dep_map;
diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
index 838dc9e..e9866f7 100644
--- a/kernel/locking/mcs_spinlock.c
+++ b/kernel/locking/mcs_spinlock.c
@@ -14,18 +14,18 @@
  * called from interrupt context and we have preemption disabled while
  * spinning.
  */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_queue, osq_node);
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node);
 
 /*
  * Get a stable @node->next pointer, either for unlock() or unqueue() purposes.
  * Can return NULL in case we were the last queued and we updated @lock instead.
  */
-static inline struct optimistic_spin_queue *
-osq_wait_next(struct optimistic_spin_queue **lock,
-	      struct optimistic_spin_queue *node,
-	      struct optimistic_spin_queue *prev)
+static inline struct optimistic_spin_node *
+osq_wait_next(struct optimistic_spin_node **lock,
+	      struct optimistic_spin_node *node,
+	      struct optimistic_spin_node *prev)
 {
-	struct optimistic_spin_queue *next = NULL;
+	struct optimistic_spin_node *next = NULL;
 
 	for (;;) {
 		if (*lock == node && cmpxchg(lock, node, prev) == node) {
@@ -59,10 +59,10 @@ osq_wait_next(struct optimistic_spin_queue **lock,
 	return next;
 }
 
-bool osq_lock(struct optimistic_spin_queue **lock)
+bool osq_lock(struct optimistic_spin_node **lock)
 {
-	struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node);
-	struct optimistic_spin_queue *prev, *next;
+	struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
+	struct optimistic_spin_node *prev, *next;
 
 	node->locked = 0;
 	node->next = NULL;
@@ -149,10 +149,10 @@ unqueue:
 	return false;
 }
 
-void osq_unlock(struct optimistic_spin_queue **lock)
+void osq_unlock(struct optimistic_spin_node **lock)
 {
-	struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node);
-	struct optimistic_spin_queue *next;
+	struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
+	struct optimistic_spin_node *next;
 
 	/*
 	 * Fast path for the uncontended case.
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index a2dbac4..c99dc00 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -118,12 +118,12 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
  * mutex_lock()/rwsem_down_{read,write}() etc.
  */
 
-struct optimistic_spin_queue {
-	struct optimistic_spin_queue *next, *prev;
+struct optimistic_spin_node {
+	struct optimistic_spin_node *next, *prev;
 	int locked; /* 1 if lock acquired */
 };
 
-extern bool osq_lock(struct optimistic_spin_queue **lock);
-extern void osq_unlock(struct optimistic_spin_queue **lock);
+extern bool osq_lock(struct optimistic_spin_node **lock);
+extern void osq_unlock(struct optimistic_spin_node **lock);
 
 #endif /* __LINUX_MCS_SPINLOCK_H */
-- 
cgit v0.10.2


From 90631822c5d307b5410500806e8ac3e63928aa3e Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Mon, 14 Jul 2014 10:27:49 -0700
Subject: locking/spinlocks/mcs: Convert osq lock to atomic_t to reduce
 overhead

The cancellable MCS spinlock is currently used to queue threads that are
doing optimistic spinning. It uses per-cpu nodes, where a thread obtaining
the lock would access and queue the local node corresponding to the CPU that
it's running on. Currently, the cancellable MCS lock is implemented by using
pointers to these nodes.

In this patch, instead of operating on pointers to the per-cpu nodes, we
store the CPU numbers in which the per-cpu nodes correspond to in atomic_t.
A similar concept is used with the qspinlock.

By operating on the CPU # of the nodes using atomic_t instead of pointers
to those nodes, this can reduce the overhead of the cancellable MCS spinlock
by 32 bits (on 64 bit systems).

Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Scott Norton <scott.norton@hp.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Waiman Long <waiman.long@hp.com>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Aswin Chandramouleeswaran <aswin@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Chris Mason <clm@fb.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Josef Bacik <jbacik@fusionio.com>
Link: http://lkml.kernel.org/r/1405358872-3732-3-git-send-email-jason.low2@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 885f3f5..42aa9b9 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -17,6 +17,7 @@
 #include <linux/lockdep.h>
 #include <linux/atomic.h>
 #include <asm/processor.h>
+#include <linux/osq_lock.h>
 
 /*
  * Simple, straightforward mutexes with strict semantics:
@@ -46,7 +47,6 @@
  * - detects multi-task circular deadlocks and prints out all affected
  *   locks and tasks (and only those tasks)
  */
-struct optimistic_spin_node;
 struct mutex {
 	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
 	atomic_t		count;
@@ -56,7 +56,7 @@ struct mutex {
 	struct task_struct	*owner;
 #endif
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-	struct optimistic_spin_node	*osq;	/* Spinner MCS lock */
+	struct optimistic_spin_queue osq; /* Spinner MCS lock */
 #endif
 #ifdef CONFIG_DEBUG_MUTEXES
 	const char 		*name;
diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h
new file mode 100644
index 0000000..b001682
--- /dev/null
+++ b/include/linux/osq_lock.h
@@ -0,0 +1,19 @@
+#ifndef __LINUX_OSQ_LOCK_H
+#define __LINUX_OSQ_LOCK_H
+
+/*
+ * An MCS like lock especially tailored for optimistic spinning for sleeping
+ * lock implementations (mutex, rwsem, etc).
+ */
+
+#define OSQ_UNLOCKED_VAL (0)
+
+struct optimistic_spin_queue {
+	/*
+	 * Stores an encoded value of the CPU # of the tail node in the queue.
+	 * If the queue is empty, then it's set to OSQ_UNLOCKED_VAL.
+	 */
+	atomic_t tail;
+};
+
+#endif
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index ba3f108..9fdcdd0 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -13,10 +13,9 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
-
 #include <linux/atomic.h>
+#include <linux/osq_lock.h>
 
-struct optimistic_spin_node;
 struct rw_semaphore;
 
 #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
@@ -33,7 +32,7 @@ struct rw_semaphore {
 	 * if the owner is running on the cpu.
 	 */
 	struct task_struct *owner;
-	struct optimistic_spin_node *osq; /* spinner MCS lock */
+	struct optimistic_spin_queue osq; /* spinner MCS lock */
 #endif
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map	dep_map;
@@ -70,7 +69,7 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
 	  __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),	\
 	  LIST_HEAD_INIT((name).wait_list),		\
 	  NULL, /* owner */				\
-	  NULL /* mcs lock */                           \
+	  { ATOMIC_INIT(OSQ_UNLOCKED_VAL) } /* osq */   \
 	  __RWSEM_DEP_MAP_INIT(name) }
 #else
 #define __RWSEM_INITIALIZER(name)			\
diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
index e9866f7..32fc16c 100644
--- a/kernel/locking/mcs_spinlock.c
+++ b/kernel/locking/mcs_spinlock.c
@@ -17,18 +17,44 @@
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node);
 
 /*
+ * We use the value 0 to represent "no CPU", thus the encoded value
+ * will be the CPU number incremented by 1.
+ */
+static inline int encode_cpu(int cpu_nr)
+{
+	return cpu_nr + 1;
+}
+
+static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val)
+{
+	int cpu_nr = encoded_cpu_val - 1;
+
+	return per_cpu_ptr(&osq_node, cpu_nr);
+}
+
+/*
  * Get a stable @node->next pointer, either for unlock() or unqueue() purposes.
  * Can return NULL in case we were the last queued and we updated @lock instead.
  */
 static inline struct optimistic_spin_node *
-osq_wait_next(struct optimistic_spin_node **lock,
+osq_wait_next(struct optimistic_spin_queue *lock,
 	      struct optimistic_spin_node *node,
 	      struct optimistic_spin_node *prev)
 {
 	struct optimistic_spin_node *next = NULL;
+	int curr = encode_cpu(smp_processor_id());
+	int old;
+
+	/*
+	 * If there is a prev node in queue, then the 'old' value will be
+	 * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if
+	 * we're currently last in queue, then the queue will then become empty.
+	 */
+	old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
 
 	for (;;) {
-		if (*lock == node && cmpxchg(lock, node, prev) == node) {
+		if (atomic_read(&lock->tail) == curr &&
+		    atomic_cmpxchg(&lock->tail, curr, old) == curr) {
 			/*
 			 * We were the last queued, we moved @lock back. @prev
 			 * will now observe @lock and will complete its
@@ -59,18 +85,23 @@ osq_wait_next(struct optimistic_spin_node **lock,
 	return next;
 }
 
-bool osq_lock(struct optimistic_spin_node **lock)
+bool osq_lock(struct optimistic_spin_queue *lock)
 {
 	struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
 	struct optimistic_spin_node *prev, *next;
+	int curr = encode_cpu(smp_processor_id());
+	int old;
 
 	node->locked = 0;
 	node->next = NULL;
+	node->cpu = curr;
 
-	node->prev = prev = xchg(lock, node);
-	if (likely(prev == NULL))
+	old = atomic_xchg(&lock->tail, curr);
+	if (old == OSQ_UNLOCKED_VAL)
 		return true;
 
+	prev = decode_cpu(old);
+	node->prev = prev;
 	ACCESS_ONCE(prev->next) = node;
 
 	/*
@@ -149,15 +180,16 @@ unqueue:
 	return false;
 }
 
-void osq_unlock(struct optimistic_spin_node **lock)
+void osq_unlock(struct optimistic_spin_queue *lock)
 {
 	struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
 	struct optimistic_spin_node *next;
+	int curr = encode_cpu(smp_processor_id());
 
 	/*
 	 * Fast path for the uncontended case.
 	 */
-	if (likely(cmpxchg(lock, node, NULL) == node))
+	if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
 		return;
 
 	/*
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index c99dc00..74356dc 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -121,9 +121,10 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 struct optimistic_spin_node {
 	struct optimistic_spin_node *next, *prev;
 	int locked; /* 1 if lock acquired */
+	int cpu; /* encoded CPU # value */
 };
 
-extern bool osq_lock(struct optimistic_spin_node **lock);
-extern void osq_unlock(struct optimistic_spin_node **lock);
+extern bool osq_lock(struct optimistic_spin_queue *lock);
+extern void osq_unlock(struct optimistic_spin_queue *lock);
 
 #endif /* __LINUX_MCS_SPINLOCK_H */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index bc73d33..d9b3139 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -60,7 +60,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
 	INIT_LIST_HEAD(&lock->wait_list);
 	mutex_clear_owner(lock);
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-	lock->osq = NULL;
+	atomic_set(&lock->osq.tail, OSQ_UNLOCKED_VAL);
 #endif
 
 	debug_mutex_init(lock, name, key);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index c40c7d2..b77a623 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -84,7 +84,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
 	INIT_LIST_HEAD(&sem->wait_list);
 #ifdef CONFIG_SMP
 	sem->owner = NULL;
-	sem->osq = NULL;
+	atomic_set(&sem->osq.tail, OSQ_UNLOCKED_VAL);
 #endif
 }
 
-- 
cgit v0.10.2


From 4d9d951e6b5df85ccfca2c5bd8b4f5c71d256b65 Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Mon, 14 Jul 2014 10:27:50 -0700
Subject: locking/spinlocks/mcs: Introduce and use init macro and function for
 osq locks

Currently, we initialize the osq lock by directly setting the lock's values. It
would be preferable if we use an init macro to do the initialization like we do
with other locks.

This patch introduces and uses a macro and function for initializing the osq lock.

Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Scott Norton <scott.norton@hp.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Waiman Long <waiman.long@hp.com>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Aswin Chandramouleeswaran <aswin@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <jbacik@fusionio.com>
Link: http://lkml.kernel.org/r/1405358872-3732-4-git-send-email-jason.low2@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h
index b001682..90230d5 100644
--- a/include/linux/osq_lock.h
+++ b/include/linux/osq_lock.h
@@ -16,4 +16,12 @@ struct optimistic_spin_queue {
 	atomic_t tail;
 };
 
+/* Init macro and function. */
+#define OSQ_LOCK_UNLOCKED { ATOMIC_INIT(OSQ_UNLOCKED_VAL) }
+
+static inline void osq_lock_init(struct optimistic_spin_queue *lock)
+{
+	atomic_set(&lock->tail, OSQ_UNLOCKED_VAL);
+}
+
 #endif
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 9fdcdd0..25cd9aa 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -69,7 +69,7 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
 	  __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),	\
 	  LIST_HEAD_INIT((name).wait_list),		\
 	  NULL, /* owner */				\
-	  { ATOMIC_INIT(OSQ_UNLOCKED_VAL) } /* osq */   \
+	  OSQ_LOCK_UNLOCKED /* osq */			\
 	  __RWSEM_DEP_MAP_INIT(name) }
 #else
 #define __RWSEM_INITIALIZER(name)			\
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index d9b3139..acca2c1 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -60,7 +60,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
 	INIT_LIST_HEAD(&lock->wait_list);
 	mutex_clear_owner(lock);
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-	atomic_set(&lock->osq.tail, OSQ_UNLOCKED_VAL);
+	osq_lock_init(&lock->osq);
 #endif
 
 	debug_mutex_init(lock, name, key);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index b77a623..7190592 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -84,7 +84,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
 	INIT_LIST_HEAD(&sem->wait_list);
 #ifdef CONFIG_SMP
 	sem->owner = NULL;
-	atomic_set(&sem->osq.tail, OSQ_UNLOCKED_VAL);
+	osq_lock_init(&sem->osq);
 #endif
 }
 
-- 
cgit v0.10.2


From 33ecd2083a9560fbc1ef1b1279ef3ecb4c012a4f Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Mon, 14 Jul 2014 10:27:51 -0700
Subject: locking/spinlocks/mcs: Micro-optimize osq_unlock()

In the unlock function of the cancellable MCS spinlock, the first
thing we do is to retrive the current CPU's osq node. However, due to
the changes made in the previous patch, in the common case where the
lock is not contended, we wouldn't need to access the current CPU's
osq node anymore.

This patch optimizes this by only retriving this CPU's osq node
after we attempt the initial cmpxchg to unlock the osq and found
that its contended.

Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Scott Norton <scott.norton@hp.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Waiman Long <waiman.long@hp.com>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Aswin Chandramouleeswaran <aswin@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1405358872-3732-5-git-send-email-jason.low2@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c
index 32fc16c..be9ee15 100644
--- a/kernel/locking/mcs_spinlock.c
+++ b/kernel/locking/mcs_spinlock.c
@@ -182,8 +182,7 @@ unqueue:
 
 void osq_unlock(struct optimistic_spin_queue *lock)
 {
-	struct optimistic_spin_node *node = this_cpu_ptr(&osq_node);
-	struct optimistic_spin_node *next;
+	struct optimistic_spin_node *node, *next;
 	int curr = encode_cpu(smp_processor_id());
 
 	/*
@@ -195,6 +194,7 @@ void osq_unlock(struct optimistic_spin_queue *lock)
 	/*
 	 * Second most likely case.
 	 */
+	node = this_cpu_ptr(&osq_node);
 	next = xchg(&node->next, NULL);
 	if (next) {
 		ACCESS_ONCE(next->locked) = 1;
-- 
cgit v0.10.2


From b0ab99e7736af88b8ac1b7ae50ea287fffa2badc Mon Sep 17 00:00:00 2001
From: Mateusz Guzik <mguzik@redhat.com>
Date: Sat, 14 Jun 2014 15:00:09 +0200
Subject: sched: Fix possible divide by zero in avg_atom() calculation

proc_sched_show_task() does:

  if (nr_switches)
	do_div(avg_atom, nr_switches);

nr_switches is unsigned long and do_div truncates it to 32 bits, which
means it can test non-zero on e.g. x86-64 and be truncated to zero for
division.

Fix the problem by using div64_ul() instead.

As a side effect calculations of avg_atom for big nr_switches are now correct.

Signed-off-by: Mateusz Guzik <mguzik@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1402750809-31991-1-git-send-email-mguzik@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 695f977..627b3c3 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -608,7 +608,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 
 		avg_atom = p->se.sum_exec_runtime;
 		if (nr_switches)
-			do_div(avg_atom, nr_switches);
+			avg_atom = div64_ul(avg_atom, nr_switches);
 		else
 			avg_atom = -1LL;
 
-- 
cgit v0.10.2


From 4cf465b579c20bee868464f5d664f8d2d96cd370 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 16 Jul 2014 13:28:34 +0200
Subject: ACPI / video: Add use_native_backlight quirk for HP ProBook 4540s

As reported here:
https://bugzilla.redhat.com/show_bug.cgi?id=1025690
This is yet another model which needs this quirk.

Link: https://bugzilla.redhat.com/show_bug.cgi?id=1025690
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 29649c1..350d52a 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -581,6 +581,14 @@ static struct dmi_system_id video_dmi_table[] __initdata = {
 	},
 	{
 	.callback = video_set_use_native_backlight,
+	.ident = "HP ProBook 4540s",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+		DMI_MATCH(DMI_PRODUCT_VERSION, "HP ProBook 4540s"),
+		},
+	},
+	{
+	.callback = video_set_use_native_backlight,
 	.ident = "HP ProBook 2013 models",
 	.matches = {
 		DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
-- 
cgit v0.10.2


From 97d496bf1a7934c77f8bb0de9b773dd759def616 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sat, 12 Jul 2014 11:43:33 +0200
Subject: cpufreq: sa1110: set memory type for h3600

The Compaq iPAQ h3600 also has the K4S281632b-1H memory type.
Verified by prying apart a broken board.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/sa1110-cpufreq.c b/drivers/cpufreq/sa1110-cpufreq.c
index 5463767..b5befc2 100644
--- a/drivers/cpufreq/sa1110-cpufreq.c
+++ b/drivers/cpufreq/sa1110-cpufreq.c
@@ -349,7 +349,7 @@ static int __init sa1110_clk_init(void)
 			name = "K4S641632D";
 		if (machine_is_h3100())
 			name = "KM416S4030CT";
-		if (machine_is_jornada720())
+		if (machine_is_jornada720() || machine_is_h3600())
 			name = "K4S281632B-1H";
 		if (machine_is_nanoengine())
 			name = "MT48LC8M16A2TG-75";
-- 
cgit v0.10.2


From 7e02168711e70a93a4795870e028a260702bdcb5 Mon Sep 17 00:00:00 2001
From: Nicolas Del Piano <ndel314@gmail.com>
Date: Sun, 13 Jul 2014 18:59:00 -0300
Subject: cpufreq: imx6q: Select PM_OPP

PM_OPP is a library used by several of the existing cpufreq drivers.
ARM IMX6Q cpufreq driver uses this library for its functionality.
Thus, it should be selected in Kconfig.

Reported-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
Signed-off-by: Nicolas Del Piano <ndel314@gmail.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index ebac671..8c5cf4b 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -104,6 +104,7 @@ config ARM_IMX6Q_CPUFREQ
 	tristate "Freescale i.MX6 cpufreq support"
 	depends on ARCH_MXC
 	depends on REGULATOR_ANATOP
+	select PM_OPP
 	help
 	  This adds cpufreq driver support for Freescale i.MX6 series SoCs.
 
-- 
cgit v0.10.2


From 13b9a962a2594ee880c5d50d7f70964da1d4fe5a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 16 Jul 2014 14:54:55 +0200
Subject: locking/rwsem: Rename 'activity' to 'count'

There are two definitions of struct rw_semaphore, one in linux/rwsem.h
and one in linux/rwsem-spinlock.h.

For some reason they have different names for the initial field. This
makes it impossible to use C99 named initialization for
__RWSEM_INITIALIZER() -- or we have to duplicate that entire thing
along with the structure definitions.

The simpler patch is renaming the rwsem-spinlock variant to match the
regular rwsem.

This allows us to switch to C99 named initialization.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/n/tip-bmrZolsbGmautmzrerog27io@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index d5b13bc..561e861 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -15,13 +15,13 @@
 #ifdef __KERNEL__
 /*
  * the rw-semaphore definition
- * - if activity is 0 then there are no active readers or writers
- * - if activity is +ve then that is the number of active readers
- * - if activity is -1 then there is one active writer
+ * - if count is 0 then there are no active readers or writers
+ * - if count is +ve then that is the number of active readers
+ * - if count is -1 then there is one active writer
  * - if wait_list is not empty, then there are processes waiting for the semaphore
  */
 struct rw_semaphore {
-	__s32			activity;
+	__s32			count;
 	raw_spinlock_t		wait_lock;
 	struct list_head	wait_list;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 9be8a91..2c93571 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -26,7 +26,7 @@ int rwsem_is_locked(struct rw_semaphore *sem)
 	unsigned long flags;
 
 	if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) {
-		ret = (sem->activity != 0);
+		ret = (sem->count != 0);
 		raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 	}
 	return ret;
@@ -46,7 +46,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
 	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
 	lockdep_init_map(&sem->dep_map, name, key, 0);
 #endif
-	sem->activity = 0;
+	sem->count = 0;
 	raw_spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->wait_list);
 }
@@ -95,7 +95,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
 		waiter = list_entry(next, struct rwsem_waiter, list);
 	} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
 
-	sem->activity += woken;
+	sem->count += woken;
 
  out:
 	return sem;
@@ -126,9 +126,9 @@ void __sched __down_read(struct rw_semaphore *sem)
 
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
-	if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
+	if (sem->count >= 0 && list_empty(&sem->wait_list)) {
 		/* granted */
-		sem->activity++;
+		sem->count++;
 		raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 		goto out;
 	}
@@ -170,9 +170,9 @@ int __down_read_trylock(struct rw_semaphore *sem)
 
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
-	if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
+	if (sem->count >= 0 && list_empty(&sem->wait_list)) {
 		/* granted */
-		sem->activity++;
+		sem->count++;
 		ret = 1;
 	}
 
@@ -206,7 +206,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
 		 * itself into sleep and waiting for system woke it or someone
 		 * else in the head of the wait list up.
 		 */
-		if (sem->activity == 0)
+		if (sem->count == 0)
 			break;
 		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
 		raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
@@ -214,7 +214,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
 		raw_spin_lock_irqsave(&sem->wait_lock, flags);
 	}
 	/* got the lock */
-	sem->activity = -1;
+	sem->count = -1;
 	list_del(&waiter.list);
 
 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
@@ -235,9 +235,9 @@ int __down_write_trylock(struct rw_semaphore *sem)
 
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
-	if (sem->activity == 0) {
+	if (sem->count == 0) {
 		/* got the lock */
-		sem->activity = -1;
+		sem->count = -1;
 		ret = 1;
 	}
 
@@ -255,7 +255,7 @@ void __up_read(struct rw_semaphore *sem)
 
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
-	if (--sem->activity == 0 && !list_empty(&sem->wait_list))
+	if (--sem->count == 0 && !list_empty(&sem->wait_list))
 		sem = __rwsem_wake_one_writer(sem);
 
 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
@@ -270,7 +270,7 @@ void __up_write(struct rw_semaphore *sem)
 
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
-	sem->activity = 0;
+	sem->count = 0;
 	if (!list_empty(&sem->wait_list))
 		sem = __rwsem_do_wake(sem, 1);
 
@@ -287,7 +287,7 @@ void __downgrade_write(struct rw_semaphore *sem)
 
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
-	sem->activity = 1;
+	sem->count = 1;
 	if (!list_empty(&sem->wait_list))
 		sem = __rwsem_do_wake(sem, 0);
 
-- 
cgit v0.10.2


From ce069fc920e5734558b3d9cbef1ab06cf01ee793 Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Mon, 14 Jul 2014 10:27:52 -0700
Subject: locking/rwsem: Reduce the size of struct rw_semaphore

Recent optimistic spinning additions to rwsem provide significant performance
benefits on many workloads on large machines. The cost of it was increasing
the size of the rwsem structure by up to 128 bits.

However, now that the previous patches in this series bring the overhead of
struct optimistic_spin_queue to 32 bits, this patch reorders some fields in
struct rw_semaphore such that we can reduce the overhead of the rwsem structure
by 64 bits (on 64 bit systems).

The extra overhead required for rwsem optimistic spinning would now be up
to 8 additional bytes instead of up to 16 bytes. Additionally, the size of
rwsem would now be more in line with mutexes.

Signed-off-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Scott Norton <scott.norton@hp.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Waiman Long <waiman.long@hp.com>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Aswin Chandramouleeswaran <aswin@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <jbacik@fusionio.com>
Link: http://lkml.kernel.org/r/1405358872-3732-6-git-send-email-jason.low2@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 25cd9aa..716807f 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -24,15 +24,15 @@ struct rw_semaphore;
 /* All arch specific implementations share the same struct */
 struct rw_semaphore {
 	long count;
-	raw_spinlock_t wait_lock;
 	struct list_head wait_list;
+	raw_spinlock_t wait_lock;
 #ifdef CONFIG_SMP
+	struct optimistic_spin_queue osq; /* spinner MCS lock */
 	/*
 	 * Write owner. Used as a speculative check to see
 	 * if the owner is running on the cpu.
 	 */
 	struct task_struct *owner;
-	struct optimistic_spin_queue osq; /* spinner MCS lock */
 #endif
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map	dep_map;
@@ -64,21 +64,18 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
 #endif
 
 #if defined(CONFIG_SMP) && !defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
-#define __RWSEM_INITIALIZER(name)			\
-	{ RWSEM_UNLOCKED_VALUE,				\
-	  __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),	\
-	  LIST_HEAD_INIT((name).wait_list),		\
-	  NULL, /* owner */				\
-	  OSQ_LOCK_UNLOCKED /* osq */			\
-	  __RWSEM_DEP_MAP_INIT(name) }
+#define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED, .owner = NULL
 #else
-#define __RWSEM_INITIALIZER(name)			\
-	{ RWSEM_UNLOCKED_VALUE,				\
-	  __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),	\
-	  LIST_HEAD_INIT((name).wait_list)		\
-	  __RWSEM_DEP_MAP_INIT(name) }
+#define __RWSEM_OPT_INIT(lockname)
 #endif
 
+#define __RWSEM_INITIALIZER(name)				\
+	{ .count = RWSEM_UNLOCKED_VALUE,			\
+	  .wait_list = LIST_HEAD_INIT((name).wait_list),	\
+	  .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock)	\
+	  __RWSEM_OPT_INIT(name)				\
+	  __RWSEM_DEP_MAP_INIT(name) }
+
 #define DECLARE_RWSEM(name) \
 	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
 
-- 
cgit v0.10.2


From 4badad352a6bb202ec68afa7a574c0bb961e5ebc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 6 Jun 2014 19:53:16 +0200
Subject: locking/mutex: Disable optimistic spinning on some architectures

The optimistic spin code assumes regular stores and cmpxchg() play nice;
this is found to not be true for at least: parisc, sparc32, tile32,
metag-lock1, arc-!llsc and hexagon.

There is further wreckage, but this in particular seemed easy to
trigger, so blacklist this.

Opt in for known good archs.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Reported-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: David Miller <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: James Bottomley <James.Bottomley@hansenpartnership.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Jason Low <jason.low2@hp.com>
Cc: Waiman Long <waiman.long@hp.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: John David Anglin <dave.anglin@bell.net>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: stable@vger.kernel.org
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: sparclinux@vger.kernel.org
Link: http://lkml.kernel.org/r/20140606175316.GV13930@laptop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 245058b..88acf8b 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -6,6 +6,7 @@ config ARM
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAVE_CUSTOM_GPIO_H
 	select ARCH_MIGHT_HAVE_PC_PARPORT
+	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_WANT_IPC_PARSE_VERSION
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a474de34..839f48c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -4,6 +4,7 @@ config ARM64
 	select ARCH_HAS_OPP
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_USE_CMPXCHG_LOCKREF
+	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	select ARCH_WANT_FRAME_POINTERS
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index fefe7c8..80b94b0 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -145,6 +145,7 @@ config PPC
 	select HAVE_IRQ_EXIT_ON_IRQ_STACK
 	select ARCH_USE_CMPXCHG_LOCKREF if PPC64
 	select HAVE_ARCH_AUDITSYSCALL
+	select ARCH_SUPPORTS_ATOMIC_RMW
 
 config GENERIC_CSUM
 	def_bool CPU_LITTLE_ENDIAN
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 29f2e98..407c87d 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -78,6 +78,7 @@ config SPARC64
 	select HAVE_C_RECORDMCOUNT
 	select NO_BOOTMEM
 	select HAVE_ARCH_AUDITSYSCALL
+	select ARCH_SUPPORTS_ATOMIC_RMW
 
 config ARCH_DEFCONFIG
 	string
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a8f749e..d24887b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -131,6 +131,7 @@ config X86
 	select HAVE_CC_STACKPROTECTOR
 	select GENERIC_CPU_AUTOPROBE
 	select HAVE_ARCH_AUDITSYSCALL
+	select ARCH_SUPPORTS_ATOMIC_RMW
 
 config INSTRUCTION_DECODER
 	def_bool y
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 35536d9..8190794 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -220,9 +220,12 @@ config INLINE_WRITE_UNLOCK_IRQRESTORE
 
 endif
 
+config ARCH_SUPPORTS_ATOMIC_RMW
+	bool
+
 config MUTEX_SPIN_ON_OWNER
 	def_bool y
-	depends on SMP && !DEBUG_MUTEXES
+	depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW
 
 config ARCH_USE_QUEUE_RWLOCK
 	bool
-- 
cgit v0.10.2


From 5db6c6fefb1ca0e81e3bd6dd8998bf51c453d823 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <davidlohr@hp.com>
Date: Fri, 11 Jul 2014 14:00:06 -0700
Subject: locking/rwsem: Add CONFIG_RWSEM_SPIN_ON_OWNER

Just like with mutexes (CONFIG_MUTEX_SPIN_ON_OWNER),
encapsulate the dependencies for rwsem optimistic spinning.
No logical changes here as it continues to depend on both
SMP and the XADD algorithm variant.

Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
Acked-by: Jason Low <jason.low2@hp.com>
[ Also make it depend on ARCH_SUPPORTS_ATOMIC_RMW. ]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1405112406-13052-2-git-send-email-davidlohr@hp.com
Cc: aswin@hp.com
Cc: Chris Mason <clm@fb.com>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: Josef Bacik <jbacik@fusionio.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>

Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 716807f..035d3c5 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -14,7 +14,9 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
 #include <linux/osq_lock.h>
+#endif
 
 struct rw_semaphore;
 
@@ -26,7 +28,7 @@ struct rw_semaphore {
 	long count;
 	struct list_head wait_list;
 	raw_spinlock_t wait_lock;
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
 	struct optimistic_spin_queue osq; /* spinner MCS lock */
 	/*
 	 * Write owner. Used as a speculative check to see
@@ -63,7 +65,7 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
 # define __RWSEM_DEP_MAP_INIT(lockname)
 #endif
 
-#if defined(CONFIG_SMP) && !defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
 #define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED, .owner = NULL
 #else
 #define __RWSEM_OPT_INIT(lockname)
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 8190794..76768ee 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -227,6 +227,10 @@ config MUTEX_SPIN_ON_OWNER
 	def_bool y
 	depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW
 
+config RWSEM_SPIN_ON_OWNER
+       def_bool y
+       depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW
+
 config ARCH_USE_QUEUE_RWLOCK
 	bool
 
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 7190592..a2391ac 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -82,7 +82,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
 	sem->count = RWSEM_UNLOCKED_VALUE;
 	raw_spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->wait_list);
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
 	sem->owner = NULL;
 	osq_lock_init(&sem->osq);
 #endif
@@ -262,7 +262,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
 	return false;
 }
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
 /*
  * Try to acquire write lock before the writer has been put on wait queue.
  */
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 42f806d..e2d3bc7 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -12,7 +12,7 @@
 
 #include <linux/atomic.h>
 
-#if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM)
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
 static inline void rwsem_set_owner(struct rw_semaphore *sem)
 {
 	sem->owner = current;
-- 
cgit v0.10.2


From 2fa1adc07089020984d52bb55c5af0b9d86dff21 Mon Sep 17 00:00:00 2001
From: Quentin Armitage <quentin@armitage.org.uk>
Date: Thu, 10 Jul 2014 11:15:55 +0100
Subject: cpufreq: kirkwood: Reinstate cpufreq driver for ARCH_KIRKWOOD

Commit ff1f0018cf66080d8e6f59791e552615648a033a ("drivers: Enable
building of Kirkwood drivers for mach-mvebu") added Kirkwood into
mach-mvebu, adding MACH_KIRKWOOD to ARCH_KIRKWOOD in the KConfig files.

The change for ARM_KIRKWOOD_CPUFREQ replaced ARCH_KIRKWOOD with
MACH_KIRKWOOD, whereas all the other changes were ARCH_KIRKWOOD ||
MACH_KIRKWOOD.

As a consequence of this change, the cpufreq driver is no longer enabled
for ARCH_KIRKWOOD. This patch reinstates ARM_KIRKWOOD_CPUFREQ for
ARCH_KIRKWOOD.

Signed-off-by: Quentin Armitage <quentin@armitage.org.uk>
Acked-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 8c5cf4b..7364a53 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -119,7 +119,7 @@ config ARM_INTEGRATOR
 	  If in doubt, say Y.
 
 config ARM_KIRKWOOD_CPUFREQ
-	def_bool MACH_KIRKWOOD
+	def_bool ARCH_KIRKWOOD || MACH_KIRKWOOD
 	help
 	  This adds the CPUFreq driver for Marvell Kirkwood
 	  SoCs.
-- 
cgit v0.10.2


From 1bf8cc3d017575a38d4361f56ccc0a670a16bcd9 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 11 Jul 2014 20:24:19 +0530
Subject: cpufreq: cpu0: OPPs can be populated at runtime

OPPs can be populated statically, via DT, or added at run time with
dev_pm_opp_add().

While this driver handles the first case correctly, it would fail to populate
OPPs added at runtime. Because call to of_init_opp_table() would fail as there
are no OPPs in DT and probe will return early.

To fix this, remove error checking and call dev_pm_opp_init_cpufreq_table()
unconditionally.

Update bindings as well.

Suggested-by: Stephen Boyd <sboyd@codeaurora.org>
Tested-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
index f055515..366690c 100644
--- a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
@@ -8,10 +8,12 @@ Both required and optional properties listed below must be defined
 under node /cpus/cpu@0.
 
 Required properties:
-- operating-points: Refer to Documentation/devicetree/bindings/power/opp.txt
-  for details
+- None
 
 Optional properties:
+- operating-points: Refer to Documentation/devicetree/bindings/power/opp.txt for
+  details. OPPs *must* be supplied either via DT, i.e. this property, or
+  populated at runtime.
 - clock-latency: Specify the possible maximum transition latency for clock,
   in unit of nanoseconds.
 - voltage-tolerance: Specify the CPU voltage tolerance in percentage.
diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
index ee1ae30..86beda9 100644
--- a/drivers/cpufreq/cpufreq-cpu0.c
+++ b/drivers/cpufreq/cpufreq-cpu0.c
@@ -152,11 +152,8 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev)
 		goto out_put_reg;
 	}
 
-	ret = of_init_opp_table(cpu_dev);
-	if (ret) {
-		pr_err("failed to init OPP table: %d\n", ret);
-		goto out_put_clk;
-	}
+	/* OPPs might be populated at runtime, don't check for error here */
+	of_init_opp_table(cpu_dev);
 
 	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
 	if (ret) {
-- 
cgit v0.10.2


From c3caf1192f904de2f1381211f564537235d50de3 Mon Sep 17 00:00:00 2001
From: Jerry Chu <hkchu@google.com>
Date: Mon, 14 Jul 2014 15:54:46 -0700
Subject: net-gre-gro: Fix a bug that breaks the forwarding path

Fixed a bug that was introduced by my GRE-GRO patch
(bf5a755f5e9186406bbf50f4087100af5bd68e40 net-gre-gro: Add GRE
support to the GRO stack) that breaks the forwarding path
because various GSO related fields were not set. The bug will
cause on the egress path either the GSO code to fail, or a
GRE-TSO capable (NETIF_F_GSO_GRE) NICs to choke. The following
fix has been tested for both cases.

Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/dev.c b/net/core/dev.c
index 7990984..367a586 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4096,6 +4096,8 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 	skb->vlan_tci = 0;
 	skb->dev = napi->dev;
 	skb->skb_iif = 0;
+	skb->encapsulation = 0;
+	skb_shinfo(skb)->gso_type = 0;
 	skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
 
 	napi->skb = skb;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d5e6836..d156b3c 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1429,6 +1429,9 @@ static int inet_gro_complete(struct sk_buff *skb, int nhoff)
 	int proto = iph->protocol;
 	int err = -ENOSYS;
 
+	if (skb->encapsulation)
+		skb_set_inner_network_header(skb, nhoff);
+
 	csum_replace2(&iph->check, iph->tot_len, newlen);
 	iph->tot_len = newlen;
 
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index eb92deb..f0bdd47 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -263,6 +263,9 @@ static int gre_gro_complete(struct sk_buff *skb, int nhoff)
 	int err = -ENOENT;
 	__be16 type;
 
+	skb->encapsulation = 1;
+	skb_shinfo(skb)->gso_type = SKB_GSO_GRE;
+
 	type = greh->protocol;
 	if (greh->flags & GRE_KEY)
 		grehlen += GRE_HEADER_SECTION;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 4e86c59..55046ec 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -309,7 +309,7 @@ static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
 
 	th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
 				  iph->daddr, 0);
-	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+	skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
 
 	return tcp_gro_complete(skb);
 }
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 8517d3c..01b0ff9 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -73,7 +73,7 @@ static int tcp6_gro_complete(struct sk_buff *skb, int thoff)
 
 	th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
 				  &iph->daddr, 0);
-	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+	skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
 
 	return tcp_gro_complete(skb);
 }
-- 
cgit v0.10.2


From fbb60fe35ad579b511de8604b06a30b43846473b Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Mon, 12 May 2014 16:35:39 +0800
Subject: drm/qxl: return IRQ_NONE if it was not our irq

Return IRQ_NONE if it was not our irq. This is necessary for the case
when qxl is sharing irq line with a device A in a crash kernel. If qxl
is initialized before A and A's irq was raised during this gap,
returning IRQ_HANDLED in this case will cause this irq to be raised
again after EOI since kernel think it was handled but in fact it was
not.

Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/qxl/qxl_irq.c b/drivers/gpu/drm/qxl/qxl_irq.c
index 34d6a85..0bf1e20 100644
--- a/drivers/gpu/drm/qxl/qxl_irq.c
+++ b/drivers/gpu/drm/qxl/qxl_irq.c
@@ -33,6 +33,9 @@ irqreturn_t qxl_irq_handler(int irq, void *arg)
 
 	pending = xchg(&qdev->ram_header->int_pending, 0);
 
+	if (!pending)
+		return IRQ_NONE;
+
 	atomic_inc(&qdev->irq_received);
 
 	if (pending & QXL_INTERRUPT_DISPLAY) {
-- 
cgit v0.10.2


From de12d6f4b10b21854441f5242dcb29ea96181e58 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 16 Jul 2014 17:40:31 -0700
Subject: hwmon: (adt7470) Fix writes to temperature limit registers

Temperature limit registers are signed. Limits therefore need
to be clamped to (-128, 127) degrees C and not to (0, 255)
degrees C.

Without this fix, writing a limit of 128 degrees C sets the
actual limit to -128 degrees C.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Cc: stable@vger.kernel.org
Reviewed-by: Axel Lin <axel.lin@ingics.com>

diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c
index 0f4dea5..9ee3913 100644
--- a/drivers/hwmon/adt7470.c
+++ b/drivers/hwmon/adt7470.c
@@ -515,7 +515,7 @@ static ssize_t set_temp_min(struct device *dev,
 		return -EINVAL;
 
 	temp = DIV_ROUND_CLOSEST(temp, 1000);
-	temp = clamp_val(temp, 0, 255);
+	temp = clamp_val(temp, -128, 127);
 
 	mutex_lock(&data->lock);
 	data->temp_min[attr->index] = temp;
@@ -549,7 +549,7 @@ static ssize_t set_temp_max(struct device *dev,
 		return -EINVAL;
 
 	temp = DIV_ROUND_CLOSEST(temp, 1000);
-	temp = clamp_val(temp, 0, 255);
+	temp = clamp_val(temp, -128, 127);
 
 	mutex_lock(&data->lock);
 	data->temp_max[attr->index] = temp;
@@ -826,7 +826,7 @@ static ssize_t set_pwm_tmin(struct device *dev,
 		return -EINVAL;
 
 	temp = DIV_ROUND_CLOSEST(temp, 1000);
-	temp = clamp_val(temp, 0, 255);
+	temp = clamp_val(temp, -128, 127);
 
 	mutex_lock(&data->lock);
 	data->pwm_tmin[attr->index] = temp;
-- 
cgit v0.10.2


From 7a9810e7bd99c922d9cedf64dbaa5ef6be412295 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel@daenzer.net>
Date: Thu, 17 Jul 2014 12:55:40 +0900
Subject: r8169: Enable RX_MULTI_EN for RTL_GIGA_MAC_VER_40
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ethernet port on my ASUS A88X Pro mainboard stopped working
several times a day, with messages like these in dmesg:

AMD-Vi: Event logged [IO_PAGE_FAULT device=05:00.0 domain=0x001e address=0x0000000000003000 flags=0x0050]

Searching the web for these messages led me to similar reports about
different hardware supported by r8169, and eventually to commits
3ced8c955e74d319f3e3997f7169c79d524dfd06 ('r8169: enforce RX_MULTI_EN
for the 8168f.') and eb2dc35d99028b698cdedba4f5522bc43e576bd2 ('r8169:
RxConfig hack for the 8168evl'). So I tried this change, and it fixes
the problem for me.

Signed-off-by: Michel Dänzer <michel@daenzer.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 06bdc31..61623e9 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -4240,6 +4240,8 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
 		RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
 		break;
 	case RTL_GIGA_MAC_VER_40:
+		RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
+		break;
 	case RTL_GIGA_MAC_VER_41:
 	case RTL_GIGA_MAC_VER_42:
 	case RTL_GIGA_MAC_VER_43:
-- 
cgit v0.10.2


From a4b70a07ed12a71131cab7adce2ce91c71b37060 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Wed, 16 Jul 2014 10:02:26 -0400
Subject: sunvnet: clean up objects created in vnet_new() on vnet_exit()

Nothing cleans up the objects created by
vnet_new(), they are completely leaked.

vnet_exit(), after doing the vio_unregister_driver() to clean
up ports, should call a helper function that iterates over vnet_list
and cleans up those objects. This includes unregister_netdevice()
as well as free_netdev().

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Reviewed-by: Karl Volz <karl.volz@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index 1c24a8f..fd411d6 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -1083,6 +1083,24 @@ static struct vnet *vnet_find_or_create(const u64 *local_mac)
 	return vp;
 }
 
+static void vnet_cleanup(void)
+{
+	struct vnet *vp;
+	struct net_device *dev;
+
+	mutex_lock(&vnet_list_mutex);
+	while (!list_empty(&vnet_list)) {
+		vp = list_first_entry(&vnet_list, struct vnet, list);
+		list_del(&vp->list);
+		dev = vp->dev;
+		/* vio_unregister_driver() should have cleaned up port_list */
+		BUG_ON(!list_empty(&vp->port_list));
+		unregister_netdev(dev);
+		free_netdev(dev);
+	}
+	mutex_unlock(&vnet_list_mutex);
+}
+
 static const char *local_mac_prop = "local-mac-address";
 
 static struct vnet *vnet_find_parent(struct mdesc_handle *hp,
@@ -1240,7 +1258,6 @@ static int vnet_port_remove(struct vio_dev *vdev)
 
 		kfree(port);
 
-		unregister_netdev(vp->dev);
 	}
 	return 0;
 }
@@ -1268,6 +1285,7 @@ static int __init vnet_init(void)
 static void __exit vnet_exit(void)
 {
 	vio_unregister_driver(&vnet_port_driver);
+	vnet_cleanup();
 }
 
 module_init(vnet_init);
-- 
cgit v0.10.2


From 652c1a05171695d21b84dd3a723606b50eeb80fd Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Wed, 25 Jun 2014 16:44:14 +0300
Subject: IB/mlx5: Enable "block multicast loopback" for kernel consumers

In commit f360d88a2efd, we advertise blocking multicast loopback to both
kernel and userspace consumers, but don't allow kernel consumers (e.g IPoIB)
to use it with their UD QPs.  Fix that.

Fixes: f360d88a2efd ("IB/mlx5: Add block multicast loopback support")
Reported-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index d13ddf1..bbbcf38 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -675,7 +675,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
 	int err;
 
 	uuari = &dev->mdev.priv.uuari;
-	if (init_attr->create_flags & ~IB_QP_CREATE_SIGNATURE_EN)
+	if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
 		return -EINVAL;
 
 	if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
-- 
cgit v0.10.2


From 858e6c321065344339906672bccd0eafe9622258 Mon Sep 17 00:00:00 2001
From: Amir Vadai <amirv@mellanox.com>
Date: Wed, 16 Jul 2014 13:33:50 +0300
Subject: net/mlx4_en: cq->irq_desc wasn't set in legacy EQ's

Fix a regression introduced by commit 35f6f45 ("net/mlx4_en: Don't use
irq_affinity_notifier to track changes in IRQ affinity map").
When core is started in legacy EQ's (number of IRQ's < rx rings), cq->irq_desc
was NULL.  This caused a kernel crash under heavy traffic - when having more
than rx NAPI budget completions.
Fixed to have it set for both EQ modes.

Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index 14c0004..82322b1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -129,14 +129,15 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
 						  name);
 				}
 
-				cq->irq_desc =
-					irq_to_desc(mlx4_eq_get_irq(mdev->dev,
-								    cq->vector));
 			}
 		} else {
 			cq->vector = (cq->ring + 1 + priv->port) %
 				mdev->dev->caps.num_comp_vectors;
 		}
+
+		cq->irq_desc =
+			irq_to_desc(mlx4_eq_get_irq(mdev->dev,
+						    cq->vector));
 	} else {
 		/* For TX we use the same irq per
 		ring we assigned for the RX    */
-- 
cgit v0.10.2


From cc25eaae238ddd693aa5eaa73e565d8ff4915f6e Mon Sep 17 00:00:00 2001
From: Christoph Schulz <develop@kristov.de>
Date: Wed, 16 Jul 2014 22:10:29 +0200
Subject: net: ppp: fix creating PPP pass and active filters

Commit 568f194e8bd16c353ad50f9ab95d98b20578a39d ("net: ppp: use
sk_unattached_filter api") inadvertently changed the logic when setting
PPP pass and active filters. This applies to both the generic PPP subsystem
implemented by drivers/net/ppp/ppp_generic.c and the ISDN PPP subsystem
implemented by drivers/isdn/i4l/isdn_ppp.c. The original code in ppp_ioctl()
(or isdn_ppp_ioctl(), resp.) handling PPPIOCSPASS and PPPIOCSACTIVE allowed to
remove a pass/active filter previously set by using a filter of length zero.
However, with the new code this is not possible anymore as this case is not
explicitly checked for, which leads to passing NULL as a filter to
sk_unattached_filter_create(). This results in returning EINVAL to the caller.

Additionally, the variables ppp->pass_filter and ppp->active_filter (or
is->pass_filter and is->active_filter, resp.) are not reset to NULL, although
the filters they point to may have been destroyed by
sk_unattached_filter_destroy(), so in this EINVAL case dangling pointers are
left behind (provided the pointers were previously non-NULL).

This patch corrects both problems by checking whether the filter passed is
empty or non-empty, and prevents sk_unattached_filter_create() from being
called in the first case. Moreover, the pointers are always reset to NULL
as soon as sk_unattached_filter_destroy() returns.

Signed-off-by: Christoph Schulz <develop@kristov.de>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index a333b7f..62f0688 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -638,9 +638,15 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg)
 		fprog.len = len;
 		fprog.filter = code;
 
-		if (is->pass_filter)
+		if (is->pass_filter) {
 			sk_unattached_filter_destroy(is->pass_filter);
-		err = sk_unattached_filter_create(&is->pass_filter, &fprog);
+			is->pass_filter = NULL;
+		}
+		if (fprog.filter != NULL)
+			err = sk_unattached_filter_create(&is->pass_filter,
+							  &fprog);
+		else
+			err = 0;
 		kfree(code);
 
 		return err;
@@ -657,9 +663,15 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg)
 		fprog.len = len;
 		fprog.filter = code;
 
-		if (is->active_filter)
+		if (is->active_filter) {
 			sk_unattached_filter_destroy(is->active_filter);
-		err = sk_unattached_filter_create(&is->active_filter, &fprog);
+			is->active_filter = NULL;
+		}
+		if (fprog.filter != NULL)
+			err = sk_unattached_filter_create(&is->active_filter,
+							  &fprog);
+		else
+			err = 0;
 		kfree(code);
 
 		return err;
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index e2f20f8..d5b77ef 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -757,10 +757,15 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			};
 
 			ppp_lock(ppp);
-			if (ppp->pass_filter)
+			if (ppp->pass_filter) {
 				sk_unattached_filter_destroy(ppp->pass_filter);
-			err = sk_unattached_filter_create(&ppp->pass_filter,
-							  &fprog);
+				ppp->pass_filter = NULL;
+			}
+			if (fprog.filter != NULL)
+				err = sk_unattached_filter_create(&ppp->pass_filter,
+								  &fprog);
+			else
+				err = 0;
 			kfree(code);
 			ppp_unlock(ppp);
 		}
@@ -778,10 +783,15 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			};
 
 			ppp_lock(ppp);
-			if (ppp->active_filter)
+			if (ppp->active_filter) {
 				sk_unattached_filter_destroy(ppp->active_filter);
-			err = sk_unattached_filter_create(&ppp->active_filter,
-							  &fprog);
+				ppp->active_filter = NULL;
+			}
+			if (fprog.filter != NULL)
+				err = sk_unattached_filter_create(&ppp->active_filter,
+								  &fprog);
+			else
+				err = 0;
 			kfree(code);
 			ppp_unlock(ppp);
 		}
-- 
cgit v0.10.2


From 92c14bd9477a20a83144f08c0ca25b0308bf0730 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 17 Jul 2014 10:48:25 +0530
Subject: cpufreq: move policy kobj to policy->cpu at resume

This is only relevant to implementations with multiple clusters, where clusters
have separate clock lines but all CPUs within a cluster share it.

Consider a dual cluster platform with 2 cores per cluster. During suspend we
start hot unplugging CPUs in order 1 to 3. When CPU2 is removed, policy->kobj
would be moved to CPU3 and when CPU3 goes down we wouldn't free policy or its
kobj as we want to retain permissions/values/etc.

Now on resume, we will get CPU2 before CPU3 and will call __cpufreq_add_dev().
We will recover the old policy and update policy->cpu from 3 to 2 from
update_policy_cpu().

But the kobj is still tied to CPU3 and isn't moved to CPU2. We wouldn't create a
link for CPU2, but would try that for CPU3 while bringing it online. Which will
report errors as CPU3 already has kobj assigned to it.

This bug got introduced with commit 42f921a, which overlooked this scenario.

To fix this, lets move kobj to the new policy->cpu while bringing first CPU of a
cluster back. Also do a WARN_ON() if kobject_move failed, as we would reach here
only for the first CPU of a non-boot cluster. And we can't recover from this
situation, if kobject_move() fails.

Fixes: 42f921a6f10c (cpufreq: remove sysfs files for CPUs which failed to come back after resume)
Cc:  3.13+ <stable@vger.kernel.org> # 3.13+
Reported-and-tested-by: Bu Yitian <ybu@qti.qualcomm.com>
Reported-by: Saravana Kannan <skannan@codeaurora.org>
Reviewed-by: Srivatsa S. Bhat <srivatsa@mit.edu>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 62259d2..6f02485 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1153,10 +1153,12 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	 * the creation of a brand new one. So we need to perform this update
 	 * by invoking update_policy_cpu().
 	 */
-	if (recover_policy && cpu != policy->cpu)
+	if (recover_policy && cpu != policy->cpu) {
 		update_policy_cpu(policy, cpu);
-	else
+		WARN_ON(kobject_move(&policy->kobj, &dev->kobj));
+	} else {
 		policy->cpu = cpu;
+	}
 
 	cpumask_copy(policy->cpus, cpumask_of(cpu));
 
-- 
cgit v0.10.2


From a2a9e02b5b67a7a32a14ab6c4c331a1a0c23a1db Mon Sep 17 00:00:00 2001
From: Andrey Utkin <andrey.krieger.utkin@gmail.com>
Date: Thu, 17 Jul 2014 15:13:23 +0300
Subject: drivers/ata/pata_ep93xx.c: use signed int type for result of
 platform_get_irq()

[linux-3.16-rc5/drivers/ata/pata_ep93xx.c:929]: (style) Checking if unsigned
variable 'irq' is less than zero.

Source code is

    irq = platform_get_irq(pdev, 0);
    if (irq < 0) {

but

    unsigned int irq;

$ fgrep platform_get_irq `find . -name \*.h -print`
./include/linux/platform_device.h:extern int platform_get_irq(struct
platform_device *, unsigned int);

Now using "int" type instead of "unsigned int" for "irq" variable.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=80401
Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: Andrey Utkin <andrey.krieger.utkin@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ata/pata_ep93xx.c b/drivers/ata/pata_ep93xx.c
index 6ad5c07..4d37c54 100644
--- a/drivers/ata/pata_ep93xx.c
+++ b/drivers/ata/pata_ep93xx.c
@@ -915,7 +915,7 @@ static int ep93xx_pata_probe(struct platform_device *pdev)
 	struct ep93xx_pata_data *drv_data;
 	struct ata_host *host;
 	struct ata_port *ap;
-	unsigned int irq;
+	int irq;
 	struct resource *mem_res;
 	void __iomem *ide_base;
 	int err;
-- 
cgit v0.10.2


From 144cb08864ed44be52d8634ac69cd98e5efcf527 Mon Sep 17 00:00:00 2001
From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Date: Tue, 15 Jul 2014 00:03:03 +0200
Subject: irqchip: gic: Add binding probe for ARM GIC400

Commit 3ab72f9156bb "dt-bindings: add GIC-400 binding" added the
"arm,gic-400" compatible string, but the corresponding IRQCHIP_DECLARE
was never added to the gic driver.

Therefore add the missing irqchip declaration for it.

Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>

Removed additional empty line and adapted commit message to mark it
as fixing an issue.
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Will Deacon <will.deacon@arm.com>
Fixes: 3ab72f9156bb ("dt-bindings: add GIC-400 binding")
Cc: <stable@vger.kernel.org> # v3.14+
Link: https://lkml.kernel.org/r/2621565.f5eISveXXJ@diego
Signed-off-by: Jason Cooper <jason@lakedaemon.net>

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index aadd6f5..66ed892 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1071,6 +1071,7 @@ gic_of_init(struct device_node *node, struct device_node *parent)
 	gic_cnt++;
 	return 0;
 }
+IRQCHIP_DECLARE(gic_400, "arm,gic-400", gic_of_init);
 IRQCHIP_DECLARE(cortex_a15_gic, "arm,cortex-a15-gic", gic_of_init);
 IRQCHIP_DECLARE(cortex_a9_gic, "arm,cortex-a9-gic", gic_of_init);
 IRQCHIP_DECLARE(cortex_a7_gic, "arm,cortex-a7-gic", gic_of_init);
-- 
cgit v0.10.2


From 0ac66effe7fcdee55bda6d5d10d3372c95a41920 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 14 Jul 2014 17:57:19 -0400
Subject: drm/radeon: avoid leaking edid data

In some cases we fetch the edid in the detect() callback
in order to determine what sort of monitor is connected.
If that happens, don't fetch the edid again in the get_modes()
callback or we will leak the edid.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org

diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 13896ed..65501af 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -830,6 +830,10 @@ int radeon_ddc_get_modes(struct radeon_connector *radeon_connector)
 	struct radeon_device *rdev = dev->dev_private;
 	int ret = 0;
 
+	/* don't leak the edid if we already fetched it in detect() */
+	if (radeon_connector->edid)
+		goto got_edid;
+
 	/* on hw with routers, select right port */
 	if (radeon_connector->router.ddc_valid)
 		radeon_router_select_ddc_port(radeon_connector);
@@ -868,6 +872,7 @@ int radeon_ddc_get_modes(struct radeon_connector *radeon_connector)
 			radeon_connector->edid = radeon_bios_get_hardcoded_edid(rdev);
 	}
 	if (radeon_connector->edid) {
+got_edid:
 		drm_mode_connector_update_edid_property(&radeon_connector->base, radeon_connector->edid);
 		ret = drm_add_edid_modes(&radeon_connector->base, radeon_connector->edid);
 		drm_edid_to_eld(&radeon_connector->base, radeon_connector->edid);
-- 
cgit v0.10.2


From 201bb62402e0227375c655446ea04fcd0acf7287 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 15 Jul 2014 09:48:53 -0400
Subject: drm/radeon: set default bl level to something reasonable

If the value in the scratch register is 0, set it to the
max level.  This fixes an issue where the console fb blanking
code calls back into the backlight driver on unblank and then
sets the backlight level to 0 after the driver has already
set the mode and enabled the backlight.

bugs:
https://bugs.freedesktop.org/show_bug.cgi?id=81382
https://bugs.freedesktop.org/show_bug.cgi?id=70207

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Tested-by: David Heidelberger <david.heidelberger@ixit.cz>
Cc: stable@vger.kernel.org

diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index 2b29084..7d68203 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -183,7 +183,6 @@ void radeon_atom_backlight_init(struct radeon_encoder *radeon_encoder,
 	struct backlight_properties props;
 	struct radeon_backlight_privdata *pdata;
 	struct radeon_encoder_atom_dig *dig;
-	u8 backlight_level;
 	char bl_name[16];
 
 	/* Mac laptops with multiple GPUs use the gmux driver for backlight
@@ -222,12 +221,17 @@ void radeon_atom_backlight_init(struct radeon_encoder *radeon_encoder,
 
 	pdata->encoder = radeon_encoder;
 
-	backlight_level = radeon_atom_get_backlight_level_from_reg(rdev);
-
 	dig = radeon_encoder->enc_priv;
 	dig->bl_dev = bd;
 
 	bd->props.brightness = radeon_atom_backlight_get_brightness(bd);
+	/* Set a reasonable default here if the level is 0 otherwise
+	 * fbdev will attempt to turn the backlight on after console
+	 * unblanking and it will try and restore 0 which turns the backlight
+	 * off again.
+	 */
+	if (bd->props.brightness == 0)
+		bd->props.brightness = RADEON_MAX_BL_LEVEL;
 	bd->props.power = FB_BLANK_UNBLANK;
 	backlight_update_status(bd);
 
-- 
cgit v0.10.2


From f53f81b2576a9bd3af947e2b1c3a46dfab51c5ef Mon Sep 17 00:00:00 2001
From: Mario Kleiner <mario.kleiner.de@gmail.com>
Date: Thu, 3 Jul 2014 03:45:02 +0200
Subject: drm/radeon: Prevent too early kms-pageflips triggered by vblank.

Since 3.16-rc1 we have this new failure:

When the userspace XOrg ddx schedules vblank events to
trigger deferred kms-pageflips, e.g., via the OML_sync_control
extension call glXSwapBuffersMscOML(), or if a glXSwapBuffers()
is called immediately after completion of a previous swapbuffers
call, e.g., in a tight rendering loop with minimal rendering,
it happens frequently that the pageflip ioctl() is executed
within the same vblank in which a previous kms-pageflip completed,
or - for deferred swaps - always one vblank earlier than requested
by the client app.

This causes premature pageflips and detection of failure by
the ddx, e.g., XOrg log warnings like...

"(WW) RADEON(1): radeon_dri2_flip_event_handler: Pageflip
completion event has impossible msc 201025 < target_msc 201026"

... and error/invalid return values of glXWaitForSbcOML() and
Intel_swap_events extension.

Reason is the new way in which kms-pageflips are programmed
since 3.16.

This commit changes the time window in which the hw can
execute pending programmed pageflips. Before, a pending flip
would get executed anywhere within the vblank interval. Now
a pending flip only gets executed at the leading edge of
vblank (start of front porch), making sure that a invocation
of the pageflip ioctl() within a given vblank interval will
only lead to pageflip completion in the following vblank.

Tested to death on a DCE-4 card.

Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index a03c734..30d242b 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -1414,8 +1414,8 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc,
 	tmp &= ~EVERGREEN_GRPH_SURFACE_UPDATE_H_RETRACE_EN;
 	WREG32(EVERGREEN_GRPH_FLIP_CONTROL + radeon_crtc->crtc_offset, tmp);
 
-	/* set pageflip to happen anywhere in vblank interval */
-	WREG32(EVERGREEN_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0);
+	/* set pageflip to happen only at start of vblank interval (front porch) */
+	WREG32(EVERGREEN_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 3);
 
 	if (!atomic && fb && fb != crtc->primary->fb) {
 		radeon_fb = to_radeon_framebuffer(fb);
@@ -1614,8 +1614,8 @@ static int avivo_crtc_do_set_base(struct drm_crtc *crtc,
 	tmp &= ~AVIVO_D1GRPH_SURFACE_UPDATE_H_RETRACE_EN;
 	WREG32(AVIVO_D1GRPH_FLIP_CONTROL + radeon_crtc->crtc_offset, tmp);
 
-	/* set pageflip to happen anywhere in vblank interval */
-	WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 0);
+	/* set pageflip to happen only at start of vblank interval (front porch) */
+	WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + radeon_crtc->crtc_offset, 3);
 
 	if (!atomic && fb && fb != crtc->primary->fb) {
 		radeon_fb = to_radeon_framebuffer(fb);
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index f7ece0f..250bac3 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2642,8 +2642,9 @@ void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *s
 	for (i = 0; i < rdev->num_crtc; i++) {
 		if (save->crtc_enabled[i]) {
 			tmp = RREG32(EVERGREEN_MASTER_UPDATE_MODE + crtc_offsets[i]);
-			if ((tmp & 0x3) != 0) {
-				tmp &= ~0x3;
+			if ((tmp & 0x7) != 3) {
+				tmp &= ~0x7;
+				tmp |= 0x3;
 				WREG32(EVERGREEN_MASTER_UPDATE_MODE + crtc_offsets[i], tmp);
 			}
 			tmp = RREG32(EVERGREEN_GRPH_UPDATE + crtc_offsets[i]);
diff --git a/drivers/gpu/drm/radeon/evergreen_reg.h b/drivers/gpu/drm/radeon/evergreen_reg.h
index 333d143..23bff59 100644
--- a/drivers/gpu/drm/radeon/evergreen_reg.h
+++ b/drivers/gpu/drm/radeon/evergreen_reg.h
@@ -239,7 +239,6 @@
 #       define EVERGREEN_CRTC_V_BLANK                   (1 << 0)
 #define EVERGREEN_CRTC_STATUS_POSITION                  0x6e90
 #define EVERGREEN_CRTC_STATUS_HV_COUNT                  0x6ea0
-#define EVERGREEN_MASTER_UPDATE_MODE                    0x6ef8
 #define EVERGREEN_CRTC_UPDATE_LOCK                      0x6ed4
 #define EVERGREEN_MASTER_UPDATE_LOCK                    0x6ef4
 #define EVERGREEN_MASTER_UPDATE_MODE                    0x6ef8
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index 237dd29..3e21e86 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -406,8 +406,9 @@ void rv515_mc_resume(struct radeon_device *rdev, struct rv515_mc_save *save)
 	for (i = 0; i < rdev->num_crtc; i++) {
 		if (save->crtc_enabled[i]) {
 			tmp = RREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + crtc_offsets[i]);
-			if ((tmp & 0x3) != 0) {
-				tmp &= ~0x3;
+			if ((tmp & 0x7) != 3) {
+				tmp &= ~0x7;
+				tmp |= 0x3;
 				WREG32(AVIVO_D1MODE_MASTER_UPDATE_MODE + crtc_offsets[i], tmp);
 			}
 			tmp = RREG32(AVIVO_D1GRPH_UPDATE + crtc_offsets[i]);
-- 
cgit v0.10.2


From c60381bd82a54233bb46f93be00a4154bd0cf95d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Mon, 14 Jul 2014 15:48:42 +0900
Subject: drm/radeon: Move pinning the BO back to radeon_crtc_page_flip()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As well as enabling the vblank interrupt. These shouldn't take any
significant amount of time, but at least pinning the BO has actually been
seen to fail in practice before, in which case we need to let userspace
know about it.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 29d9cc0..b720450 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -684,10 +684,9 @@ struct radeon_flip_work {
 	struct work_struct		unpin_work;
 	struct radeon_device		*rdev;
 	int				crtc_id;
-	struct drm_framebuffer		*fb;
+	uint64_t			base;
 	struct drm_pending_vblank_event *event;
 	struct radeon_bo		*old_rbo;
-	struct radeon_bo		*new_rbo;
 	struct radeon_fence		*fence;
 };
 
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 65501af..8de5794 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -386,11 +386,6 @@ static void radeon_flip_work_func(struct work_struct *__work)
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[work->crtc_id];
 
 	struct drm_crtc *crtc = &radeon_crtc->base;
-	struct drm_framebuffer *fb = work->fb;
-
-	uint32_t tiling_flags, pitch_pixels;
-	uint64_t base;
-
 	unsigned long flags;
 	int r;
 
@@ -411,26 +406,94 @@ static void radeon_flip_work_func(struct work_struct *__work)
 			radeon_fence_unref(&work->fence);
 	}
 
+	/* do the flip (mmio) */
+	radeon_page_flip(rdev, radeon_crtc->crtc_id, work->base);
+
+	/* We borrow the event spin lock for protecting flip_status */
+	spin_lock_irqsave(&crtc->dev->event_lock, flags);
+
+	/* set the proper interrupt */
+	radeon_irq_kms_pflip_irq_get(rdev, radeon_crtc->crtc_id);
+
+	radeon_crtc->flip_status = RADEON_FLIP_SUBMITTED;
+	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+	up_read(&rdev->exclusive_lock);
+
+	return;
+
+cleanup:
+	drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
+	radeon_fence_unref(&work->fence);
+	kfree(work);
+	up_read(&rdev->exclusive_lock);
+}
+
+static int radeon_crtc_page_flip(struct drm_crtc *crtc,
+				 struct drm_framebuffer *fb,
+				 struct drm_pending_vblank_event *event,
+				 uint32_t page_flip_flags)
+{
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct radeon_framebuffer *old_radeon_fb;
+	struct radeon_framebuffer *new_radeon_fb;
+	struct drm_gem_object *obj;
+	struct radeon_flip_work *work;
+	struct radeon_bo *new_rbo;
+	uint32_t tiling_flags, pitch_pixels;
+	uint64_t base;
+	unsigned long flags;
+	int r;
+
+	work = kzalloc(sizeof *work, GFP_KERNEL);
+	if (work == NULL)
+		return -ENOMEM;
+
+	INIT_WORK(&work->flip_work, radeon_flip_work_func);
+	INIT_WORK(&work->unpin_work, radeon_unpin_work_func);
+
+	work->rdev = rdev;
+	work->crtc_id = radeon_crtc->crtc_id;
+	work->event = event;
+
+	/* schedule unpin of the old buffer */
+	old_radeon_fb = to_radeon_framebuffer(crtc->primary->fb);
+	obj = old_radeon_fb->obj;
+
+	/* take a reference to the old object */
+	drm_gem_object_reference(obj);
+	work->old_rbo = gem_to_radeon_bo(obj);
+
+	new_radeon_fb = to_radeon_framebuffer(fb);
+	obj = new_radeon_fb->obj;
+	new_rbo = gem_to_radeon_bo(obj);
+
+	spin_lock(&new_rbo->tbo.bdev->fence_lock);
+	if (new_rbo->tbo.sync_obj)
+		work->fence = radeon_fence_ref(new_rbo->tbo.sync_obj);
+	spin_unlock(&new_rbo->tbo.bdev->fence_lock);
+
 	/* pin the new buffer */
-	DRM_DEBUG_DRIVER("flip-ioctl() cur_fbo = %p, cur_bbo = %p\n",
-			 work->old_rbo, work->new_rbo);
+	DRM_DEBUG_DRIVER("flip-ioctl() cur_rbo = %p, new_rbo = %p\n",
+			 work->old_rbo, new_rbo);
 
-	r = radeon_bo_reserve(work->new_rbo, false);
+	r = radeon_bo_reserve(new_rbo, false);
 	if (unlikely(r != 0)) {
 		DRM_ERROR("failed to reserve new rbo buffer before flip\n");
 		goto cleanup;
 	}
 	/* Only 27 bit offset for legacy CRTC */
-	r = radeon_bo_pin_restricted(work->new_rbo, RADEON_GEM_DOMAIN_VRAM,
+	r = radeon_bo_pin_restricted(new_rbo, RADEON_GEM_DOMAIN_VRAM,
 				     ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, &base);
 	if (unlikely(r != 0)) {
-		radeon_bo_unreserve(work->new_rbo);
+		radeon_bo_unreserve(new_rbo);
 		r = -EINVAL;
 		DRM_ERROR("failed to pin new rbo buffer before flip\n");
 		goto cleanup;
 	}
-	radeon_bo_get_tiling_flags(work->new_rbo, &tiling_flags, NULL);
-	radeon_bo_unreserve(work->new_rbo);
+	radeon_bo_get_tiling_flags(new_rbo, &tiling_flags, NULL);
+	radeon_bo_unreserve(new_rbo);
 
 	if (!ASIC_IS_AVIVO(rdev)) {
 		/* crtc offset is from display base addr not FB location */
@@ -467,6 +530,7 @@ static void radeon_flip_work_func(struct work_struct *__work)
 		}
 		base &= ~7;
 	}
+	work->base = base;
 
 	r = drm_vblank_get(crtc->dev, radeon_crtc->crtc_id);
 	if (r) {
@@ -477,100 +541,39 @@ static void radeon_flip_work_func(struct work_struct *__work)
 	/* We borrow the event spin lock for protecting flip_work */
 	spin_lock_irqsave(&crtc->dev->event_lock, flags);
 
-	/* set the proper interrupt */
-	radeon_irq_kms_pflip_irq_get(rdev, radeon_crtc->crtc_id);
+	if (radeon_crtc->flip_status != RADEON_FLIP_NONE) {
+		DRM_DEBUG_DRIVER("flip queue: crtc already busy\n");
+		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+		r = -EBUSY;
+		goto pflip_cleanup;
+	}
+	radeon_crtc->flip_status = RADEON_FLIP_PENDING;
+	radeon_crtc->flip_work = work;
 
-	/* do the flip (mmio) */
-	radeon_page_flip(rdev, radeon_crtc->crtc_id, base);
+	/* update crtc fb */
+	crtc->primary->fb = fb;
 
-	radeon_crtc->flip_status = RADEON_FLIP_SUBMITTED;
 	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
-	up_read(&rdev->exclusive_lock);
 
-	return;
+	queue_work(radeon_crtc->flip_queue, &work->flip_work);
+	return 0;
 
 pflip_cleanup:
-	if (unlikely(radeon_bo_reserve(work->new_rbo, false) != 0)) {
+	if (unlikely(radeon_bo_reserve(new_rbo, false) != 0)) {
 		DRM_ERROR("failed to reserve new rbo in error path\n");
 		goto cleanup;
 	}
-	if (unlikely(radeon_bo_unpin(work->new_rbo) != 0)) {
+	if (unlikely(radeon_bo_unpin(new_rbo) != 0)) {
 		DRM_ERROR("failed to unpin new rbo in error path\n");
 	}
-	radeon_bo_unreserve(work->new_rbo);
+	radeon_bo_unreserve(new_rbo);
 
 cleanup:
 	drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
 	radeon_fence_unref(&work->fence);
 	kfree(work);
-	up_read(&rdev->exclusive_lock);
-}
-
-static int radeon_crtc_page_flip(struct drm_crtc *crtc,
-				 struct drm_framebuffer *fb,
-				 struct drm_pending_vblank_event *event,
-				 uint32_t page_flip_flags)
-{
-	struct drm_device *dev = crtc->dev;
-	struct radeon_device *rdev = dev->dev_private;
-	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
-	struct radeon_framebuffer *old_radeon_fb;
-	struct radeon_framebuffer *new_radeon_fb;
-	struct drm_gem_object *obj;
-	struct radeon_flip_work *work;
-	unsigned long flags;
-
-	work = kzalloc(sizeof *work, GFP_KERNEL);
-	if (work == NULL)
-		return -ENOMEM;
-
-	INIT_WORK(&work->flip_work, radeon_flip_work_func);
-	INIT_WORK(&work->unpin_work, radeon_unpin_work_func);
 
-	work->rdev = rdev;
-	work->crtc_id = radeon_crtc->crtc_id;
-	work->fb = fb;
-	work->event = event;
-
-	/* schedule unpin of the old buffer */
-	old_radeon_fb = to_radeon_framebuffer(crtc->primary->fb);
-	obj = old_radeon_fb->obj;
-
-	/* take a reference to the old object */
-	drm_gem_object_reference(obj);
-	work->old_rbo = gem_to_radeon_bo(obj);
-
-	new_radeon_fb = to_radeon_framebuffer(fb);
-	obj = new_radeon_fb->obj;
-	work->new_rbo = gem_to_radeon_bo(obj);
-
-	spin_lock(&work->new_rbo->tbo.bdev->fence_lock);
-	if (work->new_rbo->tbo.sync_obj)
-		work->fence = radeon_fence_ref(work->new_rbo->tbo.sync_obj);
-	spin_unlock(&work->new_rbo->tbo.bdev->fence_lock);
-
-	/* We borrow the event spin lock for protecting flip_work */
-	spin_lock_irqsave(&crtc->dev->event_lock, flags);
-
-	if (radeon_crtc->flip_status != RADEON_FLIP_NONE) {
-		DRM_DEBUG_DRIVER("flip queue: crtc already busy\n");
-		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
-		drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
-		radeon_fence_unref(&work->fence);
-		kfree(work);
-		return -EBUSY;
-	}
-	radeon_crtc->flip_status = RADEON_FLIP_PENDING;
-	radeon_crtc->flip_work = work;
-
-	/* update crtc fb */
-	crtc->primary->fb = fb;
-
-	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
-
-	queue_work(radeon_crtc->flip_queue, &work->flip_work);
-
-	return 0;
+	return r;
 }
 
 static int
-- 
cgit v0.10.2


From 306f98d9a1079cc78567b1a6a5e94c272c163e47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Mon, 14 Jul 2014 15:58:03 +0900
Subject: drm/radeon: Complete page flip even if waiting on the BO fence fails
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Otherwise the DRM core and userspace will be confused about which BO the
CRTC is scanning out.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 8de5794..97ea465 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -390,20 +390,22 @@ static void radeon_flip_work_func(struct work_struct *__work)
 	int r;
 
         down_read(&rdev->exclusive_lock);
-	while (work->fence) {
+	if (work->fence) {
 		r = radeon_fence_wait(work->fence, false);
 		if (r == -EDEADLK) {
 			up_read(&rdev->exclusive_lock);
 			r = radeon_gpu_reset(rdev);
 			down_read(&rdev->exclusive_lock);
 		}
+		if (r)
+			DRM_ERROR("failed to wait on page flip fence (%d)!\n", r);
 
-		if (r) {
-			DRM_ERROR("failed to wait on page flip fence (%d)!\n",
-				  r);
-			goto cleanup;
-		} else
-			radeon_fence_unref(&work->fence);
+		/* We continue with the page flip even if we failed to wait on
+		 * the fence, otherwise the DRM core and userspace will be
+		 * confused about which BO the CRTC is scanning out
+		 */
+
+		radeon_fence_unref(&work->fence);
 	}
 
 	/* do the flip (mmio) */
@@ -418,14 +420,6 @@ static void radeon_flip_work_func(struct work_struct *__work)
 	radeon_crtc->flip_status = RADEON_FLIP_SUBMITTED;
 	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 	up_read(&rdev->exclusive_lock);
-
-	return;
-
-cleanup:
-	drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
-	radeon_fence_unref(&work->fence);
-	kfree(work);
-	up_read(&rdev->exclusive_lock);
 }
 
 static int radeon_crtc_page_flip(struct drm_crtc *crtc,
-- 
cgit v0.10.2


From c89e5be621e84b7b83650d87a956c52c5654c35b Mon Sep 17 00:00:00 2001
From: Mario Kleiner <mario.kleiner.de@gmail.com>
Date: Thu, 17 Jul 2014 01:27:25 +0200
Subject: drm/radeon: Remove redundant fence unref in pageflip path.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Not needed anymore, as it is already unreffed within
radeon_flip_work_func() after its only use.

Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 97ea465..bfa3a6c 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -366,7 +366,6 @@ void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id)
 	spin_unlock_irqrestore(&rdev->ddev->event_lock, flags);
 
 	drm_vblank_put(rdev->ddev, radeon_crtc->crtc_id);
-	radeon_fence_unref(&work->fence);
 	radeon_irq_kms_pflip_irq_put(rdev, work->crtc_id);
 	queue_work(radeon_crtc->flip_queue, &work->unpin_work);
 }
-- 
cgit v0.10.2


From 826484977c29b42c8cb8c42bd41acaa6e152a4bb Mon Sep 17 00:00:00 2001
From: Mario Kleiner <mario.kleiner.de@gmail.com>
Date: Thu, 17 Jul 2014 01:37:53 +0200
Subject: drm/radeon: Add missing vblank_put in pageflip ioctl error path.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index bfa3a6c..0a22a1b 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -538,7 +538,7 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc,
 		DRM_DEBUG_DRIVER("flip queue: crtc already busy\n");
 		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 		r = -EBUSY;
-		goto pflip_cleanup;
+		goto vblank_cleanup;
 	}
 	radeon_crtc->flip_status = RADEON_FLIP_PENDING;
 	radeon_crtc->flip_work = work;
@@ -551,6 +551,9 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc,
 	queue_work(radeon_crtc->flip_queue, &work->flip_work);
 	return 0;
 
+vblank_cleanup:
+	drm_vblank_put(crtc->dev, radeon_crtc->crtc_id);
+
 pflip_cleanup:
 	if (unlikely(radeon_bo_reserve(new_rbo, false) != 0)) {
 		DRM_ERROR("failed to reserve new rbo in error path\n");
-- 
cgit v0.10.2


From 5f87e090a7368adc2290ae17ffd82a070caadd20 Mon Sep 17 00:00:00 2001
From: Mario Kleiner <mario.kleiner.de@gmail.com>
Date: Thu, 17 Jul 2014 02:24:45 +0200
Subject: drm/radeon: Make classic pageflip completion path less racy.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Need to protect mmio flip programming by event lock as well.

Need to also first enable pflip irq, then mmio program,
otherwise a flip completion may get unnoticed in the vblank
of actual completion if the flip is programmed, but
radeon_flip_work_func gets preempted immediately after
mmio programming and before vblank. In that case the
vblank irq handler wouldn't run radeon_crtc_handle_vblank()
with the completion check routine, miss the completed flip,
and only notice one vblank after actual completion, causing
a false/delayed report of flip completion.

Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 0a22a1b..bf25061 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -407,15 +407,15 @@ static void radeon_flip_work_func(struct work_struct *__work)
 		radeon_fence_unref(&work->fence);
 	}
 
-	/* do the flip (mmio) */
-	radeon_page_flip(rdev, radeon_crtc->crtc_id, work->base);
-
 	/* We borrow the event spin lock for protecting flip_status */
 	spin_lock_irqsave(&crtc->dev->event_lock, flags);
 
 	/* set the proper interrupt */
 	radeon_irq_kms_pflip_irq_get(rdev, radeon_crtc->crtc_id);
 
+	/* do the flip (mmio) */
+	radeon_page_flip(rdev, radeon_crtc->crtc_id, work->base);
+
 	radeon_crtc->flip_status = RADEON_FLIP_SUBMITTED;
 	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 	up_read(&rdev->exclusive_lock);
-- 
cgit v0.10.2


From a28d0e873d2899bd750ae495f84fe9c1a2f53809 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 17 Jul 2014 13:50:45 +0300
Subject: wan/x25_asy: integer overflow in x25_asy_change_mtu()

If "newmtu * 2 + 4" is too large then it can cause an integer overflow
leading to memory corruption.  Eric Dumazet suggests that 65534 is a
reasonable upper limit.

Btw, "newmtu" is not allowed to be a negative number because of the
check in dev_set_mtu(), so that's ok.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c
index 5895f19..fa9fdfa 100644
--- a/drivers/net/wan/x25_asy.c
+++ b/drivers/net/wan/x25_asy.c
@@ -122,8 +122,12 @@ static int x25_asy_change_mtu(struct net_device *dev, int newmtu)
 {
 	struct x25_asy *sl = netdev_priv(dev);
 	unsigned char *xbuff, *rbuff;
-	int len = 2 * newmtu;
+	int len;
 
+	if (newmtu > 65534)
+		return -EINVAL;
+
+	len = 2 * newmtu;
 	xbuff = kmalloc(len + 4, GFP_ATOMIC);
 	rbuff = kmalloc(len + 4, GFP_ATOMIC);
 
-- 
cgit v0.10.2


From 5343330010a892b76a97fd93ad3c455a4a32a7fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Thu, 17 Jul 2014 13:33:51 +0200
Subject: net: qmi_wwan: add two Sierra Wireless/Netgear devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add two device IDs found in an out-of-tree driver downloadable
from Netgear.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index c4638c6..22756db 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -667,6 +667,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x05c6, 0x9084, 4)},
 	{QMI_FIXED_INTF(0x05c6, 0x920d, 0)},
 	{QMI_FIXED_INTF(0x05c6, 0x920d, 5)},
+	{QMI_FIXED_INTF(0x0846, 0x68a2, 8)},
 	{QMI_FIXED_INTF(0x12d1, 0x140c, 1)},	/* Huawei E173 */
 	{QMI_FIXED_INTF(0x12d1, 0x14ac, 1)},	/* Huawei E1820 */
 	{QMI_FIXED_INTF(0x16d8, 0x6003, 0)},	/* CMOTech 6003 */
@@ -757,6 +758,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x1199, 0x9054, 8)},	/* Sierra Wireless Modem */
 	{QMI_FIXED_INTF(0x1199, 0x9055, 8)},	/* Netgear AirCard 341U */
 	{QMI_FIXED_INTF(0x1199, 0x9056, 8)},	/* Sierra Wireless Modem */
+	{QMI_FIXED_INTF(0x1199, 0x9057, 8)},
 	{QMI_FIXED_INTF(0x1199, 0x9061, 8)},	/* Sierra Wireless Modem */
 	{QMI_FIXED_INTF(0x1bbb, 0x011e, 4)},	/* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
 	{QMI_FIXED_INTF(0x1bbb, 0x0203, 2)},	/* Alcatel L800MA */
-- 
cgit v0.10.2


From c2a6c7813f1ffae636e369b5d7011c9f518d3cd9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Thu, 17 Jul 2014 13:34:09 +0200
Subject: net: huawei_cdc_ncm: add "subclass 3" devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Huawei's usage of the subclass and protocol fields is not 100%
clear to us, but there appears to be a very strict system.

A device with the "shared" device ID 12d1:1506 and this NCM
function was recently reported (showing only default altsetting):

    Interface Descriptor:
      bLength                 9
      bDescriptorType         4
      bInterfaceNumber        1
      bAlternateSetting       0
      bNumEndpoints           1
      bInterfaceClass       255 Vendor Specific Class
      bInterfaceSubClass      3
      bInterfaceProtocol     22
      iInterface              8 CDC Network Control Model (NCM)
      ** UNRECOGNIZED:  05 24 00 10 01
      ** UNRECOGNIZED:  06 24 1a 00 01 1f
      ** UNRECOGNIZED:  0c 24 1b 00 01 00 04 10 14 dc 05 20
      ** UNRECOGNIZED:  0d 24 0f 0a 0f 00 00 00 ea 05 03 00 01
      ** UNRECOGNIZED:  05 24 06 01 01
      Endpoint Descriptor:
        bLength                 7
        bDescriptorType         5
        bEndpointAddress     0x85  EP 5 IN
        bmAttributes            3
          Transfer Type            Interrupt
          Synch Type               None
          Usage Type               Data
        wMaxPacketSize     0x0010  1x 16 bytes
        bInterval               9

Cc: Enrico Mioso <mrkiko.rs@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/usb/huawei_cdc_ncm.c b/drivers/net/usb/huawei_cdc_ncm.c
index 5d95a13..735f7da 100644
--- a/drivers/net/usb/huawei_cdc_ncm.c
+++ b/drivers/net/usb/huawei_cdc_ncm.c
@@ -194,6 +194,9 @@ static const struct usb_device_id huawei_cdc_ncm_devs[] = {
 	{ USB_VENDOR_AND_INTERFACE_INFO(0x12d1, 0xff, 0x02, 0x76),
 	  .driver_info = (unsigned long)&huawei_cdc_ncm_info,
 	},
+	{ USB_VENDOR_AND_INTERFACE_INFO(0x12d1, 0xff, 0x03, 0x16),
+	  .driver_info = (unsigned long)&huawei_cdc_ncm_info,
+	},
 
 	/* Terminating entry */
 	{
-- 
cgit v0.10.2


From 953c66469735aed8d2ada639a72b150f01dae605 Mon Sep 17 00:00:00 2001
From: Abbas Raza <Abbas_Raza@mentor.com>
Date: Thu, 17 Jul 2014 19:34:31 +0800
Subject: usb: chipidea: udc: Disable auto ZLP generation on ep0

There are 2 methods for ZLP (zero-length packet) generation:
1) In software
2) Automatic generation by device controller

1) is implemented in UDC driver and it attaches ZLP to IN packet if
   descriptor->size < wLength
2) can be enabled/disabled by setting ZLT bit in the QH

When gadget ffs is connected to ubuntu host, the host sends
get descriptor request and wLength in setup packet is 255 while the
size of descriptor which will be sent by gadget in IN packet is
64 byte. So the composite driver sets req->zero = 1.
In UDC driver following code will be executed then

        if (hwreq->req.zero && hwreq->req.length
            && (hwreq->req.length % hwep->ep.maxpacket == 0))
                add_td_to_list(hwep, hwreq, 0);

Case-A:
So in case of ubuntu host, UDC driver will attach a ZLP to the IN packet.
ubuntu host will request 255 byte in IN request, gadget will send 64 byte
with ZLP and host will come to know that there is no more data.
But hold on, by default ZLT=0 for endpoint 0 so hardware also tries to
automatically generate the ZLP which blocks enumeration for ~6 seconds due
to endpoint 0 STALL, NAKs are sent to host for any requests (OUT/PING)

Case-B:
In case when gadget ffs is connected to Apple device, Apple device sends
setup packet with wLength=64. So descriptor->size = 64 and wLength=64
therefore req->zero = 0 and UDC driver will not attach any ZLP to the
IN packet. Apple device requests 64 bytes, gets 64 bytes and doesn't
further request for IN data. But ZLT=0 by default for endpoint 0 so
hardware tries to automatically generate the ZLP which blocks enumeration
for ~6 seconds due to endpoint 0 STALL, NAKs are sent to host for any
requests (OUT/PING)

According to USB2.0 specs:

    8.5.3.2 Variable-length Data Stage
    A control pipe may have a variable-length data phase in which the
    host requests more data than is contained in the specified data
    structure. When all of the data structure is returned to the host,
    the function should indicate that the Data stage is ended by
    returning a packet that is shorter than the MaxPacketSize for the
    pipe. If the data structure is an exact multiple of wMaxPacketSize
    for the pipe, the function will return a zero-length packet to indicate
    the end of the Data stage.

In Case-A mentioned above:
If we disable software ZLP generation & ZLT=0 for endpoint 0 OR if software
ZLP generation is not disabled but we set ZLT=1 for endpoint 0 then
enumeration doesn't block for 6 seconds.

In Case-B mentioned above:
If we disable software ZLP generation & ZLT=0 for endpoint then enumeration
still blocks due to ZLP automatically generated by hardware and host not needing
it. But if we keep software ZLP generation enabled but we set ZLT=1 for
endpoint 0 then enumeration doesn't block for 6 seconds.

So the proper solution for this issue seems to disable automatic ZLP generation
by hardware (i.e by setting ZLT=1 for endpoint 0) and let software (UDC driver)
handle the ZLP generation based on req->zero field.

Cc: stable@vger.kernel.org
Signed-off-by: Abbas Raza <Abbas_Raza@mentor.com>
Signed-off-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index 9d2b673..b8125aa 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c
@@ -1169,8 +1169,8 @@ static int ep_enable(struct usb_ep *ep,
 
 	if (hwep->type == USB_ENDPOINT_XFER_CONTROL)
 		cap |= QH_IOS;
-	if (hwep->num)
-		cap |= QH_ZLT;
+
+	cap |= QH_ZLT;
 	cap |= (hwep->ep.maxpacket << __ffs(QH_MAX_PKT)) & QH_MAX_PKT;
 	/*
 	 * For ISO-TX, we set mult at QH as the largest value, and use
-- 
cgit v0.10.2


From bb86cf569bbd7ad4dce581a37c7fbd748057e9dc Mon Sep 17 00:00:00 2001
From: Gavin Guo <gavin.guo@canonical.com>
Date: Fri, 18 Jul 2014 01:12:13 +0800
Subject: usb: Check if port status is equal to RxDetect

When using USB 3.0 pen drive with the [AMD] FCH USB XHCI Controller
[1022:7814], the second hotplugging will experience the USB 3.0 pen
drive is recognized as high-speed device. After bisecting the kernel,
I found the commit number 41e7e056cdc662f704fa9262e5c6e213b4ab45dd
(USB: Allow USB 3.0 ports to be disabled.) causes the bug. After doing
some experiments, the bug can be fixed by avoiding executing the function
hub_usb3_port_disable(). Because the port status with [AMD] FCH USB
XHCI Controlleris [1022:7814] is already in RxDetect
(I tried printing out the port status before setting to Disabled state),
it's reasonable to check the port status before really executing
hub_usb3_port_disable().

Fixes: 41e7e056cdc6 (USB: Allow USB 3.0 ports to be disabled.)
Signed-off-by: Gavin Guo <gavin.guo@canonical.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Cc: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 21b99b4..0e950ad 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -889,6 +889,25 @@ static int hub_usb3_port_disable(struct usb_hub *hub, int port1)
 	if (!hub_is_superspeed(hub->hdev))
 		return -EINVAL;
 
+	ret = hub_port_status(hub, port1, &portstatus, &portchange);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * USB controller Advanced Micro Devices, Inc. [AMD] FCH USB XHCI
+	 * Controller [1022:7814] will have spurious result making the following
+	 * usb 3.0 device hotplugging route to the 2.0 root hub and recognized
+	 * as high-speed device if we set the usb 3.0 port link state to
+	 * Disabled. Since it's already in USB_SS_PORT_LS_RX_DETECT state, we
+	 * check the state here to avoid the bug.
+	 */
+	if ((portstatus & USB_PORT_STAT_LINK_STATE) ==
+				USB_SS_PORT_LS_RX_DETECT) {
+		dev_dbg(&hub->ports[port1 - 1]->dev,
+			 "Not disabling port; link state is RxDetect\n");
+		return ret;
+	}
+
 	ret = hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_SS_DISABLED);
 	if (ret)
 		return ret;
-- 
cgit v0.10.2


From 2b1987a9f1e6250c962ca13820d3c69817879266 Mon Sep 17 00:00:00 2001
From: Brian W Hart <hartb@linux.vnet.ibm.com>
Date: Fri, 27 Jun 2014 16:09:39 -0500
Subject: cpufreq: make table sentinel macros unsigned to match use
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 5eeaf1f18973 (cpufreq: Fix build error on some platforms that
use cpufreq_for_each_*) moved function cpufreq_next_valid() to a public
header.  Warnings are now generated when objects including that header
are built with -Wsign-compare (as an out-of-tree module might be):

.../include/linux/cpufreq.h: In function ‘cpufreq_next_valid’:
.../include/linux/cpufreq.h:519:27: warning: comparison between signed
and unsigned integer expressions [-Wsign-compare]
  while ((*pos)->frequency != CPUFREQ_TABLE_END)
                           ^
.../include/linux/cpufreq.h:520:25: warning: comparison between signed
and unsigned integer expressions [-Wsign-compare]
   if ((*pos)->frequency != CPUFREQ_ENTRY_INVALID)
                         ^

Constants CPUFREQ_ENTRY_INVALID and CPUFREQ_TABLE_END are signed, but
are used with unsigned member 'frequency' of cpufreq_frequency_table.
Update the macro definitions to be explicitly unsigned to match their
use.

This also corrects potentially wrong behavior of clk_rate_table_iter()
if unsigned long is wider than usigned int.

Fixes: 5eeaf1f18973 (cpufreq: Fix build error on some platforms that use cpufreq_for_each_*)
Signed-off-by: Brian W Hart <hartb@linux.vnet.ibm.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index ec4112d..8f8ae95 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -482,8 +482,8 @@ extern struct cpufreq_governor cpufreq_gov_conservative;
  *********************************************************************/
 
 /* Special Values of .frequency field */
-#define CPUFREQ_ENTRY_INVALID	~0
-#define CPUFREQ_TABLE_END	~1
+#define CPUFREQ_ENTRY_INVALID	~0u
+#define CPUFREQ_TABLE_END	~1u
 /* Special Values of .flags field */
 #define CPUFREQ_BOOST_FREQ	(1 << 0)
 
-- 
cgit v0.10.2


From 2ef82d24f445e82f80e235f44eb9d1bc933e3670 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Wed, 16 Jul 2014 00:00:45 -0700
Subject: Drivers: hv: hv_fcopy: fix a race condition for SMP guest

We should schedule the 5s "timer work" before starting the data transfer,
otherwise, the data transfer code may finish so fast on another
virtual cpu that when the code(fcopy_write()) trying to cancel the 5s
"timer work" can occasionally fail because the "timer work" may haven't
been scheduled yet and as a result the fcopy process will be aborted
wrongly by fcopy_work_func() in 5s.

Thank Liz Zhang <lizzha@microsoft.com> for the initial investigation
on the bug.

This addresses https://bugzilla.redhat.com/show_bug.cgi?id=1118123

Tested-by: Liz Zhang <lizzha@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index eaaa3d8..23b2ce2 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -246,8 +246,8 @@ void hv_fcopy_onchannelcallback(void *context)
 		/*
 		 * Send the information to the user-level daemon.
 		 */
-		fcopy_send_data();
 		schedule_delayed_work(&fcopy_work, 5*HZ);
+		fcopy_send_data();
 		return;
 	}
 	icmsghdr->icflags = ICMSGHDRFLAG_TRANSACTION | ICMSGHDRFLAG_RESPONSE;
-- 
cgit v0.10.2


From 03e97220b99b8b691ea5b130b7b4c135c9662792 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Thu, 17 Jul 2014 12:20:14 +0200
Subject: ARM: clk-imx6q: parent lvds_sel input from upstream clock gates

The i.MX6 reference manual doesn't make a clear distinction
between the fixed clock divider and the enable gate for the
pcie and sata reference clocks. This lead to the lvds mux
inputs in the imx6q clk driver to be parented from the
ref clock (which is the divider) instead of the actual gate,
which in turn prevents the upstream clock to actually be
enabled when lvds clk out is active.

This fixes a hard machine hang regression in kernel 3.16 for
boards where only pcie is active but no sata, as with this
kernel version the imx6-pcie driver is no longer enabling
the upstream clock directly but only lvds clk out.

Reported-by: Arne Ruhnau <arne.ruhnau@target-sg.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Tested-by: Arne Ruhnau <arne.ruhnau@target-sg.com>
Signed-off-by: Shawn Guo <shawn.guo@freescale.com>

diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c
index 8e795de..8556c78 100644
--- a/arch/arm/mach-imx/clk-imx6q.c
+++ b/arch/arm/mach-imx/clk-imx6q.c
@@ -70,7 +70,7 @@ static const char *cko_sels[] = { "cko1", "cko2", };
 static const char *lvds_sels[] = {
 	"dummy", "dummy", "dummy", "dummy", "dummy", "dummy",
 	"pll4_audio", "pll5_video", "pll8_mlb", "enet_ref",
-	"pcie_ref", "sata_ref",
+	"pcie_ref_125m", "sata_ref_100m",
 };
 
 enum mx6q_clks {
@@ -491,7 +491,7 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node)
 
 	/* All existing boards with PCIe use LVDS1 */
 	if (IS_ENABLED(CONFIG_PCI_IMX6))
-		clk_set_parent(clk[lvds1_sel], clk[sata_ref]);
+		clk_set_parent(clk[lvds1_sel], clk[sata_ref_100m]);
 
 	/* Set initial power mode */
 	imx6q_set_lpm(WAIT_CLOCKED);
-- 
cgit v0.10.2


From 79272b3562bb44ce7dc720cd13136f5a4a53c618 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Fri, 20 Jun 2014 09:36:41 -0400
Subject: GFS2: Only wait for demote when last holder is dequeued

Function gfs2_glock_dq_wait is supposed to dequeue a glock and then
wait for the lock to be demoted. The problem is, if this is a shared
lock, its demote will depend on the other holders, which means you
might end up waiting forever because the other process is blocked.
This problem is especially apparent when dealing with nested flocks.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c355f73..278fae5 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1128,7 +1128,9 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh)
 	struct gfs2_glock *gl = gh->gh_gl;
 	gfs2_glock_dq(gh);
 	might_sleep();
-	wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE);
+	if (!find_first_holder(gl))
+		wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait,
+			    TASK_UNINTERRUPTIBLE);
 }
 
 /**
-- 
cgit v0.10.2


From 94a09a3999ee978e097b5aad74034ed43bae56db Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 23 Jun 2014 14:43:32 +0100
Subject: GFS2: Fix race in glock lru glock disposal

We must not leave items on the LRU list with GLF_LOCK set, since
they can be removed if the glock is brought back into use, which
may then potentially result in a hang, waiting for GLF_LOCK to
clear.

It doesn't happen very often, since it requires a glock that has
not been used for a long time to be brought back into use at the
same moment that the shrinker is part way through disposing of
glocks.

The fix is to set GLF_LOCK at a later time, when we already know
that the other locks can be obtained. Also, we now only release
the lru_lock in case a resched is needed, rather than on every
iteration.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 278fae5..c1e5b12 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1406,12 +1406,16 @@ __acquires(&lru_lock)
 		gl = list_entry(list->next, struct gfs2_glock, gl_lru);
 		list_del_init(&gl->gl_lru);
 		if (!spin_trylock(&gl->gl_spin)) {
+add_back_to_lru:
 			list_add(&gl->gl_lru, &lru_list);
 			atomic_inc(&lru_count);
 			continue;
 		}
+		if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
+			spin_unlock(&gl->gl_spin);
+			goto add_back_to_lru;
+		}
 		clear_bit(GLF_LRU, &gl->gl_flags);
-		spin_unlock(&lru_lock);
 		gl->gl_lockref.count++;
 		if (demote_ok(gl))
 			handle_callback(gl, LM_ST_UNLOCKED, 0, false);
@@ -1419,7 +1423,7 @@ __acquires(&lru_lock)
 		if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
 			gl->gl_lockref.count--;
 		spin_unlock(&gl->gl_spin);
-		spin_lock(&lru_lock);
+		cond_resched_lock(&lru_lock);
 	}
 }
 
@@ -1444,7 +1448,7 @@ static long gfs2_scan_glock_lru(int nr)
 		gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
 
 		/* Test for being demotable */
-		if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
+		if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
 			list_move(&gl->gl_lru, &dispose);
 			atomic_dec(&lru_count);
 			freed++;
-- 
cgit v0.10.2


From fe0bbd2986996b9efe3a78bf5a591b0496c7afea Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Mon, 23 Jun 2014 14:50:20 +0100
Subject: GFS2: Use GFP_NOFS when allocating glocks

Normally GFP_KERNEL is ok here, but there is now a rarely used code path
relating to deallocation of unlinked inodes (in certain corner cases)
which if hit at times of memory shortage can cause recursion while
trying to free memory.

One solution would be to try and move the gfs2_glock_get() call so
that it is no longer called while another glock is held, but that
doesn't look at all easy, so GFP_NOFS is the best solution for the
time being.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c1e5b12..b703dcc 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -731,14 +731,14 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 		cachep = gfs2_glock_aspace_cachep;
 	else
 		cachep = gfs2_glock_cachep;
-	gl = kmem_cache_alloc(cachep, GFP_KERNEL);
+	gl = kmem_cache_alloc(cachep, GFP_NOFS);
 	if (!gl)
 		return -ENOMEM;
 
 	memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
 
 	if (glops->go_flags & GLOF_LVB) {
-		gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_KERNEL);
+		gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_NOFS);
 		if (!gl->gl_lksb.sb_lvbptr) {
 			kmem_cache_free(cachep, gl);
 			return -ENOMEM;
-- 
cgit v0.10.2


From 6ec43b1838bd71633ac3f853c63ddf1f5940b1ed Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Wed, 25 Jun 2014 20:40:45 +0200
Subject: GFS2: replace count*size kzalloc by kcalloc

kcalloc manages count*sizeof overflow.

Cc: cluster-devel@redhat.com
Signed-off-by: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 91f274d..4fafea1 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -1036,8 +1036,8 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots,
 
 	new_size = old_size + RECOVER_SIZE_INC;
 
-	submit = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS);
-	result = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS);
+	submit = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
+	result = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
 	if (!submit || !result) {
 		kfree(submit);
 		kfree(result);
-- 
cgit v0.10.2


From 5bef3e7cf18c56cc733777c61b6b61a0b8a62b35 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Thu, 26 Jun 2014 10:46:25 -0400
Subject: GFS2: Allow flocks to use normal glock dq rather than dq_wait

This patch allows flock glocks to use a non-blocking dequeue rather
than dq_wait. It also reverts the previous patch I had posted regarding
dq_wait. The reverted patch isn't necessarily a bad idea, but I decided
this might avoid unforeseen side effects, and was therefore safer.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4fc3a30..491e8e0 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -991,7 +991,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
 			goto out;
 		flock_lock_file_wait(file,
 				     &(struct file_lock){.fl_type = F_UNLCK});
-		gfs2_glock_dq_wait(fl_gh);
+		gfs2_glock_dq(fl_gh);
 		gfs2_holder_reinit(state, flags, fl_gh);
 	} else {
 		error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index b703dcc..ee4e04f 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1128,9 +1128,7 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh)
 	struct gfs2_glock *gl = gh->gh_gl;
 	gfs2_glock_dq(gh);
 	might_sleep();
-	if (!find_first_holder(gl))
-		wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait,
-			    TASK_UNINTERRUPTIBLE);
+	wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE);
 }
 
 /**
-- 
cgit v0.10.2


From 97a4f1d7653684fff0d50e9328917506f06e9d79 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Thu, 26 Jun 2014 10:47:48 -0400
Subject: GFS2: Allow caching of glocks for flock

This patch removes the GLF_NOCACHE flag from the glocks associated with
flocks. There should be no good reason not to cache glocks for flocks:
they only force the glock to be demoted before they can be reacquired,
which can slow down performance and even cause glock hangs, especially
in cases where the flocks are held in Shared (SH) mode.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 491e8e0..26b3f95 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -981,7 +981,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
 	int error = 0;
 
 	state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
-	flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
+	flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT;
 
 	mutex_lock(&fp->f_fl_mutex);
 
-- 
cgit v0.10.2


From 6b49d1d9c3c1088758c6a2758aaa5d236ef609e2 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 29 Jun 2014 12:21:39 +0200
Subject: GFS2: memcontrol: Spelling s/invlidate/invalidate/

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: cluster-devel@redhat.com
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index fc11007..2ffc67d 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -234,8 +234,8 @@ static void inode_go_sync(struct gfs2_glock *gl)
  * inode_go_inval - prepare a inode glock to be released
  * @gl: the glock
  * @flags:
- * 
- * Normally we invlidate everything, but if we are moving into
+ *
+ * Normally we invalidate everything, but if we are moving into
  * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we
  * can keep hold of the metadata, since it won't have changed.
  *
-- 
cgit v0.10.2


From 27ff6a0f7f5bf500e9d2a8760c062789b52c551f Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Wed, 2 Jul 2014 22:05:27 +0200
Subject: GFS2: fs/gfs2/rgrp.c: kernel-doc warning fixes

Cc: cluster-devel@redhat.com
Signed-off-by: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index db629d1..f4cb9c0 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -337,7 +337,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le
 
 /**
  * gfs2_free_extlen - Return extent length of free blocks
- * @rbm: Starting position
+ * @rrbm: Starting position
  * @len: Max length to check
  *
  * Starting at the block specified by the rbm, see how many free blocks
@@ -2522,7 +2522,7 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)
 
 /**
  * gfs2_rlist_free - free a resource group list
- * @list: the list of resource groups
+ * @rlist: the list of resource groups
  *
  */
 
-- 
cgit v0.10.2


From 29e697b11853d3f83b1864ae385abdad4aa2c361 Mon Sep 17 00:00:00 2001
From: Tomasz Figa <t.figa@samsung.com>
Date: Thu, 17 Jul 2014 17:23:44 +0200
Subject: irqchip: gic: Fix core ID calculation when topology is read from DT

Certain GIC implementation, namely those found on earlier, single
cluster, Exynos SoCs, have registers mapped without per-CPU banking,
which means that the driver needs to use different offset for each CPU.

Currently the driver calculates the offset by multiplying value returned
by cpu_logical_map() by CPU offset parsed from DT. This is correct when
CPU topology is not specified in DT and aforementioned function returns
core ID alone. However when DT contains CPU topology, the function
changes to return cluster ID as well, which is non-zero on mentioned
SoCs and so breaks the calculation in GIC driver.

This patch fixes this by masking out cluster ID in CPU offset
calculation so that only core ID is considered. Multi-cluster Exynos
SoCs already have banked GIC implementations, so this simple fix should
be enough.

Reported-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reported-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Tomasz Figa <t.figa@samsung.com>
Fixes: db0d4db22a78d ("ARM: gic: allow GIC to support non-banked setups")
Cc: <stable@vger.kernel.org> # v3.3+
Link: https://lkml.kernel.org/r/1405610624-18722-1-git-send-email-t.figa@samsung.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 66ed892..7c131cf 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -42,6 +42,7 @@
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqchip/arm-gic.h>
 
+#include <asm/cputype.h>
 #include <asm/irq.h>
 #include <asm/exception.h>
 #include <asm/smp_plat.h>
@@ -954,7 +955,9 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 		}
 
 		for_each_possible_cpu(cpu) {
-			unsigned long offset = percpu_offset * cpu_logical_map(cpu);
+			u32 mpidr = cpu_logical_map(cpu);
+			u32 core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+			unsigned long offset = percpu_offset * core_id;
 			*per_cpu_ptr(gic->dist_base.percpu_base, cpu) = dist_base + offset;
 			*per_cpu_ptr(gic->cpu_base.percpu_base, cpu) = cpu_base + offset;
 		}
-- 
cgit v0.10.2


From dba1fd0bff38966f16bbe194fb451f73ddaafb58 Mon Sep 17 00:00:00 2001
From: Bo Shen <voice.shen@atmel.com>
Date: Mon, 14 Jul 2014 11:08:14 +0800
Subject: ARM: at91: at91sam9x5: correct typo error for ohci clock

Correct the typo error for the second "uhphs_clk".

Signed-off-by: Bo Shen <voice.shen@atmel.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>

diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi
index d6133f4..ae34c9c 100644
--- a/arch/arm/boot/dts/at91sam9x5.dtsi
+++ b/arch/arm/boot/dts/at91sam9x5.dtsi
@@ -1153,8 +1153,7 @@
 			compatible = "atmel,at91rm9200-ohci", "usb-ohci";
 			reg = <0x00600000 0x100000>;
 			interrupts = <22 IRQ_TYPE_LEVEL_HIGH 2>;
-			clocks = <&usb>, <&uhphs_clk>, <&udphs_clk>,
-				 <&uhpck>;
+			clocks = <&usb>, <&uhphs_clk>, <&uhphs_clk>, <&uhpck>;
 			clock-names = "usb_clk", "ohci_clk", "hclk", "uhpck";
 			status = "disabled";
 		};
-- 
cgit v0.10.2


From 043dfc1b624caf67a52412412a7ccce2d7d2b7f5 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Mon, 14 Jul 2014 08:39:27 +0200
Subject: ARM: at91/dt: fix usb0 clocks definition in sam9n12 dtsi

udphs_clk (USB Device Controller clock) is referenced instead of
uhphs_clk (USB Host Controller clock).

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Acked-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>

diff --git a/arch/arm/boot/dts/at91sam9n12.dtsi b/arch/arm/boot/dts/at91sam9n12.dtsi
index 2877959..b84bac5 100644
--- a/arch/arm/boot/dts/at91sam9n12.dtsi
+++ b/arch/arm/boot/dts/at91sam9n12.dtsi
@@ -925,7 +925,7 @@
 			compatible = "atmel,at91rm9200-ohci", "usb-ohci";
 			reg = <0x00500000 0x00100000>;
 			interrupts = <22 IRQ_TYPE_LEVEL_HIGH 2>;
-			clocks = <&usb>, <&uhphs_clk>, <&udphs_clk>,
+			clocks = <&usb>, <&uhphs_clk>, <&uhphs_clk>,
 				 <&uhpck>;
 			clock-names = "usb_clk", "ohci_clk", "hclk", "uhpck";
 			status = "disabled";
-- 
cgit v0.10.2


From e0d69e119fc6bf7cc3c9f791478108c1b925bb2e Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Thu, 17 Jul 2014 21:03:58 +0200
Subject: ARM: at91/dt: add missing clocks property to pwm node in sam9x5.dtsi

The pwm driver requires a clocks property referencing the pwm peripheral
clk.

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>

diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi
index ae34c9c..5ef716d 100644
--- a/arch/arm/boot/dts/at91sam9x5.dtsi
+++ b/arch/arm/boot/dts/at91sam9x5.dtsi
@@ -1122,6 +1122,7 @@
 				compatible = "atmel,at91sam9rl-pwm";
 				reg = <0xf8034000 0x300>;
 				interrupts = <18 IRQ_TYPE_LEVEL_HIGH 4>;
+				clocks = <&pwm_clk>;
 				#pwm-cells = <3>;
 				status = "disabled";
 			};
-- 
cgit v0.10.2


From b32bfc06aefab61acc872dec3222624e6cd867ed Mon Sep 17 00:00:00 2001
From: Romain Degez <romain.degez@gmail.com>
Date: Fri, 11 Jul 2014 18:08:13 +0200
Subject: ahci: add support for the Promise FastTrak TX8660 SATA HBA (ahci
 mode)

Add support of the Promise FastTrak TX8660 SATA HBA in ahci mode by
registering the board in the ahci_pci_tbl[].

Note: this HBA also provide a hardware RAID mode when activated in
BIOS but specific drivers from the manufacturer are required in this
case.

Signed-off-by: Romain Degez <romain.degez@gmail.com>
Tested-by: Romain Degez <romain.degez@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: stable@vger.kernel.org

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index dae5607..4cd52a4 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -456,6 +456,7 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 
 	/* Promise */
 	{ PCI_VDEVICE(PROMISE, 0x3f20), board_ahci },	/* PDC42819 */
+	{ PCI_VDEVICE(PROMISE, 0x3781), board_ahci },   /* FastTrak TX8660 ahci-mode */
 
 	/* Asmedia */
 	{ PCI_VDEVICE(ASMEDIA, 0x0601), board_ahci },	/* ASM1060 */
-- 
cgit v0.10.2


From 9637f30e6b7bc394c08fa9d27d63622f141142e9 Mon Sep 17 00:00:00 2001
From: Tomasz Figa <t.figa@samsung.com>
Date: Wed, 16 Jul 2014 02:59:18 +0900
Subject: ARM: EXYNOS: Fix core ID used by platsmp and hotplug code

When CPU topology is specified in device tree, cpu_logical_map() does
not return core ID anymore, but rather full MPIDR value. This breaks
existing calculation of PMU register offsets on Exynos SoCs.

This patch fixes the problem by adjusting the code to use only core ID
bits of the value returned by cpu_logical_map() to allow CPU topology to
be specified in device tree on Exynos SoCs.

Signed-off-by: Tomasz Figa <t.figa@samsung.com>
Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
Signed-off-by: Olof Johansson <olof@lixom.net>

diff --git a/arch/arm/mach-exynos/hotplug.c b/arch/arm/mach-exynos/hotplug.c
index 8a134d0..920a4ba 100644
--- a/arch/arm/mach-exynos/hotplug.c
+++ b/arch/arm/mach-exynos/hotplug.c
@@ -40,15 +40,17 @@ static inline void cpu_leave_lowpower(void)
 
 static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
 {
+	u32 mpidr = cpu_logical_map(cpu);
+	u32 core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+
 	for (;;) {
 
-		/* make cpu1 to be turned off at next WFI command */
-		if (cpu == 1)
-			exynos_cpu_power_down(cpu);
+		/* Turn the CPU off on next WFI instruction. */
+		exynos_cpu_power_down(core_id);
 
 		wfi();
 
-		if (pen_release == cpu_logical_map(cpu)) {
+		if (pen_release == core_id) {
 			/*
 			 * OK, proper wakeup, we're done
 			 */
diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c
index 1c8d31e..50b9aad 100644
--- a/arch/arm/mach-exynos/platsmp.c
+++ b/arch/arm/mach-exynos/platsmp.c
@@ -90,7 +90,8 @@ static void exynos_secondary_init(unsigned int cpu)
 static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
 	unsigned long timeout;
-	unsigned long phys_cpu = cpu_logical_map(cpu);
+	u32 mpidr = cpu_logical_map(cpu);
+	u32 core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
 	int ret = -ENOSYS;
 
 	/*
@@ -104,17 +105,18 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
 	 * the holding pen - release it, then wait for it to flag
 	 * that it has been released by resetting pen_release.
 	 *
-	 * Note that "pen_release" is the hardware CPU ID, whereas
+	 * Note that "pen_release" is the hardware CPU core ID, whereas
 	 * "cpu" is Linux's internal ID.
 	 */
-	write_pen_release(phys_cpu);
+	write_pen_release(core_id);
 
-	if (!exynos_cpu_power_state(cpu)) {
-		exynos_cpu_power_up(cpu);
+	if (!exynos_cpu_power_state(core_id)) {
+		exynos_cpu_power_up(core_id);
 		timeout = 10;
 
 		/* wait max 10 ms until cpu1 is on */
-		while (exynos_cpu_power_state(cpu) != S5P_CORE_LOCAL_PWR_EN) {
+		while (exynos_cpu_power_state(core_id)
+		       != S5P_CORE_LOCAL_PWR_EN) {
 			if (timeout-- == 0)
 				break;
 
@@ -145,20 +147,20 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
 		 * Try to set boot address using firmware first
 		 * and fall back to boot register if it fails.
 		 */
-		ret = call_firmware_op(set_cpu_boot_addr, phys_cpu, boot_addr);
+		ret = call_firmware_op(set_cpu_boot_addr, core_id, boot_addr);
 		if (ret && ret != -ENOSYS)
 			goto fail;
 		if (ret == -ENOSYS) {
-			void __iomem *boot_reg = cpu_boot_reg(phys_cpu);
+			void __iomem *boot_reg = cpu_boot_reg(core_id);
 
 			if (IS_ERR(boot_reg)) {
 				ret = PTR_ERR(boot_reg);
 				goto fail;
 			}
-			__raw_writel(boot_addr, cpu_boot_reg(phys_cpu));
+			__raw_writel(boot_addr, cpu_boot_reg(core_id));
 		}
 
-		call_firmware_op(cpu_boot, phys_cpu);
+		call_firmware_op(cpu_boot, core_id);
 
 		arch_send_wakeup_ipi_mask(cpumask_of(cpu));
 
@@ -227,22 +229,24 @@ static void __init exynos_smp_prepare_cpus(unsigned int max_cpus)
 	 * boot register if it fails.
 	 */
 	for (i = 1; i < max_cpus; ++i) {
-		unsigned long phys_cpu;
 		unsigned long boot_addr;
+		u32 mpidr;
+		u32 core_id;
 		int ret;
 
-		phys_cpu = cpu_logical_map(i);
+		mpidr = cpu_logical_map(i);
+		core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
 		boot_addr = virt_to_phys(exynos4_secondary_startup);
 
-		ret = call_firmware_op(set_cpu_boot_addr, phys_cpu, boot_addr);
+		ret = call_firmware_op(set_cpu_boot_addr, core_id, boot_addr);
 		if (ret && ret != -ENOSYS)
 			break;
 		if (ret == -ENOSYS) {
-			void __iomem *boot_reg = cpu_boot_reg(phys_cpu);
+			void __iomem *boot_reg = cpu_boot_reg(core_id);
 
 			if (IS_ERR(boot_reg))
 				break;
-			__raw_writel(boot_addr, cpu_boot_reg(phys_cpu));
+			__raw_writel(boot_addr, cpu_boot_reg(core_id));
 		}
 	}
 }
-- 
cgit v0.10.2


From 79a8468747c5f95ed3d5ce8376a3e82e0c5857fc Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Fri, 18 Jul 2014 17:26:41 -0400
Subject: random: check for increase of entropy_count because of signed
 conversion

The expression entropy_count -= ibytes << (ENTROPY_SHIFT + 3) could
actually increase entropy_count if during assignment of the unsigned
expression on the RHS (mind the -=) we reduce the value modulo
2^width(int) and assign it to entropy_count. Trinity found this.

[ Commit modified by tytso to add an additional safety check for a
  negative entropy_count -- which should never happen, and to also add
  an additional paranoia check to prevent overly large count values to
  be passed into urandom_read().  ]

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 0a7ac0a..71529e1 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -641,7 +641,7 @@ retry:
 		} while (unlikely(entropy_count < pool_size-2 && pnfrac));
 	}
 
-	if (entropy_count < 0) {
+	if (unlikely(entropy_count < 0)) {
 		pr_warn("random: negative entropy/overflow: pool %s count %d\n",
 			r->name, entropy_count);
 		WARN_ON(1);
@@ -981,7 +981,7 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min,
 		      int reserved)
 {
 	int entropy_count, orig;
-	size_t ibytes;
+	size_t ibytes, nfrac;
 
 	BUG_ON(r->entropy_count > r->poolinfo->poolfracbits);
 
@@ -999,7 +999,17 @@ retry:
 	}
 	if (ibytes < min)
 		ibytes = 0;
-	if ((entropy_count -= ibytes << (ENTROPY_SHIFT + 3)) < 0)
+
+	if (unlikely(entropy_count < 0)) {
+		pr_warn("random: negative entropy count: pool %s count %d\n",
+			r->name, entropy_count);
+		WARN_ON(1);
+		entropy_count = 0;
+	}
+	nfrac = ibytes << (ENTROPY_SHIFT + 3);
+	if ((size_t) entropy_count > nfrac)
+		entropy_count -= nfrac;
+	else
 		entropy_count = 0;
 
 	if (cmpxchg(&r->entropy_count, orig, entropy_count) != orig)
@@ -1376,6 +1386,7 @@ urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 			    "with %d bits of entropy available\n",
 			    current->comm, nonblocking_pool.entropy_total);
 
+	nbytes = min_t(size_t, nbytes, INT_MAX >> (ENTROPY_SHIFT + 3));
 	ret = extract_entropy_user(&nonblocking_pool, buf, nbytes);
 
 	trace_urandom_read(8 * nbytes, ENTROPY_BITS(&nonblocking_pool),
-- 
cgit v0.10.2


From 98ce2deda23a303682a4253f3016a1436f4b2735 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Thu, 17 Jul 2014 16:08:36 +0800
Subject: Btrfs: fix abnormal long waiting in fsync

xfstests generic/127 detected this problem.

With commit 7fc34a62ca4434a79c68e23e70ed26111b7a4cf8, now fsync will only flush
data within the passed range.  This is the cause of the above problem,
-- btrfs's fsync has a stage called 'sync log' which will wait for all the
ordered extents it've recorded to finish.

In xfstests/generic/127, with mixed operations such as truncate, fallocate,
punch hole, and mapwrite, we get some pre-allocated extents, and mapwrite will
mmap, and then msync.  And I find that msync will wait for quite a long time
(about 20s in my case), thanks to ftrace, it turns out that the previous
fallocate calls 'btrfs_wait_ordered_range()' to flush dirty pages, but as the
range of dirty pages may be larger than 'btrfs_wait_ordered_range()' wants,
there can be some ordered extents created but not getting corresponding pages
flushed, then they're left in memory until we fsync which runs into the
stage 'sync log', and fsync will just wait for the system writeback thread
to flush those pages and get ordered extents finished, so the latency is
inevitable.

This adds a flush similar to btrfs_start_ordered_extent() in
btrfs_wait_logged_extents() to fix that.

Reviewed-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <clm@fb.com>

diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e12441c..7187b14 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -484,8 +484,19 @@ void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
 					   log_list);
 		list_del_init(&ordered->log_list);
 		spin_unlock_irq(&log->log_extents_lock[index]);
+
+		if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
+		    !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
+			struct inode *inode = ordered->inode;
+			u64 start = ordered->file_offset;
+			u64 end = ordered->file_offset + ordered->len - 1;
+
+			WARN_ON(!inode);
+			filemap_fdatawrite_range(inode->i_mapping, start, end);
+		}
 		wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
 						   &ordered->flags));
+
 		btrfs_put_ordered_extent(ordered);
 		spin_lock_irq(&log->log_extents_lock[index]);
 	}
-- 
cgit v0.10.2


From 0bfaa9c5cb479cebc24979b384374fe47500b4c9 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Mon, 7 Jul 2014 12:34:49 -0500
Subject: btrfs: test for valid bdev before kobj removal in btrfs_rm_device

commit 99994cd btrfs: dev delete should remove sysfs entry
added a btrfs_kobj_rm_device, which dereferences device->bdev...
right after we check whether device->bdev might be NULL.

I don't honestly know if it's possible to have a NULL device->bdev
here, but assuming that it is (given the test), we need to move
the kobject removal to be under that test.

(Coverity spotted this)

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Chris Mason <clm@fb.com>

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6104676..6cb82f6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1680,11 +1680,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
 	if (device->bdev == root->fs_info->fs_devices->latest_bdev)
 		root->fs_info->fs_devices->latest_bdev = next_device->bdev;
 
-	if (device->bdev)
+	if (device->bdev) {
 		device->fs_devices->open_devices--;
-
-	/* remove sysfs entry */
-	btrfs_kobj_rm_device(root->fs_info, device);
+		/* remove sysfs entry */
+		btrfs_kobj_rm_device(root->fs_info, device);
+	}
 
 	call_rcu(&device->rcu, free_device);
 
-- 
cgit v0.10.2


From ae5db6d12341684913a78b6537c0b9c22c999b5c Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Sun, 20 Jul 2014 12:56:34 +0200
Subject: Revert "um: Fix wait_stub_done() error handling"

This reverts commit 0974a9cadc7886f7baaa458bb0c89f5c5f9d458e.
The real for for that issue is to release current->mm->mmap_sem in
fix_range_common().

Signed-off-by: Richard Weinberger <richard@nod.at>

diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index d531879..908579f 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -54,7 +54,7 @@ static int ptrace_dump_regs(int pid)
 
 void wait_stub_done(int pid)
 {
-	int n, status, err, bad_stop = 0;
+	int n, status, err;
 
 	while (1) {
 		CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL));
@@ -74,8 +74,6 @@ void wait_stub_done(int pid)
 
 	if (((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0)
 		return;
-	else
-		bad_stop = 1;
 
 bad_wait:
 	err = ptrace_dump_regs(pid);
@@ -85,10 +83,7 @@ bad_wait:
 	printk(UM_KERN_ERR "wait_stub_done : failed to wait for SIGTRAP, "
 	       "pid = %d, n = %d, errno = %d, status = 0x%x\n", pid, n, errno,
 	       status);
-	if (bad_stop)
-		kill(pid, SIGKILL);
-	else
-		fatal_sigsegv();
+	fatal_sigsegv();
 }
 
 extern unsigned long current_stub_stack(void);
-- 
cgit v0.10.2


From 284e6d39516cc7f9fbceebb259849fcb41559a7b Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Sun, 20 Jul 2014 13:09:15 +0200
Subject: um: Ensure that a stub page cannot get unmapped
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Trinity discovered an execution path such that a task
can unmap his stub page.

Reported-by: Toralf Förster <toralf.foerster@gmx.de>
Signed-off-by: Richard Weinberger <richard@nod.at>

diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 9472079..1fc619e 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -124,6 +124,9 @@ static int add_munmap(unsigned long addr, unsigned long len,
 	struct host_vm_op *last;
 	int ret = 0;
 
+	if ((addr >= STUB_START) && (addr < STUB_END))
+		return -EINVAL;
+
 	if (hvc->index != 0) {
 		last = &hvc->ops[hvc->index - 1];
 		if ((last->type == MUNMAP) &&
-- 
cgit v0.10.2


From 468f65976a8d065ee1f27782337f4ee85a9151c5 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Sun, 20 Jul 2014 13:16:20 +0200
Subject: um: Fix hung task in fix_range_common()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If do_ops() fails we have to release current->mm->mmap_sem
otherwise the failing task will never terminate.

Reported-by: Toralf Förster <toralf.foerster@gmx.de>
Signed-off-by: Richard Weinberger <richard@nod.at>

diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 1fc619e..f1b3eb1 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -12,6 +12,7 @@
 #include <mem_user.h>
 #include <os.h>
 #include <skas.h>
+#include <kern_util.h>
 
 struct host_vm_change {
 	struct host_vm_op {
@@ -286,8 +287,11 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
 	/* This is not an else because ret is modified above */
 	if (ret) {
 		printk(KERN_ERR "fix_range_common: failed, killing current "
-		       "process\n");
+		       "process: %d\n", task_tgid_vnr(current));
+		/* We are under mmap_sem, release it such that current can terminate */
+		up_write(&current->mm->mmap_sem);
 		force_sig(SIGKILL, current);
+		do_signal();
 	}
 }
 
-- 
cgit v0.10.2


From bb6a1b2e189f797c0e4a116aec7ce77c344f11e0 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Sun, 20 Jul 2014 13:39:27 +0200
Subject: um: segv: Save regs only in case of a kernel mode fault

...otherwise me lose user mode regs and the resulting
stack trace is useless.

Signed-off-by: Richard Weinberger <richard@nod.at>

diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 974b874..5678c35 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -206,7 +206,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
 	int is_write = FAULT_WRITE(fi);
 	unsigned long address = FAULT_ADDRESS(fi);
 
-	if (regs)
+	if (!is_user && regs)
 		current->thread.segv_regs = container_of(regs, struct pt_regs, regs);
 
 	if (!is_user && (address >= start_vm) && (address < end_vm)) {
-- 
cgit v0.10.2


From 7801db8aec957fa6610efe0ee26a6c8bc0f1d73b Mon Sep 17 00:00:00 2001
From: Cong Wang <cwang@twopensource.com>
Date: Thu, 17 Jul 2014 17:34:53 -0700
Subject: net_sched: avoid generating same handle for u32 filters

When kernel generates a handle for a u32 filter, it tries to start
from the max in the bucket. So when we have a filter with the max (fff)
handle, it will cause kernel always generates the same handle for new
filters. This can be shown by the following command:

	tc qdisc add dev eth0 ingress
	tc filter add dev eth0 parent ffff: protocol ip pref 770 handle 800::fff u32 match ip protocol 1 0xff
	tc filter add dev eth0 parent ffff: protocol ip pref 770 u32 match ip protocol 1 0xff
	...

we will get some u32 filters with same handle:

 # tc filter show dev eth0 parent ffff:
filter protocol ip pref 770 u32
filter protocol ip pref 770 u32 fh 800: ht divisor 1
filter protocol ip pref 770 u32 fh 800::fff order 4095 key ht 800 bkt 0
  match 00010000/00ff0000 at 8
filter protocol ip pref 770 u32 fh 800::fff order 4095 key ht 800 bkt 0
  match 00010000/00ff0000 at 8
filter protocol ip pref 770 u32 fh 800::fff order 4095 key ht 800 bkt 0
  match 00010000/00ff0000 at 8
filter protocol ip pref 770 u32 fh 800::fff order 4095 key ht 800 bkt 0
  match 00010000/00ff0000 at 8

handles should be unique. This patch fixes it by looking up a bitmap,
so that can guarantee the handle is as unique as possible. For compatibility,
we still start from 0x800.

Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index c39b583..70c0be8 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -38,6 +38,7 @@
 #include <linux/errno.h>
 #include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
+#include <linux/bitmap.h>
 #include <net/netlink.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
@@ -460,17 +461,25 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
 	return 0;
 }
 
+#define NR_U32_NODE (1<<12)
 static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
 {
 	struct tc_u_knode *n;
-	unsigned int i = 0x7FF;
+	unsigned long i;
+	unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long),
+					GFP_KERNEL);
+	if (!bitmap)
+		return handle | 0xFFF;
 
 	for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
-		if (i < TC_U32_NODE(n->handle))
-			i = TC_U32_NODE(n->handle);
-	i++;
+		set_bit(TC_U32_NODE(n->handle), bitmap);
 
-	return handle | (i > 0xFFF ? 0xFFF : i);
+	i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
+	if (i >= NR_U32_NODE)
+		i = find_next_zero_bit(bitmap, NR_U32_NODE, 1);
+
+	kfree(bitmap);
+	return handle | (i >= NR_U32_NODE ? 0xFFF : i);
 }
 
 static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
-- 
cgit v0.10.2


From 1a998d3e6bc1e44f4c0bc7509bdedef8ed3845ec Mon Sep 17 00:00:00 2001
From: Zoltan Kiss <zoltan.kiss@citrix.com>
Date: Fri, 18 Jul 2014 19:08:02 +0100
Subject: xen-netback: Fix handling frag_list on grant op error path

The error handling for skb's with frag_list was completely wrong, it caused
double unmap attempts to happen if the error was on the first skb. Move it to
the right place in the loop.

Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com>
Reported-by: Armin Zentai <armin.zentai@ezit.hu>
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: xen-devel@lists.xenproject.org
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 1844a47..a773f20 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1030,10 +1030,16 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
 {
 	struct gnttab_map_grant_ref *gop_map = *gopp_map;
 	u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+	/* This always points to the shinfo of the skb being checked, which
+	 * could be either the first or the one on the frag_list
+	 */
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
+	/* If this is non-NULL, we are currently checking the frag_list skb, and
+	 * this points to the shinfo of the first one
+	 */
+	struct skb_shared_info *first_shinfo = NULL;
 	int nr_frags = shinfo->nr_frags;
 	int i, err;
-	struct sk_buff *first_skb = NULL;
 
 	/* Check status of header. */
 	err = (*gopp_copy)->status;
@@ -1086,31 +1092,28 @@ check_frags:
 			xenvif_idx_unmap(queue, pending_idx);
 		}
 
+		/* And if we found the error while checking the frag_list, unmap
+		 * the first skb's frags
+		 */
+		if (first_shinfo) {
+			for (j = 0; j < first_shinfo->nr_frags; j++) {
+				pending_idx = frag_get_pending_idx(&first_shinfo->frags[j]);
+				xenvif_idx_unmap(queue, pending_idx);
+			}
+		}
+
 		/* Remember the error: invalidate all subsequent fragments. */
 		err = newerr;
 	}
 
-	if (skb_has_frag_list(skb)) {
-		first_skb = skb;
-		skb = shinfo->frag_list;
-		shinfo = skb_shinfo(skb);
+	if (skb_has_frag_list(skb) && !first_shinfo) {
+		first_shinfo = skb_shinfo(skb);
+		shinfo = skb_shinfo(skb_shinfo(skb)->frag_list);
 		nr_frags = shinfo->nr_frags;
 
 		goto check_frags;
 	}
 
-	/* There was a mapping error in the frag_list skb. We have to unmap
-	 * the first skb's frags
-	 */
-	if (first_skb && err) {
-		int j;
-		shinfo = skb_shinfo(first_skb);
-		for (j = 0; j < shinfo->nr_frags; j++) {
-			pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
-			xenvif_idx_unmap(queue, pending_idx);
-		}
-	}
-
 	*gopp_map = gop_map;
 	return err;
 }
-- 
cgit v0.10.2


From b42cc6e421e7bf74e545483aa34b99d2a2ca6d3a Mon Sep 17 00:00:00 2001
From: Zoltan Kiss <zoltan.kiss@citrix.com>
Date: Fri, 18 Jul 2014 19:08:03 +0100
Subject: xen-netback: Fix releasing frag_list skbs in error path

When the grant operations failed, the skb is freed up eventually, and it tries
to release the frags, if there is any. For the main skb nr_frags is set to 0 to
avoid this, but on the frag_list it iterates through the frags array, and tries
to call put_page on the page pointer which contains garbage at that time.

Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com>
Reported-by: Armin Zentai <armin.zentai@ezit.hu>
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: xen-devel@lists.xenproject.org
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index a773f20..8cbf60d 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1521,7 +1521,16 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
 
 		/* Check the remap error code. */
 		if (unlikely(xenvif_tx_check_gop(queue, skb, &gop_map, &gop_copy))) {
+			/* If there was an error, xenvif_tx_check_gop is
+			 * expected to release all the frags which were mapped,
+			 * so kfree_skb shouldn't do it again
+			 */
 			skb_shinfo(skb)->nr_frags = 0;
+			if (skb_has_frag_list(skb)) {
+				struct sk_buff *nskb =
+						skb_shinfo(skb)->frag_list;
+				skb_shinfo(nskb)->nr_frags = 0;
+			}
 			kfree_skb(skb);
 			continue;
 		}
-- 
cgit v0.10.2


From 1b860da0404a76af8533099ffe0a965490939369 Mon Sep 17 00:00:00 2001
From: Zoltan Kiss <zoltan.kiss@citrix.com>
Date: Fri, 18 Jul 2014 19:08:04 +0100
Subject: xen-netback: Fix releasing header slot on error path

This patch makes this function aware that the first frag and the header might
share the same ring slot. That could happen if the first slot is bigger than
PKT_PROT_LEN. Due to this the error path might release that slot twice or never,
depending on the error scenario.
xenvif_idx_release is also removed from xenvif_idx_unmap, and called separately.

Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com>
Reported-by: Armin Zentai <armin.zentai@ezit.hu>
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: xen-devel@lists.xenproject.org
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 8cbf60d..6fff911 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1039,6 +1039,8 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
 	 */
 	struct skb_shared_info *first_shinfo = NULL;
 	int nr_frags = shinfo->nr_frags;
+	const bool sharedslot = nr_frags &&
+				frag_get_pending_idx(&shinfo->frags[0]) == pending_idx;
 	int i, err;
 
 	/* Check status of header. */
@@ -1051,7 +1053,10 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
 				   (*gopp_copy)->status,
 				   pending_idx,
 				   (*gopp_copy)->source.u.ref);
-		xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
+		/* The first frag might still have this slot mapped */
+		if (!sharedslot)
+			xenvif_idx_release(queue, pending_idx,
+					   XEN_NETIF_RSP_ERROR);
 	}
 
 check_frags:
@@ -1068,8 +1073,19 @@ check_frags:
 						pending_idx,
 						gop_map->handle);
 			/* Had a previous error? Invalidate this fragment. */
-			if (unlikely(err))
+			if (unlikely(err)) {
 				xenvif_idx_unmap(queue, pending_idx);
+				/* If the mapping of the first frag was OK, but
+				 * the header's copy failed, and they are
+				 * sharing a slot, send an error
+				 */
+				if (i == 0 && sharedslot)
+					xenvif_idx_release(queue, pending_idx,
+							   XEN_NETIF_RSP_ERROR);
+				else
+					xenvif_idx_release(queue, pending_idx,
+							   XEN_NETIF_RSP_OKAY);
+			}
 			continue;
 		}
 
@@ -1081,15 +1097,27 @@ check_frags:
 				   gop_map->status,
 				   pending_idx,
 				   gop_map->ref);
+
 		xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR);
 
 		/* Not the first error? Preceding frags already invalidated. */
 		if (err)
 			continue;
-		/* First error: invalidate preceding fragments. */
+
+		/* First error: if the header haven't shared a slot with the
+		 * first frag, release it as well.
+		 */
+		if (!sharedslot)
+			xenvif_idx_release(queue,
+					   XENVIF_TX_CB(skb)->pending_idx,
+					   XEN_NETIF_RSP_OKAY);
+
+		/* Invalidate preceding fragments of this skb. */
 		for (j = 0; j < i; j++) {
 			pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
 			xenvif_idx_unmap(queue, pending_idx);
+			xenvif_idx_release(queue, pending_idx,
+					   XEN_NETIF_RSP_OKAY);
 		}
 
 		/* And if we found the error while checking the frag_list, unmap
@@ -1099,6 +1127,8 @@ check_frags:
 			for (j = 0; j < first_shinfo->nr_frags; j++) {
 				pending_idx = frag_get_pending_idx(&first_shinfo->frags[j]);
 				xenvif_idx_unmap(queue, pending_idx);
+				xenvif_idx_release(queue, pending_idx,
+						   XEN_NETIF_RSP_OKAY);
 			}
 		}
 
@@ -1834,8 +1864,6 @@ void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
 			   tx_unmap_op.status);
 		BUG();
 	}
-
-	xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_OKAY);
 }
 
 static inline int rx_work_todo(struct xenvif_queue *queue)
-- 
cgit v0.10.2


From d8cfbfc4660054150ca1b7c501a8edc0771022f9 Mon Sep 17 00:00:00 2001
From: Zoltan Kiss <zoltan.kiss@citrix.com>
Date: Fri, 18 Jul 2014 19:08:05 +0100
Subject: xen-netback: Fix pointer incrementation to avoid incorrect logging

Due to this pointer is increased prematurely, the error log contains rubbish.

Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com>
Reported-by: Armin Zentai <armin.zentai@ezit.hu>
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: xen-devel@lists.xenproject.org
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 6fff911..c65b636 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1045,7 +1045,6 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
 
 	/* Check status of header. */
 	err = (*gopp_copy)->status;
-	(*gopp_copy)++;
 	if (unlikely(err)) {
 		if (net_ratelimit())
 			netdev_dbg(queue->vif->dev,
@@ -1058,6 +1057,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue,
 			xenvif_idx_release(queue, pending_idx,
 					   XEN_NETIF_RSP_ERROR);
 	}
+	(*gopp_copy)++;
 
 check_frags:
 	for (i = 0; i < nr_frags; i++, gop_map++) {
-- 
cgit v0.10.2


From 9a3c4145af32125c5ee39c0272662b47307a8323 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 20 Jul 2014 21:04:16 -0700
Subject: Linux 3.16-rc6


diff --git a/Makefile b/Makefile
index f3c543d..6b27741 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 16
 SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc6
 NAME = Shuffling Zombie Juror
 
 # *DOCUMENTATION*
-- 
cgit v0.10.2


From 640d7efe4c08f06c4ae5d31b79bd8740e7f6790a Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 21 Jul 2014 00:06:48 +0100
Subject: dns_resolver: Null-terminate the right string

*_result[len] is parsed as *(_result[len]) which is not at all what we
want to touch here.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Fixes: 84a7c0b1db1c ("dns_resolver: assure that dns_query() result is null-terminated")
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 9acec61f..dd8696a 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -150,7 +150,7 @@ int dns_query(const char *type, const char *name, size_t namelen,
 		goto put;
 
 	memcpy(*_result, upayload->data, len);
-	*_result[len] = '\0';
+	(*_result)[len] = '\0';
 
 	if (_expiry)
 		*_expiry = rkey->expiry;
-- 
cgit v0.10.2


From d46b6bfa7628030a93e05f7087b7c638a85b4a35 Mon Sep 17 00:00:00 2001
From: Simon Wunderlich <simon@open-mesh.com>
Date: Mon, 23 Jun 2014 15:55:36 +0200
Subject: batman-adv: drop QinQ claim frames in bridge loop avoidance

Since bridge loop avoidance only supports untagged or simple 802.1q
tagged VLAN claim frames, claim frames with stacked VLAN headers (QinQ)
should be detected and dropped. Transporting the over the mesh may cause
problems on the receivers, or create bogus entries in the local tt
tables.

Reported-by: Antonio Quartulli <antonio@open-mesh.com>
Signed-off-by: Simon Wunderlich <simon@open-mesh.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Antonio Quartulli <antonio@meshcoding.com>

diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 6f0d9ec..a957c81 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -800,11 +800,6 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv,
 	bla_dst = (struct batadv_bla_claim_dst *)hw_dst;
 	bla_dst_own = &bat_priv->bla.claim_dest;
 
-	/* check if it is a claim packet in general */
-	if (memcmp(bla_dst->magic, bla_dst_own->magic,
-		   sizeof(bla_dst->magic)) != 0)
-		return 0;
-
 	/* if announcement packet, use the source,
 	 * otherwise assume it is in the hw_src
 	 */
@@ -866,12 +861,13 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
 				    struct batadv_hard_iface *primary_if,
 				    struct sk_buff *skb)
 {
-	struct batadv_bla_claim_dst *bla_dst;
+	struct batadv_bla_claim_dst *bla_dst, *bla_dst_own;
 	uint8_t *hw_src, *hw_dst;
-	struct vlan_ethhdr *vhdr;
+	struct vlan_hdr *vhdr, vhdr_buf;
 	struct ethhdr *ethhdr;
 	struct arphdr *arphdr;
 	unsigned short vid;
+	int vlan_depth = 0;
 	__be16 proto;
 	int headlen;
 	int ret;
@@ -882,9 +878,24 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
 	proto = ethhdr->h_proto;
 	headlen = ETH_HLEN;
 	if (vid & BATADV_VLAN_HAS_TAG) {
-		vhdr = vlan_eth_hdr(skb);
-		proto = vhdr->h_vlan_encapsulated_proto;
-		headlen += VLAN_HLEN;
+		/* Traverse the VLAN/Ethertypes.
+		 *
+		 * At this point it is known that the first protocol is a VLAN
+		 * header, so start checking at the encapsulated protocol.
+		 *
+		 * The depth of the VLAN headers is recorded to drop BLA claim
+		 * frames encapsulated into multiple VLAN headers (QinQ).
+		 */
+		do {
+			vhdr = skb_header_pointer(skb, headlen, VLAN_HLEN,
+						  &vhdr_buf);
+			if (!vhdr)
+				return 0;
+
+			proto = vhdr->h_vlan_encapsulated_proto;
+			headlen += VLAN_HLEN;
+			vlan_depth++;
+		} while (proto == htons(ETH_P_8021Q));
 	}
 
 	if (proto != htons(ETH_P_ARP))
@@ -914,6 +925,19 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
 	hw_src = (uint8_t *)arphdr + sizeof(struct arphdr);
 	hw_dst = hw_src + ETH_ALEN + 4;
 	bla_dst = (struct batadv_bla_claim_dst *)hw_dst;
+	bla_dst_own = &bat_priv->bla.claim_dest;
+
+	/* check if it is a claim frame in general */
+	if (memcmp(bla_dst->magic, bla_dst_own->magic,
+		   sizeof(bla_dst->magic)) != 0)
+		return 0;
+
+	/* check if there is a claim frame encapsulated deeper in (QinQ) and
+	 * drop that, as this is not supported by BLA but should also not be
+	 * sent via the mesh.
+	 */
+	if (vlan_depth > 1)
+		return 1;
 
 	/* check if it is a claim frame. */
 	ret = batadv_check_claim_group(bat_priv, primary_if, hw_src, hw_dst,
-- 
cgit v0.10.2


From 35df3b298fc8779f7edf4b0228c683f7e98edcd5 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <antonio@open-mesh.com>
Date: Thu, 8 May 2014 17:13:15 +0200
Subject: batman-adv: fix TT VLAN inconsistency on VLAN re-add

When a VLAN interface (on top of batX) is removed and
re-added within a short timeframe TT does not have enough
time to properly cleanup. This creates an internal TT state
mismatch as the newly created softif_vlan will be
initialized from scratch with a TT client count of zero
(even if TT entries for this VLAN still exist). The
resulting TT messages are bogus due to the counter / tt
client listing mismatch, thus creating inconsistencies on
every node in the network

To fix this issue destroy_vlan() has to not free the VLAN
object immediately but it has to be kept alive until all the
TT entries for this VLAN have been removed. destroy_vlan()
still removes the sysfs folder so that the user has the
feeling that everything went fine.

If the same VLAN is re-added before the old object is free'd,
then the latter is resurrected and re-used.

Implement such behaviour by increasing the reference counter
of a softif_vlan object every time a new local TT entry for
such VLAN is created and remove the object from the list
only when all the TT entries have been destroyed.

Signed-off-by: Antonio Quartulli <antonio@open-mesh.com>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>

diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index e7ee65d..cbd677f 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -448,10 +448,15 @@ out:
  *  possibly free it
  * @softif_vlan: the vlan object to release
  */
-void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *softif_vlan)
+void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *vlan)
 {
-	if (atomic_dec_and_test(&softif_vlan->refcount))
-		kfree_rcu(softif_vlan, rcu);
+	if (atomic_dec_and_test(&vlan->refcount)) {
+		spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock);
+		hlist_del_rcu(&vlan->list);
+		spin_unlock_bh(&vlan->bat_priv->softif_vlan_list_lock);
+
+		kfree_rcu(vlan, rcu);
+	}
 }
 
 /**
@@ -505,6 +510,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
 	if (!vlan)
 		return -ENOMEM;
 
+	vlan->bat_priv = bat_priv;
 	vlan->vid = vid;
 	atomic_set(&vlan->refcount, 1);
 
@@ -516,6 +522,10 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
 		return err;
 	}
 
+	spin_lock_bh(&bat_priv->softif_vlan_list_lock);
+	hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list);
+	spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
+
 	/* add a new TT local entry. This one will be marked with the NOPURGE
 	 * flag
 	 */
@@ -523,10 +533,6 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
 			    bat_priv->soft_iface->dev_addr, vid,
 			    BATADV_NULL_IFINDEX, BATADV_NO_MARK);
 
-	spin_lock_bh(&bat_priv->softif_vlan_list_lock);
-	hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list);
-	spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
-
 	return 0;
 }
 
@@ -538,18 +544,13 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
 static void batadv_softif_destroy_vlan(struct batadv_priv *bat_priv,
 				       struct batadv_softif_vlan *vlan)
 {
-	spin_lock_bh(&bat_priv->softif_vlan_list_lock);
-	hlist_del_rcu(&vlan->list);
-	spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
-
-	batadv_sysfs_del_vlan(bat_priv, vlan);
-
 	/* explicitly remove the associated TT local entry because it is marked
 	 * with the NOPURGE flag
 	 */
 	batadv_tt_local_remove(bat_priv, bat_priv->soft_iface->dev_addr,
 			       vlan->vid, "vlan interface destroyed", false);
 
+	batadv_sysfs_del_vlan(bat_priv, vlan);
 	batadv_softif_vlan_free_ref(vlan);
 }
 
@@ -567,6 +568,8 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
 				    unsigned short vid)
 {
 	struct batadv_priv *bat_priv = netdev_priv(dev);
+	struct batadv_softif_vlan *vlan;
+	int ret;
 
 	/* only 802.1Q vlans are supported.
 	 * batman-adv does not know how to handle other types
@@ -576,7 +579,36 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto,
 
 	vid |= BATADV_VLAN_HAS_TAG;
 
-	return batadv_softif_create_vlan(bat_priv, vid);
+	/* if a new vlan is getting created and it already exists, it means that
+	 * it was not deleted yet. batadv_softif_vlan_get() increases the
+	 * refcount in order to revive the object.
+	 *
+	 * if it does not exist then create it.
+	 */
+	vlan = batadv_softif_vlan_get(bat_priv, vid);
+	if (!vlan)
+		return batadv_softif_create_vlan(bat_priv, vid);
+
+	/* recreate the sysfs object if it was already destroyed (and it should
+	 * be since we received a kill_vid() for this vlan
+	 */
+	if (!vlan->kobj) {
+		ret = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan);
+		if (ret) {
+			batadv_softif_vlan_free_ref(vlan);
+			return ret;
+		}
+	}
+
+	/* add a new TT local entry. This one will be marked with the NOPURGE
+	 * flag. This must be added again, even if the vlan object already
+	 * exists, because the entry was deleted by kill_vid()
+	 */
+	batadv_tt_local_add(bat_priv->soft_iface,
+			    bat_priv->soft_iface->dev_addr, vid,
+			    BATADV_NULL_IFINDEX, BATADV_NO_MARK);
+
+	return 0;
 }
 
 /**
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index d636bde..5f59e7f 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -511,6 +511,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
 	struct batadv_priv *bat_priv = netdev_priv(soft_iface);
 	struct batadv_tt_local_entry *tt_local;
 	struct batadv_tt_global_entry *tt_global = NULL;
+	struct batadv_softif_vlan *vlan;
 	struct net_device *in_dev = NULL;
 	struct hlist_head *head;
 	struct batadv_tt_orig_list_entry *orig_entry;
@@ -572,6 +573,9 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
 	if (!tt_local)
 		goto out;
 
+	/* increase the refcounter of the related vlan */
+	vlan = batadv_softif_vlan_get(bat_priv, vid);
+
 	batadv_dbg(BATADV_DBG_TT, bat_priv,
 		   "Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n",
 		   addr, BATADV_PRINT_VID(vid),
@@ -604,6 +608,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
 	if (unlikely(hash_added != 0)) {
 		/* remove the reference for the hash */
 		batadv_tt_local_entry_free_ref(tt_local);
+		batadv_softif_vlan_free_ref(vlan);
 		goto out;
 	}
 
@@ -1009,6 +1014,7 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
 {
 	struct batadv_tt_local_entry *tt_local_entry;
 	uint16_t flags, curr_flags = BATADV_NO_FLAGS;
+	struct batadv_softif_vlan *vlan;
 
 	tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid);
 	if (!tt_local_entry)
@@ -1039,6 +1045,11 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
 	hlist_del_rcu(&tt_local_entry->common.hash_entry);
 	batadv_tt_local_entry_free_ref(tt_local_entry);
 
+	/* decrease the reference held for this vlan */
+	vlan = batadv_softif_vlan_get(bat_priv, vid);
+	batadv_softif_vlan_free_ref(vlan);
+	batadv_softif_vlan_free_ref(vlan);
+
 out:
 	if (tt_local_entry)
 		batadv_tt_local_entry_free_ref(tt_local_entry);
@@ -1111,6 +1122,7 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
 	spinlock_t *list_lock; /* protects write access to the hash lists */
 	struct batadv_tt_common_entry *tt_common_entry;
 	struct batadv_tt_local_entry *tt_local;
+	struct batadv_softif_vlan *vlan;
 	struct hlist_node *node_tmp;
 	struct hlist_head *head;
 	uint32_t i;
@@ -1131,6 +1143,13 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
 			tt_local = container_of(tt_common_entry,
 						struct batadv_tt_local_entry,
 						common);
+
+			/* decrease the reference held for this vlan */
+			vlan = batadv_softif_vlan_get(bat_priv,
+						      tt_common_entry->vid);
+			batadv_softif_vlan_free_ref(vlan);
+			batadv_softif_vlan_free_ref(vlan);
+
 			batadv_tt_local_entry_free_ref(tt_local);
 		}
 		spin_unlock_bh(list_lock);
@@ -3139,6 +3158,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
 	struct batadv_hashtable *hash = bat_priv->tt.local_hash;
 	struct batadv_tt_common_entry *tt_common;
 	struct batadv_tt_local_entry *tt_local;
+	struct batadv_softif_vlan *vlan;
 	struct hlist_node *node_tmp;
 	struct hlist_head *head;
 	spinlock_t *list_lock; /* protects write access to the hash lists */
@@ -3167,6 +3187,12 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
 			tt_local = container_of(tt_common,
 						struct batadv_tt_local_entry,
 						common);
+
+			/* decrease the reference held for this vlan */
+			vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid);
+			batadv_softif_vlan_free_ref(vlan);
+			batadv_softif_vlan_free_ref(vlan);
+
 			batadv_tt_local_entry_free_ref(tt_local);
 		}
 		spin_unlock_bh(list_lock);
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 34891a5..8854c05 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -687,6 +687,7 @@ struct batadv_priv_nc {
 
 /**
  * struct batadv_softif_vlan - per VLAN attributes set
+ * @bat_priv: pointer to the mesh object
  * @vid: VLAN identifier
  * @kobj: kobject for sysfs vlan subdirectory
  * @ap_isolation: AP isolation state
@@ -696,6 +697,7 @@ struct batadv_priv_nc {
  * @rcu: struct used for freeing in a RCU-safe manner
  */
 struct batadv_softif_vlan {
+	struct batadv_priv *bat_priv;
 	unsigned short vid;
 	struct kobject *kobj;
 	atomic_t ap_isolation;		/* boolean */
-- 
cgit v0.10.2


From 58d4e21e50ff3cc57910a8abc20d7e14375d2f61 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 18 Jul 2014 11:43:01 -0700
Subject: tracing: Fix wraparound problems in "uptime" trace clock

The "uptime" trace clock added in:

    commit 8aacf017b065a805d27467843490c976835eb4a5
    tracing: Add "uptime" trace clock that uses jiffies

has wraparound problems when the system has been up more
than 1 hour 11 minutes and 34 seconds. It converts jiffies
to nanoseconds using:
        (u64)jiffies_to_usecs(jiffy) * 1000ULL
but since jiffies_to_usecs() only returns a 32-bit value, it
truncates at 2^32 microseconds.  An additional problem on 32-bit
systems is that the argument is "unsigned long", so fixing the
return value only helps until 2^32 jiffies (49.7 days on a HZ=1000
system).

Avoid these problems by using jiffies_64 as our basis, and
not converting to nanoseconds (we do convert to clock_t because
user facing API must not be dependent on internal kernel
HZ values).

Link: http://lkml.kernel.org/p/99d63c5bfe9b320a3b428d773825a37095bf6a51.1405708254.git.tony.luck@intel.com

Cc: stable@vger.kernel.org # 3.10+
Fixes: 8aacf017b065 "tracing: Add "uptime" trace clock that uses jiffies"
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bda9621..291397e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -823,7 +823,7 @@ static struct {
 	{ trace_clock_local,	"local",	1 },
 	{ trace_clock_global,	"global",	1 },
 	{ trace_clock_counter,	"counter",	0 },
-	{ trace_clock_jiffies,	"uptime",	1 },
+	{ trace_clock_jiffies,	"uptime",	0 },
 	{ trace_clock,		"perf",		1 },
 	ARCH_TRACE_CLOCKS
 };
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 26dc348..57b67b1 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -59,13 +59,14 @@ u64 notrace trace_clock(void)
 
 /*
  * trace_jiffy_clock(): Simply use jiffies as a clock counter.
+ * Note that this use of jiffies_64 is not completely safe on
+ * 32-bit systems. But the window is tiny, and the effect if
+ * we are affected is that we will have an obviously bogus
+ * timestamp on a trace event - i.e. not life threatening.
  */
 u64 notrace trace_clock_jiffies(void)
 {
-	u64 jiffy = jiffies - INITIAL_JIFFIES;
-
-	/* Return nsecs */
-	return (u64)jiffies_to_usecs(jiffy) * 1000ULL;
+	return jiffies_64_to_clock_t(jiffies_64 - INITIAL_JIFFIES);
 }
 
 /*
-- 
cgit v0.10.2


From 10ec9472f05b45c94db3c854d22581a20b97db41 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 21 Jul 2014 07:17:42 +0200
Subject: ipv4: fix buffer overflow in ip_options_compile()

There is a benign buffer overflow in ip_options_compile spotted by
AddressSanitizer[1] :

Its benign because we always can access one extra byte in skb->head
(because header is followed by struct skb_shared_info), and in this case
this byte is not even used.

[28504.910798] ==================================================================
[28504.912046] AddressSanitizer: heap-buffer-overflow in ip_options_compile
[28504.913170] Read of size 1 by thread T15843:
[28504.914026]  [<ffffffff81802f91>] ip_options_compile+0x121/0x9c0
[28504.915394]  [<ffffffff81804a0d>] ip_options_get_from_user+0xad/0x120
[28504.916843]  [<ffffffff8180dedf>] do_ip_setsockopt.isra.15+0x8df/0x1630
[28504.918175]  [<ffffffff8180ec60>] ip_setsockopt+0x30/0xa0
[28504.919490]  [<ffffffff8181e59b>] tcp_setsockopt+0x5b/0x90
[28504.920835]  [<ffffffff8177462f>] sock_common_setsockopt+0x5f/0x70
[28504.922208]  [<ffffffff817729c2>] SyS_setsockopt+0xa2/0x140
[28504.923459]  [<ffffffff818cfb69>] system_call_fastpath+0x16/0x1b
[28504.924722]
[28504.925106] Allocated by thread T15843:
[28504.925815]  [<ffffffff81804995>] ip_options_get_from_user+0x35/0x120
[28504.926884]  [<ffffffff8180dedf>] do_ip_setsockopt.isra.15+0x8df/0x1630
[28504.927975]  [<ffffffff8180ec60>] ip_setsockopt+0x30/0xa0
[28504.929175]  [<ffffffff8181e59b>] tcp_setsockopt+0x5b/0x90
[28504.930400]  [<ffffffff8177462f>] sock_common_setsockopt+0x5f/0x70
[28504.931677]  [<ffffffff817729c2>] SyS_setsockopt+0xa2/0x140
[28504.932851]  [<ffffffff818cfb69>] system_call_fastpath+0x16/0x1b
[28504.934018]
[28504.934377] The buggy address ffff880026382828 is located 0 bytes to the right
[28504.934377]  of 40-byte region [ffff880026382800, ffff880026382828)
[28504.937144]
[28504.937474] Memory state around the buggy address:
[28504.938430]  ffff880026382300: ........ rrrrrrrr rrrrrrrr rrrrrrrr
[28504.939884]  ffff880026382400: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr
[28504.941294]  ffff880026382500: .....rrr rrrrrrrr rrrrrrrr rrrrrrrr
[28504.942504]  ffff880026382600: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr
[28504.943483]  ffff880026382700: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr
[28504.944511] >ffff880026382800: .....rrr rrrrrrrr rrrrrrrr rrrrrrrr
[28504.945573]                         ^
[28504.946277]  ffff880026382900: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr
[28505.094949]  ffff880026382a00: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr
[28505.096114]  ffff880026382b00: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr
[28505.097116]  ffff880026382c00: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr
[28505.098472]  ffff880026382d00: ffffffff rrrrrrrr rrrrrrrr rrrrrrrr
[28505.099804] Legend:
[28505.100269]  f - 8 freed bytes
[28505.100884]  r - 8 redzone bytes
[28505.101649]  . - 8 allocated bytes
[28505.102406]  x=1..7 - x allocated bytes + (8-x) redzone bytes
[28505.103637] ==================================================================

[1] https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 5e7aece..ad38249 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -288,6 +288,10 @@ int ip_options_compile(struct net *net,
 			optptr++;
 			continue;
 		}
+		if (unlikely(l < 2)) {
+			pp_ptr = optptr;
+			goto error;
+		}
 		optlen = optptr[1];
 		if (optlen < 2 || optlen > l) {
 			pp_ptr = optptr;
-- 
cgit v0.10.2


From 26053926feb1c16ade9c30bc7443bf28d829d08e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 21 Jul 2014 22:27:56 -0700
Subject: sparc: Hook up renameat2 syscall.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h
index b73274f..42f2bca 100644
--- a/arch/sparc/include/uapi/asm/unistd.h
+++ b/arch/sparc/include/uapi/asm/unistd.h
@@ -410,8 +410,9 @@
 #define __NR_finit_module	342
 #define __NR_sched_setattr	343
 #define __NR_sched_getattr	344
+#define __NR_renameat2		345
 
-#define NR_syscalls		345
+#define NR_syscalls		346
 
 /* Bitmask values returned from kern_features system call.  */
 #define KERN_FEATURE_MIXED_MODE_STACK	0x00000001
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index d066eb1..f834224 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -48,6 +48,7 @@ SIGN1(sys32_futex, compat_sys_futex, %o1)
 SIGN1(sys32_recvfrom, compat_sys_recvfrom, %o0)
 SIGN1(sys32_recvmsg, compat_sys_recvmsg, %o0)
 SIGN1(sys32_sendmsg, compat_sys_sendmsg, %o0)
+SIGN2(sys32_renameat2, sys_renameat2, %o0, %o2)
 
 	.globl		sys32_mmap2
 sys32_mmap2:
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 151ace8..85fe9b1 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -86,3 +86,4 @@ sys_call_table:
 /*330*/	.long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
 /*335*/	.long sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev
 /*340*/	.long sys_ni_syscall, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
+/*345*/	.long sys_renameat2
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 4bd4e2b..33ecba2 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -87,6 +87,7 @@ sys_call_table32:
 /*330*/	.word compat_sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
 	.word sys_syncfs, compat_sys_sendmmsg, sys_setns, compat_sys_process_vm_readv, compat_sys_process_vm_writev
 /*340*/	.word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
+	.word sys32_renameat2
 
 #endif /* CONFIG_COMPAT */
 
@@ -165,3 +166,4 @@ sys_call_table:
 /*330*/	.word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
 	.word sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev
 /*340*/	.word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
+	.word sys_renameat2
-- 
cgit v0.10.2