From b00c52dae6d9ee8d0f2407118ef6544ae5524781 Mon Sep 17 00:00:00 2001
From: Wenwei Tao <ww.tao0320@gmail.com>
Date: Fri, 13 May 2016 22:59:20 +0800
Subject: cgroup: remove redundant cleanup in css_create

When create css failed, before call css_free_rcu_fn, we remove the css
id and exit the percpu_ref, but we will do these again in
css_free_work_fn, so they are redundant.  Especially the css id, that
would cause problem if we remove it twice, since it may be assigned to
another css after the first remove.

tj: This was broken by two commits updating the free path without
    synchronizing the creation failure path.  This can be easily
    triggered by trying to create more than 64k memory cgroups.

Signed-off-by: Wenwei Tao <ww.tao0320@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Fixes: 9a1049da9bd2 ("percpu-refcount: require percpu_ref to be exited explicitly")
Fixes: 01e586598b22 ("cgroup: release css->id after css_free")
Cc: stable@vger.kernel.org # v3.17+

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 86cb5c6..789b84f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5150,7 +5150,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
 
 	err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL);
 	if (err < 0)
-		goto err_free_percpu_ref;
+		goto err_free_css;
 	css->id = err;
 
 	/* @css is ready to be brought online now, make it visible */
@@ -5174,9 +5174,6 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
 
 err_list_del:
 	list_del_rcu(&css->sibling);
-	cgroup_idr_remove(&ss->css_idr, css->id);
-err_free_percpu_ref:
-	percpu_ref_exit(&css->refcnt);
 err_free_css:
 	call_rcu(&css->rcu_head, css_free_rcu_fn);
 	return ERR_PTR(err);
-- 
cgit v0.10.2


From bebef39842c8615180e711c0ff3e8c9c40c397da Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 2 May 2016 12:59:48 +0200
Subject: drm/sun4i: add COMMON_CLK dependency

The sun4i drm driver uses the clk-provider interfaces, which are not available
when CONFIG_COMMON_CLK is disabled:

drivers/gpu/drm/sun4i/sun4i_dotclock.c:19:16: error: field 'hw' has incomplete type
  struct clk_hw hw;
In file included from ../include/asm-generic/bug.h:13:0,
                 from ../arch/arm/include/asm/bug.h:59,
                 from ../include/linux/bug.h:4,
                 from ../include/linux/io.h:23,
                 from ../include/linux/clk-provider.h:14,
                 from ../drivers/gpu/drm/sun4i/sun4i_dotclock.c:13:
drivers/gpu/drm/sun4i/sun4i_dotclock.c: In function 'hw_to_dclk':
include/linux/kernel.h:831:48: error: initialization from incompatible pointer type [-Werror=incompatible-pointer-types]
...

This adds a Kconfig dependency to prevent the driver from being enabled
in this case.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support")
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>

diff --git a/drivers/gpu/drm/sun4i/Kconfig b/drivers/gpu/drm/sun4i/Kconfig
index 99510e6..a4b357d 100644
--- a/drivers/gpu/drm/sun4i/Kconfig
+++ b/drivers/gpu/drm/sun4i/Kconfig
@@ -1,6 +1,6 @@
 config DRM_SUN4I
 	tristate "DRM Support for Allwinner A10 Display Engine"
-	depends on DRM && ARM
+	depends on DRM && ARM && COMMON_CLK
 	depends on ARCH_SUNXI || COMPILE_TEST
 	select DRM_GEM_CMA_HELPER
 	select DRM_KMS_HELPER
-- 
cgit v0.10.2


From f1b78f0e759b174ec4f5e3b0dd27a079a2416b66 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 3 May 2016 17:23:28 +0200
Subject: drm: sun4i: print DMA address correctly

The newly added sun4i drm driver prints a dma address using the %x
format string, which cannot work when dma_addr_t is 64 bit,
and gcc warns about this configuration:

drm/sun4i/sun4i_backend.c: In function 'sun4i_backend_update_layer_buffer':
drm/sun4i/sun4i_backend.c:193:84: error: format '%x' expects argument of type 'unsigned int', but argument 3 has type 'dma_addr_t {aka long long unsigned int}' [-Werror=format=]
  DRM_DEBUG_DRIVER("Using GEM @ 0x%x\n", gem->paddr);
drm/sun4i/sun4i_backend.c:201:84: error: format '%x' expects argument of type 'unsigned int', but argument 3 has type 'dma_addr_t {aka long long unsigned int}' [-Werror=format=]
  DRM_DEBUG_DRIVER("Setting buffer address to 0x%x\n", paddr);

This changes the code to use the explicit %pad format string, which
always prints the right length.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>

diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c
index f7a15c1..3ab5604 100644
--- a/drivers/gpu/drm/sun4i/sun4i_backend.c
+++ b/drivers/gpu/drm/sun4i/sun4i_backend.c
@@ -190,7 +190,7 @@ int sun4i_backend_update_layer_buffer(struct sun4i_backend *backend,
 	/* Get the physical address of the buffer in memory */
 	gem = drm_fb_cma_get_gem_obj(fb, 0);
 
-	DRM_DEBUG_DRIVER("Using GEM @ 0x%x\n", gem->paddr);
+	DRM_DEBUG_DRIVER("Using GEM @ %pad\n", &gem->paddr);
 
 	/* Compute the start of the displayed memory */
 	bpp = drm_format_plane_cpp(fb->pixel_format, 0);
@@ -198,7 +198,7 @@ int sun4i_backend_update_layer_buffer(struct sun4i_backend *backend,
 	paddr += (state->src_x >> 16) * bpp;
 	paddr += (state->src_y >> 16) * fb->pitches[0];
 
-	DRM_DEBUG_DRIVER("Setting buffer address to 0x%x\n", paddr);
+	DRM_DEBUG_DRIVER("Setting buffer address to %pad\n", &paddr);
 
 	/* Write the 32 lower bits of the address (in bits) */
 	lo_paddr = paddr << 3;
-- 
cgit v0.10.2


From 9fa2568d9fe0c6dbf3253af6840de9389f4e89cb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 5 May 2016 22:10:52 +0200
Subject: drm: sun4i: fix probe error handling

gcc points out a possible uninitialized variable use in
sun4i_dclk_create():

drivers/gpu/drm/sun4i/sun4i_dotclock.c: In function 'sun4i_dclk_create':
drivers/gpu/drm/sun4i/sun4i_dotclock.c:139:12: error: 'clk_name' may be used uninitialized in this function [-Werror=maybe-uninitialized]
  init.name = clk_name;

The warning only shows up when CONFIG_OF is disabled, and the
property is never filled, but the same bug can show up even
when CONFIG_OF is enabled but of_property_read_string_index
returns another error.

To fix it, this ensures that sun4i_dclk_create propagates
any error from of_property_read_string_index.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support")
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>

diff --git a/drivers/gpu/drm/sun4i/sun4i_dotclock.c b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
index 3ff668c..6c9c090 100644
--- a/drivers/gpu/drm/sun4i/sun4i_dotclock.c
+++ b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
@@ -127,10 +127,14 @@ int sun4i_dclk_create(struct device *dev, struct sun4i_tcon *tcon)
 	const char *clk_name, *parent_name;
 	struct clk_init_data init;
 	struct sun4i_dclk *dclk;
+	int ret;
 
 	parent_name = __clk_get_name(tcon->sclk0);
-	of_property_read_string_index(dev->of_node, "clock-output-names", 0,
-				      &clk_name);
+	ret = of_property_read_string_index(dev->of_node,
+					    "clock-output-names", 0,
+					    &clk_name);
+	if (ret)
+		return ret;
 
 	dclk = devm_kzalloc(dev, sizeof(*dclk), GFP_KERNEL);
 	if (!dclk)
-- 
cgit v0.10.2


From 4731a72df273bdbd225445424057c15dbb8d3495 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Sat, 2 Apr 2016 12:30:11 +0200
Subject: drm/sun4i: request exact rates to our parents

Our pixel clock currently only tries to deal with the current parent rate.

While that works when the resolution is the same than the one already
program, or when we can compute directly the rate from the current parent
rate, it cannot work in most situation when we want to change the
frequency, and we end up with an improper pixel clock rate, which obviously
doesn't work as expected.

Ask our parent for all the possible dividers if it can reach that
frequency, and return the best parent rate to the clock framework so that
we can use it.

Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support")
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>

diff --git a/drivers/gpu/drm/sun4i/sun4i_dotclock.c b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
index 6c9c090..5b34631 100644
--- a/drivers/gpu/drm/sun4i/sun4i_dotclock.c
+++ b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
@@ -72,14 +72,40 @@ static unsigned long sun4i_dclk_recalc_rate(struct clk_hw *hw,
 static long sun4i_dclk_round_rate(struct clk_hw *hw, unsigned long rate,
 				  unsigned long *parent_rate)
 {
-	return *parent_rate / DIV_ROUND_CLOSEST(*parent_rate, rate);
+	unsigned long best_parent = 0;
+	u8 best_div = 1;
+	int i;
+
+	for (i = 6; i < 127; i++) {
+		unsigned long ideal = rate * i;
+		unsigned long rounded;
+
+		rounded = clk_hw_round_rate(clk_hw_get_parent(hw),
+					    ideal);
+
+		if (rounded == ideal) {
+			best_parent = rounded;
+			best_div = i;
+			goto out;
+		}
+
+		if ((rounded < ideal) && (rounded > best_parent)) {
+			best_parent = rounded;
+			best_div = i;
+		}
+	}
+
+out:
+	*parent_rate = best_parent;
+
+	return best_parent / best_div;
 }
 
 static int sun4i_dclk_set_rate(struct clk_hw *hw, unsigned long rate,
 			       unsigned long parent_rate)
 {
 	struct sun4i_dclk *dclk = hw_to_dclk(hw);
-	int div = DIV_ROUND_CLOSEST(parent_rate, rate);
+	u8 div = parent_rate / rate;
 
 	return regmap_update_bits(dclk->regmap, SUN4I_TCON0_DCLK_REG,
 				  GENMASK(6, 0), div);
@@ -144,6 +170,7 @@ int sun4i_dclk_create(struct device *dev, struct sun4i_tcon *tcon)
 	init.ops = &sun4i_dclk_ops;
 	init.parent_names = &parent_name;
 	init.num_parents = 1;
+	init.flags = CLK_SET_RATE_PARENT;
 
 	dclk->regmap = tcon->regs;
 	dclk->hw.init = &init;
-- 
cgit v0.10.2


From bb43d40d7c830da5f623e3938fef908b003b7523 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Thu, 21 Apr 2016 11:25:26 +0200
Subject: drm/sun4i: rgb: Validate the clock rate

Our pixel clock cannot reach a high enough rate for some rather high while
common resolutions (like 1080p60).

Make sure we filter the resolutions we cannot reach in our mode_valid
function.

Fixes: 29e57fab97fc ("drm: sun4i: Add RGB output")
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>

diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c
index ab64948..fe7ef52 100644
--- a/drivers/gpu/drm/sun4i/sun4i_rgb.c
+++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c
@@ -54,8 +54,13 @@ static int sun4i_rgb_get_modes(struct drm_connector *connector)
 static int sun4i_rgb_mode_valid(struct drm_connector *connector,
 				struct drm_display_mode *mode)
 {
+	struct sun4i_rgb *rgb = drm_connector_to_sun4i_rgb(connector);
+	struct sun4i_drv *drv = rgb->drv;
+	struct sun4i_tcon *tcon = drv->tcon;
 	u32 hsync = mode->hsync_end - mode->hsync_start;
 	u32 vsync = mode->vsync_end - mode->vsync_start;
+	unsigned long rate = mode->clock * 1000;
+	long rounded_rate;
 
 	DRM_DEBUG_DRIVER("Validating modes...\n");
 
@@ -87,6 +92,15 @@ static int sun4i_rgb_mode_valid(struct drm_connector *connector,
 
 	DRM_DEBUG_DRIVER("Vertical parameters OK\n");
 
+	rounded_rate = clk_round_rate(tcon->dclk, rate);
+	if (rounded_rate < rate)
+		return MODE_CLOCK_LOW;
+
+	if (rounded_rate > rate)
+		return MODE_CLOCK_HIGH;
+
+	DRM_DEBUG_DRIVER("Clock rate OK\n");
+
 	return MODE_OK;
 }
 
-- 
cgit v0.10.2


From 0bbbb00bda57e6f091275b0103445596322b9277 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Wed, 4 May 2016 17:38:32 +0200
Subject: drm/sun4i: defer only if we didn't find our panel

Our code currently defers our probe on any error, even if we were not
expecting to have one at all.

Make sure we return -EPROBE_DEFER only when we were supposed to have a
panel, but it's not probed yet.

Also fix a typo while we're at it.

Fixes: 29e57fab97fc ("drm: sun4i: Add RGB output")
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>

diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index 9f19b0e..16ab426 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -425,11 +425,11 @@ static struct drm_panel *sun4i_tcon_find_panel(struct device_node *node)
 
 	remote = of_graph_get_remote_port_parent(end_node);
 	if (!remote) {
-		DRM_DEBUG_DRIVER("Enable to parse remote node\n");
+		DRM_DEBUG_DRIVER("Unable to parse remote node\n");
 		return ERR_PTR(-EINVAL);
 	}
 
-	return of_drm_find_panel(remote);
+	return of_drm_find_panel(remote) ?: ERR_PTR(-EPROBE_DEFER);
 }
 
 static int sun4i_tcon_bind(struct device *dev, struct device *master,
@@ -522,12 +522,13 @@ static int sun4i_tcon_probe(struct platform_device *pdev)
 	 * Defer the probe.
 	 */
 	panel = sun4i_tcon_find_panel(node);
-	if (IS_ERR(panel)) {
-		/*
-		 * If we don't have a panel endpoint, just go on
-		 */
-		if (PTR_ERR(panel) != -ENODEV)
-			return -EPROBE_DEFER;
+
+	/*
+	 * If we don't have a panel endpoint, just go on
+	 */
+	if (PTR_ERR(panel) == -EPROBE_DEFER) {
+		DRM_DEBUG_DRIVER("Still waiting for our panel. Deferring...\n");
+		return -EPROBE_DEFER;
 	}
 
 	return component_add(&pdev->dev, &sun4i_tcon_ops);
-- 
cgit v0.10.2


From 0de6e914a035076b1241f5738c06c9d3820abd6e Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Wed, 4 May 2016 17:37:58 +0200
Subject: drm/sun4i: rgb: panel is an error pointer

In case of an error, our pointer to the drm_panel structure attached to our
encoder will hold an error pointer, not a NULL pointer.

Make sure we check the right thing.

Fixes: 29e57fab97fc ("drm: sun4i: Add RGB output")
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>

diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c
index fe7ef52..aaffe9e 100644
--- a/drivers/gpu/drm/sun4i/sun4i_rgb.c
+++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c
@@ -217,7 +217,7 @@ int sun4i_rgb_init(struct drm_device *drm)
 	int ret;
 
 	/* If we don't have a panel, there's no point in going on */
-	if (!tcon->panel)
+	if (IS_ERR(tcon->panel))
 		return -ENODEV;
 
 	rgb = devm_kzalloc(drm->dev, sizeof(*rgb), GFP_KERNEL);
-- 
cgit v0.10.2


From 3d6bd9065b7acf8499d414d70bdb4b4955856299 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Wed, 11 May 2016 16:52:50 +0200
Subject: drm/sun4i: remove simplefb at probe

If simplefb was setup by our bootloader and enabled in the DT, we will have
a first framebuffer loaded in our system.

However, as soon as our DRM driver will load, it will reset the controller,
initialise it and, if the framebuffer emulation is enabled, register a
second framebuffer device.

This is obviously pretty bad, since the first framebuffer will be some kind
of a black hole, with memory still reserved that we can write to safely,
but not displayed anywhere.

Make sure we remove that framebuffer when we probe so we don't end up in
that situation.

Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support")
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>

diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index 76e922b..af62b24 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -125,6 +125,22 @@ static struct drm_driver sun4i_drv_driver = {
 	.disable_vblank		= sun4i_drv_disable_vblank,
 };
 
+static void sun4i_remove_framebuffers(void)
+{
+	struct apertures_struct *ap;
+
+	ap = alloc_apertures(1);
+	if (!ap)
+		return;
+
+	/* The framebuffer can be located anywhere in RAM */
+	ap->ranges[0].base = 0;
+	ap->ranges[0].size = ~0;
+
+	remove_conflicting_framebuffers(ap, "sun4i-drm-fb", false);
+	kfree(ap);
+}
+
 static int sun4i_drv_bind(struct device *dev)
 {
 	struct drm_device *drm;
@@ -172,6 +188,9 @@ static int sun4i_drv_bind(struct device *dev)
 	}
 	drm->irq_enabled = true;
 
+	/* Remove early framebuffers (ie. simplefb) */
+	sun4i_remove_framebuffers();
+
 	/* Create our framebuffer */
 	drv->fbdev = sun4i_framebuffer_init(drm);
 	if (IS_ERR(drv->fbdev)) {
-- 
cgit v0.10.2


From 7aa2e2b731b337a7bc74cd076bbcd6b8b4b95fbf Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Wed, 11 May 2016 19:04:18 +0200
Subject: drm/sun4i: Convert to connector register helpers

Now that connector register helpers have been created, switch to them.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>

diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index af62b24..257d2b4 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -24,34 +24,6 @@
 #include "sun4i_layer.h"
 #include "sun4i_tcon.h"
 
-static int sun4i_drv_connector_plug_all(struct drm_device *drm)
-{
-	struct drm_connector *connector, *failed;
-	int ret;
-
-	mutex_lock(&drm->mode_config.mutex);
-	list_for_each_entry(connector, &drm->mode_config.connector_list, head) {
-		ret = drm_connector_register(connector);
-		if (ret) {
-			failed = connector;
-			goto err;
-		}
-	}
-	mutex_unlock(&drm->mode_config.mutex);
-	return 0;
-
-err:
-	list_for_each_entry(connector, &drm->mode_config.connector_list, head) {
-		if (failed == connector)
-			break;
-
-		drm_connector_unregister(connector);
-	}
-	mutex_unlock(&drm->mode_config.mutex);
-
-	return ret;
-}
-
 static int sun4i_drv_enable_vblank(struct drm_device *drm, unsigned int pipe)
 {
 	struct sun4i_drv *drv = drm->dev_private;
@@ -206,7 +178,7 @@ static int sun4i_drv_bind(struct device *dev)
 	if (ret)
 		goto free_drm;
 
-	ret = sun4i_drv_connector_plug_all(drm);
+	ret = drm_connector_register_all(drm);
 	if (ret)
 		goto unregister_drm;
 
@@ -223,6 +195,7 @@ static void sun4i_drv_unbind(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
 
+	drm_connector_unregister_all(drm);
 	drm_dev_unregister(drm);
 	drm_kms_helper_poll_fini(drm);
 	sun4i_framebuffer_free(drm);
-- 
cgit v0.10.2


From 13fef095bde04228316046f997eb963285d8532e Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Tue, 17 May 2016 23:56:06 +0800
Subject: drm: sun4i: do cleanup if RGB output init fails

sun4i_rgb_init() can fail, which results in TCON failing to bind.
In this case we need to do cleanup, specificly unregistering the
dotclock, which is regmap based, and the regmap is registered as
part of the sun4i_tcon_bind().

Failing to do so results in a NULL pointer reference when the CCF
tries to turn off unused clocks.

Fixes: 29e57fab97fc ("drm: sun4i: Add RGB output")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>

diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index 16ab426..652385f 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -490,7 +490,11 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
 		return 0;
 	}
 
-	return sun4i_rgb_init(drm);
+	ret = sun4i_rgb_init(drm);
+	if (ret < 0)
+		goto err_free_clocks;
+
+	return 0;
 
 err_free_clocks:
 	sun4i_tcon_free_clocks(tcon);
-- 
cgit v0.10.2


From 595144c1141c951a3c6bb9004ae6a2bc29aad66f Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Tue, 17 May 2016 20:57:50 +0200
Subject: clk: rockchip: initialize flags of clk_init_data in mmc-phase clock

The flags element of clk_init_data was never initialized for mmc-
phase-clocks resulting in the element containing a random value
and thus possibly enabling unwanted clock flags.

Fixes: 89bf26cbc1a0 ("clk: rockchip: Add support for the mmc clock phases using the framework")
Cc: stable@vger.kernel.org
Signed-off-by: Heiko Stuebner <heiko@sntech.de>

diff --git a/drivers/clk/rockchip/clk-mmc-phase.c b/drivers/clk/rockchip/clk-mmc-phase.c
index bc856f2..4601130 100644
--- a/drivers/clk/rockchip/clk-mmc-phase.c
+++ b/drivers/clk/rockchip/clk-mmc-phase.c
@@ -154,6 +154,7 @@ struct clk *rockchip_clk_register_mmc(const char *name,
 		return ERR_PTR(-ENOMEM);
 
 	init.name = name;
+	init.flags = 0;
 	init.num_parents = num_parents;
 	init.parent_names = parent_names;
 	init.ops = &rockchip_mmc_clk_ops;
-- 
cgit v0.10.2


From 176df69cb080c8cd813e51f800069e13ee607641 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Fri, 13 May 2016 11:42:16 -0700
Subject: clk: rockchip: mark rk3399 GIC clocks as critical

We never want to kill the GIC.

Noticed when making other clock fixups, and seeing the newly-constructed
clock tree try to disable cpll, where we had this parent structure:

  aclk_gic <------\
                  |--- aclk_gic_pre <-- cpll <-- pll_cpll
  aclk_gic_noc <--/

Signed-off-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>

diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c
index 291543f..145756c 100644
--- a/drivers/clk/rockchip/clk-rk3399.c
+++ b/drivers/clk/rockchip/clk-rk3399.c
@@ -1466,6 +1466,8 @@ static struct rockchip_clk_branch rk3399_clk_pmu_branches[] __initdata = {
 
 static const char *const rk3399_cru_critical_clocks[] __initconst = {
 	"aclk_cci_pre",
+	"aclk_gic",
+	"aclk_gic_noc",
 	"pclk_perilp0",
 	"pclk_perilp0",
 	"hclk_perilp0",
-- 
cgit v0.10.2


From 3bd14ae9da9194d369334556a70a7ff73ef94491 Mon Sep 17 00:00:00 2001
From: Xing Zheng <zhengxing@rock-chips.com>
Date: Fri, 13 May 2016 11:42:17 -0700
Subject: clk: rockchip: fix incorrect parent for rk3399's
 {c,g}pll_aclk_perihp_src

There was a typo, swapping 'c' <--> 'g'.

Signed-off-by: Xing Zheng <zhengxing@rock-chips.com>
Signed-off-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>

diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c
index 145756c..9f86bfe 100644
--- a/drivers/clk/rockchip/clk-rk3399.c
+++ b/drivers/clk/rockchip/clk-rk3399.c
@@ -832,9 +832,9 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = {
 			RK3399_CLKGATE_CON(13), 1, GFLAGS),
 
 	/* perihp */
-	GATE(0, "cpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED,
+	GATE(0, "cpll_aclk_perihp_src", "cpll", CLK_IGNORE_UNUSED,
 			RK3399_CLKGATE_CON(5), 0, GFLAGS),
-	GATE(0, "gpll_aclk_perihp_src", "cpll", CLK_IGNORE_UNUSED,
+	GATE(0, "gpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED,
 			RK3399_CLKGATE_CON(5), 1, GFLAGS),
 	COMPOSITE(ACLK_PERIHP, "aclk_perihp", mux_aclk_perihp_p, CLK_IGNORE_UNUSED,
 			RK3399_CLKSEL_CON(14), 7, 1, MFLAGS, 0, 5, DFLAGS,
-- 
cgit v0.10.2


From 4715f81afc342996f680b08c944a712d9cbef11b Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Thu, 12 May 2016 11:03:16 -0700
Subject: clk: rockchip: Revert "clk: rockchip: reset init state before mmc
 card initialization"

This reverts commit 7a03fe6f48f3 ("clk: rockchip: reset init state
before mmc card initialization").

Though not totally obvious from the commit message nor from the source
code, that commit appears to be trying to reset the "_drv" MMC clocks to
90 degrees (note that the "_sample" MMC clocks have a shift of 0 so are
not touched).

The major problem here is that it doesn't properly reset things.  The
phase is a two bit field and the commit only touches one of the two
bits.  Thus the commit had the following affect:
- phase   0  => phase  90
- phase  90  => phase  90
- phase 180  => phase 270
- phase 270  => phase 270

Things get even weirder if you happen to have a bootloader that was
actually using delay elements (should be no reason to, but you never
know), since those are additional bits that weren't touched by the
original patch.

This is unlikely to be what we actually want.  Checking on rk3288-veyron
devices, I can see that the bootloader leaves these clocks as:
- emmc:  phase 180
- sdmmc: phase 90
- sdio0: phase 90

Thus on rk3288-veyron devices the commit we're reverting had the effect
of changing the eMMC clock to phase 270.  This probably explains the
scattered reports I've heard of eMMC devices not working on some veyron
devices when using the upstream kernel.

The original commit was presumably made because previously the kernel
didn't touch the "_drv" phase at all and relied on whatever value was
there when the kernel started.  If someone was using a bootloader that
touched the "_drv" phase then, indeed, we should have code in the kernel
to fix that.  ...and also, to get ideal timings, we should also have the
kernel change the phase depending on the speed mode.  In fact, that's
the subject of a recent patch I posted at
<https://patchwork.kernel.org/patch/9075141/>.

Ideally, we should take both the patch posted to dw_mmc and this
revert.  Since those will likely go through different trees, here I
describe behavior with the combos:

1. Just this revert: likely will fix rk3288-veyron eMMC on some devices
   + other cases; might break someone with a strange bootloader that
   sets the phase to 0 or one that uses delay elements (pretty
   unpredicable what would happen in that case).
2. Just dw_mmc patch: fixes everyone.  Effectly the dw_mmc patch will
   totally override the broken patch and fix everything.
3. Both patches: fixes everyone.  Once dw_mmc is initting properly then
   any defaults from the clock code doesn't mattery.

Fixes: 7a03fe6f48f3 ("clk: rockchip: reset init state before mmc card initialization")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>

[emmc and sdmmc still work on all current boards in mainline after this
revert, so they should take precedence over any out-of-tree board that
will hopefully again get fixed with the better upcoming dw_mmc change.]
Signed-off-by: Heiko Stuebner <heiko@sntech.de>

diff --git a/drivers/clk/rockchip/clk-mmc-phase.c b/drivers/clk/rockchip/clk-mmc-phase.c
index 4601130..077fcdc 100644
--- a/drivers/clk/rockchip/clk-mmc-phase.c
+++ b/drivers/clk/rockchip/clk-mmc-phase.c
@@ -41,8 +41,6 @@ static unsigned long rockchip_mmc_recalc(struct clk_hw *hw,
 #define ROCKCHIP_MMC_DEGREE_MASK 0x3
 #define ROCKCHIP_MMC_DELAYNUM_OFFSET 2
 #define ROCKCHIP_MMC_DELAYNUM_MASK (0xff << ROCKCHIP_MMC_DELAYNUM_OFFSET)
-#define ROCKCHIP_MMC_INIT_STATE_RESET 0x1
-#define ROCKCHIP_MMC_INIT_STATE_SHIFT 1
 
 #define PSECS_PER_SEC 1000000000000LL
 
@@ -163,15 +161,6 @@ struct clk *rockchip_clk_register_mmc(const char *name,
 	mmc_clock->reg = reg;
 	mmc_clock->shift = shift;
 
-	/*
-	 * Assert init_state to soft reset the CLKGEN
-	 * for mmc tuning phase and degree
-	 */
-	if (mmc_clock->shift == ROCKCHIP_MMC_INIT_STATE_SHIFT)
-		writel(HIWORD_UPDATE(ROCKCHIP_MMC_INIT_STATE_RESET,
-				     ROCKCHIP_MMC_INIT_STATE_RESET,
-				     mmc_clock->shift), mmc_clock->reg);
-
 	clk = clk_register(NULL, &mmc_clock->hw);
 	if (IS_ERR(clk))
 		kfree(mmc_clock);
-- 
cgit v0.10.2


From 3183c0d519ff83af2926382c11716496f01d34bf Mon Sep 17 00:00:00 2001
From: Xing Zheng <zhengxing@rock-chips.com>
Date: Thu, 26 May 2016 21:49:08 +0800
Subject: clk: rockchip: fix cpuclk registration error handling

It maybe due to a copy-paste error the error handing should be
cclk not clk when checking if the cpuclk registration succeeded.

Reported-by: Lin Huang <lin.huang@rock-chips.com>
Signed-off-by: Xing Zheng <zhengxing@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>

diff --git a/drivers/clk/rockchip/clk-cpu.c b/drivers/clk/rockchip/clk-cpu.c
index 4bb130c..05b3d73 100644
--- a/drivers/clk/rockchip/clk-cpu.c
+++ b/drivers/clk/rockchip/clk-cpu.c
@@ -321,9 +321,9 @@ struct clk *rockchip_clk_register_cpuclk(const char *name,
 	}
 
 	cclk = clk_register(NULL, &cpuclk->hw);
-	if (IS_ERR(clk)) {
+	if (IS_ERR(cclk)) {
 		pr_err("%s: could not register cpuclk %s\n", __func__,	name);
-		ret = PTR_ERR(clk);
+		ret = PTR_ERR(cclk);
 		goto free_rate_table;
 	}
 
-- 
cgit v0.10.2


From 3ac066e2227cb272c097f34475247fa0a6cdd2ff Mon Sep 17 00:00:00 2001
From: Jean-Jacques Hiblot <jjhiblot@ti.com>
Date: Tue, 31 May 2016 17:56:23 +0200
Subject: spi: spi-ti-qspi: Suspend the queue before removing the device

Before disabling the pm_runtime, we must ensure that there is no transfer
in progress nor will a new one be started. Otherwise the message pump will
fail and in the end, the process requesting the transfer will be stuck.
This behavior has been observed when transferring data from a SPI flash
with dd while removing the module on a DRA7x-evm.

Signed-off-by: Jean-Jacques Hiblot <jjhiblot@ti.com>
Signed-off-by: Mark Brown <broonie@kernel.org>

diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index 443f664..29ea8d2 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -646,6 +646,13 @@ free_master:
 
 static int ti_qspi_remove(struct platform_device *pdev)
 {
+	struct ti_qspi *qspi = platform_get_drvdata(pdev);
+	int rc;
+
+	rc = spi_master_suspend(qspi->master);
+	if (rc)
+		return rc;
+
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
-- 
cgit v0.10.2


From 540c26087bfbad6ea72758b76b16ae6282a73fea Mon Sep 17 00:00:00 2001
From: Cameron Gutman <aicommander@gmail.com>
Date: Wed, 1 Jun 2016 11:32:51 -0700
Subject: Input: xpad - fix rumble on Xbox One controllers with 2015 firmware

Xbox One controllers that shipped with or were upgraded to the 2015
firmware discard the current rumble packets we send. This patch changes
the Xbox One rumble packet to a form that both the newer and older
firmware will accept.

It is based on changes made to support newer Xbox One controllers in
the SteamOS brewmaster-4.1 kernel branch.

Signed-off-by: Cameron Gutman <aicommander@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>

diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 804dbcc..923c572 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -1031,17 +1031,17 @@ static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect
 
 	case XTYPE_XBOXONE:
 		packet->data[0] = 0x09; /* activate rumble */
-		packet->data[1] = 0x08;
+		packet->data[1] = 0x00;
 		packet->data[2] = xpad->odata_serial++;
-		packet->data[3] = 0x08; /* continuous effect */
-		packet->data[4] = 0x00; /* simple rumble mode */
-		packet->data[5] = 0x03; /* L and R actuator only */
-		packet->data[6] = 0x00; /* TODO: LT actuator */
-		packet->data[7] = 0x00; /* TODO: RT actuator */
+		packet->data[3] = 0x09;
+		packet->data[4] = 0x00;
+		packet->data[5] = 0x0F;
+		packet->data[6] = 0x00;
+		packet->data[7] = 0x00;
 		packet->data[8] = strong / 512;	/* left actuator */
 		packet->data[9] = weak / 512;	/* right actuator */
-		packet->data[10] = 0x80;	/* length of pulse */
-		packet->data[11] = 0x00;	/* stop period of pulse */
+		packet->data[10] = 0xFF;
+		packet->data[11] = 0x00;
 		packet->data[12] = 0x00;
 		packet->len = 13;
 		packet->pending = true;
-- 
cgit v0.10.2


From 62d0e71df063101e4551327bd9fa9aaa3535c86b Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Fri, 3 Jun 2016 08:54:18 +0800
Subject: clk: rockchip: release io resource when failing to init clk on rk3399

We should call iounmap to relase reg_base since it's not going
to be used any more if failing to init clk.

This was missing on the newly added rk3399 clock tree.

Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>

diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c
index 9f86bfe..8059a8d 100644
--- a/drivers/clk/rockchip/clk-rk3399.c
+++ b/drivers/clk/rockchip/clk-rk3399.c
@@ -1510,6 +1510,7 @@ static void __init rk3399_clk_init(struct device_node *np)
 	ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS);
 	if (IS_ERR(ctx)) {
 		pr_err("%s: rockchip clk init failed\n", __func__);
+		iounmap(reg_base);
 		return;
 	}
 
@@ -1555,6 +1556,7 @@ static void __init rk3399_pmu_clk_init(struct device_node *np)
 	ctx = rockchip_clk_init(np, reg_base, CLKPMU_NR_CLKS);
 	if (IS_ERR(ctx)) {
 		pr_err("%s: rockchip pmu clk init failed\n", __func__);
+		iounmap(reg_base);
 		return;
 	}
 
-- 
cgit v0.10.2


From fee48cf8374569a3888fd8c8536283e6067f0cfb Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Fri, 1 Apr 2016 14:12:12 -0700
Subject: ath10k: fix deadlock when peer cannot be created

We must not attempt to send WMI packets while holding the data-lock,
as it may deadlock:

BUG: sleeping function called from invalid context at drivers/net/wireless/ath/ath10k/wmi.c:1824
in_atomic(): 1, irqs_disabled(): 0, pid: 2878, name: wpa_supplicant

=============================================
[ INFO: possible recursive locking detected ]
4.4.6+ #21 Tainted: G        W  O
---------------------------------------------
wpa_supplicant/2878 is trying to acquire lock:
 (&(&ar->data_lock)->rlock){+.-...}, at: [<ffffffffa0721511>] ath10k_wmi_tx_beacons_iter+0x26/0x11a [ath10k_core]

but task is already holding lock:
 (&(&ar->data_lock)->rlock){+.-...}, at: [<ffffffffa070251b>] ath10k_peer_create+0x122/0x1ae [ath10k_core]

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&(&ar->data_lock)->rlock);
  lock(&(&ar->data_lock)->rlock);

 *** DEADLOCK ***

 May be due to missing lock nesting notation

4 locks held by wpa_supplicant/2878:
 #0:  (rtnl_mutex){+.+.+.}, at: [<ffffffff816493ca>] rtnl_lock+0x12/0x14
 #1:  (&ar->conf_mutex){+.+.+.}, at: [<ffffffffa0706932>] ath10k_add_interface+0x3b/0xbda [ath10k_core]
 #2:  (&(&ar->data_lock)->rlock){+.-...}, at: [<ffffffffa070251b>] ath10k_peer_create+0x122/0x1ae [ath10k_core]
 #3:  (rcu_read_lock){......}, at: [<ffffffffa062f304>] rcu_read_lock+0x0/0x66 [mac80211]

stack backtrace:
CPU: 3 PID: 2878 Comm: wpa_supplicant Tainted: G        W  O    4.4.6+ #21
Hardware name: To be filled by O.E.M. To be filled by O.E.M./ChiefRiver, BIOS 4.6.5 06/07/2013
 0000000000000000 ffff8801fcadf8f0 ffffffff8137086d ffffffff82681720
 ffffffff82681720 ffff8801fcadf9b0 ffffffff8112e3be ffff8801fcadf920
 0000000100000000 ffffffff82681720 ffffffffa0721500 ffff8801fcb8d348
Call Trace:
 [<ffffffff8137086d>] dump_stack+0x81/0xb6
 [<ffffffff8112e3be>] __lock_acquire+0xc5b/0xde7
 [<ffffffffa0721500>] ? ath10k_wmi_tx_beacons_iter+0x15/0x11a [ath10k_core]
 [<ffffffff8112d0d0>] ? mark_lock+0x24/0x201
 [<ffffffff8112e908>] lock_acquire+0x132/0x1cb
 [<ffffffff8112e908>] ? lock_acquire+0x132/0x1cb
 [<ffffffffa0721511>] ? ath10k_wmi_tx_beacons_iter+0x26/0x11a [ath10k_core]
 [<ffffffffa07214eb>] ? ath10k_wmi_cmd_send_nowait+0x1ce/0x1ce [ath10k_core]
 [<ffffffff816f9e2b>] _raw_spin_lock_bh+0x31/0x40
 [<ffffffffa0721511>] ? ath10k_wmi_tx_beacons_iter+0x26/0x11a [ath10k_core]
 [<ffffffffa0721511>] ath10k_wmi_tx_beacons_iter+0x26/0x11a [ath10k_core]
 [<ffffffffa07214eb>] ? ath10k_wmi_cmd_send_nowait+0x1ce/0x1ce [ath10k_core]
 [<ffffffffa062eb18>] __iterate_interfaces+0x9d/0x13d [mac80211]
 [<ffffffffa062f609>] ieee80211_iterate_active_interfaces_atomic+0x32/0x3e [mac80211]
 [<ffffffffa07214eb>] ? ath10k_wmi_cmd_send_nowait+0x1ce/0x1ce [ath10k_core]
 [<ffffffffa071fa9f>] ath10k_wmi_tx_beacons_nowait.isra.13+0x14/0x16 [ath10k_core]
 [<ffffffffa0721676>] ath10k_wmi_cmd_send+0x71/0x242 [ath10k_core]
 [<ffffffffa07023f6>] ath10k_wmi_peer_delete+0x3f/0x42 [ath10k_core]
 [<ffffffffa0702557>] ath10k_peer_create+0x15e/0x1ae [ath10k_core]
 [<ffffffffa0707004>] ath10k_add_interface+0x70d/0xbda [ath10k_core]
 [<ffffffffa05fffcc>] drv_add_interface+0x123/0x1a5 [mac80211]
 [<ffffffffa061554b>] ieee80211_do_open+0x351/0x667 [mac80211]
 [<ffffffffa06158aa>] ieee80211_open+0x49/0x4c [mac80211]
 [<ffffffff8163ecf9>] __dev_open+0x88/0xde
 [<ffffffff8163ef6e>] __dev_change_flags+0xa4/0x13a
 [<ffffffff8163f023>] dev_change_flags+0x1f/0x54
 [<ffffffff816a5532>] devinet_ioctl+0x2b9/0x5c9
 [<ffffffff816514dd>] ? copy_to_user+0x32/0x38
 [<ffffffff816a6115>] inet_ioctl+0x81/0x9d
 [<ffffffff816a6115>] ? inet_ioctl+0x81/0x9d
 [<ffffffff81621cf8>] sock_do_ioctl+0x20/0x3d
 [<ffffffff816223c4>] sock_ioctl+0x222/0x22e
 [<ffffffff8121cf95>] do_vfs_ioctl+0x453/0x4d7
 [<ffffffff81625603>] ? __sys_recvmsg+0x4c/0x5b
 [<ffffffff81225af1>] ? __fget_light+0x48/0x6c
 [<ffffffff8121d06b>] SyS_ioctl+0x52/0x74
 [<ffffffff816fa736>] entry_SYSCALL_64_fastpath+0x16/0x7a

Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 6dd1d26..4040f94 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -679,10 +679,10 @@ static int ath10k_peer_create(struct ath10k *ar,
 
 	peer = ath10k_peer_find(ar, vdev_id, addr);
 	if (!peer) {
+		spin_unlock_bh(&ar->data_lock);
 		ath10k_warn(ar, "failed to find peer %pM on vdev %i after creation\n",
 			    addr, vdev_id);
 		ath10k_wmi_peer_delete(ar, vdev_id, addr);
-		spin_unlock_bh(&ar->data_lock);
 		return -ENOENT;
 	}
 
-- 
cgit v0.10.2


From 8d0a0710ea0d22881fdb40eb79d346a98cc64ae6 Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Thu, 2 Jun 2016 17:59:54 +0300
Subject: ath10k: fix crash related to printing features

This looks like a regression from commit c4cdf753ed42 ("ath10k: move
fw_features to struct ath10k_fw_file"), we were printing the features from a
wrong struct.

Fixes: c4cdf753ed42 ("ath10k: move fw_features to struct ath10k_fw_file")
Signed-off-by:  Ben Greear <greearb@candelatech.com>
[kvalo@qca.qualcomm.com: improve commit log]
Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>

diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index 49af624..a92a0ba 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -1083,7 +1083,7 @@ int ath10k_core_fetch_firmware_api_n(struct ath10k *ar, const char *name,
 			}
 
 			ath10k_dbg_dump(ar, ATH10K_DBG_BOOT, "features", "",
-					ar->running_fw->fw_file.fw_features,
+					fw_file->fw_features,
 					sizeof(fw_file->fw_features));
 			break;
 		case ATH10K_FW_IE_FW_IMAGE:
-- 
cgit v0.10.2


From b36fad65d61fffe4b662d4bfb1ed673c455a36a2 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.com>
Date: Tue, 7 Jun 2016 11:57:02 +0200
Subject: kbuild: Initialize exported variables

The NOSTDINC_FLAGS variable is exported, so it needs to be cleared to
avoid duplicating its content when running make from within make (e.g.
in the packaging targets). This became an issue after commit
9c8fa9bc08f6 ("kbuild: fix if_change and friends to consider argument
order"), which no longer ignores the duplicate options. As Paulo Zanoni
points out, the LDFLAGS_vmlinux variable has the same problem.

Reported-by: "Zanoni, Paulo R" <paulo.r.zanoni@intel.com>
Fixes: 9c8fa9bc08f6 ("kbuild: fix if_change and friends to consider argument order")
Signed-off-by: Michal Marek <mmarek@suse.com>

diff --git a/Makefile b/Makefile
index 0f70de6..af0c463 100644
--- a/Makefile
+++ b/Makefile
@@ -363,11 +363,13 @@ CHECK		= sparse
 
 CHECKFLAGS     := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \
 		  -Wbitwise -Wno-return-void $(CF)
+NOSTDINC_FLAGS  =
 CFLAGS_MODULE   =
 AFLAGS_MODULE   =
 LDFLAGS_MODULE  =
 CFLAGS_KERNEL	=
 AFLAGS_KERNEL	=
+LDFLAGS_vmlinux =
 CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage -fno-tree-loop-im -Wno-maybe-uninitialized
 CFLAGS_KCOV	= -fsanitize-coverage=trace-pc
 
-- 
cgit v0.10.2


From 4dc0dd83603f05dc3ae152af33ecb15104c313f3 Mon Sep 17 00:00:00 2001
From: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Date: Wed, 8 Jun 2016 09:32:51 +0200
Subject: spi: rockchip: Signal unfinished DMA transfers

When using DMA, the transfer_one callback should return 1 because the
transfer hasn't finished yet.

A previous commit changed the function to return 0 when the DMA channels
were correctly prepared.

This manifested in Veyron boards with this message:

[ 1.983605] cros-ec-spi spi0.0: EC failed to respond in time

Fixes: ea9849113343 ("spi: rockchip: check return value of dmaengine_prep_slave_sg")
Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org

diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index cd89682..1026e18 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -578,7 +578,7 @@ static int rockchip_spi_transfer_one(
 		struct spi_device *spi,
 		struct spi_transfer *xfer)
 {
-	int ret = 1;
+	int ret = 0;
 	struct rockchip_spi *rs = spi_master_get_devdata(master);
 
 	WARN_ON(readl_relaxed(rs->regs + ROCKCHIP_SPI_SSIENR) &&
@@ -627,6 +627,8 @@ static int rockchip_spi_transfer_one(
 			spi_enable_chip(rs, 1);
 			ret = rockchip_spi_prepare_dma(rs);
 		}
+		/* successful DMA prepare means the transfer is in progress */
+		ret = ret ? ret : 1;
 	} else {
 		spi_enable_chip(rs, 1);
 		ret = rockchip_spi_pio_transfer(rs);
-- 
cgit v0.10.2


From 72d8c36ec364c82bf1bf0c64dfa1041cfaf139f7 Mon Sep 17 00:00:00 2001
From: Wei Fang <fangwei1@huawei.com>
Date: Tue, 7 Jun 2016 14:53:56 +0800
Subject: scsi: fix race between simultaneous decrements of ->host_failed

sas_ata_strategy_handler() adds the works of the ata error handler to
system_unbound_wq. This workqueue asynchronously runs work items, so the
ata error handler will be performed concurrently on different CPUs. In
this case, ->host_failed will be decreased simultaneously in
scsi_eh_finish_cmd() on different CPUs, and become abnormal.

It will lead to permanently inequality between ->host_failed and
->host_busy, and scsi error handler thread won't start running. IO
errors after that won't be handled.

Since all scmds must have been handled in the strategy handler, just
remove the decrement in scsi_eh_finish_cmd() and zero ->host_busy after
the strategy handler to fix this race.

Fixes: 50824d6c5657 ("[SCSI] libsas: async ata-eh")
Cc: stable@vger.kernel.org
Signed-off-by: Wei Fang <fangwei1@huawei.com>
Reviewed-by: James Bottomley <jejb@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

diff --git a/Documentation/scsi/scsi_eh.txt b/Documentation/scsi/scsi_eh.txt
index 8638f61..37eca00 100644
--- a/Documentation/scsi/scsi_eh.txt
+++ b/Documentation/scsi/scsi_eh.txt
@@ -263,19 +263,23 @@ scmd->allowed.
 
  3. scmd recovered
     ACTION: scsi_eh_finish_cmd() is invoked to EH-finish scmd
-	- shost->host_failed--
 	- clear scmd->eh_eflags
 	- scsi_setup_cmd_retry()
 	- move from local eh_work_q to local eh_done_q
     LOCKING: none
+    CONCURRENCY: at most one thread per separate eh_work_q to
+		 keep queue manipulation lockless
 
  4. EH completes
     ACTION: scsi_eh_flush_done_q() retries scmds or notifies upper
-	    layer of failure.
+	    layer of failure. May be called concurrently but must have
+	    a no more than one thread per separate eh_work_q to
+	    manipulate the queue locklessly
 	- scmd is removed from eh_done_q and scmd->eh_entry is cleared
 	- if retry is necessary, scmd is requeued using
           scsi_queue_insert()
 	- otherwise, scsi_finish_command() is invoked for scmd
+	- zero shost->host_failed
     LOCKING: queue or finish function performs appropriate locking
 
 
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 961acc7..91a9e6a 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -606,7 +606,7 @@ void ata_scsi_error(struct Scsi_Host *host)
 	ata_scsi_port_error_handler(host, ap);
 
 	/* finish or retry handled scmd's and clean up */
-	WARN_ON(host->host_failed || !list_empty(&eh_work_q));
+	WARN_ON(!list_empty(&eh_work_q));
 
 	DPRINTK("EXIT\n");
 }
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 984ddcb..1b9c049 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1127,7 +1127,6 @@ static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn)
  */
 void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q)
 {
-	scmd->device->host->host_failed--;
 	scmd->eh_eflags = 0;
 	list_move_tail(&scmd->eh_entry, done_q);
 }
@@ -2226,6 +2225,9 @@ int scsi_error_handler(void *data)
 		else
 			scsi_unjam_host(shost);
 
+		/* All scmds have been handled */
+		shost->host_failed = 0;
+
 		/*
 		 * Note - if the above fails completely, the action is to take
 		 * individual devices offline and flush the queue of any
-- 
cgit v0.10.2


From 36194812a4063dd2a72070aec3ee7890d135a587 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 8 Jun 2016 19:55:50 +0530
Subject: powerpc/mm/radix: Update to tlb functions ric argument

Radix invalidate control (RIC) is used to control which cache to flush
using tlb instructions. When doing a PID flush, we currently flush
everything including page walk cache. For address range flush, we flush
only the TLB. In the next patch, we add support for flushing only the
page walk cache.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 54efba2..7108362 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -18,16 +18,20 @@
 
 static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
 
-static inline void __tlbiel_pid(unsigned long pid, int set)
+#define RIC_FLUSH_TLB 0
+#define RIC_FLUSH_PWC 1
+#define RIC_FLUSH_ALL 2
+
+static inline void __tlbiel_pid(unsigned long pid, int set,
+				unsigned long ric)
 {
-	unsigned long rb,rs,ric,prs,r;
+	unsigned long rb,rs,prs,r;
 
 	rb = PPC_BIT(53); /* IS = 1 */
 	rb |= set << PPC_BITLSHIFT(51);
 	rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
 	prs = 1; /* process scoped */
 	r = 1;   /* raidx format */
-	ric = 2;  /* invalidate all the caches */
 
 	asm volatile("ptesync": : :"memory");
 	asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |"
@@ -39,25 +43,24 @@ static inline void __tlbiel_pid(unsigned long pid, int set)
 /*
  * We use 128 set in radix mode and 256 set in hpt mode.
  */
-static inline void _tlbiel_pid(unsigned long pid)
+static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
 {
 	int set;
 
 	for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) {
-		__tlbiel_pid(pid, set);
+		__tlbiel_pid(pid, set, ric);
 	}
 	return;
 }
 
-static inline void _tlbie_pid(unsigned long pid)
+static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
 {
-	unsigned long rb,rs,ric,prs,r;
+	unsigned long rb,rs,prs,r;
 
 	rb = PPC_BIT(53); /* IS = 1 */
 	rs = pid << PPC_BITLSHIFT(31);
 	prs = 1; /* process scoped */
 	r = 1;   /* raidx format */
-	ric = 2;  /* invalidate all the caches */
 
 	asm volatile("ptesync": : :"memory");
 	asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |"
@@ -67,16 +70,15 @@ static inline void _tlbie_pid(unsigned long pid)
 }
 
 static inline void _tlbiel_va(unsigned long va, unsigned long pid,
-			      unsigned long ap)
+			      unsigned long ap, unsigned long ric)
 {
-	unsigned long rb,rs,ric,prs,r;
+	unsigned long rb,rs,prs,r;
 
 	rb = va & ~(PPC_BITMASK(52, 63));
 	rb |= ap << PPC_BITLSHIFT(58);
 	rs = pid << PPC_BITLSHIFT(31);
 	prs = 1; /* process scoped */
 	r = 1;   /* raidx format */
-	ric = 0;  /* no cluster flush yet */
 
 	asm volatile("ptesync": : :"memory");
 	asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |"
@@ -86,16 +88,15 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
 }
 
 static inline void _tlbie_va(unsigned long va, unsigned long pid,
-			     unsigned long ap)
+			     unsigned long ap, unsigned long ric)
 {
-	unsigned long rb,rs,ric,prs,r;
+	unsigned long rb,rs,prs,r;
 
 	rb = va & ~(PPC_BITMASK(52, 63));
 	rb |= ap << PPC_BITLSHIFT(58);
 	rs = pid << PPC_BITLSHIFT(31);
 	prs = 1; /* process scoped */
 	r = 1;   /* raidx format */
-	ric = 0;  /* no cluster flush yet */
 
 	asm volatile("ptesync": : :"memory");
 	asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |"
@@ -122,7 +123,7 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm)
 	preempt_disable();
 	pid = mm->context.id;
 	if (pid != MMU_NO_CONTEXT)
-		_tlbiel_pid(pid);
+		_tlbiel_pid(pid, RIC_FLUSH_ALL);
 	preempt_enable();
 }
 EXPORT_SYMBOL(radix__local_flush_tlb_mm);
@@ -135,7 +136,7 @@ void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 	preempt_disable();
 	pid = mm ? mm->context.id : 0;
 	if (pid != MMU_NO_CONTEXT)
-		_tlbiel_va(vmaddr, pid, ap);
+		_tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
 	preempt_enable();
 }
 
@@ -172,11 +173,11 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
 
 		if (lock_tlbie)
 			raw_spin_lock(&native_tlbie_lock);
-		_tlbie_pid(pid);
+		_tlbie_pid(pid, RIC_FLUSH_ALL);
 		if (lock_tlbie)
 			raw_spin_unlock(&native_tlbie_lock);
 	} else
-		_tlbiel_pid(pid);
+		_tlbiel_pid(pid, RIC_FLUSH_ALL);
 no_context:
 	preempt_enable();
 }
@@ -196,11 +197,11 @@ void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 
 		if (lock_tlbie)
 			raw_spin_lock(&native_tlbie_lock);
-		_tlbie_va(vmaddr, pid, ap);
+		_tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
 		if (lock_tlbie)
 			raw_spin_unlock(&native_tlbie_lock);
 	} else
-		_tlbiel_va(vmaddr, pid, ap);
+		_tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
 bail:
 	preempt_enable();
 }
@@ -224,7 +225,7 @@ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
 
 	if (lock_tlbie)
 		raw_spin_lock(&native_tlbie_lock);
-	_tlbie_pid(0);
+	_tlbie_pid(0, RIC_FLUSH_ALL);
 	if (lock_tlbie)
 		raw_spin_unlock(&native_tlbie_lock);
 }
-- 
cgit v0.10.2


From a145abf12c9f7d30d8c330c9d8a97428cbf0589b Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 8 Jun 2016 19:55:51 +0530
Subject: powerpc/mm/radix: Flush page walk cache when freeing page table

Even though a tlb_flush() does a flush with invalidate all cache,
we can end up doing an RCU page table free before calling tlb_flush().
That means we can have page walk cache entries even after we free the
page table pages. This can result in us doing wrong page table walk.

Avoid this by doing pwc flush on every page table free. We can't batch
the pwc flush, because the rcu call back function where we free the
page table pages doesn't have information of the mmu gather. Thus we
have to do a pwc on every page table page freed.

Note: I also removed the dummy tlb_flush_pgtable call functions for
hash 32.

Fixes: 1a472c9dba6b ("powerpc/mm/radix: Add tlbflush routines")
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index a235019..8e21bb4 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -102,7 +102,6 @@ static inline void pgtable_free_tlb(struct mmu_gather *tlb,
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 				  unsigned long address)
 {
-	tlb_flush_pgtable(tlb, address);
 	pgtable_page_dtor(table);
 	pgtable_free_tlb(tlb, page_address(table), 0);
 }
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 488279e..26eb2cb 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -110,6 +110,11 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
                                   unsigned long address)
 {
+	/*
+	 * By now all the pud entries should be none entries. So go
+	 * ahead and flush the page walk cache
+	 */
+	flush_tlb_pgtable(tlb, address);
         pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE);
 }
 
@@ -127,6 +132,11 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
                                   unsigned long address)
 {
+	/*
+	 * By now all the pud entries should be none entries. So go
+	 * ahead and flush the page walk cache
+	 */
+	flush_tlb_pgtable(tlb, address);
         return pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX);
 }
 
@@ -198,7 +208,11 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 				  unsigned long address)
 {
-	tlb_flush_pgtable(tlb, address);
+	/*
+	 * By now all the pud entries should be none entries. So go
+	 * ahead and flush the page walk cache
+	 */
+	flush_tlb_pgtable(tlb, address);
 	pgtable_free_tlb(tlb, table, 0);
 }
 
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 13ef388..3fa94fca 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -18,16 +18,19 @@ extern void radix__local_flush_tlb_mm(struct mm_struct *mm);
 extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
 extern void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 				    unsigned long ap, int nid);
+extern void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
 extern void radix__tlb_flush(struct mmu_gather *tlb);
 #ifdef CONFIG_SMP
 extern void radix__flush_tlb_mm(struct mm_struct *mm);
 extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
 extern void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 			      unsigned long ap, int nid);
+extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
 #else
 #define radix__flush_tlb_mm(mm)		radix__local_flush_tlb_mm(mm)
 #define radix__flush_tlb_page(vma,addr)	radix__local_flush_tlb_page(vma,addr)
 #define radix___flush_tlb_page(mm,addr,p,i)	radix___local_flush_tlb_page(mm,addr,p,i)
+#define radix__flush_tlb_pwc(tlb, addr)	radix__local_flush_tlb_pwc(tlb, addr)
 #endif
 
 #endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index d98424a..96e5769 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -72,5 +72,19 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
 #define flush_tlb_mm(mm)		local_flush_tlb_mm(mm)
 #define flush_tlb_page(vma, addr)	local_flush_tlb_page(vma, addr)
 #endif /* CONFIG_SMP */
+/*
+ * flush the page walk cache for the address
+ */
+static inline void flush_tlb_pgtable(struct mmu_gather *tlb, unsigned long address)
+{
+	/*
+	 * Flush the page table walk cache on freeing a page table. We already
+	 * have marked the upper/higher level page table entry none by now.
+	 * So it is safe to flush PWC here.
+	 */
+	if (!radix_enabled())
+		return;
 
+	radix__flush_tlb_pwc(tlb, address);
+}
 #endif /*  _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/book3s/pgalloc.h b/arch/powerpc/include/asm/book3s/pgalloc.h
index 54f591e..c0a69ae 100644
--- a/arch/powerpc/include/asm/book3s/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/pgalloc.h
@@ -4,11 +4,6 @@
 #include <linux/mm.h>
 
 extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
-static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
-				     unsigned long address)
-{
-
-}
 
 #ifdef CONFIG_PPC64
 #include <asm/book3s/64/pgalloc.h>
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 7108362..ab2f60e 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -128,6 +128,21 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm)
 }
 EXPORT_SYMBOL(radix__local_flush_tlb_mm);
 
+void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
+{
+	unsigned long pid;
+	struct mm_struct *mm = tlb->mm;
+
+	preempt_disable();
+
+	pid = mm->context.id;
+	if (pid != MMU_NO_CONTEXT)
+		_tlbiel_pid(pid, RIC_FLUSH_PWC);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL(radix__local_flush_tlb_pwc);
+
 void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 			    unsigned long ap, int nid)
 {
@@ -183,6 +198,32 @@ no_context:
 }
 EXPORT_SYMBOL(radix__flush_tlb_mm);
 
+void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
+{
+	unsigned long pid;
+	struct mm_struct *mm = tlb->mm;
+
+	preempt_disable();
+
+	pid = mm->context.id;
+	if (unlikely(pid == MMU_NO_CONTEXT))
+		goto no_context;
+
+	if (!mm_is_core_local(mm)) {
+		int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+		if (lock_tlbie)
+			raw_spin_lock(&native_tlbie_lock);
+		_tlbie_pid(pid, RIC_FLUSH_PWC);
+		if (lock_tlbie)
+			raw_spin_unlock(&native_tlbie_lock);
+	} else
+		_tlbiel_pid(pid, RIC_FLUSH_PWC);
+no_context:
+	preempt_enable();
+}
+EXPORT_SYMBOL(radix__flush_tlb_pwc);
+
 void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 		       unsigned long ap, int nid)
 {
-- 
cgit v0.10.2


From 06a84db74c3572cde79eb1b04f301399eafb8226 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Mon, 2 May 2016 15:27:34 +0300
Subject: iwlwifi: mvm: increase scan timeout to 20 seconds

The 16 seconds timeout we were using turned out to be too short.
Recalculations by system show that the total time in both bands should
be < 18.5 seconds, even in the slowest cases (e.g. DCM P2P with
DTIM=2).  Rounding it up to 20 seconds for a bit more safety.

Fixes: 728e825f81b1 ("iwlwifi: mvm: add a scan timeout for regular scans")
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index 6f609dd..e78fc56 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -1222,7 +1222,7 @@ static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type)
 	return -EIO;
 }
 
-#define SCAN_TIMEOUT (16 * HZ)
+#define SCAN_TIMEOUT (20 * HZ)
 
 void iwl_mvm_scan_timeout(unsigned long data)
 {
-- 
cgit v0.10.2


From 7d6a1ab6a2db180122dee8db6c201f2dcf677813 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sun, 15 May 2016 10:20:29 +0300
Subject: iwlwifi: mvm: fix RCU splat in TKIP's update_key

The commit below mistakenly changed an rcu_dereference_check
to a rcu_dereference_protected which introduced the
following RCU warning:

[ INFO: suspicious RCU usage. ]
 4.6.0-rc7-next-20160513-dbg-00004-g8de8b92-dirty #655 Not tainted
 -------------------------------
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h:1069 suspicious rcu_dereference_protected() usage!
 Call Trace:
  [<ffffffff8106b836>] lockdep_rcu_suspicious+0xf7/0x100
  [<ffffffffa03b2321>] iwl_mvm_get_key_sta.part.0+0x5d/0x80 [iwlmvm]
  [<ffffffffa03b4acb>] iwl_mvm_update_tkip_key+0xd3/0x162 [iwlmvm]
  [<ffffffffa03a2b60>] iwl_mvm_mac_update_tkip_key+0x17/0x19 [iwlmvm]
  [<ffffffffa0329646>] ieee80211_tkip_decrypt_data+0x22c/0x24b [mac80211]
  [<ffffffffa0318bb1>] ieee80211_crypto_tkip_decrypt+0xc5/0x110 [mac80211]
  [<ffffffffa033102e>] ieee80211_rx_handlers+0x9bb/0x1fe1 [mac80211]

Fixes: 13303c0fb148 ("iwlwifi: mvm: use helpers to get iwl_mvm_sta")
Reported-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index fea4d3437..0454bfe 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -1852,12 +1852,18 @@ static struct iwl_mvm_sta *iwl_mvm_get_key_sta(struct iwl_mvm *mvm,
 	    mvmvif->ap_sta_id != IWL_MVM_STATION_COUNT) {
 		u8 sta_id = mvmvif->ap_sta_id;
 
+		sta = rcu_dereference_check(mvm->fw_id_to_mac_id[sta_id],
+					    lockdep_is_held(&mvm->mutex));
+
 		/*
 		 * It is possible that the 'sta' parameter is NULL,
 		 * for example when a GTK is removed - the sta_id will then
 		 * be the AP ID, and no station was passed by mac80211.
 		 */
-		return iwl_mvm_sta_from_staid_protected(mvm, sta_id);
+		if (IS_ERR_OR_NULL(sta))
+			return NULL;
+
+		return iwl_mvm_sta_from_mac80211(sta);
 	}
 
 	return NULL;
-- 
cgit v0.10.2


From 1f9788f335d7c3145bcb59bd570c5b9ef7203ef4 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Mon, 16 May 2016 14:34:20 +0300
Subject: iwlwifi: mvm: fix potential NULL-dereference in iwl_mvm_reorder()

We try to access sta before we check for IS_ERR_OR_NULL(), so we may
end up accessing a NULL pointer.  To prevent that, move the conversion
from sta to mvm_sta below the check.

Fixes: b915c10174fb ("iwlwifi: mvm: add reorder buffer per queue")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index ac2c571..2c61516 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -581,7 +581,7 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm,
 			    struct iwl_rx_mpdu_desc *desc)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
-	struct iwl_mvm_sta *mvm_sta = iwl_mvm_sta_from_mac80211(sta);
+	struct iwl_mvm_sta *mvm_sta;
 	struct iwl_mvm_baid_data *baid_data;
 	struct iwl_mvm_reorder_buffer *buffer;
 	struct sk_buff *tail;
@@ -604,6 +604,8 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm,
 	if (WARN_ON(IS_ERR_OR_NULL(sta)))
 		return false;
 
+	mvm_sta = iwl_mvm_sta_from_mac80211(sta);
+
 	/* not a data packet */
 	if (!ieee80211_is_data_qos(hdr->frame_control) ||
 	    is_multicast_ether_addr(hdr->addr1))
-- 
cgit v0.10.2


From aa950524d501afa28869b7f56e539fd9e744dd9f Mon Sep 17 00:00:00 2001
From: Ayala Beker <ayala.beker@intel.com>
Date: Wed, 1 Jun 2016 00:28:09 +0300
Subject: iwlwifi: mvm: set the encryption type of an IGTK key

The FW expect the driver to set the encryption algorithm type when
installing the IGTK key in the HW.
Currently when installing CMAC IGTK key we don't set the algorithm type
and as a result the FW fails to calculate the MIC of multicast management
frames.
Fix it.

Signed-off-by: Ayala Beker <ayala.beker@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 0454bfe..b23ab4a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -1961,6 +1961,14 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm,
 		struct ieee80211_key_seq seq;
 		const u8 *pn;
 
+		switch (keyconf->cipher) {
+		case WLAN_CIPHER_SUITE_AES_CMAC:
+			igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_FLG_CCM);
+			break;
+		default:
+			return -EINVAL;
+		}
+
 		memcpy(igtk_cmd.IGTK, keyconf->key, keyconf->keylen);
 		ieee80211_get_key_rx_seq(keyconf, 0, &seq);
 		pn = seq.aes_cmac.pn;
-- 
cgit v0.10.2


From 280a3efa82fccc9532c968a77e5162cb9f0af497 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 7 Jun 2016 14:46:37 +0200
Subject: iwlwifi: mvm: fix a few firmware capability checks

My cleanup in "iwlwifi: prepare for higher API/CAPA bits" accidentally
inverted a few tests - fix them.

Fixes: 859d914c8f5c ("iwlwifi: prepare for higher API/CAPA bits")
Reported-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index e5f267b..18a8474 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -3851,8 +3851,8 @@ static int iwl_mvm_mac_get_survey(struct ieee80211_hw *hw, int idx,
 	if (idx != 0)
 		return -ENOENT;
 
-	if (fw_has_capa(&mvm->fw->ucode_capa,
-			IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS))
+	if (!fw_has_capa(&mvm->fw->ucode_capa,
+			 IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS))
 		return -ENOENT;
 
 	mutex_lock(&mvm->mutex);
@@ -3898,8 +3898,8 @@ static void iwl_mvm_mac_sta_statistics(struct ieee80211_hw *hw,
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
 
-	if (fw_has_capa(&mvm->fw->ucode_capa,
-			IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS))
+	if (!fw_has_capa(&mvm->fw->ucode_capa,
+			 IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS))
 		return;
 
 	/* if beacon filtering isn't on mac80211 does it anyway */
-- 
cgit v0.10.2


From ba65dc5ef16f82fba77869cecf7a7d515f61446b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 10 Jun 2016 11:32:47 -0400
Subject: much milder d_walk() race

d_walk() relies upon the tree not getting rearranged under it without
rename_lock being touched.  And we do grab rename_lock around the
places that change the tree topology.  Unfortunately, branch reordering
is just as bad from d_walk() POV and we have two places that do it
without touching rename_lock - one in handling of cursors (for ramfs-style
directories) and another in autofs.  autofs one is a separate story; this
commit deals with the cursors.
	* mark cursor dentries explicitly at allocation time
	* make __dentry_kill() leave ->d_child.next pointing to the next
non-cursor sibling, making sure that it won't be moved around unnoticed
before the parent is relocked on ascend-to-parent path in d_walk().
	* make d_walk() skip cursors explicitly; strictly speaking it's
not necessary (all callbacks we pass to d_walk() are no-ops on cursors),
but it makes analysis easier.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/dcache.c b/fs/dcache.c
index 817c243..b7eddfd 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -507,6 +507,44 @@ void d_drop(struct dentry *dentry)
 }
 EXPORT_SYMBOL(d_drop);
 
+static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent)
+{
+	struct dentry *next;
+	/*
+	 * Inform d_walk() and shrink_dentry_list() that we are no longer
+	 * attached to the dentry tree
+	 */
+	dentry->d_flags |= DCACHE_DENTRY_KILLED;
+	if (unlikely(list_empty(&dentry->d_child)))
+		return;
+	__list_del_entry(&dentry->d_child);
+	/*
+	 * Cursors can move around the list of children.  While we'd been
+	 * a normal list member, it didn't matter - ->d_child.next would've
+	 * been updated.  However, from now on it won't be and for the
+	 * things like d_walk() it might end up with a nasty surprise.
+	 * Normally d_walk() doesn't care about cursors moving around -
+	 * ->d_lock on parent prevents that and since a cursor has no children
+	 * of its own, we get through it without ever unlocking the parent.
+	 * There is one exception, though - if we ascend from a child that
+	 * gets killed as soon as we unlock it, the next sibling is found
+	 * using the value left in its ->d_child.next.  And if _that_
+	 * pointed to a cursor, and cursor got moved (e.g. by lseek())
+	 * before d_walk() regains parent->d_lock, we'll end up skipping
+	 * everything the cursor had been moved past.
+	 *
+	 * Solution: make sure that the pointer left behind in ->d_child.next
+	 * points to something that won't be moving around.  I.e. skip the
+	 * cursors.
+	 */
+	while (dentry->d_child.next != &parent->d_subdirs) {
+		next = list_entry(dentry->d_child.next, struct dentry, d_child);
+		if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR)))
+			break;
+		dentry->d_child.next = next->d_child.next;
+	}
+}
+
 static void __dentry_kill(struct dentry *dentry)
 {
 	struct dentry *parent = NULL;
@@ -532,12 +570,7 @@ static void __dentry_kill(struct dentry *dentry)
 	}
 	/* if it was on the hash then remove it */
 	__d_drop(dentry);
-	__list_del_entry(&dentry->d_child);
-	/*
-	 * Inform d_walk() that we are no longer attached to the
-	 * dentry tree
-	 */
-	dentry->d_flags |= DCACHE_DENTRY_KILLED;
+	dentry_unlist(dentry, parent);
 	if (parent)
 		spin_unlock(&parent->d_lock);
 	dentry_iput(dentry);
@@ -1203,6 +1236,9 @@ resume:
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
 		next = tmp->next;
 
+		if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR))
+			continue;
+
 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 
 		ret = enter(data, dentry);
@@ -1651,6 +1687,16 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 }
 EXPORT_SYMBOL(d_alloc);
 
+struct dentry *d_alloc_cursor(struct dentry * parent)
+{
+	struct dentry *dentry = __d_alloc(parent->d_sb, NULL);
+	if (dentry) {
+		dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR;
+		dentry->d_parent = dget(parent);
+	}
+	return dentry;
+}
+
 /**
  * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
  * @sb: the superblock
diff --git a/fs/internal.h b/fs/internal.h
index b71deee..f57ced5 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -130,6 +130,7 @@ extern int invalidate_inodes(struct super_block *, bool);
 extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
 extern int d_set_mounted(struct dentry *dentry);
 extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc);
+extern struct dentry *d_alloc_cursor(struct dentry *);
 
 /*
  * read_write.c
diff --git a/fs/libfs.c b/fs/libfs.c
index 3db2721..cedeacb 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -71,9 +71,7 @@ EXPORT_SYMBOL(simple_lookup);
 
 int dcache_dir_open(struct inode *inode, struct file *file)
 {
-	static struct qstr cursor_name = QSTR_INIT(".", 1);
-
-	file->private_data = d_alloc(file->f_path.dentry, &cursor_name);
+	file->private_data = d_alloc_cursor(file->f_path.dentry);
 
 	return file->private_data ? 0 : -ENOMEM;
 }
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 484c879..bcd0c64 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -212,6 +212,7 @@ struct dentry_operations {
 #define DCACHE_OP_REAL			0x08000000
 
 #define DCACHE_PAR_LOOKUP		0x10000000 /* being looked up (with parent locked shared) */
+#define DCACHE_DENTRY_CURSOR		0x20000000
 
 extern seqlock_t rename_lock;
 
-- 
cgit v0.10.2


From fc0f7e3317c5f406e6d5520209b4689a4ffecfdf Mon Sep 17 00:00:00 2001
From: Manfred Schlaegl <manfred.schlaegl@ginzinger.com>
Date: Mon, 6 Jun 2016 10:47:47 +0200
Subject: net: phy: smsc: reintroduced unconditional soft reset

We detected some problems using the smsc lan8720a in combination with
i.MX28 and tracked this down to commit 21009686662f ("net: phy: smsc: move
smsc_phy_config_init reset part in a soft_reset function")
With 2100968666 the generic soft reset is replaced by a specific function
which handles power down state correctly. But additionally the soft reset
itself got conditional and is therefore also only performed if the phy is
in power down state.

This patch keeps the conditional wake up from power down, but
re-introduces the unconditional soft reset using the generic soft reset
function.
It was tested on linux-4.1.25 and linux-4.7.0-rc2.

Signed-off-by: Manfred Schlaegl <manfred.schlaegl@ginzinger.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index 2e21e93..b62c4aa 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -75,22 +75,13 @@ static int smsc_phy_reset(struct phy_device *phydev)
 	 * in all capable mode before using it.
 	 */
 	if ((rc & MII_LAN83C185_MODE_MASK) == MII_LAN83C185_MODE_POWERDOWN) {
-		int timeout = 50000;
-
-		/* set "all capable" mode and reset the phy */
+		/* set "all capable" mode */
 		rc |= MII_LAN83C185_MODE_ALL;
 		phy_write(phydev, MII_LAN83C185_SPECIAL_MODES, rc);
-		phy_write(phydev, MII_BMCR, BMCR_RESET);
-
-		/* wait end of reset (max 500 ms) */
-		do {
-			udelay(10);
-			if (timeout-- == 0)
-				return -1;
-			rc = phy_read(phydev, MII_BMCR);
-		} while (rc & BMCR_RESET);
 	}
-	return 0;
+
+	/* reset the phy */
+	return genphy_soft_reset(phydev);
 }
 
 static int lan911x_config_init(struct phy_device *phydev)
-- 
cgit v0.10.2


From 56fae404fb2c306db0a35dad0d16fa24c65678f3 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 7 Jun 2016 12:06:58 +0300
Subject: bridge: Fix incorrect re-injection of STP packets

Commit 8626c56c8279 ("bridge: fix potential use-after-free when hook
returns QUEUE or STOLEN verdict") fixed incorrect usage of NF_HOOK's
return value by consuming packets in okfn via br_pass_frame_up().

However, this function re-injects packets to the Rx path with skb->dev
set to the bridge device, which breaks kernel's STP, as all STP packets
appear to originate from the bridge device itself.

Instead, if STP is enabled and bridge isn't a 802.1ad bridge, then learn
packet's SMAC and inject it back to the Rx path for further processing
by the packet handlers.

The patch also makes netfilter's behavior consistent with regards to
packets destined to the Bridge Group Address, as no hook registered at
LOCAL_IN will ever be called, regardless if STP is enabled or not.

Cc: Florian Westphal <fw@strlen.de>
Cc: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Cc: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Fixes: 8626c56c8279 ("bridge: fix potential use-after-free when hook returns QUEUE or STOLEN verdict")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 1607977..43d2cd8 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -213,8 +213,7 @@ drop:
 }
 EXPORT_SYMBOL_GPL(br_handle_frame_finish);
 
-/* note: already called with rcu_read_lock */
-static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+static void __br_handle_local_finish(struct sk_buff *skb)
 {
 	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
 	u16 vid = 0;
@@ -222,6 +221,14 @@ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_bu
 	/* check if vlan is allowed, to avoid spoofing */
 	if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid))
 		br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
+}
+
+/* note: already called with rcu_read_lock */
+static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
+
+	__br_handle_local_finish(skb);
 
 	BR_INPUT_SKB_CB(skb)->brdev = p->br->dev;
 	br_pass_frame_up(skb);
@@ -274,7 +281,9 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
 			if (p->br->stp_enabled == BR_NO_STP ||
 			    fwd_mask & (1u << dest[5]))
 				goto forward;
-			break;
+			*pskb = skb;
+			__br_handle_local_finish(skb);
+			return RX_HANDLER_PASS;
 
 		case 0x01:	/* IEEE MAC (Pause) */
 			goto drop;
-- 
cgit v0.10.2


From c3ec5e5ce9cea1f369a5a8ad69d6471680796bc6 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben.dooks@codethink.co.uk>
Date: Thu, 9 Jun 2016 18:05:09 +0100
Subject: net: diag: add missing declarations

The functions inet_diag_msg_common_fill and inet_diag_msg_attrs_fill
seem to have been missed from the include/linux/inet_diag.h header
file. Add them to fix the following warnings:

net/ipv4/inet_diag.c:69:6: warning: symbol 'inet_diag_msg_common_fill' was not declared. Should it be static?
net/ipv4/inet_diag.c:108:5: warning: symbol 'inet_diag_msg_attrs_fill' was not declared. Should it be static?

Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 7c27fa1..feb04ea 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -52,6 +52,12 @@ struct sock *inet_diag_find_one_icsk(struct net *net,
 
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 
+void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk);
+
+int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
+			     struct inet_diag_msg *r, int ext,
+			     struct user_namespace *user_ns);
+
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
 extern void inet_diag_unregister(const struct inet_diag_handler *handler);
 #endif /* _INET_DIAG_H_ */
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index 1ce724b..f69edcf 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -3,12 +3,6 @@
 #include <linux/sock_diag.h>
 #include <net/sctp/sctp.h>
 
-extern void inet_diag_msg_common_fill(struct inet_diag_msg *r,
-				      struct sock *sk);
-extern int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
-				    struct inet_diag_msg *r, int ext,
-				    struct user_namespace *user_ns);
-
 static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 			       void *info);
 
-- 
cgit v0.10.2


From 0b392be9a86560dae3af2e7528f226ff465ab549 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben.dooks@codethink.co.uk>
Date: Fri, 10 Jun 2016 12:11:06 +0100
Subject: net: ipconfig: avoid warning by making ic_addrservaddr static

The symbol ic_addrservaddr is not static, but has no declaration
to match so make it static to fix the following warning:

net/ipv4/ipconfig.c:130:8: warning: symbol 'ic_addrservaddr' was not declared. Should it be static?

Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 2ed9dd2..eccf9fd 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -127,7 +127,7 @@ __be32 ic_myaddr = NONE;		/* My IP address */
 static __be32 ic_netmask = NONE;	/* Netmask for local subnet */
 __be32 ic_gateway = NONE;	/* Gateway IP address */
 
-__be32 ic_addrservaddr = NONE;	/* IP Address of the IP addresses'server */
+static __be32 ic_addrservaddr = NONE;	/* IP Address of the IP addresses'server */
 
 __be32 ic_servaddr = NONE;	/* Boot server IP address */
 
-- 
cgit v0.10.2


From 562c5a70400c75f50d99de631666ac7cc2f03958 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:27:58 +0200
Subject: net: mediatek: add missing return code check

The code fails to check if the scratch memory was properly allocated. Add
this check and return with an error if the allocation failed.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 4763252..11cd730 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -495,6 +495,9 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
 
 	eth->scratch_head = kcalloc(cnt, MTK_QDMA_PAGE_SIZE,
 				    GFP_KERNEL);
+	if (unlikely(!eth->scratch_head))
+		return -ENOMEM;
+
 	dma_addr = dma_map_single(eth->dev,
 				  eth->scratch_head, cnt * MTK_QDMA_PAGE_SIZE,
 				  DMA_FROM_DEVICE);
-- 
cgit v0.10.2


From 605e4fe476956c67ced8518247e6c9601a31b868 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:27:59 +0200
Subject: net: mediatek: fix missing free of scratch memory

Scratch memory gets allocated in mtk_init_fq_dma() but the corresponding
code to free it is missing inside mtk_dma_free() causing a memory leak.
With this patch applied, we can run ifconfig up/down several thousand
times without any problems.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 11cd730..fb8b17d 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -481,14 +481,14 @@ static inline void mtk_rx_get_desc(struct mtk_rx_dma *rxd,
 /* the qdma core needs scratch memory to be setup */
 static int mtk_init_fq_dma(struct mtk_eth *eth)
 {
-	dma_addr_t phy_ring_head, phy_ring_tail;
+	dma_addr_t phy_ring_tail;
 	int cnt = MTK_DMA_SIZE;
 	dma_addr_t dma_addr;
 	int i;
 
 	eth->scratch_ring = dma_alloc_coherent(eth->dev,
 					       cnt * sizeof(struct mtk_tx_dma),
-					       &phy_ring_head,
+					       &eth->phy_scratch_ring,
 					       GFP_ATOMIC | __GFP_ZERO);
 	if (unlikely(!eth->scratch_ring))
 		return -ENOMEM;
@@ -505,19 +505,19 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
 		return -ENOMEM;
 
 	memset(eth->scratch_ring, 0x0, sizeof(struct mtk_tx_dma) * cnt);
-	phy_ring_tail = phy_ring_head +
+	phy_ring_tail = eth->phy_scratch_ring +
 			(sizeof(struct mtk_tx_dma) * (cnt - 1));
 
 	for (i = 0; i < cnt; i++) {
 		eth->scratch_ring[i].txd1 =
 					(dma_addr + (i * MTK_QDMA_PAGE_SIZE));
 		if (i < cnt - 1)
-			eth->scratch_ring[i].txd2 = (phy_ring_head +
+			eth->scratch_ring[i].txd2 = (eth->phy_scratch_ring +
 				((i + 1) * sizeof(struct mtk_tx_dma)));
 		eth->scratch_ring[i].txd3 = TX_DMA_SDL(MTK_QDMA_PAGE_SIZE);
 	}
 
-	mtk_w32(eth, phy_ring_head, MTK_QDMA_FQ_HEAD);
+	mtk_w32(eth, eth->phy_scratch_ring, MTK_QDMA_FQ_HEAD);
 	mtk_w32(eth, phy_ring_tail, MTK_QDMA_FQ_TAIL);
 	mtk_w32(eth, (cnt << 16) | cnt, MTK_QDMA_FQ_CNT);
 	mtk_w32(eth, MTK_QDMA_PAGE_SIZE << 16, MTK_QDMA_FQ_BLEN);
@@ -1210,6 +1210,14 @@ static void mtk_dma_free(struct mtk_eth *eth)
 	for (i = 0; i < MTK_MAC_COUNT; i++)
 		if (eth->netdev[i])
 			netdev_reset_queue(eth->netdev[i]);
+	if (eth->scratch_ring) {
+		dma_free_coherent(eth->dev,
+				  MTK_DMA_SIZE * sizeof(struct mtk_tx_dma),
+				  eth->scratch_ring,
+				  eth->phy_scratch_ring);
+		eth->scratch_ring = NULL;
+		eth->phy_scratch_ring = 0;
+	}
 	mtk_tx_clean(eth);
 	mtk_rx_clean(eth);
 	kfree(eth->scratch_head);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index eed626d..57f7e8a 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -357,6 +357,7 @@ struct mtk_rx_ring {
  * @rx_ring:		Pointer to the memore holding info about the RX ring
  * @rx_napi:		The NAPI struct
  * @scratch_ring:	Newer SoCs need memory for a second HW managed TX ring
+ * @phy_scratch_ring:	physical address of scratch_ring
  * @scratch_head:	The scratch memory that scratch_ring points to.
  * @clk_ethif:		The ethif clock
  * @clk_esw:		The switch clock
@@ -384,6 +385,7 @@ struct mtk_eth {
 	struct mtk_rx_ring		rx_ring;
 	struct napi_struct		rx_napi;
 	struct mtk_tx_dma		*scratch_ring;
+	dma_addr_t			phy_scratch_ring;
 	void				*scratch_head;
 	struct clk			*clk_ethif;
 	struct clk			*clk_esw;
-- 
cgit v0.10.2


From 2fae723cefb8bfe712371978288aa0a70b54802e Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:28:00 +0200
Subject: net: mediatek: invalid buffer lookup in mtk_tx_map()

The lookup of the tx_buffer in the error path inside mtk_tx_map() uses the
wrong descriptor pointer. This looks like a copy & paste error. Change the
code to use the correct pointer.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index fb8b17d..8c3e543 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -674,7 +674,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
 
 err_dma:
 	do {
-		tx_buf = mtk_desc_to_tx_buf(ring, txd);
+		tx_buf = mtk_desc_to_tx_buf(ring, itxd);
 
 		/* unmap dma */
 		mtk_tx_unmap(&dev->dev, tx_buf);
-- 
cgit v0.10.2


From 94321a9fc9f5b6c6e949cc7c69741538f556ab74 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:28:01 +0200
Subject: net: mediatek: dropped rx packets are not being counted properly

There are two places inside mtk_poll_rx where rx_dropped is not being
incremented properly. Fix this by adding the missing code to increment
the counter.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 8c3e543..b6d3c21 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -829,6 +829,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 					  DMA_FROM_DEVICE);
 		if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) {
 			skb_free_frag(new_data);
+			netdev->stats.rx_dropped++;
 			goto release_desc;
 		}
 
@@ -836,6 +837,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
 		skb = build_skb(data, ring->frag_size);
 		if (unlikely(!skb)) {
 			put_page(virt_to_head_page(new_data));
+			netdev->stats.rx_dropped++;
 			goto release_desc;
 		}
 		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
-- 
cgit v0.10.2


From 6675086d04e7c0748cd5884f7c8611b5f0836250 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:28:02 +0200
Subject: net: mediatek: add next data pointer coherency protection

The QDMA engine can fail to update the register pointing to the next TX
descriptor if this bit does not get set in the QDMA configuration register.
Not setting this bit can result in invalid values inside the TX rings
registers which will causes TX stalls.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index b6d3c21..f30c2e47 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1282,7 +1282,7 @@ static int mtk_start_dma(struct mtk_eth *eth)
 	mtk_w32(eth,
 		MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN |
 		MTK_RX_2B_OFFSET | MTK_DMA_SIZE_16DWORDS |
-		MTK_RX_BT_32DWORDS,
+		MTK_RX_BT_32DWORDS | MTK_NDP_CO_PRO,
 		MTK_QDMA_GLO_CFG);
 
 	return 0;
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 57f7e8a..a5eb7c6 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -91,6 +91,7 @@
 #define MTK_QDMA_GLO_CFG	0x1A04
 #define MTK_RX_2B_OFFSET	BIT(31)
 #define MTK_RX_BT_32DWORDS	(3 << 11)
+#define MTK_NDP_CO_PRO		BIT(10)
 #define MTK_TX_WB_DDONE		BIT(6)
 #define MTK_DMA_SIZE_16DWORDS	(2 << 4)
 #define MTK_RX_DMA_BUSY		BIT(3)
-- 
cgit v0.10.2


From 2ff0bb61646f286fa97db2904491974302a14f1f Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:28:03 +0200
Subject: net: mediatek: disable all interrupts during probe

The current code only disables those IRQs that we will later use. To
ensure that we have a predefined state, we really want to disable all IRQs.
Change the code to disable all IRQs to achieve this.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index f30c2e47..e3dadae9 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1396,7 +1396,7 @@ static int __init mtk_hw_init(struct mtk_eth *eth)
 
 	/* disable delay and normal interrupt */
 	mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
-	mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
+	mtk_irq_disable(eth, ~0);
 	mtk_w32(eth, RST_GL_PSE, MTK_RST_GL);
 	mtk_w32(eth, 0, MTK_RST_GL);
 
-- 
cgit v0.10.2


From 04698cccb1de54d5d97fda2e4a1c6ca365da0f70 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:28:04 +0200
Subject: net: mediatek: fix threshold value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The logic to calculate the threshold value for stopping the TX queue is
bad. Currently it will always use 1/2 of the rings size, which is way too
much. Set the threshold to MAX_SKB_FRAGS. This makes sure that the queue
is stopped when there is not enough room to accept an additional segment. 

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index e3dadae9..032939d 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1033,8 +1033,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
 	atomic_set(&ring->free_count, MTK_DMA_SIZE - 2);
 	ring->next_free = &ring->dma[0];
 	ring->last_free = &ring->dma[MTK_DMA_SIZE - 2];
-	ring->thresh = max((unsigned long)MTK_DMA_SIZE >> 2,
-			      MAX_SKB_FRAGS);
+	ring->thresh = MAX_SKB_FRAGS;
 
 	/* make sure that all changes to the dma ring are flushed before we
 	 * continue
-- 
cgit v0.10.2


From eaadf9fd3f6390f6ecbd0dfbd5997a0486fc9d5e Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:28:05 +0200
Subject: net: mediatek: increase watchdog_timeo

During stress testing, after reducing the threshold value, we have seen
TX timeouts that were caused by the watchdog_timeo value being too low.
Increase the value to 5 * HZ which is a value commonly used by many other
drivers.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 032939d..3b3ba2e 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1709,7 +1709,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
 	mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
 
 	SET_NETDEV_DEV(eth->netdev[id], eth->dev);
-	eth->netdev[id]->watchdog_timeo = HZ;
+	eth->netdev[id]->watchdog_timeo = 5 * HZ;
 	eth->netdev[id]->netdev_ops = &mtk_netdev_ops;
 	eth->netdev[id]->base_addr = (unsigned long)eth->base;
 	eth->netdev[id]->vlan_features = MTK_HW_FEATURES &
-- 
cgit v0.10.2


From 12c97c13ea7174db5b5dc4a1ef91d4e9245bb569 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:28:06 +0200
Subject: net: mediatek: fix off by one in the TX ring allocation

The TX ring setup has an off by one error causing it to not utilise all
descriptors. This has the side effect that we need to reset the next
pointer at runtime to make it work. Fix the off by one and remove the
code fixing the ring at runtime.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 3b3ba2e..c097e9e 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -926,7 +926,6 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again)
 		}
 		mtk_tx_unmap(eth->dev, tx_buf);
 
-		ring->last_free->txd2 = next_cpu;
 		ring->last_free = desc;
 		atomic_inc(&ring->free_count);
 
@@ -1032,7 +1031,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
 
 	atomic_set(&ring->free_count, MTK_DMA_SIZE - 2);
 	ring->next_free = &ring->dma[0];
-	ring->last_free = &ring->dma[MTK_DMA_SIZE - 2];
+	ring->last_free = &ring->dma[MTK_DMA_SIZE - 1];
 	ring->thresh = MAX_SKB_FRAGS;
 
 	/* make sure that all changes to the dma ring are flushed before we
-- 
cgit v0.10.2


From ad3cba989e8b1bbefe078eece29f0e8d8aaea1d6 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:28:07 +0200
Subject: net: mediatek: only wake the queue if it is stopped

The current code unconditionally wakes up the queue at the end of each
tx_poll action. Change the code to only wake up the queues if any of
them have actually been stopped before.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index c097e9e..40e37b15 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -704,6 +704,20 @@ static inline int mtk_cal_txd_req(struct sk_buff *skb)
 	return nfrags;
 }
 
+static int mtk_queue_stopped(struct mtk_eth *eth)
+{
+	int i;
+
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!eth->netdev[i])
+			continue;
+		if (netif_queue_stopped(eth->netdev[i]))
+			return 1;
+	}
+
+	return 0;
+}
+
 static void mtk_wake_queue(struct mtk_eth *eth)
 {
 	int i;
@@ -950,7 +964,8 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again)
 	if (!total)
 		return 0;
 
-	if (atomic_read(&ring->free_count) > ring->thresh)
+	if (mtk_queue_stopped(eth) &&
+	    (atomic_read(&ring->free_count) > ring->thresh))
 		mtk_wake_queue(eth);
 
 	return total;
-- 
cgit v0.10.2


From 82c6544dddc6c4bd940917af2f987dee6be0fc17 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Fri, 10 Jun 2016 13:28:08 +0200
Subject: net: mediatek: remove superfluous queue wake up call

The code checks if the queue should be stopped because we are below the
threshold of free descriptors only to check if it should be started again.
If we do end up in a state where we are at the threshold limit, it makes
more sense to just stop the queue and wait for the next IRQ to trigger the
TX housekeeping again. There is no rush in enqueuing the next packet, it
needs to wait for all the others in the queue to be dispatched first
anyway.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 40e37b15..d1cdc2d 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -783,12 +783,9 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (mtk_tx_map(skb, dev, tx_num, ring, gso) < 0)
 		goto drop;
 
-	if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) {
+	if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
 		mtk_stop_queue(eth);
-		if (unlikely(atomic_read(&ring->free_count) >
-			     ring->thresh))
-			mtk_wake_queue(eth);
-	}
+
 	spin_unlock_irqrestore(&eth->page_lock, flags);
 
 	return NETDEV_TX_OK;
-- 
cgit v0.10.2


From a2f27217e4e60e663b5b971b0ccb287a9548b04e Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@gmail.com>
Date: Fri, 10 Jun 2016 16:53:05 +0200
Subject: net: au1000_eth: fix PHY detection

Commit 7f854420fbfe9d49afe2ffb1df052cfe8e215541
("phy: Add API for {un}registering an mdio device to a bus.")
broke PHY detection on this driver with a copy-paste bug:
The code is looking 32 times for a PHY at address 0.

Fixes ethernet on AMD DB1100/DB1500/DB1550 boards which have
their (autodetected) PHYs at address 31.

Cc: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Manuel Lauss <manuel.lauss@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index e0fb0f1..d8c77e8 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -508,13 +508,12 @@ static int au1000_mii_probe(struct net_device *dev)
 	/* find the first (lowest address) PHY
 	 * on the current MAC's MII bus
 	 */
-	for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++)
-		if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) {
-			phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr);
-			if (!aup->phy_search_highest_addr)
-				/* break out with first one found */
-				break;
-		}
+	for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) {
+		phydev = mdiobus_get_phy(aup->mii_bus, phy_addr);
+		if (phydev && !aup->phy_search_highest_addr)
+			/* break out with first one found */
+			break;
+	}
 
 	if (aup->phy1_search_mac0) {
 		/* try harder to find a PHY */
-- 
cgit v0.10.2


From 86c5fe4c932a8ef2d32f8b3d32cc9f0476fc54f3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 10 Jun 2016 23:34:24 -0700
Subject: Revert "net: au1000_eth: fix PHY detection"

This reverts commit a2f27217e4e60e663b5b971b0ccb287a9548b04e.

I applied the wrong version of this.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index d8c77e8..e0fb0f1 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -508,12 +508,13 @@ static int au1000_mii_probe(struct net_device *dev)
 	/* find the first (lowest address) PHY
 	 * on the current MAC's MII bus
 	 */
-	for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) {
-		phydev = mdiobus_get_phy(aup->mii_bus, phy_addr);
-		if (phydev && !aup->phy_search_highest_addr)
-			/* break out with first one found */
-			break;
-	}
+	for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++)
+		if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) {
+			phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr);
+			if (!aup->phy_search_highest_addr)
+				/* break out with first one found */
+				break;
+		}
 
 	if (aup->phy1_search_mac0) {
 		/* try harder to find a PHY */
-- 
cgit v0.10.2


From 92ca8241533009e4e05a9f3999a75389678af094 Mon Sep 17 00:00:00 2001
From: Manuel Lauss <manuel.lauss@gmail.com>
Date: Sat, 11 Jun 2016 00:13:04 +0200
Subject: net: au1000_eth: fix PHY detection

Commit 7f854420fbfe9d49afe2ffb1df052cfe8e215541
("phy: Add API for {un}registering an mdio device to a bus.")
broke PHY detection on this driver with a copy-paste bug:
The code is looking 32 times for a PHY at address 0.

Fixes ethernet on AMD DB1100/DB1500/DB1550 boards which have
their (autodetected) PHYs at address 31.

Cc: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Manuel Lauss <manuel.lauss@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index e0fb0f1..20760e1 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -509,8 +509,8 @@ static int au1000_mii_probe(struct net_device *dev)
 	 * on the current MAC's MII bus
 	 */
 	for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++)
-		if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) {
-			phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr);
+		if (mdiobus_get_phy(aup->mii_bus, phy_addr)) {
+			phydev = mdiobus_get_phy(aup->mii_bus, phy_addr);
 			if (!aup->phy_search_highest_addr)
 				/* break out with first one found */
 				break;
-- 
cgit v0.10.2


From 6e85dbe4b461e59fa3cad6f6235cb47fa4c6a629 Mon Sep 17 00:00:00 2001
From: Crestez Dan Leonard <leonard.crestez@intel.com>
Date: Fri, 3 Jun 2016 21:30:24 +0300
Subject: iio: inv_mpu6050: Fix use-after-free in ACPI code

In some cases this can result in incorrectly returning a negative value
from asus_acpi_get_sensor_info and the AK8963 magnetometer failing to
show up.

Note cpm is an alias for buffer.pointer which isn't apparent in this
patch on it's own.

Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Crestez Dan Leonard <leonard.crestez@intel.com>
Acked-by: Daniel Baluta <daniel.baluta@intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>

diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c
index f62b8bd..dd6fc6d 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c
@@ -56,6 +56,7 @@ static int asus_acpi_get_sensor_info(struct acpi_device *adev,
 	int i;
 	acpi_status status;
 	union acpi_object *cpm;
+	int ret;
 
 	status = acpi_evaluate_object(adev->handle, "CNF0", NULL, &buffer);
 	if (ACPI_FAILURE(status))
@@ -82,10 +83,10 @@ static int asus_acpi_get_sensor_info(struct acpi_device *adev,
 			}
 		}
 	}
-
+	ret = cpm->package.count;
 	kfree(buffer.pointer);
 
-	return cpm->package.count;
+	return ret;
 }
 
 static int acpi_i2c_check_resource(struct acpi_resource *ares, void *data)
-- 
cgit v0.10.2


From 7e982555d89cc84b1fa23b5d54c7ffd9f7753908 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 30 May 2016 15:50:24 +0200
Subject: staging: iio: fix ad7606_spi regression
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As pointed out by Geert Uytterhoeven, the patch was incorrect
and breaks the driver, which was fortunately pointed out by
this gcc warning:

drivers/staging/iio/adc/ad7606_spi.c: In function ‘ad7606_spi_read_block’:
drivers/staging/iio/adc/ad7606_spi.c:34: warning: ‘data’ is used uninitialized in this function

The effect of the patch is that the data is copied into
a random memory location (from the uninitialized pointer)
instead of being byteswapped in place.

This adds the initialization for the 'data' variable back
to restore the original behavior.

Cc: Ksenija Stanojevic <ksenija.stanojevic@gmail.com>
Fixes: 87787e5ef727 ("Staging: iio: Fix sparse endian warning")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>

diff --git a/drivers/staging/iio/adc/ad7606_spi.c b/drivers/staging/iio/adc/ad7606_spi.c
index 825da07..9587fa8 100644
--- a/drivers/staging/iio/adc/ad7606_spi.c
+++ b/drivers/staging/iio/adc/ad7606_spi.c
@@ -21,7 +21,7 @@ static int ad7606_spi_read_block(struct device *dev,
 {
 	struct spi_device *spi = to_spi_device(dev);
 	int i, ret;
-	unsigned short *data;
+	unsigned short *data = buf;
 	__be16 *bdata = buf;
 
 	ret = spi_read(spi, buf, count * 2);
-- 
cgit v0.10.2


From f4070a19142d5ee06f0da0cef56a0e78995f172c Mon Sep 17 00:00:00 2001
From: Luis de Bethencourt <luisbg@osg.samsung.com>
Date: Wed, 1 Jun 2016 20:25:54 +0100
Subject: staging: iio: ad5933: fix order of cycle conditions

Correctly handle the settling time cycles value. The else branch is an
impossible condition, > 1022 in the else branch of > 511. Flipping the order.

Based on the Table 13 at the bottom of Page 25 of the Data Sheet:
http://www.analog.com/media/en/technical-documentation/data-sheets/AD5933.pdf

Signed-off-by: Luis de Bethencourt <luisbg@osg.samsung.com>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>

diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c
index 9f43976..170ac98 100644
--- a/drivers/staging/iio/impedance-analyzer/ad5933.c
+++ b/drivers/staging/iio/impedance-analyzer/ad5933.c
@@ -444,10 +444,10 @@ static ssize_t ad5933_store(struct device *dev,
 		st->settling_cycles = val;
 
 		/* 2x, 4x handling, see datasheet */
-		if (val > 511)
-			val = (val >> 1) | (1 << 9);
-		else if (val > 1022)
+		if (val > 1022)
 			val = (val >> 2) | (3 << 9);
+		else if (val > 511)
+			val = (val >> 1) | (1 << 9);
 
 		dat = cpu_to_be16(val);
 		ret = ad5933_i2c_write(st->client,
-- 
cgit v0.10.2


From 86ef7f9cbfd564377028098cf20cc1c3ec2c776d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 11 Jun 2016 20:40:24 -0700
Subject: ipconfig: Protect ic_addrservaddr with IPCONFIG_DYNAMIC.

>> net/ipv4/ipconfig.c:130:15: warning: 'ic_addrservaddr' defined but not used [-Wunused-variable]
    static __be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index eccf9fd..1d71c40 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -127,7 +127,9 @@ __be32 ic_myaddr = NONE;		/* My IP address */
 static __be32 ic_netmask = NONE;	/* Netmask for local subnet */
 __be32 ic_gateway = NONE;	/* Gateway IP address */
 
+#ifdef IPCONFIG_DYNAMIC
 static __be32 ic_addrservaddr = NONE;	/* IP Address of the IP addresses'server */
+#endif
 
 __be32 ic_servaddr = NONE;	/* Boot server IP address */
 
-- 
cgit v0.10.2


From ea01a18494b3d7a91b2f1f2a6a5aaef4741bc294 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 12 Jun 2016 11:24:46 -0400
Subject: autofs races

* make autofs4_expire_indirect() skip the dentries being in process of
expiry
* do *not* mess with list_move(); making sure that dentry with
AUTOFS_INF_EXPIRING are not picked for expiry is enough.
* do not remove NO_RCU when we set EXPIRING, don't bother with smp_mb()
there.  Clear it at the same time we clear EXPIRING.  Makes a bunch of
tests simpler.
* rename NO_RCU to WANT_EXPIRE, which is what it really is.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index f0d268b..a439548 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -70,9 +70,13 @@ struct autofs_info {
 };
 
 #define AUTOFS_INF_EXPIRING	(1<<0) /* dentry in the process of expiring */
-#define AUTOFS_INF_NO_RCU	(1<<1) /* the dentry is being considered
+#define AUTOFS_INF_WANT_EXPIRE	(1<<1) /* the dentry is being considered
 					* for expiry, so RCU_walk is
-					* not permitted
+					* not permitted.  If it progresses to
+					* actual expiry attempt, the flag is
+					* not cleared when EXPIRING is set -
+					* in that case it gets cleared only
+					* when it comes to clearing EXPIRING.
 					*/
 #define AUTOFS_INF_PENDING	(1<<2) /* dentry pending mount */
 
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 9510d8d..b493909 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -316,19 +316,17 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
 	if (ino->flags & AUTOFS_INF_PENDING)
 		goto out;
 	if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
-		ino->flags |= AUTOFS_INF_NO_RCU;
+		ino->flags |= AUTOFS_INF_WANT_EXPIRE;
 		spin_unlock(&sbi->fs_lock);
 		synchronize_rcu();
 		spin_lock(&sbi->fs_lock);
 		if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
 			ino->flags |= AUTOFS_INF_EXPIRING;
-			smp_mb();
-			ino->flags &= ~AUTOFS_INF_NO_RCU;
 			init_completion(&ino->expire_complete);
 			spin_unlock(&sbi->fs_lock);
 			return root;
 		}
-		ino->flags &= ~AUTOFS_INF_NO_RCU;
+		ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
 	}
 out:
 	spin_unlock(&sbi->fs_lock);
@@ -446,7 +444,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 	while ((dentry = get_next_positive_subdir(dentry, root))) {
 		spin_lock(&sbi->fs_lock);
 		ino = autofs4_dentry_ino(dentry);
-		if (ino->flags & AUTOFS_INF_NO_RCU)
+		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
 			expired = NULL;
 		else
 			expired = should_expire(dentry, mnt, timeout, how);
@@ -455,7 +453,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 			continue;
 		}
 		ino = autofs4_dentry_ino(expired);
-		ino->flags |= AUTOFS_INF_NO_RCU;
+		ino->flags |= AUTOFS_INF_WANT_EXPIRE;
 		spin_unlock(&sbi->fs_lock);
 		synchronize_rcu();
 		spin_lock(&sbi->fs_lock);
@@ -465,7 +463,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 			goto found;
 		}
 
-		ino->flags &= ~AUTOFS_INF_NO_RCU;
+		ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
 		if (expired != dentry)
 			dput(expired);
 		spin_unlock(&sbi->fs_lock);
@@ -475,17 +473,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 found:
 	pr_debug("returning %p %pd\n", expired, expired);
 	ino->flags |= AUTOFS_INF_EXPIRING;
-	smp_mb();
-	ino->flags &= ~AUTOFS_INF_NO_RCU;
 	init_completion(&ino->expire_complete);
 	spin_unlock(&sbi->fs_lock);
-	spin_lock(&sbi->lookup_lock);
-	spin_lock(&expired->d_parent->d_lock);
-	spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
-	list_move(&expired->d_parent->d_subdirs, &expired->d_child);
-	spin_unlock(&expired->d_lock);
-	spin_unlock(&expired->d_parent->d_lock);
-	spin_unlock(&sbi->lookup_lock);
 	return expired;
 }
 
@@ -496,7 +485,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
 	int status;
 
 	/* Block on any pending expire */
-	if (!(ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU)))
+	if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE))
 		return 0;
 	if (rcu_walk)
 		return -ECHILD;
@@ -554,7 +543,7 @@ int autofs4_expire_run(struct super_block *sb,
 	ino = autofs4_dentry_ino(dentry);
 	/* avoid rapid-fire expire attempts if expiry fails */
 	ino->last_used = now;
-	ino->flags &= ~AUTOFS_INF_EXPIRING;
+	ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
 	complete_all(&ino->expire_complete);
 	spin_unlock(&sbi->fs_lock);
 
@@ -583,7 +572,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 		spin_lock(&sbi->fs_lock);
 		/* avoid rapid-fire expire attempts if expiry fails */
 		ino->last_used = now;
-		ino->flags &= ~AUTOFS_INF_EXPIRING;
+		ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
 		complete_all(&ino->expire_complete);
 		spin_unlock(&sbi->fs_lock);
 		dput(dentry);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 78bd802..3767f66 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -458,7 +458,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
 		 */
 		struct inode *inode;
 
-		if (ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU))
+		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
 			return 0;
 		if (d_mountpoint(dentry))
 			return 0;
-- 
cgit v0.10.2


From cbdf451164785c9cf5acd5d2983c1e7c778df4c1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 12 Jun 2016 16:21:47 -0700
Subject: net_sched: prio: properly report out of memory errors

At Qdisc creation or change time, prio_tune() creates missing
pfifo qdiscs but does not return an error code if one
qdisc could not be allocated.

Leaving a qdisc in non operational state without telling user
anything about this problem is not good.

Also, testing if we replace something different than noop_qdisc
a second time makes no sense so I removed useless code.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 4b0a821..071718b 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -202,26 +202,18 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 	sch_tree_unlock(sch);
 
 	for (i = 0; i < q->bands; i++) {
-		if (q->queues[i] == &noop_qdisc) {
-			struct Qdisc *child, *old;
-
-			child = qdisc_create_dflt(sch->dev_queue,
-						  &pfifo_qdisc_ops,
-						  TC_H_MAKE(sch->handle, i + 1));
-			if (child) {
-				sch_tree_lock(sch);
-				old = q->queues[i];
-				q->queues[i] = child;
-
-				if (old != &noop_qdisc) {
-					qdisc_tree_reduce_backlog(old,
-								  old->q.qlen,
-								  old->qstats.backlog);
-					qdisc_destroy(old);
-				}
-				sch_tree_unlock(sch);
-			}
-		}
+		struct Qdisc *child;
+
+		if (q->queues[i] != &noop_qdisc)
+			continue;
+
+		child = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+					  TC_H_MAKE(sch->handle, i + 1));
+		if (!child)
+			return -ENOMEM;
+		sch_tree_lock(sch);
+		q->queues[i] = child;
+		sch_tree_unlock(sch);
 	}
 	return 0;
 }
-- 
cgit v0.10.2


From d941ebe88a411aa281cc80477a93feb931a1b50b Mon Sep 17 00:00:00 2001
From: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
Date: Sat, 11 Jun 2016 01:11:54 +0300
Subject: net: ethernet: ti: cpsw: use destroy ctlr to destroy channels

There is no reason to destroy channels that are destroyed while
cpdma_ctlr destroy. In this case no need to remember how much
channels where created and destroy them by one, as cpdma_ctlr
destroys all of them.

Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index e6bb0ec..5319089 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2505,8 +2505,6 @@ static int cpsw_probe(struct platform_device *pdev)
 clean_ale_ret:
 	cpsw_ale_destroy(priv->ale);
 clean_dma_ret:
-	cpdma_chan_destroy(priv->txch);
-	cpdma_chan_destroy(priv->rxch);
 	cpdma_ctlr_destroy(priv->dma);
 clean_runtime_disable_ret:
 	pm_runtime_disable(&pdev->dev);
@@ -2534,8 +2532,6 @@ static int cpsw_remove(struct platform_device *pdev)
 	unregister_netdev(ndev);
 
 	cpsw_ale_destroy(priv->ale);
-	cpdma_chan_destroy(priv->txch);
-	cpdma_chan_destroy(priv->rxch);
 	cpdma_ctlr_destroy(priv->dma);
 	pm_runtime_disable(&pdev->dev);
 	device_for_each_child(&pdev->dev, NULL, cpsw_remove_child_device);
-- 
cgit v0.10.2


From 975f57fdff1d0eb9816806cabd27162a8a1a4038 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Fri, 10 Jun 2016 16:47:02 +1000
Subject: crypto: vmx - Fix ABI detection

When calling ppc-xlate.pl, we pass it either linux-ppc64 or
linux-ppc64le. The script however was expecting linux64le, a result
of its OpenSSL origins. This means we aren't obeying the ppc64le
ABIv2 rules.

Fix this by checking for linux-ppc64le.

Fixes: 5ca55738201c ("crypto: vmx - comply with ABIs that specify vrsave as reserved.")
Cc: stable@vger.kernel.org
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl
index 9f4994c..b18e67d 100644
--- a/drivers/crypto/vmx/ppc-xlate.pl
+++ b/drivers/crypto/vmx/ppc-xlate.pl
@@ -141,7 +141,7 @@ my $vmr = sub {
 
 # Some ABIs specify vrsave, special-purpose register #256, as reserved
 # for system use.
-my $no_vrsave = ($flavour =~ /aix|linux64le/);
+my $no_vrsave = ($flavour =~ /linux-ppc64le/);
 my $mtspr = sub {
     my ($f,$idx,$ra) = @_;
     if ($idx == 256 && $no_vrsave) {
-- 
cgit v0.10.2


From 12d3f49e1ffbbf8cbbb60acae5a21103c5c841ac Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Fri, 10 Jun 2016 16:47:03 +1000
Subject: crypto: vmx - Increase priority of aes-cbc cipher

All of the VMX AES ciphers (AES, AES-CBC and AES-CTR) are set at
priority 1000. Unfortunately this means we never use AES-CBC and
AES-CTR, because the base AES-CBC cipher that is implemented on
top of AES inherits its priority.

To fix this, AES-CBC and AES-CTR have to be a higher priority. Set
them to 2000.

Testing on a POWER8 with:

cryptsetup benchmark --cipher aes --key-size 256

Shows decryption speed increase from 402.4 MB/s to 3069.2 MB/s,
over 7x faster. Thanks to Mike Strosaker for helping me debug
this issue.

Fixes: 8c755ace357c ("crypto: vmx - Adding CBC routines for VMX module")
Cc: stable@vger.kernel.org
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
index 495577b..94ad5c0 100644
--- a/drivers/crypto/vmx/aes_cbc.c
+++ b/drivers/crypto/vmx/aes_cbc.c
@@ -182,7 +182,7 @@ struct crypto_alg p8_aes_cbc_alg = {
 	.cra_name = "cbc(aes)",
 	.cra_driver_name = "p8_aes_cbc",
 	.cra_module = THIS_MODULE,
-	.cra_priority = 1000,
+	.cra_priority = 2000,
 	.cra_type = &crypto_blkcipher_type,
 	.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
 	.cra_alignmask = 0,
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
index 0a3c1b0..38ed10d 100644
--- a/drivers/crypto/vmx/aes_ctr.c
+++ b/drivers/crypto/vmx/aes_ctr.c
@@ -166,7 +166,7 @@ struct crypto_alg p8_aes_ctr_alg = {
 	.cra_name = "ctr(aes)",
 	.cra_driver_name = "p8_aes_ctr",
 	.cra_module = THIS_MODULE,
-	.cra_priority = 1000,
+	.cra_priority = 2000,
 	.cra_type = &crypto_blkcipher_type,
 	.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK,
 	.cra_alignmask = 0,
-- 
cgit v0.10.2


From 19ced623db2fe91604d69f7d86b03144c5107739 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 8 Jun 2016 14:56:39 +0200
Subject: crypto: ux500 - memmove the right size

The hash buffer is really HASH_BLOCK_SIZE bytes, someone
must have thought that memmove takes n*u32 words by mistake.
Tests work as good/bad as before after this patch.

Cc: Joakim Bech <joakim.bech@linaro.org>
Cc: stable@vger.kernel.org
Reported-by: David Binderman <linuxdev.baldrick@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index 574e87c..9acccad 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -781,7 +781,7 @@ static int hash_process_data(struct hash_device_data *device_data,
 						&device_data->state);
 				memmove(req_ctx->state.buffer,
 					device_data->state.buffer,
-					HASH_BLOCK_SIZE / sizeof(u32));
+					HASH_BLOCK_SIZE);
 				if (ret) {
 					dev_err(device_data->dev,
 						"%s: hash_resume_state() failed!\n",
@@ -832,7 +832,7 @@ static int hash_process_data(struct hash_device_data *device_data,
 
 			memmove(device_data->state.buffer,
 				req_ctx->state.buffer,
-				HASH_BLOCK_SIZE / sizeof(u32));
+				HASH_BLOCK_SIZE);
 			if (ret) {
 				dev_err(device_data->dev, "%s: hash_save_state() failed!\n",
 					__func__);
-- 
cgit v0.10.2


From 053ae6499a5634c0dc5fa18437e1af3d2f2ec98e Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 13 Jun 2016 13:48:53 +0800
Subject: gpio: 104-idi-48: Fix missing spin_lock_init for ack_lock

Fixes: 9ae482104cb9 ("gpio: 104-idi-48: Clear pending interrupt once in IRQ handler")
Signed-off-by: Axel Lin <axel.lin@ingics.com>
Acked-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>

diff --git a/drivers/gpio/gpio-104-idi-48.c b/drivers/gpio/gpio-104-idi-48.c
index 6c75c83..2d2763e 100644
--- a/drivers/gpio/gpio-104-idi-48.c
+++ b/drivers/gpio/gpio-104-idi-48.c
@@ -247,6 +247,7 @@ static int idi_48_probe(struct device *dev, unsigned int id)
 	idi48gpio->irq = irq[id];
 
 	spin_lock_init(&idi48gpio->lock);
+	spin_lock_init(&idi48gpio->ack_lock);
 
 	dev_set_drvdata(dev, idi48gpio);
 
-- 
cgit v0.10.2


From 1c343f7b0e177e8ca7f4d4a5dd1fa790f85abbcc Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Mon, 13 Jun 2016 13:14:56 +0200
Subject: KVM: s390/mm: Fix CMMA reset during reboot

commit 1e133ab296f ("s390/mm: split arch/s390/mm/pgtable.c") factored
out the page table handling code from __gmap_zap and  __s390_reset_cmma
into ptep_zap_unused and added a simple flag that tells which one of the
function (reset or not) is to be made. This also changed the behaviour,
as it also zaps unused page table entries on reset.
Turns out that this is wrong as s390_reset_cmma uses the page walker,
which DOES NOT take the ptl lock.

The most simple fix is to not do the zapping part on reset (which uses
the walker)

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Fixes: 1e133ab296f ("s390/mm: split arch/s390/mm/pgtable.c")
Cc: stable@vger.kernel.org # 4.6+
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 4324b87..9f0ce0e 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -437,7 +437,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
 	pgste = pgste_get_lock(ptep);
 	pgstev = pgste_val(pgste);
 	pte = *ptep;
-	if (pte_swap(pte) &&
+	if (!reset && pte_swap(pte) &&
 	    ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
 	     (pgstev & _PGSTE_GPS_ZERO))) {
 		ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
-- 
cgit v0.10.2


From 8550e2fa34f077c8a87cf1ba2453102bbbc9ade9 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 8 Jun 2016 19:55:55 +0530
Subject: powerpc/mm/hash: Use the correct PPP mask when updating HPTE

With commit e58e87adc8bf9 "powerpc/mm: Update _PAGE_KERNEL_RO" we now
use all the three PPP bits. The top bit is now used to have a PPP value
of 0b110 which will be mapped to kernel read only. When updating the
hpte entry use right mask such that we update the 63rd bit (top 'P' bit)
too.

Prior to e58e87adc8bf we didn't support KERNEL_RO at all (it was ==
KERNEL_RW), so this isn't a regression as such.

Fixes: e58e87adc8bf ("powerpc/mm: Update _PAGE_KERNEL_RO")
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 290157e..74839f24 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -88,6 +88,7 @@
 #define HPTE_R_RPN_SHIFT	12
 #define HPTE_R_RPN		ASM_CONST(0x0ffffffffffff000)
 #define HPTE_R_PP		ASM_CONST(0x0000000000000003)
+#define HPTE_R_PPP		ASM_CONST(0x8000000000000003)
 #define HPTE_R_N		ASM_CONST(0x0000000000000004)
 #define HPTE_R_G		ASM_CONST(0x0000000000000008)
 #define HPTE_R_M		ASM_CONST(0x0000000000000010)
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 40e05e7..f8a871a 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -316,8 +316,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
 			DBG_LOW(" -> hit\n");
 			/* Update the HPTE */
 			hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
-						~(HPTE_R_PP | HPTE_R_N)) |
-					       (newpp & (HPTE_R_PP | HPTE_R_N |
+						~(HPTE_R_PPP | HPTE_R_N)) |
+					       (newpp & (HPTE_R_PPP | HPTE_R_N |
 							 HPTE_R_C)));
 		}
 		native_unlock_hpte(hptep);
@@ -385,8 +385,8 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 
 	/* Update the HPTE */
 	hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
-			~(HPTE_R_PP | HPTE_R_N)) |
-		(newpp & (HPTE_R_PP | HPTE_R_N)));
+				~(HPTE_R_PPP | HPTE_R_N)) |
+			       (newpp & (HPTE_R_PPP | HPTE_R_N)));
 	/*
 	 * Ensure it is out of the tlb too. Bolted entries base and
 	 * actual page size will be same.
-- 
cgit v0.10.2


From ea1d39a31d3b1b6060b6e83e5a29c069a124c68a Mon Sep 17 00:00:00 2001
From: Oscar <oscar@naiandei.net>
Date: Tue, 14 Jun 2016 14:14:35 +0800
Subject: usb: common: otg-fsm: add license to usb-otg-fsm

Fix warning about tainted kernel because usb-otg-fsm has no license.
WARNING: with this patch usb-otg-fsm module can be loaded
but then the kernel will hang. Tested with a udoo quad board.

Cc: <stable@vger.kernel.org> #v4.1+
Signed-off-by: Oscar <oscar@naiandei.net>
Signed-off-by: Peter Chen <peter.chen@nxp.com>

diff --git a/drivers/usb/common/usb-otg-fsm.c b/drivers/usb/common/usb-otg-fsm.c
index 9059b7d..2f537bb 100644
--- a/drivers/usb/common/usb-otg-fsm.c
+++ b/drivers/usb/common/usb-otg-fsm.c
@@ -21,6 +21,7 @@
  * 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/mutex.h>
@@ -450,3 +451,4 @@ int otg_statemachine(struct otg_fsm *fsm)
 	return fsm->state_changed;
 }
 EXPORT_SYMBOL_GPL(otg_statemachine);
+MODULE_LICENSE("GPL");
-- 
cgit v0.10.2


From b7fa30c9cc48c4f55663420472505d3b4f6e1705 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 9 Jun 2016 15:07:50 +0200
Subject: sched/fair: Fix post_init_entity_util_avg() serialization

Chris Wilson reported a divide by 0 at:

 post_init_entity_util_avg():

 >    725	if (cfs_rq->avg.util_avg != 0) {
 >    726		sa->util_avg  = cfs_rq->avg.util_avg * se->load.weight;
 > -> 727		sa->util_avg /= (cfs_rq->avg.load_avg + 1);
 >    728
 >    729		if (sa->util_avg > cap)
 >    730			sa->util_avg = cap;
 >    731	} else {

Which given the lack of serialization, and the code generated from
update_cfs_rq_load_avg() is entirely possible:

	if (atomic_long_read(&cfs_rq->removed_load_avg)) {
		s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
		sa->load_avg = max_t(long, sa->load_avg - r, 0);
		sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
		removed_load = 1;
	}

turns into:

  ffffffff81087064:       49 8b 85 98 00 00 00    mov    0x98(%r13),%rax
  ffffffff8108706b:       48 85 c0                test   %rax,%rax
  ffffffff8108706e:       74 40                   je     ffffffff810870b0
  ffffffff81087070:       4c 89 f8                mov    %r15,%rax
  ffffffff81087073:       49 87 85 98 00 00 00    xchg   %rax,0x98(%r13)
  ffffffff8108707a:       49 29 45 70             sub    %rax,0x70(%r13)
  ffffffff8108707e:       4c 89 f9                mov    %r15,%rcx
  ffffffff81087081:       bb 01 00 00 00          mov    $0x1,%ebx
  ffffffff81087086:       49 83 7d 70 00          cmpq   $0x0,0x70(%r13)
  ffffffff8108708b:       49 0f 49 4d 70          cmovns 0x70(%r13),%rcx

Which you'll note ends up with 'sa->load_avg - r' in memory at
ffffffff8108707a.

By calling post_init_entity_util_avg() under rq->lock we're sure to be
fully serialized against PELT updates and cannot observe intermediate
state like this.

Reported-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yuyang Du <yuyang.du@intel.com>
Cc: bsegall@google.com
Cc: morten.rasmussen@arm.com
Cc: pjt@google.com
Cc: steve.muckle@linaro.org
Fixes: 2b8c41daba32 ("sched/fair: Initiate a new task's util avg to a bounded value")
Link: http://lkml.kernel.org/r/20160609130750.GQ30909@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 017d539..13d0896 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2535,10 +2535,9 @@ void wake_up_new_task(struct task_struct *p)
 	 */
 	set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
 #endif
-	/* Post initialize new task's util average when its cfs_rq is set */
+	rq = __task_rq_lock(p, &rf);
 	post_init_entity_util_avg(&p->se);
 
-	rq = __task_rq_lock(p, &rf);
 	activate_task(rq, p, 0);
 	p->on_rq = TASK_ON_RQ_QUEUED;
 	trace_sched_wakeup_new(p);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 218f8e8..4e33ad1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8496,8 +8496,9 @@ void free_fair_sched_group(struct task_group *tg)
 
 int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 {
-	struct cfs_rq *cfs_rq;
 	struct sched_entity *se;
+	struct cfs_rq *cfs_rq;
+	struct rq *rq;
 	int i;
 
 	tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
@@ -8512,6 +8513,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 	init_cfs_bandwidth(tg_cfs_bandwidth(tg));
 
 	for_each_possible_cpu(i) {
+		rq = cpu_rq(i);
+
 		cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
 				      GFP_KERNEL, cpu_to_node(i));
 		if (!cfs_rq)
@@ -8525,7 +8528,10 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 		init_cfs_rq(cfs_rq);
 		init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
 		init_entity_runnable_average(se);
+
+		raw_spin_lock_irq(&rq->lock);
 		post_init_entity_util_avg(se);
+		raw_spin_unlock_irq(&rq->lock);
 	}
 
 	return 1;
-- 
cgit v0.10.2


From 6d9fe44bd73d567d04d3a68a2d2fa521ab9532f2 Mon Sep 17 00:00:00 2001
From: Michal Suchanek <hramrach@gmail.com>
Date: Mon, 13 Jun 2016 17:46:49 +0000
Subject: spi: sun4i: fix FIFO limit

When testing SPI without DMA I noticed that filling the FIFO on the
spi controller causes timeout.

Always leave room for one byte in the FIFO.

Signed-off-by: Michal Suchanek <hramrach@gmail.com>
Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org

diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c
index 1ddd9e2..e7e4aec 100644
--- a/drivers/spi/spi-sun4i.c
+++ b/drivers/spi/spi-sun4i.c
@@ -179,7 +179,10 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
 
 	/* We don't support transfer larger than the FIFO */
 	if (tfr->len > SUN4I_FIFO_DEPTH)
-		return -EINVAL;
+		return -EMSGSIZE;
+
+	if (tfr->tx_buf && tfr->len >= SUN4I_FIFO_DEPTH)
+		return -EMSGSIZE;
 
 	reinit_completion(&sspi->done);
 	sspi->tx_buf = tfr->tx_buf;
@@ -269,8 +272,12 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
 	sun4i_spi_write(sspi, SUN4I_BURST_CNT_REG, SUN4I_BURST_CNT(tfr->len));
 	sun4i_spi_write(sspi, SUN4I_XMIT_CNT_REG, SUN4I_XMIT_CNT(tx_len));
 
-	/* Fill the TX FIFO */
-	sun4i_spi_fill_fifo(sspi, SUN4I_FIFO_DEPTH);
+	/*
+	 * Fill the TX FIFO
+	 * Filling the FIFO fully causes timeout for some reason
+	 * at least on spi2 on A10s
+	 */
+	sun4i_spi_fill_fifo(sspi, SUN4I_FIFO_DEPTH - 1);
 
 	/* Enable the interrupts */
 	sun4i_spi_write(sspi, SUN4I_INT_CTL_REG, SUN4I_INT_CTL_TC);
-- 
cgit v0.10.2


From 719bd6542044efd9b338a53dba1bef45f40ca169 Mon Sep 17 00:00:00 2001
From: Michal Suchanek <hramrach@gmail.com>
Date: Mon, 13 Jun 2016 17:46:49 +0000
Subject: spi: sunxi: fix transfer timeout

The trasfer timeout is fixed at 1000 ms. Reading a 4Mbyte flash over
1MHz SPI bus takes way longer than that. Calculate the timeout from the
actual time the transfer is supposed to take and multiply by 2 for good
measure.

Signed-off-by: Michal Suchanek <hramrach@gmail.com>
Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org

diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c
index e7e4aec..cf007f3 100644
--- a/drivers/spi/spi-sun4i.c
+++ b/drivers/spi/spi-sun4i.c
@@ -173,6 +173,7 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
 {
 	struct sun4i_spi *sspi = spi_master_get_devdata(master);
 	unsigned int mclk_rate, div, timeout;
+	unsigned int start, end, tx_time;
 	unsigned int tx_len = 0;
 	int ret = 0;
 	u32 reg;
@@ -286,9 +287,16 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
 	reg = sun4i_spi_read(sspi, SUN4I_CTL_REG);
 	sun4i_spi_write(sspi, SUN4I_CTL_REG, reg | SUN4I_CTL_XCH);
 
+	tx_time = max(tfr->len * 8 * 2 / (tfr->speed_hz / 1000), 100U);
+	start = jiffies;
 	timeout = wait_for_completion_timeout(&sspi->done,
-					      msecs_to_jiffies(1000));
+					      msecs_to_jiffies(tx_time));
+	end = jiffies;
 	if (!timeout) {
+		dev_warn(&master->dev,
+			 "%s: timeout transferring %u bytes@%iHz for %i(%i)ms",
+			 dev_name(&spi->dev), tfr->len, tfr->speed_hz,
+			 jiffies_to_msecs(end - start), tx_time);
 		ret = -ETIMEDOUT;
 		goto out;
 	}
diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
index 42e2c4b..7fce79a 100644
--- a/drivers/spi/spi-sun6i.c
+++ b/drivers/spi/spi-sun6i.c
@@ -160,6 +160,7 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
 {
 	struct sun6i_spi *sspi = spi_master_get_devdata(master);
 	unsigned int mclk_rate, div, timeout;
+	unsigned int start, end, tx_time;
 	unsigned int tx_len = 0;
 	int ret = 0;
 	u32 reg;
@@ -269,9 +270,16 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
 	reg = sun6i_spi_read(sspi, SUN6I_TFR_CTL_REG);
 	sun6i_spi_write(sspi, SUN6I_TFR_CTL_REG, reg | SUN6I_TFR_CTL_XCH);
 
+	tx_time = max(tfr->len * 8 * 2 / (tfr->speed_hz / 1000), 100U);
+	start = jiffies;
 	timeout = wait_for_completion_timeout(&sspi->done,
-					      msecs_to_jiffies(1000));
+					      msecs_to_jiffies(tx_time));
+	end = jiffies;
 	if (!timeout) {
+		dev_warn(&master->dev,
+			 "%s: timeout transferring %u bytes@%iHz for %i(%i)ms",
+			 dev_name(&spi->dev), tfr->len, tfr->speed_hz,
+			 jiffies_to_msecs(end - start), tx_time);
 		ret = -ETIMEDOUT;
 		goto out;
 	}
-- 
cgit v0.10.2


From 4b2312bd0592708c85ed94368c874819e7013309 Mon Sep 17 00:00:00 2001
From: Harvey Hunt <harvey.hunt@imgtec.com>
Date: Mon, 23 May 2016 12:05:52 +0100
Subject: irqchip/mips-gic: Fix IRQs in gic_dev_domain

When allocating a new device IRQ, gic_dev_domain_alloc() correctly calls
irq_domain_set_hwirq_and_chip(), but gic_irq_domain_alloc() does not. This
means that gic_irq_domain believes all IRQs from the dev domain have an
hwirq of 0 and creates incorrect mappings in the linear_revmap. As
gic_irq_domain is a parent of the gic_dev_domain, this leads to an
inability to boot on devices with a GIC. Excerpt of the error:

[    2.297649] irq 0: nobody cared (try booting with the "irqpoll" option)
...
[    2.436963] handlers:
[    2.439492] Disabling IRQ #0

Fix this by calling irq_domain_set_hwirq_and_chip() for both the dev and
irq domain.

Now that we are modifying the parent domain, be sure to clear it up in
case of an allocation error.

Fixes: c98c1822ee13 ("irqchip/mips-gic: Add device hierarchy domain")
Fixes: 2af70a962070 ("irqchip/mips-gic: Add a IPI hierarchy domain")
Signed-off-by: Harvey Hunt <harvey.hunt@imgtec.com>
Tested-by: Govindraj Raja <Govindraj.Raja@imgtec.com> # On Pistachio SoC
Reviewed-by: Matt Redfearn <matt.redfearn@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: Qais Yousef <qsyousef@gmail.com>
Cc: jason@lakedaemon.net
Cc: marc.zyngier@arm.com
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1464001552-31174-1-git-send-email-harvey.hunt@imgtec.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 3b5e10a..8a4adbeb 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -746,6 +746,12 @@ static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq,
 		/* verify that it doesn't conflict with an IPI irq */
 		if (test_bit(spec->hwirq, ipi_resrv))
 			return -EBUSY;
+
+		hwirq = GIC_SHARED_TO_HWIRQ(spec->hwirq);
+
+		return irq_domain_set_hwirq_and_chip(d, virq, hwirq,
+						     &gic_level_irq_controller,
+						     NULL);
 	} else {
 		base_hwirq = find_first_bit(ipi_resrv, gic_shared_intrs);
 		if (base_hwirq == gic_shared_intrs) {
@@ -867,10 +873,14 @@ static int gic_dev_domain_alloc(struct irq_domain *d, unsigned int virq,
 						    &gic_level_irq_controller,
 						    NULL);
 		if (ret)
-			return ret;
+			goto error;
 	}
 
 	return 0;
+
+error:
+	irq_domain_free_irqs_parent(d, virq, nr_irqs);
+	return ret;
 }
 
 void gic_dev_domain_free(struct irq_domain *d, unsigned int virq,
-- 
cgit v0.10.2


From dcfc47248d3f7d28df6f531e6426b933de94370d Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 11 Jun 2016 23:06:53 +0900
Subject: kprobes/x86: Clear TF bit in fault on single-stepping

Fix kprobe_fault_handler() to clear the TF (trap flag) bit of
the flags register in the case of a fault fixup on single-stepping.

If we put a kprobe on the instruction which caused a
page fault (e.g. actual mov instructions in copy_user_*),
that fault happens on the single-stepping buffer. In this
case, kprobes resets running instance so that the CPU can
retry execution on the original ip address.

However, current code forgets to reset the TF bit. Since this
fault happens with TF bit set for enabling single-stepping,
when it retries, it causes a debug exception and kprobes
can not handle it because it already reset itself.

On the most of x86-64 platform, it can be easily reproduced
by using kprobe tracer. E.g.

  # cd /sys/kernel/debug/tracing
  # echo p copy_user_enhanced_fast_string+5 > kprobe_events
  # echo 1 > events/kprobes/enable

And you'll see a kernel panic on do_debug(), since the debug
trap is not handled by kprobes.

To fix this problem, we just need to clear the TF bit when
resetting running kprobe.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: Ananth N Mavinakayanahalli <ananth@linux.vnet.ibm.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: systemtap@sourceware.org
Cc: stable@vger.kernel.org # All the way back to ancient kernels
Link: http://lkml.kernel.org/r/20160611140648.25885.37482.stgit@devbox
[ Updated the comments. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 38cf7a7..7847e5c 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -961,7 +961,19 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		 * normal page fault.
 		 */
 		regs->ip = (unsigned long)cur->addr;
+		/*
+		 * Trap flag (TF) has been set here because this fault
+		 * happened where the single stepping will be done.
+		 * So clear it by resetting the current kprobe:
+		 */
+		regs->flags &= ~X86_EFLAGS_TF;
+
+		/*
+		 * If the TF flag was set before the kprobe hit,
+		 * don't touch it:
+		 */
 		regs->flags |= kcb->kprobe_old_flags;
+
 		if (kcb->kprobe_status == KPROBE_REENTER)
 			restore_previous_kprobe(kcb);
 		else
-- 
cgit v0.10.2


From eda8dca519269c92a0771668b3d5678792de7b78 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Mon, 13 Jun 2016 02:32:09 -0500
Subject: sched/debug: Fix deadlock when enabling sched events

I see a hang when enabling sched events:

  echo 1 > /sys/kernel/debug/tracing/events/sched/enable

The printk buffer shows:

  BUG: spinlock recursion on CPU#1, swapper/1/0
   lock: 0xffff88007d5d8c00, .magic: dead4ead, .owner: swapper/1/0, .owner_cpu: 1
  CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.7.0-rc2+ #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014
  ...
  Call Trace:
   <IRQ>  [<ffffffff8143d663>] dump_stack+0x85/0xc2
   [<ffffffff81115948>] spin_dump+0x78/0xc0
   [<ffffffff81115aea>] do_raw_spin_lock+0x11a/0x150
   [<ffffffff81891471>] _raw_spin_lock+0x61/0x80
   [<ffffffff810e5466>] ? try_to_wake_up+0x256/0x4e0
   [<ffffffff810e5466>] try_to_wake_up+0x256/0x4e0
   [<ffffffff81891a0a>] ? _raw_spin_unlock_irqrestore+0x4a/0x80
   [<ffffffff810e5705>] wake_up_process+0x15/0x20
   [<ffffffff810cebb4>] insert_work+0x84/0xc0
   [<ffffffff810ced7f>] __queue_work+0x18f/0x660
   [<ffffffff810cf9a6>] queue_work_on+0x46/0x90
   [<ffffffffa00cd95b>] drm_fb_helper_dirty.isra.11+0xcb/0xe0 [drm_kms_helper]
   [<ffffffffa00cdac0>] drm_fb_helper_sys_imageblit+0x30/0x40 [drm_kms_helper]
   [<ffffffff814babcd>] soft_cursor+0x1ad/0x230
   [<ffffffff814ba379>] bit_cursor+0x649/0x680
   [<ffffffff814b9d30>] ? update_attr.isra.2+0x90/0x90
   [<ffffffff814b5e6a>] fbcon_cursor+0x14a/0x1c0
   [<ffffffff81555ef8>] hide_cursor+0x28/0x90
   [<ffffffff81558b6f>] vt_console_print+0x3bf/0x3f0
   [<ffffffff81122c63>] call_console_drivers.constprop.24+0x183/0x200
   [<ffffffff811241f4>] console_unlock+0x3d4/0x610
   [<ffffffff811247f5>] vprintk_emit+0x3c5/0x610
   [<ffffffff81124bc9>] vprintk_default+0x29/0x40
   [<ffffffff811e965b>] printk+0x57/0x73
   [<ffffffff810f7a9e>] enqueue_entity+0xc2e/0xc70
   [<ffffffff810f7b39>] enqueue_task_fair+0x59/0xab0
   [<ffffffff8106dcd9>] ? kvm_sched_clock_read+0x9/0x20
   [<ffffffff8103fb39>] ? sched_clock+0x9/0x10
   [<ffffffff810e3fcc>] activate_task+0x5c/0xa0
   [<ffffffff810e4514>] ttwu_do_activate+0x54/0xb0
   [<ffffffff810e5cea>] sched_ttwu_pending+0x7a/0xb0
   [<ffffffff810e5e51>] scheduler_ipi+0x61/0x170
   [<ffffffff81059e7f>] smp_trace_reschedule_interrupt+0x4f/0x2a0
   [<ffffffff81893ba6>] trace_reschedule_interrupt+0x96/0xa0
   <EOI>  [<ffffffff8106e0d6>] ? native_safe_halt+0x6/0x10
   [<ffffffff8110fb1d>] ? trace_hardirqs_on+0xd/0x10
   [<ffffffff81040ac0>] default_idle+0x20/0x1a0
   [<ffffffff8104147f>] arch_cpu_idle+0xf/0x20
   [<ffffffff81102f8f>] default_idle_call+0x2f/0x50
   [<ffffffff8110332e>] cpu_startup_entry+0x37e/0x450
   [<ffffffff8105af70>] start_secondary+0x160/0x1a0

Note the hang only occurs when echoing the above from a physical serial
console, not from an ssh session.

The bug is caused by a deadlock where the task is trying to grab the rq
lock twice because printk()'s aren't safe in sched code.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Fixes: cb2517653fcc ("sched/debug: Make schedstats a runtime tunable that is disabled by default")
Link: http://lkml.kernel.org/r/20160613073209.gdvdybiruljbkn3p@treble
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4e33ad1..a2348de 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3246,7 +3246,7 @@ static inline void check_schedstat_required(void)
 			trace_sched_stat_iowait_enabled()  ||
 			trace_sched_stat_blocked_enabled() ||
 			trace_sched_stat_runtime_enabled())  {
-		pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, "
+		printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
 			     "stat_blocked and stat_runtime require the "
 			     "kernel parameter schedstats=enabled or "
 			     "kernel.sched_schedstats=1\n");
-- 
cgit v0.10.2


From 57675cb976eff977aefb428e68e4e0236d48a9ff Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Thu, 9 Jun 2016 15:20:05 +0300
Subject: kernel/sysrq, watchdog, sched/core: Reset watchdog on all CPUs while
 processing sysrq-w

Lengthy output of sysrq-w may take a lot of time on slow serial console.

Currently we reset NMI-watchdog on the current CPU to avoid spurious
lockup messages. Sometimes this doesn't work since softlockup watchdog
might trigger on another CPU which is waiting for an IPI to proceed.
We reset softlockup watchdogs on all CPUs, but we do this only after
listing all tasks, and this may be too late on a busy system.

So, reset watchdogs CPUs earlier, in for_each_process_thread() loop.

Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: <stable@vger.kernel.org>
Link: http://lkml.kernel.org/r/1465474805-14641-1-git-send-email-aryabinin@virtuozzo.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 13d0896..4135ac8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5147,14 +5147,16 @@ void show_state_filter(unsigned long state_filter)
 		/*
 		 * reset the NMI-timeout, listing all files on a slow
 		 * console might take a lot of time:
+		 * Also, reset softlockup watchdogs on all CPUs, because
+		 * another CPU might be blocked waiting for us to process
+		 * an IPI.
 		 */
 		touch_nmi_watchdog();
+		touch_all_softlockup_watchdogs();
 		if (!state_filter || (p->state & state_filter))
 			sched_show_task(p);
 	}
 
-	touch_all_softlockup_watchdogs();
-
 #ifdef CONFIG_SCHED_DEBUG
 	if (!state_filter)
 		sysrq_sched_debug_show();
-- 
cgit v0.10.2


From e50525bef593c3dd0564df676c567d77f7c20322 Mon Sep 17 00:00:00 2001
From: Rajkumar Manoharan <rmanohar@qti.qualcomm.com>
Date: Thu, 9 Jun 2016 11:33:55 +0530
Subject: ath10k: fix deadlock while processing rx_in_ord_ind

commit 5c86d97bcc1d ("ath10k: combine txrx and replenish task")
introduced deadlock while processing rx in order indication message
for qca6174 based devices. While merging replenish and txrx tasklets,
replenish task should be called out of htt rx ring locking since it
is also try to acquire the same lock.

Unfortunately this issue is not exposed by other solutions (qca988x,
qca99x0 & qca4019), as rx_in_ord_ind message is specific to qca6174
based devices. This patch fixes

=============================================
[ INFO: possible recursive locking detected ]
4.7.0-rc2-wt-ath+ #1353 Tainted: G            E
---------------------------------------------
swapper/3/0 is trying to acquire lock:
 (&(&htt->rx_ring.lock)->rlock){+.-...}, at: [<f8d7ef19>]
ath10k_htt_rx_msdu_buff_replenish+0x29/0x90 [ath10k_core]

but task is already holding lock:
 (&(&htt->rx_ring.lock)->rlock){+.-...}, at: [<f8d82cab>]
ath10k_htt_txrx_compl_task+0x21b/0x250 [ath10k_core]

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&(&htt->rx_ring.lock)->rlock);
  lock(&(&htt->rx_ring.lock)->rlock);

 *** DEADLOCK ***

 May be due to missing lock nesting notation

1 lock held by swapper/3/0:
 #0:  (&(&htt->rx_ring.lock)->rlock){+.-...}, at: [<f8d82cab>]
ath10k_htt_txrx_compl_task+0x21b/0x250 [ath10k_core]

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=119151
Fixes: 5c86d97bcc1d ("ath10k: combine txrx and replenish task")
Reported-by: Mike Lothian <mike@fireburn.co.uk>
Signed-off-by: Rajkumar Manoharan <rmanohar@qti.qualcomm.com>
Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index cc979a4..813cdd2 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -1904,7 +1904,6 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
 			return;
 		}
 	}
-	ath10k_htt_rx_msdu_buff_replenish(htt);
 }
 
 static void ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar,
-- 
cgit v0.10.2


From e024111f6946f45cf1559a8c6fd48d2d0f696d07 Mon Sep 17 00:00:00 2001
From: Miaoqing Pan <miaoqing@codeaurora.org>
Date: Tue, 7 Jun 2016 15:47:07 +0300
Subject: ath9k: fix GPIO mask for AR9462 and AR9565

The incorrect GPIO mask cause kernel warning, when AR9462 access GPIO11.
Also fix the mask for AR9565.

WARNING: CPU: 1 PID: 199 at ../drivers/net/wireless/ath/ath9k/hw.c:2778 ath9k_hw_gpio_get+0x1a9/0x1b0 [ath9k_hw]
CPU: 1 PID: 199 Comm: kworker/u16:9 Not tainted 4.7.0-rc1-next-20160530+ #5
Hardware name: Acer TravelMate P243/BA40_HC, BIOS V1.01 04/20/2012
Workqueue: events_power_efficient rfkill_poll
 0000000000000000 ffff88002cf73d28 ffffffff813b8ddc 0000000000000000
 0000000000000000 ffff88002cf73d68 ffffffff8107a331 00000ada00000086
 ffff880148d9c018 000000000000000b ffff880147e68720 0000000000000200
Call Trace:
 [<ffffffff813b8ddc>] dump_stack+0x63/0x87
 [<ffffffff8107a331>] __warn+0xd1/0xf0
 [<ffffffff8107a41d>] warn_slowpath_null+0x1d/0x20
 [<ffffffffc0775b19>] ath9k_hw_gpio_get+0x1a9/0x1b0 [ath9k_hw]
 [<ffffffffc047f3e4>] ath9k_rfkill_poll_state+0x34/0x60 [ath9k]
 [<ffffffffc06dbb53>] ieee80211_rfkill_poll+0x33/0x40 [mac80211]
 [<ffffffffc03ad65a>] cfg80211_rfkill_poll+0x2a/0xc0 [cfg80211]
 [<ffffffff817c5514>] rfkill_poll+0x24/0x50
 [<ffffffff81093183>] process_one_work+0x153/0x3f0
 [<ffffffff8109393b>] worker_thread+0x12b/0x4b0
 [<ffffffff81093810>] ? rescuer_thread+0x340/0x340
 [<ffffffff81099129>] kthread+0xc9/0xe0
 [<ffffffff817d8f1f>] ret_from_fork+0x1f/0x40
 [<ffffffff81099060>] ? kthread_park+0x60/0x60

Fixes: a01ab81b09c5 ("ath9k: define correct GPIO numbers and bits mask")
Reported-by: Sudip Mukherjee <sudip.mukherjee@codethink.co.uk>
Tested-by: Sudip Mukherjee <sudip.mukherjee@codethink.co.uk>
Signed-off-by: Miaoqing Pan <miaoqing@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>

diff --git a/drivers/net/wireless/ath/ath9k/reg.h b/drivers/net/wireless/ath/ath9k/reg.h
index 9272ca9..80ff69f 100644
--- a/drivers/net/wireless/ath/ath9k/reg.h
+++ b/drivers/net/wireless/ath/ath9k/reg.h
@@ -1122,12 +1122,12 @@ enum {
 #define AR9300_NUM_GPIO                          16
 #define AR9330_NUM_GPIO				 16
 #define AR9340_NUM_GPIO				 23
-#define AR9462_NUM_GPIO				 10
+#define AR9462_NUM_GPIO				 14
 #define AR9485_NUM_GPIO				 12
 #define AR9531_NUM_GPIO				 18
 #define AR9550_NUM_GPIO				 24
 #define AR9561_NUM_GPIO				 23
-#define AR9565_NUM_GPIO				 12
+#define AR9565_NUM_GPIO				 14
 #define AR9580_NUM_GPIO				 16
 #define AR7010_NUM_GPIO                          16
 
@@ -1139,12 +1139,12 @@ enum {
 #define AR9300_GPIO_MASK			 0x0000F4FF
 #define AR9330_GPIO_MASK			 0x0000F4FF
 #define AR9340_GPIO_MASK			 0x0000000F
-#define AR9462_GPIO_MASK			 0x000003FF
+#define AR9462_GPIO_MASK			 0x00003FFF
 #define AR9485_GPIO_MASK			 0x00000FFF
 #define AR9531_GPIO_MASK			 0x0000000F
 #define AR9550_GPIO_MASK			 0x0000000F
 #define AR9561_GPIO_MASK			 0x0000000F
-#define AR9565_GPIO_MASK			 0x00000FFF
+#define AR9565_GPIO_MASK			 0x00003FFF
 #define AR9580_GPIO_MASK			 0x0000F4FF
 #define AR7010_GPIO_MASK			 0x0000FFFF
 
-- 
cgit v0.10.2


From 5bc28b93a36e3cb3acc2870fb75cb6ffb182fece Mon Sep 17 00:00:00 2001
From: Rhyland Klein <rklein@nvidia.com>
Date: Thu, 9 Jun 2016 17:28:39 -0400
Subject: power_supply: power_supply_read_temp only if use_cnt > 0

Change power_supply_read_temp() to use power_supply_get_property()
so that it will check the use_cnt and ensure it is > 0. The use_cnt
will be incremented at the end of __power_supply_register, so this
will block to case where get_property can be called before the supply
is fully registered. This fixes the issue show in the stack below:

[    1.452598] power_supply_read_temp+0x78/0x80
[    1.458680] thermal_zone_get_temp+0x5c/0x11c
[    1.464765] thermal_zone_device_update+0x34/0xb4
[    1.471195] thermal_zone_device_register+0x87c/0x8cc
[    1.477974] __power_supply_register+0x364/0x424
[    1.484317] power_supply_register_no_ws+0x10/0x18
[    1.490833] bq27xxx_battery_setup+0x10c/0x164
[    1.497003] bq27xxx_battery_i2c_probe+0xd0/0x1b0
[    1.503435] i2c_device_probe+0x174/0x240
[    1.509172] driver_probe_device+0x1fc/0x29c
[    1.515167] __driver_attach+0xa4/0xa8
[    1.520643] bus_for_each_dev+0x58/0x98
[    1.526204] driver_attach+0x20/0x28
[    1.531505] bus_add_driver+0x1c8/0x22c
[    1.537067] driver_register+0x68/0x108
[    1.542630] i2c_register_driver+0x38/0x7c
[    1.548457] bq27xxx_battery_i2c_driver_init+0x18/0x20
[    1.555321] do_one_initcall+0x38/0x12c
[    1.560886] kernel_init_freeable+0x148/0x1ec
[    1.566972] kernel_init+0x10/0xfc
[    1.572101] ret_from_fork+0x10/0x40

Also make the same change to ps_get_max_charge_cntl_limit() and
ps_get_cur_chrage_cntl_limit() to be safe. Lastly, change the return
value of power_supply_get_property() to -EAGAIN from -ENODEV if
use_cnt <= 0.

Fixes: 297d716f6260 ("power_supply: Change ownership from driver to core")
Cc: stable@vger.kernel.org
Signed-off-by: Rhyland Klein <rklein@nvidia.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>

diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 456987c..b13cd07 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -565,11 +565,12 @@ static int power_supply_read_temp(struct thermal_zone_device *tzd,
 
 	WARN_ON(tzd == NULL);
 	psy = tzd->devdata;
-	ret = psy->desc->get_property(psy, POWER_SUPPLY_PROP_TEMP, &val);
+	ret = power_supply_get_property(psy, POWER_SUPPLY_PROP_TEMP, &val);
+	if (ret)
+		return ret;
 
 	/* Convert tenths of degree Celsius to milli degree Celsius. */
-	if (!ret)
-		*temp = val.intval * 100;
+	*temp = val.intval * 100;
 
 	return ret;
 }
@@ -612,10 +613,12 @@ static int ps_get_max_charge_cntl_limit(struct thermal_cooling_device *tcd,
 	int ret;
 
 	psy = tcd->devdata;
-	ret = psy->desc->get_property(psy,
-		POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, &val);
-	if (!ret)
-		*state = val.intval;
+	ret = power_supply_get_property(psy,
+			POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX, &val);
+	if (ret)
+		return ret;
+
+	*state = val.intval;
 
 	return ret;
 }
@@ -628,10 +631,12 @@ static int ps_get_cur_chrage_cntl_limit(struct thermal_cooling_device *tcd,
 	int ret;
 
 	psy = tcd->devdata;
-	ret = psy->desc->get_property(psy,
-		POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val);
-	if (!ret)
-		*state = val.intval;
+	ret = power_supply_get_property(psy,
+			POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT, &val);
+	if (ret)
+		return ret;
+
+	*state = val.intval;
 
 	return ret;
 }
-- 
cgit v0.10.2


From fdecf36fcefa9f48c2f2de21c8176f1a8ce2c960 Mon Sep 17 00:00:00 2001
From: Clemens Gruber <clemens.gruber@pqgruber.com>
Date: Sat, 11 Jun 2016 17:21:26 +0200
Subject: phy: marvell: fix LED configuration via marvell,reg-init

Configuring the PHY LED registers for the Marvell 88E1510 and others is
not possible, because regardless of the values in marvell,reg-init, it
is later overridden in m88e1121_config_aneg with a non-standard default.

This patch moves that default configuration to .config_init to allow
setting the LED configuration through marvell,reg-init in the device
tree, which should override said default if it exists.

Signed-off-by: Clemens Gruber <clemens.gruber@pqgruber.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 280e879..79ccc11 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -407,15 +407,7 @@ static int m88e1121_config_aneg(struct phy_device *phydev)
 	if (err < 0)
 		return err;
 
-	oldpage = phy_read(phydev, MII_MARVELL_PHY_PAGE);
-
-	phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_88E1121_PHY_LED_PAGE);
-	phy_write(phydev, MII_88E1121_PHY_LED_CTRL, MII_88E1121_PHY_LED_DEF);
-	phy_write(phydev, MII_MARVELL_PHY_PAGE, oldpage);
-
-	err = genphy_config_aneg(phydev);
-
-	return err;
+	return genphy_config_aneg(phydev);
 }
 
 static int m88e1318_config_aneg(struct phy_device *phydev)
@@ -636,6 +628,28 @@ static int m88e1111_config_init(struct phy_device *phydev)
 	return phy_write(phydev, MII_BMCR, BMCR_RESET);
 }
 
+static int m88e1121_config_init(struct phy_device *phydev)
+{
+	int err, oldpage;
+
+	oldpage = phy_read(phydev, MII_MARVELL_PHY_PAGE);
+
+	err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_88E1121_PHY_LED_PAGE);
+	if (err < 0)
+		return err;
+
+	/* Default PHY LED config: LED[0] .. Link, LED[1] .. Activity */
+	err = phy_write(phydev, MII_88E1121_PHY_LED_CTRL,
+			MII_88E1121_PHY_LED_DEF);
+	if (err < 0)
+		return err;
+
+	phy_write(phydev, MII_MARVELL_PHY_PAGE, oldpage);
+
+	/* Set marvell,reg-init configuration from device tree */
+	return marvell_config_init(phydev);
+}
+
 static int m88e1510_config_init(struct phy_device *phydev)
 {
 	int err;
@@ -668,7 +682,7 @@ static int m88e1510_config_init(struct phy_device *phydev)
 			return err;
 	}
 
-	return marvell_config_init(phydev);
+	return m88e1121_config_init(phydev);
 }
 
 static int m88e1118_config_aneg(struct phy_device *phydev)
@@ -1196,7 +1210,7 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
-		.config_init = &marvell_config_init,
+		.config_init = &m88e1121_config_init,
 		.config_aneg = &m88e1121_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
@@ -1215,7 +1229,7 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
-		.config_init = &marvell_config_init,
+		.config_init = &m88e1121_config_init,
 		.config_aneg = &m88e1318_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
-- 
cgit v0.10.2


From dcb94b88c09ce82a80e188d49bcffdc83ba215a6 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Sat, 11 Jun 2016 20:32:06 +0200
Subject: ipv6: fix endianness error in icmpv6_err

IPv6 ping socket error handler doesn't correctly convert the new 32 bit
mtu to host endianness before using.

Cc: Lorenzo Colitti <lorenzo@google.com>
Fixes: 6d0bfe22611602f ("net: ipv6: Add IPv6 support to the ping socket.")
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 4527285..a4fa840 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -98,7 +98,7 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 	if (!(type & ICMPV6_INFOMSG_MASK))
 		if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
-			ping_err(skb, offset, info);
+			ping_err(skb, offset, ntohl(info));
 }
 
 static int icmpv6_rcv(struct sk_buff *skb);
-- 
cgit v0.10.2


From 5119bd16815d3f0364390a1369392dcc036790e7 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Sat, 11 Jun 2016 20:41:38 +0200
Subject: ipv6: tcp: fix endianness annotation in tcp_v6_send_response

Cc: Florent Fourcot <florent.fourcot@enst-bretagne.fr>
Fixes: 1d13a96c74fc ("ipv6: tcp: fix flowlabel value in ACK messages send from TIME_WAIT")
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f36c2d0..2255d2b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -738,7 +738,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
 				 int oif, struct tcp_md5sig_key *key, int rst,
-				 u8 tclass, u32 label)
+				 u8 tclass, __be32 label)
 {
 	const struct tcphdr *th = tcp_hdr(skb);
 	struct tcphdr *t1;
@@ -911,7 +911,7 @@ out:
 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
 			    struct tcp_md5sig_key *key, u8 tclass,
-			    u32 label)
+			    __be32 label)
 {
 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
 			     tclass, label);
-- 
cgit v0.10.2


From c148d16369ff0095eca950d17968ba1d56a47b53 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Sat, 11 Jun 2016 21:15:37 +0200
Subject: ipv6: fix checksum annotation in udp6_csum_init

Cc: Tom Herbert <tom@herbertland.com>
Fixes: 4068579e1e098fa ("net: Implmement RFC 6936 (zero RX csums for UDP/IPv6")
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index b2025bf..c0cbcb2 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -78,9 +78,12 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
 	 * we accept a checksum of zero here. When we find the socket
 	 * for the UDP packet we'll check if that socket allows zero checksum
 	 * for IPv6 (set by socket option).
+	 *
+	 * Note, we are only interested in != 0 or == 0, thus the
+	 * force to int.
 	 */
-	return skb_checksum_init_zero_check(skb, proto, uh->check,
-					   ip6_compute_pseudo);
+	return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
+							 ip6_compute_pseudo);
 }
 EXPORT_SYMBOL(udp6_csum_init);
 
-- 
cgit v0.10.2


From b46d9f625b07f843c706c2c7d0210a90ccdf143b Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Sun, 12 Jun 2016 12:02:46 +0200
Subject: ipv4: fix checksum annotation in udp4_csum_init

Reported-by: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Tom Herbert <tom@herbertland.com>
Fixes: 4068579e1e098fa ("net: Implmement RFC 6936 (zero RX csums for UDP/IPv6")
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0ff31d9..ba0d8b8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1755,8 +1755,11 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
 			return err;
 	}
 
-	return skb_checksum_init_zero_check(skb, proto, uh->check,
-					    inet_compute_pseudo);
+	/* Note, we are only interested in != 0 or == 0, thus the
+	 * force to int.
+	 */
+	return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
+							 inet_compute_pseudo);
 }
 
 /*
-- 
cgit v0.10.2


From 881d0327db37ad917a367c77aff1afa1ee41e0a9 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Sun, 12 Jun 2016 17:36:37 +0800
Subject: net: alx: Work around the DMA RX overflow issue

Commit 26c5f03 uses a new skb allocator to avoid the RFD overflow
issue.

But from debugging without datasheet, we found the error always
happen when the DMA RX address is set to 0x....fc0, which is very
likely to be a HW/silicon problem.

So one idea is instead of adding a new allocator, why not just
hitting the right target by avaiding the error-prone DMA address?

This patch will actually
* Remove the commit 26c5f03
* Apply rx skb with 64 bytes longer space, and if the allocated skb
  has a 0x...fc0 address, it will use skb_resever(skb, 64) to
  advance the address, so that the RX overflow can be avoided.

In theory this method should also apply to atl1c driver, which
I can't find anyone who can help to test on real devices.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=70761
Signed-off-by: Feng Tang <feng.tang@intel.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Tested-by: Ole Lukoie <olelukoie@mail.ru>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h
index d02c424..8fc93c5 100644
--- a/drivers/net/ethernet/atheros/alx/alx.h
+++ b/drivers/net/ethernet/atheros/alx/alx.h
@@ -96,10 +96,6 @@ struct alx_priv {
 	unsigned int rx_ringsz;
 	unsigned int rxbuf_size;
 
-	struct page  *rx_page;
-	unsigned int rx_page_offset;
-	unsigned int rx_frag_size;
-
 	struct napi_struct napi;
 	struct alx_tx_queue txq;
 	struct alx_rx_queue rxq;
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index c98acdc..e708e36 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -70,35 +70,6 @@ static void alx_free_txbuf(struct alx_priv *alx, int entry)
 	}
 }
 
-static struct sk_buff *alx_alloc_skb(struct alx_priv *alx, gfp_t gfp)
-{
-	struct sk_buff *skb;
-	struct page *page;
-
-	if (alx->rx_frag_size > PAGE_SIZE)
-		return __netdev_alloc_skb(alx->dev, alx->rxbuf_size, gfp);
-
-	page = alx->rx_page;
-	if (!page) {
-		alx->rx_page = page = alloc_page(gfp);
-		if (unlikely(!page))
-			return NULL;
-		alx->rx_page_offset = 0;
-	}
-
-	skb = build_skb(page_address(page) + alx->rx_page_offset,
-			alx->rx_frag_size);
-	if (likely(skb)) {
-		alx->rx_page_offset += alx->rx_frag_size;
-		if (alx->rx_page_offset >= PAGE_SIZE)
-			alx->rx_page = NULL;
-		else
-			get_page(page);
-	}
-	return skb;
-}
-
-
 static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 {
 	struct alx_rx_queue *rxq = &alx->rxq;
@@ -115,9 +86,22 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 	while (!cur_buf->skb && next != rxq->read_idx) {
 		struct alx_rfd *rfd = &rxq->rfd[cur];
 
-		skb = alx_alloc_skb(alx, gfp);
+		/*
+		 * When DMA RX address is set to something like
+		 * 0x....fc0, it will be very likely to cause DMA
+		 * RFD overflow issue.
+		 *
+		 * To work around it, we apply rx skb with 64 bytes
+		 * longer space, and offset the address whenever
+		 * 0x....fc0 is detected.
+		 */
+		skb = __netdev_alloc_skb(alx->dev, alx->rxbuf_size + 64, gfp);
 		if (!skb)
 			break;
+
+		if (((unsigned long)skb->data & 0xfff) == 0xfc0)
+			skb_reserve(skb, 64);
+
 		dma = dma_map_single(&alx->hw.pdev->dev,
 				     skb->data, alx->rxbuf_size,
 				     DMA_FROM_DEVICE);
@@ -153,7 +137,6 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 		alx_write_mem16(&alx->hw, ALX_RFD_PIDX, cur);
 	}
 
-
 	return count;
 }
 
@@ -622,11 +605,6 @@ static void alx_free_rings(struct alx_priv *alx)
 	kfree(alx->txq.bufs);
 	kfree(alx->rxq.bufs);
 
-	if (alx->rx_page) {
-		put_page(alx->rx_page);
-		alx->rx_page = NULL;
-	}
-
 	dma_free_coherent(&alx->hw.pdev->dev,
 			  alx->descmem.size,
 			  alx->descmem.virt,
@@ -681,7 +659,6 @@ static int alx_request_irq(struct alx_priv *alx)
 				  alx->dev->name, alx);
 		if (!err)
 			goto out;
-
 		/* fall back to legacy interrupt */
 		pci_disable_msi(alx->hw.pdev);
 	}
@@ -725,7 +702,6 @@ static int alx_init_sw(struct alx_priv *alx)
 	struct pci_dev *pdev = alx->hw.pdev;
 	struct alx_hw *hw = &alx->hw;
 	int err;
-	unsigned int head_size;
 
 	err = alx_identify_hw(alx);
 	if (err) {
@@ -741,12 +717,7 @@ static int alx_init_sw(struct alx_priv *alx)
 
 	hw->smb_timer = 400;
 	hw->mtu = alx->dev->mtu;
-
 	alx->rxbuf_size = ALX_MAX_FRAME_LEN(hw->mtu);
-	head_size = SKB_DATA_ALIGN(alx->rxbuf_size + NET_SKB_PAD) +
-		    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-	alx->rx_frag_size = roundup_pow_of_two(head_size);
-
 	alx->tx_ringsz = 256;
 	alx->rx_ringsz = 512;
 	hw->imt = 200;
@@ -848,7 +819,6 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
 {
 	struct alx_priv *alx = netdev_priv(netdev);
 	int max_frame = ALX_MAX_FRAME_LEN(mtu);
-	unsigned int head_size;
 
 	if ((max_frame < ALX_MIN_FRAME_SIZE) ||
 	    (max_frame > ALX_MAX_FRAME_SIZE))
@@ -860,9 +830,6 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
 	netdev->mtu = mtu;
 	alx->hw.mtu = mtu;
 	alx->rxbuf_size = max(max_frame, ALX_DEF_RXBUF_SIZE);
-	head_size = SKB_DATA_ALIGN(alx->rxbuf_size + NET_SKB_PAD) +
-		    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-	alx->rx_frag_size = roundup_pow_of_two(head_size);
 	netdev_update_features(netdev);
 	if (netif_running(netdev))
 		alx_reinit(alx);
-- 
cgit v0.10.2


From 6c0d54f1897d229748d4f41ef919078db6db2123 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 12 Jun 2016 20:01:25 -0700
Subject: net_sched: fix pfifo_head_drop behavior vs backlog

When the qdisc is full, we drop a packet at the head of the queue,
queue the current skb and return NET_XMIT_CN

Now we track backlog on upper qdiscs, we need to call
qdisc_tree_reduce_backlog(), even if the qlen did not change.

Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 2177eac..2e4bd2c 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -37,14 +37,18 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
+	unsigned int prev_backlog;
+
 	if (likely(skb_queue_len(&sch->q) < sch->limit))
 		return qdisc_enqueue_tail(skb, sch);
 
+	prev_backlog = sch->qstats.backlog;
 	/* queue full, remove one skb to fulfill the limit */
 	__qdisc_queue_drop_head(sch, &sch->q);
 	qdisc_qstats_drop(sch);
 	qdisc_enqueue_tail(skb, sch);
 
+	qdisc_tree_reduce_backlog(sch, 0, prev_backlog - sch->qstats.backlog);
 	return NET_XMIT_CN;
 }
 
-- 
cgit v0.10.2


From d1e37288c9146dccff830e3253e403af8705b51f Mon Sep 17 00:00:00 2001
From: "Su, Xuemin" <suxm@chinanetcenter.com>
Date: Mon, 13 Jun 2016 11:02:50 +0800
Subject: udp reuseport: fix packet of same flow hashed to different socket

There is a corner case in which udp packets belonging to a same
flow are hashed to different socket when hslot->count changes from 10
to 11:

1) When hslot->count <= 10, __udp_lib_lookup() searches udp_table->hash,
and always passes 'daddr' to udp_ehashfn().

2) When hslot->count > 10, __udp_lib_lookup() searches udp_table->hash2,
but may pass 'INADDR_ANY' to udp_ehashfn() if the sockets are bound to
INADDR_ANY instead of some specific addr.

That means when hslot->count changes from 10 to 11, the hash calculated by
udp_ehashfn() is also changed, and the udp packets belonging to a same
flow will be hashed to different socket.

This is easily reproduced:
1) Create 10 udp sockets and bind all of them to 0.0.0.0:40000.
2) From the same host send udp packets to 127.0.0.1:40000, record the
socket index which receives the packets.
3) Create 1 more udp socket and bind it to 0.0.0.0:44096. The number 44096
is 40000 + UDP_HASH_SIZE(4096), this makes the new socket put into the
same hslot as the aformentioned 10 sockets, and makes the hslot->count
change from 10 to 11.
4) From the same host send udp packets to 127.0.0.1:40000, and the socket
index which receives the packets will be different from the one received
in step 2.
This should not happen as the socket bound to 0.0.0.0:44096 should not
change the behavior of the sockets bound to 0.0.0.0:40000.

It's the same case for IPv6, and this patch also fixes that.

Signed-off-by: Su, Xuemin <suxm@chinanetcenter.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ba0d8b8..ca5e8ea 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -391,9 +391,9 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
 	return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr);
 }
 
-static inline int compute_score(struct sock *sk, struct net *net,
-				__be32 saddr, unsigned short hnum, __be16 sport,
-				__be32 daddr, __be16 dport, int dif)
+static int compute_score(struct sock *sk, struct net *net,
+			 __be32 saddr, __be16 sport,
+			 __be32 daddr, unsigned short hnum, int dif)
 {
 	int score;
 	struct inet_sock *inet;
@@ -434,52 +434,6 @@ static inline int compute_score(struct sock *sk, struct net *net,
 	return score;
 }
 
-/*
- * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num)
- */
-static inline int compute_score2(struct sock *sk, struct net *net,
-				 __be32 saddr, __be16 sport,
-				 __be32 daddr, unsigned int hnum, int dif)
-{
-	int score;
-	struct inet_sock *inet;
-
-	if (!net_eq(sock_net(sk), net) ||
-	    ipv6_only_sock(sk))
-		return -1;
-
-	inet = inet_sk(sk);
-
-	if (inet->inet_rcv_saddr != daddr ||
-	    inet->inet_num != hnum)
-		return -1;
-
-	score = (sk->sk_family == PF_INET) ? 2 : 1;
-
-	if (inet->inet_daddr) {
-		if (inet->inet_daddr != saddr)
-			return -1;
-		score += 4;
-	}
-
-	if (inet->inet_dport) {
-		if (inet->inet_dport != sport)
-			return -1;
-		score += 4;
-	}
-
-	if (sk->sk_bound_dev_if) {
-		if (sk->sk_bound_dev_if != dif)
-			return -1;
-		score += 4;
-	}
-
-	if (sk->sk_incoming_cpu == raw_smp_processor_id())
-		score++;
-
-	return score;
-}
-
 static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
 		       const __u16 lport, const __be32 faddr,
 		       const __be16 fport)
@@ -492,11 +446,11 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
 			      udp_ehash_secret + net_hash_mix(net));
 }
 
-/* called with read_rcu_lock() */
+/* called with rcu_read_lock() */
 static struct sock *udp4_lib_lookup2(struct net *net,
 		__be32 saddr, __be16 sport,
 		__be32 daddr, unsigned int hnum, int dif,
-		struct udp_hslot *hslot2, unsigned int slot2,
+		struct udp_hslot *hslot2,
 		struct sk_buff *skb)
 {
 	struct sock *sk, *result;
@@ -506,7 +460,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
 	result = NULL;
 	badness = 0;
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
-		score = compute_score2(sk, net, saddr, sport,
+		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
@@ -554,17 +508,22 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 
 		result = udp4_lib_lookup2(net, saddr, sport,
 					  daddr, hnum, dif,
-					  hslot2, slot2, skb);
+					  hslot2, skb);
 		if (!result) {
+			unsigned int old_slot2 = slot2;
 			hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
 			slot2 = hash2 & udptable->mask;
+			/* avoid searching the same slot again. */
+			if (unlikely(slot2 == old_slot2))
+				return result;
+
 			hslot2 = &udptable->hash2[slot2];
 			if (hslot->count < hslot2->count)
 				goto begin;
 
 			result = udp4_lib_lookup2(net, saddr, sport,
-						  htonl(INADDR_ANY), hnum, dif,
-						  hslot2, slot2, skb);
+						  daddr, hnum, dif,
+						  hslot2, skb);
 		}
 		return result;
 	}
@@ -572,8 +531,8 @@ begin:
 	result = NULL;
 	badness = 0;
 	sk_for_each_rcu(sk, &hslot->head) {
-		score = compute_score(sk, net, saddr, hnum, sport,
-				      daddr, dport, dif);
+		score = compute_score(sk, net, saddr, sport,
+				      daddr, hnum, dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f421c9f..005dc82 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -115,11 +115,10 @@ static void udp_v6_rehash(struct sock *sk)
 	udp_lib_rehash(sk, new_hash);
 }
 
-static inline int compute_score(struct sock *sk, struct net *net,
-				unsigned short hnum,
-				const struct in6_addr *saddr, __be16 sport,
-				const struct in6_addr *daddr, __be16 dport,
-				int dif)
+static int compute_score(struct sock *sk, struct net *net,
+			 const struct in6_addr *saddr, __be16 sport,
+			 const struct in6_addr *daddr, unsigned short hnum,
+			 int dif)
 {
 	int score;
 	struct inet_sock *inet;
@@ -162,54 +161,11 @@ static inline int compute_score(struct sock *sk, struct net *net,
 	return score;
 }
 
-static inline int compute_score2(struct sock *sk, struct net *net,
-				 const struct in6_addr *saddr, __be16 sport,
-				 const struct in6_addr *daddr,
-				 unsigned short hnum, int dif)
-{
-	int score;
-	struct inet_sock *inet;
-
-	if (!net_eq(sock_net(sk), net) ||
-	    udp_sk(sk)->udp_port_hash != hnum ||
-	    sk->sk_family != PF_INET6)
-		return -1;
-
-	if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
-		return -1;
-
-	score = 0;
-	inet = inet_sk(sk);
-
-	if (inet->inet_dport) {
-		if (inet->inet_dport != sport)
-			return -1;
-		score++;
-	}
-
-	if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
-		if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
-			return -1;
-		score++;
-	}
-
-	if (sk->sk_bound_dev_if) {
-		if (sk->sk_bound_dev_if != dif)
-			return -1;
-		score++;
-	}
-
-	if (sk->sk_incoming_cpu == raw_smp_processor_id())
-		score++;
-
-	return score;
-}
-
-/* called with read_rcu_lock() */
+/* called with rcu_read_lock() */
 static struct sock *udp6_lib_lookup2(struct net *net,
 		const struct in6_addr *saddr, __be16 sport,
 		const struct in6_addr *daddr, unsigned int hnum, int dif,
-		struct udp_hslot *hslot2, unsigned int slot2,
+		struct udp_hslot *hslot2,
 		struct sk_buff *skb)
 {
 	struct sock *sk, *result;
@@ -219,7 +175,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 	result = NULL;
 	badness = -1;
 	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
-		score = compute_score2(sk, net, saddr, sport,
+		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
@@ -268,17 +224,22 @@ struct sock *__udp6_lib_lookup(struct net *net,
 
 		result = udp6_lib_lookup2(net, saddr, sport,
 					  daddr, hnum, dif,
-					  hslot2, slot2, skb);
+					  hslot2, skb);
 		if (!result) {
+			unsigned int old_slot2 = slot2;
 			hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
 			slot2 = hash2 & udptable->mask;
+			/* avoid searching the same slot again. */
+			if (unlikely(slot2 == old_slot2))
+				return result;
+
 			hslot2 = &udptable->hash2[slot2];
 			if (hslot->count < hslot2->count)
 				goto begin;
 
 			result = udp6_lib_lookup2(net, saddr, sport,
-						  &in6addr_any, hnum, dif,
-						  hslot2, slot2, skb);
+						  daddr, hnum, dif,
+						  hslot2, skb);
 		}
 		return result;
 	}
@@ -286,7 +247,7 @@ begin:
 	result = NULL;
 	badness = -1;
 	sk_for_each_rcu(sk, &hslot->head) {
-		score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
+		score = compute_score(sk, net, saddr, sport, daddr, hnum, dif);
 		if (score > badness) {
 			reuseport = sk->sk_reuseport;
 			if (reuseport) {
-- 
cgit v0.10.2


From 8beb330044d0d1878c7b92290e91c0b889e92633 Mon Sep 17 00:00:00 2001
From: James Bottomley <jejb@linux.vnet.ibm.com>
Date: Mon, 13 Jun 2016 22:00:07 -0700
Subject: 53c700: fix BUG on untagged commands

The untagged command case in the 53c700 driver has been broken since
host wide tags were enabled because the replaced scsi_find_tag()
function had a special case for the tag value SCSI_NO_TAG to retrieve
sdev->current_cmnd.  The replacement function scsi_host_find_tag() has
no such special case and returns NULL causing untagged commands to
trigger a BUG() in the driver.  Inspection shows that the 53c700 is the
only driver using this SCSI_NO_TAG case, so a local fix in the driver
suffices to fix this problem globally.

Fixes: 64d513ac31b - "scsi: use host wide tags by default"
Cc: stable@vger.kernel.org	# 4.4+
Reported-by: Helge Deller <deller@gmx.de>
Tested-by: Helge Deller <deller@gmx.de>
Signed-off-by: James Bottomley <jejb@linux.vnet.ibm.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index d4c2856..3ddc85e 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -1122,7 +1122,7 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp,
 		} else {
 			struct scsi_cmnd *SCp;
 
-			SCp = scsi_host_find_tag(SDp->host, SCSI_NO_TAG);
+			SCp = SDp->current_cmnd;
 			if(unlikely(SCp == NULL)) {
 				sdev_printk(KERN_ERR, SDp,
 					"no saved request for untagged cmd\n");
@@ -1826,7 +1826,7 @@ NCR_700_queuecommand_lck(struct scsi_cmnd *SCp, void (*done)(struct scsi_cmnd *)
 		       slot->tag, slot);
 	} else {
 		slot->tag = SCSI_NO_TAG;
-		/* must populate current_cmnd for scsi_host_find_tag to work */
+		/* save current command for reselection */
 		SCp->device->current_cmnd = SCp;
 	}
 	/* sanity check: some of the commands generated by the mid-layer
-- 
cgit v0.10.2


From 106da663ff495e0aea3ac15b8317aa410754fcac Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 13 Jun 2016 10:31:04 +0200
Subject: ovs/gre,geneve: fix error path when creating an iface

After ipgre_newlink()/geneve_configure() call, the netdev is registered.

Fixes: 7e059158d57b ("vxlan, gre, geneve: Set a large MTU on ovs-created tunnel devices")
CC: David Wragg <david@weave.works>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index cadefe4..086c2da 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1508,6 +1508,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
 {
 	struct nlattr *tb[IFLA_MAX + 1];
 	struct net_device *dev;
+	LIST_HEAD(list_kill);
 	int err;
 
 	memset(tb, 0, sizeof(tb));
@@ -1519,8 +1520,10 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
 	err = geneve_configure(net, dev, &geneve_remote_unspec,
 			       0, 0, 0, 0, htons(dst_port), true,
 			       GENEVE_F_UDP_ZERO_CSUM6_RX);
-	if (err)
-		goto err;
+	if (err) {
+		free_netdev(dev);
+		return ERR_PTR(err);
+	}
 
 	/* openvswitch users expect packet sizes to be unrestricted,
 	 * so set the largest MTU we can.
@@ -1532,7 +1535,8 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
 	return dev;
 
  err:
-	free_netdev(dev);
+	geneve_dellink(dev, &list_kill);
+	unregister_netdevice_many(&list_kill);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(geneve_dev_create_fb);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 4d2025f..08deba6 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1121,6 +1121,7 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 {
 	struct nlattr *tb[IFLA_MAX + 1];
 	struct net_device *dev;
+	LIST_HEAD(list_kill);
 	struct ip_tunnel *t;
 	int err;
 
@@ -1136,8 +1137,10 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 	t->collect_md = true;
 
 	err = ipgre_newlink(net, dev, tb, NULL);
-	if (err < 0)
-		goto out;
+	if (err < 0) {
+		free_netdev(dev);
+		return ERR_PTR(err);
+	}
 
 	/* openvswitch users expect packet sizes to be unrestricted,
 	 * so set the largest MTU we can.
@@ -1148,7 +1151,8 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 
 	return dev;
 out:
-	free_netdev(dev);
+	ip_tunnel_dellink(dev, &list_kill);
+	unregister_netdevice_many(&list_kill);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
-- 
cgit v0.10.2


From cf5da330bbdd0c06b05c525a3d1d58ccd82c87a6 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 13 Jun 2016 10:31:05 +0200
Subject: ovs/vxlan: fix rtnl notifications on iface deletion

The function vxlan_dev_create() (only used by ovs) never calls
rtnl_configure_link(). The consequence is that dev->rtnl_link_state is
never set to RTNL_LINK_INITIALIZED.
During the deletion phase, the function rollback_registered_many() sends
a RTM_DELLINK only if dev->rtnl_link_state is set to RTNL_LINK_INITIALIZED.

Note that the function vxlan_dev_create() is moved after the rtnl stuff so
that vxlan_dellink() can be called in this function.

Fixes: dcc38c033b32 ("openvswitch: Re-add CONFIG_OPENVSWITCH_VXLAN")
CC: Thomas Graf <tgraf@suug.ch>
CC: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index f999db2..b3b9db6 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2952,30 +2952,6 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
 	return 0;
 }
 
-struct net_device *vxlan_dev_create(struct net *net, const char *name,
-				    u8 name_assign_type, struct vxlan_config *conf)
-{
-	struct nlattr *tb[IFLA_MAX+1];
-	struct net_device *dev;
-	int err;
-
-	memset(&tb, 0, sizeof(tb));
-
-	dev = rtnl_create_link(net, name, name_assign_type,
-			       &vxlan_link_ops, tb);
-	if (IS_ERR(dev))
-		return dev;
-
-	err = vxlan_dev_configure(net, dev, conf);
-	if (err < 0) {
-		free_netdev(dev);
-		return ERR_PTR(err);
-	}
-
-	return dev;
-}
-EXPORT_SYMBOL_GPL(vxlan_dev_create);
-
 static int vxlan_newlink(struct net *src_net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[])
 {
@@ -3268,6 +3244,40 @@ static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
 	.get_link_net	= vxlan_get_link_net,
 };
 
+struct net_device *vxlan_dev_create(struct net *net, const char *name,
+				    u8 name_assign_type,
+				    struct vxlan_config *conf)
+{
+	struct nlattr *tb[IFLA_MAX + 1];
+	struct net_device *dev;
+	int err;
+
+	memset(&tb, 0, sizeof(tb));
+
+	dev = rtnl_create_link(net, name, name_assign_type,
+			       &vxlan_link_ops, tb);
+	if (IS_ERR(dev))
+		return dev;
+
+	err = vxlan_dev_configure(net, dev, conf);
+	if (err < 0) {
+		free_netdev(dev);
+		return ERR_PTR(err);
+	}
+
+	err = rtnl_configure_link(dev, NULL);
+	if (err < 0) {
+		LIST_HEAD(list_kill);
+
+		vxlan_dellink(dev, &list_kill);
+		unregister_netdevice_many(&list_kill);
+		return ERR_PTR(err);
+	}
+
+	return dev;
+}
+EXPORT_SYMBOL_GPL(vxlan_dev_create);
+
 static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn,
 					     struct net_device *dev)
 {
-- 
cgit v0.10.2


From da6f1da819d4b9c081a477dec74dc468a0b44290 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 13 Jun 2016 10:31:06 +0200
Subject: ovs/gre: fix rtnl notifications on iface deletion

The function gretap_fb_dev_create() (only used by ovs) never calls
rtnl_configure_link(). The consequence is that dev->rtnl_link_state is
never set to RTNL_LINK_INITIALIZED.
During the deletion phase, the function rollback_registered_many() sends
a RTM_DELLINK only if dev->rtnl_link_state is set to RTNL_LINK_INITIALIZED.

Fixes: b2acd1dc3949 ("openvswitch: Use regular GRE net_device instead of vport")
CC: Thomas Graf <tgraf@suug.ch>
CC: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 08deba6..07c5cf1 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1149,6 +1149,10 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 	if (err)
 		goto out;
 
+	err = rtnl_configure_link(dev, NULL);
+	if (err < 0)
+		goto out;
+
 	return dev;
 out:
 	ip_tunnel_dellink(dev, &list_kill);
-- 
cgit v0.10.2


From 41009481b690493c169ce85f591b9d32c6fd9422 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 13 Jun 2016 10:31:07 +0200
Subject: ovs/geneve: fix rtnl notifications on iface deletion

The function geneve_dev_create_fb() (only used by ovs) never calls
rtnl_configure_link(). The consequence is that dev->rtnl_link_state is
never set to RTNL_LINK_INITIALIZED.
During the deletion phase, the function rollback_registered_many() sends
a RTM_DELLINK only if dev->rtnl_link_state is set to RTNL_LINK_INITIALIZED.

Fixes: e305ac6cf5a1 ("geneve: Add support to collect tunnel metadata.")
CC: Pravin B Shelar <pshelar@nicira.com>
CC: Jesse Gross <jesse@nicira.com>
CC: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 086c2da..305a04e 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1532,6 +1532,10 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
 	if (err)
 		goto err;
 
+	err = rtnl_configure_link(dev, NULL);
+	if (err < 0)
+		goto err;
+
 	return dev;
 
  err:
-- 
cgit v0.10.2


From 775711497202fe376368c25b0c7296ed8803e0ba Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 10 Jun 2016 17:25:19 +0200
Subject: netfilter: conntrack: destroy kmemcache on module removal

I forgot to move the kmem_cache_destroy into the exit path.

Fixes: 0c5366b3a8c7 ("netfilter: conntrack: use single slab cache)
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index db2312e..f204274 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1544,6 +1544,8 @@ void nf_conntrack_cleanup_end(void)
 	nf_conntrack_tstamp_fini();
 	nf_conntrack_acct_fini();
 	nf_conntrack_expect_fini();
+
+	kmem_cache_destroy(nf_conntrack_cachep);
 }
 
 /*
-- 
cgit v0.10.2


From a46844021f6182cca7b575295ba33a4734b1b9d9 Mon Sep 17 00:00:00 2001
From: Liping Zhang <liping.zhang@spreadtrum.com>
Date: Sat, 11 Jun 2016 12:20:26 +0800
Subject: netfilter: nf_tables: fix wrong check of NFT_SET_MAP in
 nf_tables_bind_set

We should check "i" is used as a dictionary or not, "binding" is already
checked before.

Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7b7aa87..492f6f8 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2946,7 +2946,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 		 * jumps are already validated for that chain.
 		 */
 		list_for_each_entry(i, &set->bindings, list) {
-			if (binding->flags & NFT_SET_MAP &&
+			if (i->flags & NFT_SET_MAP &&
 			    i->chain == binding->chain)
 				goto bind;
 		}
-- 
cgit v0.10.2


From 8588ac097b49ce8802f11541d9cd6f6667badb34 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 11 Jun 2016 12:20:27 +0800
Subject: netfilter: nf_tables: reject loops from set element jump to chain

Liping Zhang says:

"Users may add such a wrong nft rules successfully, which will cause an
endless jump loop:

  # nft add rule filter test tcp dport vmap {1: jump test}

This is because before we commit, the element in the current anonymous
set is inactive, so osp->walk will skip this element and miss the
validate check."

To resolve this problem, this patch passes the generation mask to the
walk function through the iter container structure depending on the code
path:

1) If we're dumping the elements, then we have to check if the element
   is active in the current generation. Thus, we check for the current
   bit in the genmask.

2) If we're checking for loops, then we have to check if the element is
   active in the next generation, as we're in the middle of a
   transaction. Thus, we check for the next bit in the genmask.

Based on original patch from Liping Zhang.

Reported-by: Liping Zhang <liping.zhang@spreadtrum.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Tested-by: Liping Zhang <liping.zhang@spreadtrum.com>

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 0922354..f7c291f 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -167,6 +167,7 @@ struct nft_set_elem {
 
 struct nft_set;
 struct nft_set_iter {
+	u8		genmask;
 	unsigned int	count;
 	unsigned int	skip;
 	int		err;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 492f6f8..0fd6998 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2951,6 +2951,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 				goto bind;
 		}
 
+		iter.genmask	= nft_genmask_next(ctx->net);
 		iter.skip 	= 0;
 		iter.count	= 0;
 		iter.err	= 0;
@@ -3192,12 +3193,13 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
 	if (nest == NULL)
 		goto nla_put_failure;
 
-	args.cb		= cb;
-	args.skb	= skb;
-	args.iter.skip	= cb->args[0];
-	args.iter.count	= 0;
-	args.iter.err   = 0;
-	args.iter.fn	= nf_tables_dump_setelem;
+	args.cb			= cb;
+	args.skb		= skb;
+	args.iter.genmask	= nft_genmask_cur(ctx.net);
+	args.iter.skip		= cb->args[0];
+	args.iter.count		= 0;
+	args.iter.err		= 0;
+	args.iter.fn		= nf_tables_dump_setelem;
 	set->ops->walk(&ctx, set, &args.iter);
 
 	nla_nest_end(skb, nest);
@@ -4284,6 +4286,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
 			    binding->chain != chain)
 				continue;
 
+			iter.genmask	= nft_genmask_next(ctx->net);
 			iter.skip 	= 0;
 			iter.count	= 0;
 			iter.err	= 0;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 6fa0165..f39c53a 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -189,7 +189,6 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
 	struct nft_hash_elem *he;
 	struct rhashtable_iter hti;
 	struct nft_set_elem elem;
-	u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
 	int err;
 
 	err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
@@ -218,7 +217,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
 			goto cont;
 		if (nft_set_elem_expired(&he->ext))
 			goto cont;
-		if (!nft_set_elem_active(&he->ext, genmask))
+		if (!nft_set_elem_active(&he->ext, iter->genmask))
 			goto cont;
 
 		elem.priv = he;
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index f762094..7201d57 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -211,7 +211,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
 	struct nft_rbtree_elem *rbe;
 	struct nft_set_elem elem;
 	struct rb_node *node;
-	u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
 
 	spin_lock_bh(&nft_rbtree_lock);
 	for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
@@ -219,7 +218,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
 
 		if (iter->count < iter->skip)
 			goto cont;
-		if (!nft_set_elem_active(&rbe->ext, genmask))
+		if (!nft_set_elem_active(&rbe->ext, iter->genmask))
 			goto cont;
 
 		elem.priv = rbe;
-- 
cgit v0.10.2


From a02f424863610a0a7abd80c468839e59cfa4d0d8 Mon Sep 17 00:00:00 2001
From: Liping Zhang <liping.zhang@spreadtrum.com>
Date: Sat, 11 Jun 2016 12:20:28 +0800
Subject: netfilter: nf_tables: fix wrong destroy anonymous sets if binding
 fails

When we add a nft rule like follows:
  # nft add rule filter test tcp dport vmap {1: jump test}
-ELOOP error will be returned, and the anonymous set will be
destroyed.

But after that, nf_tables_abort will also try to remove the
element and destroy the set, which was already destroyed and
freed.

If we add a nft wrong rule, nft_tables_abort will do the cleanup
work rightly, so nf_tables_set_destroy call here is redundant and
wrong, remove it.

Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 0fd6998..2c88187 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2958,13 +2958,8 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 		iter.fn		= nf_tables_bind_check_setelem;
 
 		set->ops->walk(ctx, set, &iter);
-		if (iter.err < 0) {
-			/* Destroy anonymous sets if binding fails */
-			if (set->flags & NFT_SET_ANONYMOUS)
-				nf_tables_set_destroy(ctx, set);
-
+		if (iter.err < 0)
 			return iter.err;
-		}
 	}
 bind:
 	binding->chain = ctx->chain;
-- 
cgit v0.10.2


From 8fff1722f705ce5023a0d6d77a31a9d013be2a34 Mon Sep 17 00:00:00 2001
From: Liping Zhang <liping.zhang@spreadtrum.com>
Date: Tue, 14 Jun 2016 20:13:04 +0800
Subject: netfilter: nf_tables: fix a wrong check to skip the inactive rules

nft_genmask_cur has already done left-shift operator on the gencursor,
so there's no need to do left-shift operator on it again.

Fixes: ea4bd995b0f2 ("netfilter: nf_tables: add transaction helper functions")
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index e9f8dff..fb8b589 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -143,7 +143,7 @@ next_rule:
 	list_for_each_entry_continue_rcu(rule, &chain->rules, list) {
 
 		/* This rule is not active, skip. */
-		if (unlikely(rule->genmask & (1 << gencursor)))
+		if (unlikely(rule->genmask & gencursor))
 			continue;
 
 		rulenum++;
-- 
cgit v0.10.2


From 695e9df010e40f407f4830dc11d53dce957710ba Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 10 Jun 2016 12:21:40 -0500
Subject: mnt: Account for MS_RDONLY in fs_fully_visible

In rare cases it is possible for s_flags & MS_RDONLY to be set but
MNT_READONLY to be clear.  This starting combination can cause
fs_fully_visible to fail to ensure that the new mount is readonly.
Therefore force MNT_LOCK_READONLY in the new mount if MS_RDONLY
is set on the source filesystem of the mount.

In general both MS_RDONLY and MNT_READONLY are set at the same for
mounts so I don't expect any programs to care.  Nor do I expect
MS_RDONLY to be set on proc or sysfs in the initial user namespace,
which further decreases the likelyhood of problems.

Which means this change should only affect system configurations by
paranoid sysadmins who should welcome the additional protection
as it keeps people from wriggling out of their policies.

Cc: stable@vger.kernel.org
Fixes: 8c6cf9cc829f ("mnt: Modify fs_fully_visible to deal with locked ro nodev and atime")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>

diff --git a/fs/namespace.c b/fs/namespace.c
index a7ec92c..783004a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3247,6 +3247,10 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
 		if (mnt->mnt.mnt_sb->s_iflags & SB_I_NOEXEC)
 			mnt_flags &= ~(MNT_LOCK_NOSUID | MNT_LOCK_NOEXEC);
 
+		/* Don't miss readonly hidden in the superblock flags */
+		if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY)
+			mnt_flags |= MNT_LOCK_READONLY;
+
 		/* Verify the mount flags are equal to or more permissive
 		 * than the proposed new mount.
 		 */
-- 
cgit v0.10.2


From f52a4c74efbb61195490535e9d65d964c4ebfee3 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Tue, 14 Jun 2016 00:26:49 +0000
Subject: ata: fix return value check in ahci_seattle_get_port_info()

In case of error, the function devm_kzalloc() returns NULL pointer
not ERR_PTR(). The IS_ERR() test in the return value check should
be replaced with NULL test.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Acked-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ata/ahci_seattle.c b/drivers/ata/ahci_seattle.c
index 6e702ab..1d31c0c 100644
--- a/drivers/ata/ahci_seattle.c
+++ b/drivers/ata/ahci_seattle.c
@@ -137,7 +137,7 @@ static const struct ata_port_info *ahci_seattle_get_port_info(
 	u32 val;
 
 	plat_data = devm_kzalloc(dev, sizeof(*plat_data), GFP_KERNEL);
-	if (IS_ERR(plat_data))
+	if (!plat_data)
 		return &ahci_port_info;
 
 	plat_data->sgpio_ctrl = devm_ioremap_resource(dev,
-- 
cgit v0.10.2


From 0c5ddb51e8f7be7170600f95a4ea92e5a32afad8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 13 Jun 2016 07:50:25 -0700
Subject: net/mlx4_en: initialize cmd.context_lock spinlock earlier
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Maciej Żenczykowski reported lockdep warning a spinlock
was not registered before being held in mlx4_cmd_wake_completions()

cmd.context_lock initialization is not at the right place.

1) mlx4_cmd_use_events() can be called multiple times.
   Calling spin_lock_init() on a live spinlock can lead
   to hangs.

2) mlx4_cmd_wake_completions() can be called while lock
   has not been initialized.
   Lockdep complains, and current logic is not race prone.

It seems better to move the initialization earlier in
mlx4_load_one()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Maciej Żenczykowski <maze@google.com>
Cc: Eugenia Emantayev <eugenia@mellanox.com>
Cc: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index e94ca1c..f04a423 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2597,7 +2597,6 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev)
 	priv->cmd.free_head = 0;
 
 	sema_init(&priv->cmd.event_sem, priv->cmd.max_cmds);
-	spin_lock_init(&priv->cmd.context_lock);
 
 	for (priv->cmd.token_mask = 1;
 	     priv->cmd.token_mask < priv->cmd.max_cmds;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 12c77a7..372ebfa 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3222,6 +3222,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 
 	INIT_LIST_HEAD(&priv->pgdir_list);
 	mutex_init(&priv->pgdir_mutex);
+	spin_lock_init(&priv->cmd.context_lock);
 
 	INIT_LIST_HEAD(&priv->bf_list);
 	mutex_init(&priv->bf_mutex);
-- 
cgit v0.10.2


From 3d7c8257d999bdf8fa77ffd9be775c7b58cc7b69 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 13 Jun 2016 11:33:32 -0700
Subject: net_sched: prio: insure proper transactional behavior

Now prio_init() can return -ENOMEM, it also has to make sure
any allocated qdiscs are freed, since the caller (qdisc_create()) wont
call ->destroy() handler for us.

More generally, we want a transactional behavior for "tc qdisc
change ...", so prio_tune() should not make modifications if
any error is returned.

It means that we must validate parameters and allocate missing qdisc(s)
before taking root qdisc lock exactly once, to not leave the prio qdisc
in an intermediate state.

Fixes: cbdf45116478 ("net_sched: prio: properly report out of memory errors")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 071718b..a356450 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -172,8 +172,9 @@ prio_destroy(struct Qdisc *sch)
 static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *queues[TCQ_PRIO_BANDS];
+	int oldbands = q->bands, i;
 	struct tc_prio_qopt *qopt;
-	int i;
 
 	if (nla_len(opt) < sizeof(*qopt))
 		return -EINVAL;
@@ -187,54 +188,42 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 			return -EINVAL;
 	}
 
+	/* Before commit, make sure we can allocate all new qdiscs */
+	for (i = oldbands; i < qopt->bands; i++) {
+		queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+					      TC_H_MAKE(sch->handle, i + 1));
+		if (!queues[i]) {
+			while (i > oldbands)
+				qdisc_destroy(queues[--i]);
+			return -ENOMEM;
+		}
+	}
+
 	sch_tree_lock(sch);
 	q->bands = qopt->bands;
 	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
 
-	for (i = q->bands; i < TCQ_PRIO_BANDS; i++) {
+	for (i = q->bands; i < oldbands; i++) {
 		struct Qdisc *child = q->queues[i];
-		q->queues[i] = &noop_qdisc;
-		if (child != &noop_qdisc) {
-			qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog);
-			qdisc_destroy(child);
-		}
-	}
-	sch_tree_unlock(sch);
 
-	for (i = 0; i < q->bands; i++) {
-		struct Qdisc *child;
+		qdisc_tree_reduce_backlog(child, child->q.qlen,
+					  child->qstats.backlog);
+		qdisc_destroy(child);
+	}
 
-		if (q->queues[i] != &noop_qdisc)
-			continue;
+	for (i = oldbands; i < q->bands; i++)
+		q->queues[i] = queues[i];
 
-		child = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
-					  TC_H_MAKE(sch->handle, i + 1));
-		if (!child)
-			return -ENOMEM;
-		sch_tree_lock(sch);
-		q->queues[i] = child;
-		sch_tree_unlock(sch);
-	}
+	sch_tree_unlock(sch);
 	return 0;
 }
 
 static int prio_init(struct Qdisc *sch, struct nlattr *opt)
 {
-	struct prio_sched_data *q = qdisc_priv(sch);
-	int i;
-
-	for (i = 0; i < TCQ_PRIO_BANDS; i++)
-		q->queues[i] = &noop_qdisc;
-
-	if (opt == NULL) {
+	if (!opt)
 		return -EINVAL;
-	} else {
-		int err;
 
-		if ((err = prio_tune(sch, opt)) != 0)
-			return err;
-	}
-	return 0;
+	return prio_tune(sch, opt);
 }
 
 static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
-- 
cgit v0.10.2


From d15eccea69b96a5116169688dcc9baf6d1ce2751 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 13 Jun 2016 13:44:14 -0700
Subject: act_ipt: fix a bind refcnt leak

And avoid calling tcf_hash_check() twice.

Fixes: a57f19d30b2d ("net sched: ipt action fix late binding")
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 9f002ad..d4bd19e 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -121,10 +121,13 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
 	}
 
 	td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
-	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size)
+	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) {
+		if (exists)
+			tcf_hash_release(a, bind);
 		return -EINVAL;
+	}
 
-	if (!tcf_hash_check(tn, index, a, bind)) {
+	if (!exists) {
 		ret = tcf_hash_create(tn, index, est, a, sizeof(*ipt), bind,
 				      false);
 		if (ret)
-- 
cgit v0.10.2


From ebecaa6662b0a9c3590bd644a4cec6f9d96818b7 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Mon, 13 Jun 2016 18:08:42 -0400
Subject: net sched actions: bug fix dumping actions directly didnt produce
 NLMSG_DONE

This refers to commands to direct action access as follows:

sudo tc actions add action drop index 12
sudo tc actions add action pipe index 10

And then dumping them like so:
sudo tc actions ls action gact

iproute2 worked because it depended on absence of TCA_ACT_TAB TLV
as end of message.
This fix has been tested with iproute2 and is backward compatible.

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 336774a..c7a0b0d 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1118,7 +1118,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 		nla_nest_end(skb, nest);
 		ret = skb->len;
 	} else
-		nla_nest_cancel(skb, nest);
+		nlmsg_trim(skb, b);
 
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	if (NETLINK_CB(cb->skb).portid && ret)
-- 
cgit v0.10.2


From 0ee13627f963f9c5c9544ed19d82854836d5e676 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 14 Jun 2016 06:16:27 +0200
Subject: htb: call qdisc_root with rcu read lock held

saw a debug splat:
net/include/net/sch_generic.h:287 suspicious rcu_dereference_check() usage!
other info that might help us debug this:
rcu_scheduler_active = 1, debug_locks = 0
 2 locks held by kworker/2:1/710:
  #0:  ("events"){.+.+.+}, at: [<ffffffff8106ca1d>]
  #1:  ((&q->work)){+.+...}, at: [<ffffffff8106ca1d>] process_one_work+0x14d/0x690
Workqueue: events htb_work_func
Call Trace:
 [<ffffffff812dc763>] dump_stack+0x85/0xc2
 [<ffffffff8109fee7>] lockdep_rcu_suspicious+0xe7/0x120
 [<ffffffff814ced47>] htb_work_func+0x67/0x70

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index d4b4218..62f9d81 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1007,7 +1007,9 @@ static void htb_work_func(struct work_struct *work)
 	struct htb_sched *q = container_of(work, struct htb_sched, work);
 	struct Qdisc *sch = q->watchdog.qdisc;
 
+	rcu_read_lock();
 	__netif_schedule(qdisc_root(sch));
+	rcu_read_unlock();
 }
 
 static int htb_init(struct Qdisc *sch, struct nlattr *opt)
-- 
cgit v0.10.2


From b196c22af5c3ff784c472c80f6fb4e5fad67b2ac Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue, 14 Jun 2016 15:25:14 +0200
Subject: macsec: add rcu_barrier() on module exit

Without this, the various uses of call_rcu could cause a kernel panic.

Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 47ee2c8..e80736f 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -3361,6 +3361,7 @@ static void __exit macsec_exit(void)
 	genl_unregister_family(&macsec_fam);
 	rtnl_link_unregister(&macsec_link_ops);
 	unregister_netdevice_notifier(&macsec_notifier);
+	rcu_barrier();
 }
 
 module_init(macsec_init);
-- 
cgit v0.10.2


From 5d9649b3a524df9ccd15ac25ad6591089260fdbd Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue, 14 Jun 2016 15:25:15 +0200
Subject: macsec: allocate sg and iv on the heap

For the crypto callbacks to work properly, we cannot have sg and iv on
the stack.  Use kmalloc instead, with a single allocation for
aead_request + scatterlist + iv.

Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index e80736f..189ea3e 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -605,12 +605,41 @@ static void macsec_encrypt_done(struct crypto_async_request *base, int err)
 	dev_put(dev);
 }
 
+static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm,
+					     unsigned char **iv,
+					     struct scatterlist **sg)
+{
+	size_t size, iv_offset, sg_offset;
+	struct aead_request *req;
+	void *tmp;
+
+	size = sizeof(struct aead_request) + crypto_aead_reqsize(tfm);
+	iv_offset = size;
+	size += GCM_AES_IV_LEN;
+
+	size = ALIGN(size, __alignof__(struct scatterlist));
+	sg_offset = size;
+	size += sizeof(struct scatterlist) * (MAX_SKB_FRAGS + 1);
+
+	tmp = kmalloc(size, GFP_ATOMIC);
+	if (!tmp)
+		return NULL;
+
+	*iv = (unsigned char *)(tmp + iv_offset);
+	*sg = (struct scatterlist *)(tmp + sg_offset);
+	req = tmp;
+
+	aead_request_set_tfm(req, tfm);
+
+	return req;
+}
+
 static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 				      struct net_device *dev)
 {
 	int ret;
-	struct scatterlist sg[MAX_SKB_FRAGS + 1];
-	unsigned char iv[GCM_AES_IV_LEN];
+	struct scatterlist *sg;
+	unsigned char *iv;
 	struct ethhdr *eth;
 	struct macsec_eth_header *hh;
 	size_t unprotected_len;
@@ -668,8 +697,6 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 	macsec_fill_sectag(hh, secy, pn);
 	macsec_set_shortlen(hh, unprotected_len - 2 * ETH_ALEN);
 
-	macsec_fill_iv(iv, secy->sci, pn);
-
 	skb_put(skb, secy->icv_len);
 
 	if (skb->len - ETH_HLEN > macsec_priv(dev)->real_dev->mtu) {
@@ -684,13 +711,15 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 		return ERR_PTR(-EINVAL);
 	}
 
-	req = aead_request_alloc(tx_sa->key.tfm, GFP_ATOMIC);
+	req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg);
 	if (!req) {
 		macsec_txsa_put(tx_sa);
 		kfree_skb(skb);
 		return ERR_PTR(-ENOMEM);
 	}
 
+	macsec_fill_iv(iv, secy->sci, pn);
+
 	sg_init_table(sg, MAX_SKB_FRAGS + 1);
 	skb_to_sgvec(skb, sg, 0, skb->len);
 
@@ -861,7 +890,6 @@ static void macsec_decrypt_done(struct crypto_async_request *base, int err)
 out:
 	macsec_rxsa_put(rx_sa);
 	dev_put(dev);
-	return;
 }
 
 static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
@@ -871,8 +899,8 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
 				      struct macsec_secy *secy)
 {
 	int ret;
-	struct scatterlist sg[MAX_SKB_FRAGS + 1];
-	unsigned char iv[GCM_AES_IV_LEN];
+	struct scatterlist *sg;
+	unsigned char *iv;
 	struct aead_request *req;
 	struct macsec_eth_header *hdr;
 	u16 icv_len = secy->icv_len;
@@ -882,7 +910,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb,
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
-	req = aead_request_alloc(rx_sa->key.tfm, GFP_ATOMIC);
+	req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg);
 	if (!req) {
 		kfree_skb(skb);
 		return ERR_PTR(-ENOMEM);
-- 
cgit v0.10.2


From 6052f7fbce857e327218a9d8a040e210ea7cc718 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue, 14 Jun 2016 15:25:16 +0200
Subject: macsec: fix SA initialization

The ASYNC flag prevents initialization on some physical machines.

Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 189ea3e..0e7eff7 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -1262,7 +1262,7 @@ static struct crypto_aead *macsec_alloc_tfm(char *key, int key_len, int icv_len)
 	struct crypto_aead *tfm;
 	int ret;
 
-	tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
+	tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
 	if (!tfm || IS_ERR(tfm))
 		return NULL;
 
-- 
cgit v0.10.2


From 66d95b6705a6347f7b2645e042874ec0bb03b726 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Wed, 15 Jun 2016 14:10:57 +0800
Subject: tipc: fix suspicious RCU usage

When run tipcTS&tipcTC test suite, the following complaint appears:

[   56.926168] ===============================
[   56.926169] [ INFO: suspicious RCU usage. ]
[   56.926171] 4.7.0-rc1+ #160 Not tainted
[   56.926173] -------------------------------
[   56.926174] net/tipc/bearer.c:408 suspicious rcu_dereference_protected() usage!
[   56.926175]
[   56.926175] other info that might help us debug this:
[   56.926175]
[   56.926177]
[   56.926177] rcu_scheduler_active = 1, debug_locks = 1
[   56.926179] 3 locks held by swapper/4/0:
[   56.926180]  #0:  (((&req->timer))){+.-...}, at: [<ffffffff810e79b5>] call_timer_fn+0x5/0x340
[   56.926203]  #1:  (&(&req->lock)->rlock){+.-...}, at: [<ffffffffa000c29b>] disc_timeout+0x1b/0xd0 [tipc]
[   56.926212]  #2:  (rcu_read_lock){......}, at: [<ffffffffa00055e0>] tipc_bearer_xmit_skb+0xb0/0x2e0 [tipc]
[   56.926218]
[   56.926218] stack backtrace:
[   56.926221] CPU: 4 PID: 0 Comm: swapper/4 Not tainted 4.7.0-rc1+ #160
[   56.926222] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
[   56.926224]  0000000000000000 ffff880016803d28 ffffffff813c4423 ffff8800154252c0
[   56.926227]  0000000000000001 ffff880016803d58 ffffffff810b7512 ffff8800124d8120
[   56.926230]  ffff880013f8a160 ffff8800132b5ccc ffff8800124d8120 ffff880016803d88
[   56.926234] Call Trace:
[   56.926235]  <IRQ>  [<ffffffff813c4423>] dump_stack+0x67/0x94
[   56.926250]  [<ffffffff810b7512>] lockdep_rcu_suspicious+0xe2/0x120
[   56.926256]  [<ffffffffa00051f1>] tipc_l2_send_msg+0x131/0x1c0 [tipc]
[   56.926261]  [<ffffffffa000567c>] tipc_bearer_xmit_skb+0x14c/0x2e0 [tipc]
[   56.926266]  [<ffffffffa00055e0>] ? tipc_bearer_xmit_skb+0xb0/0x2e0 [tipc]
[   56.926273]  [<ffffffffa000c280>] ? tipc_disc_init_msg+0x1f0/0x1f0 [tipc]
[   56.926278]  [<ffffffffa000c280>] ? tipc_disc_init_msg+0x1f0/0x1f0 [tipc]
[   56.926283]  [<ffffffffa000c2d6>] disc_timeout+0x56/0xd0 [tipc]
[   56.926288]  [<ffffffff810e7a68>] call_timer_fn+0xb8/0x340
[   56.926291]  [<ffffffff810e79b5>] ? call_timer_fn+0x5/0x340
[   56.926296]  [<ffffffffa000c280>] ? tipc_disc_init_msg+0x1f0/0x1f0 [tipc]
[   56.926300]  [<ffffffff810e8f4a>] run_timer_softirq+0x23a/0x390
[   56.926306]  [<ffffffff810f89ff>] ? clockevents_program_event+0x7f/0x130
[   56.926316]  [<ffffffff819727c3>] __do_softirq+0xc3/0x4a2
[   56.926323]  [<ffffffff8106ba5a>] irq_exit+0x8a/0xb0
[   56.926327]  [<ffffffff81972456>] smp_apic_timer_interrupt+0x46/0x60
[   56.926331]  [<ffffffff81970a49>] apic_timer_interrupt+0x89/0x90
[   56.926333]  <EOI>  [<ffffffff81027fda>] ? default_idle+0x2a/0x1a0
[   56.926340]  [<ffffffff81027fd8>] ? default_idle+0x28/0x1a0
[   56.926342]  [<ffffffff810289cf>] arch_cpu_idle+0xf/0x20
[   56.926345]  [<ffffffff810adf0f>] default_idle_call+0x2f/0x50
[   56.926347]  [<ffffffff810ae145>] cpu_startup_entry+0x215/0x3e0
[   56.926353]  [<ffffffff81040ad9>] start_secondary+0xf9/0x100

The warning appears as rtnl_dereference() is wrongly used in
tipc_l2_send_msg() under RCU read lock protection. Instead the proper
usage should be that rcu_dereference_rtnl() is called here.

Fixes: 5b7066c3dd24 ("tipc: stricter filtering of packets in bearer layer")
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 6f11c62..bf8f05c 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -405,7 +405,7 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb,
 		return 0;
 
 	/* Send RESET message even if bearer is detached from device */
-	tipc_ptr = rtnl_dereference(dev->tipc_ptr);
+	tipc_ptr = rcu_dereference_rtnl(dev->tipc_ptr);
 	if (unlikely(!tipc_ptr && !msg_is_reset(buf_msg(skb))))
 		goto drop;
 
-- 
cgit v0.10.2


From c91522f860bb9dd4178c8280bbebd4f4321b7199 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Wed, 15 Jun 2016 14:11:31 +0800
Subject: tipc: eliminate uninitialized variable warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/tipc/link.c: In function ‘tipc_link_timeout’:
net/tipc/link.c:744:28: warning: ‘mtyp’ may be used uninitialized in this function [-Wuninitialized]

Fixes: 42b18f605fea ("tipc: refactor function tipc_link_timeout()")
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 7059c94..67b6ab9 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -704,7 +704,8 @@ static void link_profile_stats(struct tipc_link *l)
  */
 int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
 {
-	int mtyp, rc = 0;
+	int mtyp = 0;
+	int rc = 0;
 	bool state = false;
 	bool probe = false;
 	bool setup = false;
-- 
cgit v0.10.2


From 4a1836701fd59476ee1331379e00a9ab51ba4f61 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 15 Jun 2016 17:45:51 +0200
Subject: net: skfb: remove obsolete -I cflag

The skfp driver has been moved to drivers/net/fddi/skfp a long time
ago, but we still attempt to include headers from the old location,
which causes a warning when building with W=1:

cc1: error: /git/arm-soc/drivers/net/skfp: No such file or directory [-Werror=missing-include-dirs]
cc1: error: drivers/net/skfp: No such file or directory [-Werror=missing-include-dirs]

Clearly this include directive is not needed any more, so we can
just remove it now.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/fddi/skfp/Makefile b/drivers/net/fddi/skfp/Makefile
index b0be023..a957a1c 100644
--- a/drivers/net/fddi/skfp/Makefile
+++ b/drivers/net/fddi/skfp/Makefile
@@ -17,4 +17,4 @@ skfp-objs :=  skfddi.o    hwmtm.o    fplustm.o  smt.o      cfm.o     \
 #   projects. To keep the source common for all those drivers (and
 #   thus simplify fixes to it), please do not clean it up!
 
-ccflags-y := -Idrivers/net/skfp -DPCI -DMEM_MAPPED_IO -Wno-strict-prototypes
+ccflags-y := -DPCI -DMEM_MAPPED_IO -Wno-strict-prototypes
-- 
cgit v0.10.2


From daddef76c3deaaa7922f9d7b18edbf0a061215c3 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Wed, 15 Jun 2016 11:14:53 +0200
Subject: net: Don't forget pr_fmt on net_dbg_ratelimited for
 CONFIG_DYNAMIC_DEBUG

The implementation of net_dbg_ratelimited in the CONFIG_DYNAMIC_DEBUG
case was added with 2c94b5373 ("net: Implement net_dbg_ratelimited() for
CONFIG_DYNAMIC_DEBUG case"). The implementation strategy was to take the
usual definition of the dynamic_pr_debug macro, but alter it by adding a
call to "net_ratelimit()" in the if statement. This is, in fact, the
correct approach.

However, while doing this, the author of the commit forgot to surround
fmt by pr_fmt, resulting in unprefixed log messages appearing in the
console. So, this commit adds back the pr_fmt(fmt) invocation, making
net_dbg_ratelimited properly consistent across DEBUG, no DEBUG, and
DYNAMIC_DEBUG cases, and bringing parity with the behavior of
dynamic_pr_debug as well.

Fixes: 2c94b5373 ("net: Implement net_dbg_ratelimited() for CONFIG_DYNAMIC_DEBUG case")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Tim Bingham <tbingham@akamai.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/net.h b/include/linux/net.h
index 9aa49a0..25aa03b 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -251,7 +251,8 @@ do {									\
 	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);			\
 	if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) &&	\
 	    net_ratelimit())						\
-		__dynamic_pr_debug(&descriptor, fmt, ##__VA_ARGS__);	\
+		__dynamic_pr_debug(&descriptor, pr_fmt(fmt),		\
+		                   ##__VA_ARGS__);			\
 } while (0)
 #elif defined(DEBUG)
 #define net_dbg_ratelimited(fmt, ...)				\
-- 
cgit v0.10.2


From e582615ad33dbd39623084a02e95567b116e1eea Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 15 Jun 2016 06:24:00 -0700
Subject: gre: fix error handler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1) gre_parse_header() can be called from gre_err()

   At this point transport header points to ICMP header, not the inner
header.

2) We can not really change transport header as ipgre_err() will later
assume transport header still points to ICMP header (using icmp_hdr())

3) pskb_may_pull() logic in gre_parse_header() really works
  if we are interested at zone pointed by skb->data

4) As Jiri explained in commit b7f8fe251e46 ("gre: do not pull header in
ICMP error processing") we should not pull headers in error handler.

So this fix :

A) changes gre_parse_header() to use skb->data instead of
skb_transport_header()

B) Adds a nhs parameter to gre_parse_header() so that we can skip the
not pulled IP header from error path.
  This offset is 0 for normal receive path.

C) remove obsolete IPV6 includes

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <tom@herbertland.com>
Cc: Maciej Żenczykowski <maze@google.com>
Cc: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/gre.h b/include/net/gre.h
index 5dce30a..7a54a31 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -26,7 +26,7 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version);
 struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
 				       u8 name_assign_type);
 int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
-		     bool *csum_err, __be16 proto);
+		     bool *csum_err, __be16 proto, int nhs);
 
 static inline int gre_calc_hlen(__be16 o_flags)
 {
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 4c39f4f..de1d119 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -62,26 +62,26 @@ EXPORT_SYMBOL_GPL(gre_del_protocol);
 
 /* Fills in tpi and returns header length to be pulled. */
 int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
-		     bool *csum_err, __be16 proto)
+		     bool *csum_err, __be16 proto, int nhs)
 {
 	const struct gre_base_hdr *greh;
 	__be32 *options;
 	int hdr_len;
 
-	if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
+	if (unlikely(!pskb_may_pull(skb, nhs + sizeof(struct gre_base_hdr))))
 		return -EINVAL;
 
-	greh = (struct gre_base_hdr *)skb_transport_header(skb);
+	greh = (struct gre_base_hdr *)(skb->data + nhs);
 	if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
 		return -EINVAL;
 
 	tpi->flags = gre_flags_to_tnl_flags(greh->flags);
 	hdr_len = gre_calc_hlen(tpi->flags);
 
-	if (!pskb_may_pull(skb, hdr_len))
+	if (!pskb_may_pull(skb, nhs + hdr_len))
 		return -EINVAL;
 
-	greh = (struct gre_base_hdr *)skb_transport_header(skb);
+	greh = (struct gre_base_hdr *)(skb->data + nhs);
 	tpi->proto = greh->protocol;
 
 	options = (__be32 *)(greh + 1);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 07c5cf1..1d000af 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -49,12 +49,6 @@
 #include <net/gre.h>
 #include <net/dst_metadata.h>
 
-#if IS_ENABLED(CONFIG_IPV6)
-#include <net/ipv6.h>
-#include <net/ip6_fib.h>
-#include <net/ip6_route.h>
-#endif
-
 /*
    Problems & solutions
    --------------------
@@ -217,12 +211,14 @@ static void gre_err(struct sk_buff *skb, u32 info)
 	 * by themselves???
 	 */
 
+	const struct iphdr *iph = (struct iphdr *)skb->data;
 	const int type = icmp_hdr(skb)->type;
 	const int code = icmp_hdr(skb)->code;
 	struct tnl_ptk_info tpi;
 	bool csum_err = false;
 
-	if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)) < 0) {
+	if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP),
+			     iph->ihl * 4) < 0) {
 		if (!csum_err)		/* ignore csum errors. */
 			return;
 	}
@@ -338,7 +334,7 @@ static int gre_rcv(struct sk_buff *skb)
 	}
 #endif
 
-	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP));
+	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
 	if (hdr_len < 0)
 		goto drop;
 
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index fdc9de2..776d145 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -468,7 +468,7 @@ static int gre_rcv(struct sk_buff *skb)
 	bool csum_err = false;
 	int hdr_len;
 
-	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6));
+	hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6), 0);
 	if (hdr_len < 0)
 		goto drop;
 
-- 
cgit v0.10.2


From 19de99f70b87fcc3338da52a89c439b088cbff71 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Wed, 15 Jun 2016 18:25:38 -0700
Subject: bpf: fix matching of data/data_end in verifier

The ctx structure passed into bpf programs is different depending on bpf
program type. The verifier incorrectly marked ctx->data and ctx->data_end
access based on ctx offset only. That caused loads in tracing programs
int bpf_prog(struct pt_regs *ctx) { .. ctx->ax .. }
to be incorrectly marked as PTR_TO_PACKET which later caused verifier
to reject the program that was actually valid in tracing context.
Fix this by doing program type specific matching of ctx offsets.

Fixes: 969bf05eb3ce ("bpf: direct packet access")
Reported-by: Sasha Goldshtein <goldshtn@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8ee27b8..8269caf 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -111,6 +111,31 @@ enum bpf_access_type {
 	BPF_WRITE = 2
 };
 
+/* types of values stored in eBPF registers */
+enum bpf_reg_type {
+	NOT_INIT = 0,		 /* nothing was written into register */
+	UNKNOWN_VALUE,		 /* reg doesn't contain a valid pointer */
+	PTR_TO_CTX,		 /* reg points to bpf_context */
+	CONST_PTR_TO_MAP,	 /* reg points to struct bpf_map */
+	PTR_TO_MAP_VALUE,	 /* reg points to map element value */
+	PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
+	FRAME_PTR,		 /* reg == frame_pointer */
+	PTR_TO_STACK,		 /* reg == frame_pointer + imm */
+	CONST_IMM,		 /* constant integer value */
+
+	/* PTR_TO_PACKET represents:
+	 * skb->data
+	 * skb->data + imm
+	 * skb->data + (u16) var
+	 * skb->data + (u16) var + imm
+	 * if (range > 0) then [ptr, ptr + range - off) is safe to access
+	 * if (id > 0) means that some 'var' was added
+	 * if (off > 0) menas that 'imm' was added
+	 */
+	PTR_TO_PACKET,
+	PTR_TO_PACKET_END,	 /* skb->data + headlen */
+};
+
 struct bpf_prog;
 
 struct bpf_verifier_ops {
@@ -120,7 +145,8 @@ struct bpf_verifier_ops {
 	/* return true if 'size' wide access at offset 'off' within bpf_context
 	 * with 'type' (read or write) is allowed
 	 */
-	bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
+	bool (*is_valid_access)(int off, int size, enum bpf_access_type type,
+				enum bpf_reg_type *reg_type);
 
 	u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg,
 				  int src_reg, int ctx_off,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 668e079..eec9f90 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -126,31 +126,6 @@
  * are set to NOT_INIT to indicate that they are no longer readable.
  */
 
-/* types of values stored in eBPF registers */
-enum bpf_reg_type {
-	NOT_INIT = 0,		 /* nothing was written into register */
-	UNKNOWN_VALUE,		 /* reg doesn't contain a valid pointer */
-	PTR_TO_CTX,		 /* reg points to bpf_context */
-	CONST_PTR_TO_MAP,	 /* reg points to struct bpf_map */
-	PTR_TO_MAP_VALUE,	 /* reg points to map element value */
-	PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
-	FRAME_PTR,		 /* reg == frame_pointer */
-	PTR_TO_STACK,		 /* reg == frame_pointer + imm */
-	CONST_IMM,		 /* constant integer value */
-
-	/* PTR_TO_PACKET represents:
-	 * skb->data
-	 * skb->data + imm
-	 * skb->data + (u16) var
-	 * skb->data + (u16) var + imm
-	 * if (range > 0) then [ptr, ptr + range - off) is safe to access
-	 * if (id > 0) means that some 'var' was added
-	 * if (off > 0) menas that 'imm' was added
-	 */
-	PTR_TO_PACKET,
-	PTR_TO_PACKET_END,	 /* skb->data + headlen */
-};
-
 struct reg_state {
 	enum bpf_reg_type type;
 	union {
@@ -695,10 +670,10 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off,
 
 /* check access to 'struct bpf_context' fields */
 static int check_ctx_access(struct verifier_env *env, int off, int size,
-			    enum bpf_access_type t)
+			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
 {
 	if (env->prog->aux->ops->is_valid_access &&
-	    env->prog->aux->ops->is_valid_access(off, size, t)) {
+	    env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) {
 		/* remember the offset of last byte accessed in ctx */
 		if (env->prog->aux->max_ctx_offset < off + size)
 			env->prog->aux->max_ctx_offset = off + size;
@@ -798,21 +773,19 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
 			mark_reg_unknown_value(state->regs, value_regno);
 
 	} else if (reg->type == PTR_TO_CTX) {
+		enum bpf_reg_type reg_type = UNKNOWN_VALUE;
+
 		if (t == BPF_WRITE && value_regno >= 0 &&
 		    is_pointer_value(env, value_regno)) {
 			verbose("R%d leaks addr into ctx\n", value_regno);
 			return -EACCES;
 		}
-		err = check_ctx_access(env, off, size, t);
+		err = check_ctx_access(env, off, size, t, &reg_type);
 		if (!err && t == BPF_READ && value_regno >= 0) {
 			mark_reg_unknown_value(state->regs, value_regno);
-			if (off == offsetof(struct __sk_buff, data) &&
-			    env->allow_ptr_leaks)
+			if (env->allow_ptr_leaks)
 				/* note that reg.[id|off|range] == 0 */
-				state->regs[value_regno].type = PTR_TO_PACKET;
-			else if (off == offsetof(struct __sk_buff, data_end) &&
-				 env->allow_ptr_leaks)
-				state->regs[value_regno].type = PTR_TO_PACKET_END;
+				state->regs[value_regno].type = reg_type;
 		}
 
 	} else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 720b7bb..e7af6cb 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -349,7 +349,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 }
 
 /* bpf+kprobe programs can access fields of 'struct pt_regs' */
-static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type)
+static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+					enum bpf_reg_type *reg_type)
 {
 	/* check bounds */
 	if (off < 0 || off >= sizeof(struct pt_regs))
@@ -427,7 +428,8 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
 	}
 }
 
-static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type)
+static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+				    enum bpf_reg_type *reg_type)
 {
 	if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
 		return false;
diff --git a/net/core/filter.c b/net/core/filter.c
index 68adb5f..c4b330c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2085,7 +2085,8 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type)
 }
 
 static bool sk_filter_is_valid_access(int off, int size,
-				      enum bpf_access_type type)
+				      enum bpf_access_type type,
+				      enum bpf_reg_type *reg_type)
 {
 	switch (off) {
 	case offsetof(struct __sk_buff, tc_classid):
@@ -2108,7 +2109,8 @@ static bool sk_filter_is_valid_access(int off, int size,
 }
 
 static bool tc_cls_act_is_valid_access(int off, int size,
-				       enum bpf_access_type type)
+				       enum bpf_access_type type,
+				       enum bpf_reg_type *reg_type)
 {
 	if (type == BPF_WRITE) {
 		switch (off) {
@@ -2123,6 +2125,16 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 			return false;
 		}
 	}
+
+	switch (off) {
+	case offsetof(struct __sk_buff, data):
+		*reg_type = PTR_TO_PACKET;
+		break;
+	case offsetof(struct __sk_buff, data_end):
+		*reg_type = PTR_TO_PACKET_END;
+		break;
+	}
+
 	return __is_valid_access(off, size, type);
 }
 
-- 
cgit v0.10.2


From ad572d174787daa59e24b8b5c83028c09cdb5ddb Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Wed, 15 Jun 2016 18:25:39 -0700
Subject: bpf, trace: check event type in bpf_perf_event_read

similar to bpf_perf_event_output() the bpf_perf_event_read() helper
needs to check the type of the perf_event before reading the counter.

Fixes: a43eec304259 ("bpf: introduce bpf_perf_event_output() helper")
Reported-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index e7af6cb..26f603d 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -209,6 +209,10 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
 	    event->pmu->count)
 		return -EINVAL;
 
+	if (unlikely(event->attr.type != PERF_TYPE_HARDWARE &&
+		     event->attr.type != PERF_TYPE_RAW))
+		return -EINVAL;
+
 	/*
 	 * we don't know if the function is run successfully by the
 	 * return value. It can be judged in other places, such as
-- 
cgit v0.10.2


From 4e384ac19b5be6ad084b215d298b82b3a91ad24b Mon Sep 17 00:00:00 2001
From: hayeswang <hayeswang@realtek.com>
Date: Thu, 16 Jun 2016 10:55:17 +0800
Subject: r8152: disable MAC clock speed down

Disable MAC clock speed down. It may casue the first control
transfer to contain the wrong data, when the power state change
from U1 to U0.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 3f9f6ed..89dc752 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -3382,15 +3382,11 @@ static void r8153_init(struct r8152 *tp)
 	r8153_power_cut_en(tp, false);
 	r8153_u1u2en(tp, true);
 
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, ALDPS_SPDWN_RATIO);
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, EEE_SPDWN_RATIO);
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3,
-		       PKT_AVAIL_SPDWN_EN | SUSPEND_SPDWN_EN |
-		       U1U2_SPDWN_EN | L1_SPDWN_EN);
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4,
-		       PWRSAVE_SPDWN_EN | RXDV_SPDWN_EN | TX10MIDLE_EN |
-		       TP100_SPDWN_EN | TP500_SPDWN_EN | TP1000_SPDWN_EN |
-		       EEE_SPDWN_EN);
+	/* MAC clock speed down */
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, 0);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, 0);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0);
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0);
 
 	r8153_enable_eee(tp);
 	r8153_aldps_en(tp, true);
-- 
cgit v0.10.2


From 93fe9b1838404fcc3bea320b704bfa461d8e57a6 Mon Sep 17 00:00:00 2001
From: hayeswang <hayeswang@realtek.com>
Date: Thu, 16 Jun 2016 10:55:18 +0800
Subject: r8152: reset the bmu

Reset the BMU to clear the rx/tx fifo. This avoids that the unexpected
data remains in the hw.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 89dc752..f5bc351 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -116,6 +116,7 @@
 #define USB_TX_DMA		0xd434
 #define USB_TOLERANCE		0xd490
 #define USB_LPM_CTRL		0xd41a
+#define USB_BMU_RESET		0xd4b0
 #define USB_UPS_CTRL		0xd800
 #define USB_MISC_0		0xd81a
 #define USB_POWER_CUT		0xd80a
@@ -338,6 +339,10 @@
 #define TEST_MODE_DISABLE	0x00000001
 #define TX_SIZE_ADJUST1		0x00000100
 
+/* USB_BMU_RESET */
+#define BMU_RESET_EP_IN		0x01
+#define BMU_RESET_EP_OUT	0x02
+
 /* USB_UPS_CTRL */
 #define POWER_CUT		0x0100
 
@@ -2456,6 +2461,17 @@ static void r8153_teredo_off(struct r8152 *tp)
 	ocp_write_dword(tp, MCU_TYPE_PLA, PLA_TEREDO_TIMER, 0);
 }
 
+static void rtl_reset_bmu(struct r8152 *tp)
+{
+	u32 ocp_data;
+
+	ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_BMU_RESET);
+	ocp_data &= ~(BMU_RESET_EP_IN | BMU_RESET_EP_OUT);
+	ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data);
+	ocp_data |= BMU_RESET_EP_IN | BMU_RESET_EP_OUT;
+	ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data);
+}
+
 static void r8152_aldps_en(struct r8152 *tp, bool enable)
 {
 	if (enable) {
@@ -2681,6 +2697,7 @@ static void r8153_first_init(struct r8152 *tp)
 	r8153_hw_phy_cfg(tp);
 
 	rtl8152_nic_reset(tp);
+	rtl_reset_bmu(tp);
 
 	ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
 	ocp_data &= ~NOW_IS_OOB;
@@ -2742,6 +2759,7 @@ static void r8153_enter_oob(struct r8152 *tp)
 	ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
 
 	rtl_disable(tp);
+	rtl_reset_bmu(tp);
 
 	for (i = 0; i < 1000; i++) {
 		ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
@@ -2803,6 +2821,7 @@ static void rtl8153_disable(struct r8152 *tp)
 {
 	r8153_aldps_en(tp, false);
 	rtl_disable(tp);
+	rtl_reset_bmu(tp);
 	r8153_aldps_en(tp, true);
 	usb_enable_lpm(tp->udev);
 }
-- 
cgit v0.10.2


From a59e6d815226b70780427648e359da9bbee07171 Mon Sep 17 00:00:00 2001
From: hayeswang <hayeswang@realtek.com>
Date: Thu, 16 Jun 2016 10:55:19 +0800
Subject: r8152: correct the rx early size

The rx early size should be

	(agg_buf_sz - packet size) / 8

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index f5bc351..4e257b8 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -31,7 +31,7 @@
 #define NETNEXT_VERSION		"08"
 
 /* Information for net */
-#define NET_VERSION		"3"
+#define NET_VERSION		"4"
 
 #define DRIVER_VERSION		"v1." NETNEXT_VERSION "." NET_VERSION
 #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
@@ -2174,7 +2174,7 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp)
 static void r8153_set_rx_early_size(struct r8152 *tp)
 {
 	u32 mtu = tp->netdev->mtu;
-	u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 4;
+	u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 8;
 
 	ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, ocp_data);
 }
-- 
cgit v0.10.2


From 9254e70c4ef1fee2e5c43feded4433d19cbb6177 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Thu, 9 Jun 2016 12:28:13 +0200
Subject: s390/cpum_cf: use perf software context for hardware counters

On s390, there are two different hardware PMUs for counting and
sampling.  Previously, both PMUs have shared the perf_hw_context
which is not correct and, recently, results in this warning:

    ------------[ cut here ]------------
    WARNING: CPU: 5 PID: 1 at kernel/events/core.c:8485 perf_pmu_register+0x420/0x428
    Modules linked in:
    CPU: 5 PID: 1 Comm: swapper/0 Not tainted 4.7.0-rc1+ #2
    task: 00000009c5240000 ti: 00000009c5234000 task.ti: 00000009c5234000
    Krnl PSW : 0704c00180000000 0000000000220c50 (perf_pmu_register+0x420/0x428)
               R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3
    Krnl GPRS: ffffffffffffffff 0000000000b15ac6 0000000000000000 00000009cb440000
               000000000022087a 0000000000000000 0000000000b78fa0 0000000000000000
               0000000000a9aa90 0000000000000084 0000000000000005 000000000088a97a
               0000000000000004 0000000000749dd0 000000000022087a 00000009c5237cc0
    Krnl Code: 0000000000220c44: a7f4ff54            brc     15,220aec
               0000000000220c48: 92011000           mvi     0(%r1),1
              #0000000000220c4c: a7f40001           brc     15,220c4e
              >0000000000220c50: a7f4ff12           brc     15,220a74
               0000000000220c54: 0707               bcr     0,%r7
               0000000000220c56: 0707               bcr     0,%r7
               0000000000220c58: ebdff0800024       stmg    %r13,%r15,128(%r15)
               0000000000220c5e: a7f13fe0           tmll    %r15,16352
    Call Trace:
    ([<000000000022087a>] perf_pmu_register+0x4a/0x428)
    ([<0000000000b2c25c>] init_cpum_sampling_pmu+0x14c/0x1f8)
    ([<0000000000100248>] do_one_initcall+0x48/0x140)
    ([<0000000000b25d26>] kernel_init_freeable+0x1e6/0x2a0)
    ([<000000000072bda4>] kernel_init+0x24/0x138)
    ([<000000000073495e>] kernel_thread_starter+0x6/0xc)
    ([<0000000000734958>] kernel_thread_starter+0x0/0xc)
    Last Breaking-Event-Address:
     [<0000000000220c4c>] perf_pmu_register+0x41c/0x428
    ---[ end trace 0c6ef9f5b771ad97 ]---

Using the perf_sw_context is an option because the cpum_cf PMU does
not use interrupts.  To make this more clear, initialize the
capabilities in the PMU structure.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 59215c5..7ec63b1 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -649,6 +649,8 @@ static int cpumf_pmu_commit_txn(struct pmu *pmu)
 
 /* Performance monitoring unit for s390x */
 static struct pmu cpumf_pmu = {
+	.task_ctx_nr  = perf_sw_context,
+	.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
 	.pmu_enable   = cpumf_pmu_enable,
 	.pmu_disable  = cpumf_pmu_disable,
 	.event_init   = cpumf_pmu_event_init,
@@ -708,12 +710,6 @@ static int __init cpumf_pmu_init(void)
 		goto out;
 	}
 
-	/* The CPU measurement counter facility does not have overflow
-	 * interrupts to do sampling.  Sampling must be provided by
-	 * external means, for example, by timers.
-	 */
-	cpumf_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
-
 	cpumf_pmu.attr_groups = cpumf_cf_event_group();
 	rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
 	if (rc) {
-- 
cgit v0.10.2


From 362761299eea7dfc3a4870551de36e08758b9254 Mon Sep 17 00:00:00 2001
From: Marcin Niestroj <m.niestroj@grinn-global.com>
Date: Tue, 14 Jun 2016 15:29:24 +0200
Subject: power_supply: tps65217-charger: Fix NULL deref during property export

This bug leads to:

[    1.906411] Unable to handle kernel NULL pointer dereference at virtual address 0000000c
[    1.914878] pgd = c0004000
[    1.917786] [0000000c] *pgd=00000000
[    1.921536] Internal error: Oops: 5 [#1] SMP ARM
[    1.926357] Modules linked in:
[    1.929556] CPU: 0 PID: 14 Comm: kworker/0:1 Not tainted 4.4.5 #18
[    1.936006] Hardware name: Generic AM33XX (Flattened Device Tree)
[    1.942383] Workqueue: events power_supply_changed_work
[    1.947842] task: de2c41c0 ti: de2c8000 task.ti: de2c8000
[    1.953483] PC is at tps65217_ac_get_property+0x14/0x28
[    1.958937] LR is at tps65217_ac_get_property+0x10/0x28

Driver was trying to use drv_data in property get handler. However drv_data
was not set, so it caused NULL pointer dereference. This patch properly
sets drv_data during probe by power_supply_config parameter, so the
property get handler works as desired.

Signed-off-by: Marcin Niestroj <m.niestroj@grinn-global.com>
Fixes: 3636859b280c ("power_supply: Add support for tps65217-charger")
Signed-off-by: Sebastian Reichel <sre@kernel.org>

diff --git a/drivers/power/tps65217_charger.c b/drivers/power/tps65217_charger.c
index d9f5673..73dfae4 100644
--- a/drivers/power/tps65217_charger.c
+++ b/drivers/power/tps65217_charger.c
@@ -197,6 +197,7 @@ static int tps65217_charger_probe(struct platform_device *pdev)
 {
 	struct tps65217 *tps = dev_get_drvdata(pdev->dev.parent);
 	struct tps65217_charger *charger;
+	struct power_supply_config cfg = {};
 	int ret;
 
 	dev_dbg(&pdev->dev, "%s\n", __func__);
@@ -208,9 +209,12 @@ static int tps65217_charger_probe(struct platform_device *pdev)
 	charger->tps = tps;
 	charger->dev = &pdev->dev;
 
+	cfg.of_node = pdev->dev.of_node;
+	cfg.drv_data = charger;
+
 	charger->ac = devm_power_supply_register(&pdev->dev,
 						 &tps65217_charger_desc,
-						 NULL);
+						 &cfg);
 	if (IS_ERR(charger->ac)) {
 		dev_err(&pdev->dev, "failed: power supply register\n");
 		return PTR_ERR(charger->ac);
-- 
cgit v0.10.2


From c70410cb91de70707a507ee7beef7021a5a89f0d Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 9 Jun 2016 14:38:50 -0400
Subject: rtl8xxxu: fix typo on variable name, compare against correct variable

path_b_ok is being assigned but immediately after path_a_ok is being
compared to the value 0x03.  This appears to be a typo on the
variable name, compare path_b_ok instead.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>

diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
index fe19ace..b04cf30 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
@@ -1149,7 +1149,7 @@ static void rtl8192eu_phy_iqcalibrate(struct rtl8xxxu_priv *priv,
 
 		for (i = 0; i < retry; i++) {
 			path_b_ok = rtl8192eu_rx_iqk_path_b(priv);
-			if (path_a_ok == 0x03) {
+			if (path_b_ok == 0x03) {
 				val32 = rtl8xxxu_read32(priv,
 							REG_RX_POWER_BEFORE_IQK_B_2);
 				result[t][6] = (val32 >> 16) & 0x3ff;
-- 
cgit v0.10.2


From 17471c7ba5115ed724d624577b21c166d2194272 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 15 Jun 2016 22:31:10 +0200
Subject: net: sfc: avoid -Wtype-limits warning

When building with -Wextra, we get a harmless warning from the
EFX_EXTRACT_OWORD32 macro:

ethernet/sfc/farch.c: In function 'efx_farch_test_registers':
ethernet/sfc/farch.c:119:30: error: comparison of unsigned expression < 0 is always false [-Werror=type-limits]
ethernet/sfc/farch.c:124:144: error: comparison of unsigned expression < 0 is always false [-Werror=type-limits]
ethernet/sfc/farch.c:124:392: error: comparison of unsigned expression < 0 is always false [-Werror=type-limits]
ethernet/sfc/farch.c:124:731: error: comparison of unsigned expression < 0 is always false [-Werror=type-limits]

The macro and the caller are both correct, but we can avoid the
warning by changing the index variable to a signed type.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Bert Kenward <bkenward@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index 133e9e3..4c83739 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -104,7 +104,8 @@ int efx_farch_test_registers(struct efx_nic *efx,
 			     const struct efx_farch_register_test *regs,
 			     size_t n_regs)
 {
-	unsigned address = 0, i, j;
+	unsigned address = 0;
+	int i, j;
 	efx_oword_t mask, imask, original, reg, buf;
 
 	for (i = 0; i < n_regs; ++i) {
-- 
cgit v0.10.2


From a547224dceed4645139f7eff72ff51adb9938bcb Mon Sep 17 00:00:00 2001
From: Alexander Duyck <aduyck@mirantis.com>
Date: Wed, 15 Jun 2016 14:42:11 -0700
Subject: mlx4e: Do not attempt to offload VXLAN ports that are unrecognized

The mlx4e driver does not support more than one port for VXLAN offload.  As
such expecting the hardware to offload other ports is invalid since it
appears the parsing logic is used to perform Tx checksum and segmentation
offloads.  Use the vxlan_port number to determine in which cases we can
apply the offload and in which cases we can not.

Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 19ceced..a2c2536 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2447,9 +2447,14 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb,
 	 * strip that feature if this is an IPv6 encapsulated frame.
 	 */
 	if (skb->encapsulation &&
-	    (skb->ip_summed == CHECKSUM_PARTIAL) &&
-	    (ip_hdr(skb)->version != 4))
-		features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+	    (skb->ip_summed == CHECKSUM_PARTIAL)) {
+		struct mlx4_en_priv *priv = netdev_priv(dev);
+
+		if (!priv->vxlan_port ||
+		    (ip_hdr(skb)->version != 4) ||
+		    (udp_hdr(skb)->dest != priv->vxlan_port))
+			features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+	}
 
 	return features;
 }
-- 
cgit v0.10.2


From 8fa3b8d689a54d6d04ff7803c724fb7aca6ce98e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 26 May 2016 15:42:13 -0400
Subject: cgroup: set css->id to -1 during init

If percpu_ref initialization fails during css_create(), the free path
can end up trying to free css->id of zero.  As ID 0 is unused, it
doesn't cause a critical breakage but it does trigger a warning
message.  Fix it by setting css->id to -1 from init_and_link_css().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Wenwei Tao <ww.tao0320@gmail.com>
Fixes: 01e586598b22 ("cgroup: release css->id after css_free")
Cc: stable@vger.kernel.org # v4.0+
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 789b84f..688eb0c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5063,6 +5063,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
 	memset(css, 0, sizeof(*css));
 	css->cgroup = cgrp;
 	css->ss = ss;
+	css->id = -1;
 	INIT_LIST_HEAD(&css->sibling);
 	INIT_LIST_HEAD(&css->children);
 	css->serial_nr = css_serial_nr_next++;
-- 
cgit v0.10.2


From d5d8760b78d0cfafe292f965f599988138b06a70 Mon Sep 17 00:00:00 2001
From: Simon Horman <simon.horman@netronome.com>
Date: Thu, 16 Jun 2016 17:06:19 +0900
Subject: sit: correct IP protocol used in ipip6_err

Since 32b8a8e59c9c ("sit: add IPv4 over IPv4 support")
ipip6_err() may be called for packets whose IP protocol is
IPPROTO_IPIP as well as those whose IP protocol is IPPROTO_IPV6.

In the case of IPPROTO_IPIP packets the correct protocol value is not
passed to ipv4_update_pmtu() or ipv4_redirect().

This patch resolves this problem by using the IP protocol of the packet
rather than a hard-coded value. This appears to be consistent
with the usage of the protocol of a packet by icmp_socket_deliver()
the caller of ipip6_err().

I was able to exercise the redirect case by using a setup where an ICMP
redirect was received for the destination of the encapsulated packet.
However, it appears that although incorrect the protocol field is not used
in this case and thus no problem manifests.  On inspection it does not
appear that a problem will manifest in the fragmentation needed/update pmtu
case either.

In short I believe this is a cosmetic fix. None the less, the use of
IPPROTO_IPV6 seems wrong and confusing.

Reviewed-by: Dinan Gunawardena <dinan.gunawardena@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 0a5a255..0619ac7 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -560,13 +560,13 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
 
 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 		ipv4_update_pmtu(skb, dev_net(skb->dev), info,
-				 t->parms.link, 0, IPPROTO_IPV6, 0);
+				 t->parms.link, 0, iph->protocol, 0);
 		err = 0;
 		goto out;
 	}
 	if (type == ICMP_REDIRECT) {
 		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
-			      IPPROTO_IPV6, 0);
+			      iph->protocol, 0);
 		err = 0;
 		goto out;
 	}
-- 
cgit v0.10.2


From ce449ba77a2271dce9771fb1f3eb37e4be6a8b81 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 16 Jun 2016 14:42:50 +0100
Subject: nfp: use correct index to mask link state irq

We were using an incorrect define to get the irq vector number.
NFP_NET_CFG_LSC is a control BAR offset, LSC interrupt vector
index is called NFP_NET_IRQ_LSC_IDX.  For machines with less
than 30 CPUs this meant that we were disabling/enabling IRQ 0.
For bigger hosts we were just playing with the 31st RX/TX
interrupt.

Fixes: 0ba40af963f0 ("nfp: move link state interrupt request/free calls")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index fa47c14..ba26bb3 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2015,7 +2015,7 @@ static void nfp_net_open_stack(struct nfp_net *nn)
 
 	netif_tx_wake_all_queues(nn->netdev);
 
-	enable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
+	enable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
 	nfp_net_read_link_status(nn);
 }
 
@@ -2044,7 +2044,7 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 				      NFP_NET_IRQ_LSC_IDX, nn->lsc_handler);
 	if (err)
 		goto err_free_exn;
-	disable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
+	disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
 
 	nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings),
 			       GFP_KERNEL);
@@ -2133,7 +2133,7 @@ static void nfp_net_close_stack(struct nfp_net *nn)
 {
 	unsigned int r;
 
-	disable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector);
+	disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
 	netif_carrier_off(nn->netdev);
 	nn->link_up = false;
 
-- 
cgit v0.10.2


From e568006b9d828403397668864d9797dc4bfefd28 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Fri, 17 Jun 2016 11:32:00 +0530
Subject: powerpc/mm/hash: Don't add memory coherence if cache inhibited is set

H_ENTER hcall handling in qemu had assumptions that a cache inhibited
hpte entry won't have memory conference set. Also older kernel
mentioned that some version of pHyp required this (the code removed
by the below commit says:

    /* Make pHyp happy */
    if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU))
            hpte_r &= ~HPTE_R_M;

But with older kernel we had some inconsistent memory conherence
mapping. We always enabled memory conherence in the page fault path and
removed memory conherence is _PAGE_NO_CACHE was set when we mapped the
page via htab_bolt_mapping. The commit mentioned below tried to
consolidate that by always enabling memory conherence. But as mentioned
above that breaks Qemu H_ENTER handling.

This patch update this such that we enable memory conherence only if
cache inhibited is not set and bring fault handling, lpar and bolt
mapping in sync.

Fixes: commit 30bda41aba4e("powerpc/mm: Drop WIMG in favour of new constant")
Reported-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index b2740c6..5b22ba0 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -201,9 +201,8 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
 	/*
 	 * We can't allow hardware to update hpte bits. Hence always
 	 * set 'R' bit and set 'C' if it is a write fault
-	 * Memory coherence is always enabled
 	 */
-	rflags |=  HPTE_R_R | HPTE_R_M;
+	rflags |=  HPTE_R_R;
 
 	if (pteflags & _PAGE_DIRTY)
 		rflags |= HPTE_R_C;
@@ -213,10 +212,15 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
 
 	if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_TOLERANT)
 		rflags |= HPTE_R_I;
-	if ((pteflags & _PAGE_CACHE_CTL ) == _PAGE_NON_IDEMPOTENT)
+	else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT)
 		rflags |= (HPTE_R_I | HPTE_R_G);
-	if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
-		rflags |= (HPTE_R_I | HPTE_R_W);
+	else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
+		rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M);
+	else
+		/*
+		 * Add memory coherence if cache inhibited is not set
+		 */
+		rflags |= HPTE_R_M;
 
 	return rflags;
 }
-- 
cgit v0.10.2


From b23d9c5b9c83c05e013aa52460f12a8365062cf4 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Fri, 17 Jun 2016 11:40:36 +0530
Subject: powerpc/mm/radix: Update Radix tree size as per ISA 3.0

ISA 3.0 updated it to be encoded as Radix tree size = 2^(RTS + 31). We
have it encoded as 2^(RTS + 28). Add a helper with the correct encoding
and use it instead of opencoding.

Fixes: 2bfd65e45e87 ("powerpc/mm/radix: Add radix callbacks for early init routines")
Reviewed-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 937d4e2..df29422 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -228,5 +228,20 @@ extern void radix__vmemmap_remove_mapping(unsigned long start,
 
 extern int radix__map_kernel_page(unsigned long ea, unsigned long pa,
 				 pgprot_t flags, unsigned int psz);
+
+static inline unsigned long radix__get_tree_size(void)
+{
+	unsigned long rts_field;
+	/*
+	 * we support 52 bits, hence 52-31 = 21, 0b10101
+	 * RTS encoding details
+	 * bits 0 - 3 of rts -> bits 6 - 8 unsigned long
+	 * bits 4 - 5 of rts -> bits 62 - 63 of unsigned long
+	 */
+	rts_field = (0x5UL << 5); /* 6 - 8 bits */
+	rts_field |= (0x2UL << 61);
+
+	return rts_field;
+}
 #endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 227b2a6..19622222 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -65,7 +65,7 @@ static int radix__init_new_context(struct mm_struct *mm, int index)
 	/*
 	 * set the process table entry,
 	 */
-	rts_field = 3ull << PPC_BITLSHIFT(2);
+	rts_field = radix__get_tree_size();
 	process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
 	return 0;
 }
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index c939e6e..e58707d 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -160,9 +160,8 @@ redo:
 	process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
 	/*
 	 * Fill in the process table.
-	 * we support 52 bits, hence 52-28 = 24, 11000
 	 */
-	rts_field = 3ull << PPC_BITLSHIFT(2);
+	rts_field = radix__get_tree_size();
 	process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
 	/*
 	 * Fill in the partition table. We are suppose to use effective address
@@ -176,10 +175,8 @@ redo:
 static void __init radix_init_partition_table(void)
 {
 	unsigned long rts_field;
-	/*
-	 * we support 52 bits, hence 52-28 = 24, 11000
-	 */
-	rts_field = 3ull << PPC_BITLSHIFT(2);
+
+	rts_field = radix__get_tree_size();
 
 	BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
 	partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT);
-- 
cgit v0.10.2


From a3aa256b7258b3d19f8b44557cc64525a993b941 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Fri, 17 Jun 2016 13:05:11 +1000
Subject: powerpc/eeh: Fix invalid cached PE primary bus

The PE primary bus cannot be got from its child devices when having
full hotplug in error recovery. The PE primary bus is cached, which
is done in commit <05ba75f84864> ("powerpc/eeh: Fix stale cached primary
bus"). In eeh_reset_device(), the flag (EEH_PE_PRI_BUS) is cleared
before the PCI hot remove. eeh_pe_bus_get() then returns NULL as the
PE primary bus in pnv_eeh_reset() and it crashes the kernel eventually.

This fixes the issue by clearing the flag (EEH_PE_PRI_BUS) before the
PCI hot add. With it, the PowerNV EEH reset backend (pnv_eeh_reset())
can get valid PE primary bus through eeh_pe_bus_get().

Fixes: 67086e32b564 ("powerpc/eeh: powerpc/eeh: Support error recovery for VF PE")
Reported-by: Pridhiviraj Paidipeddi <ppaiddipe@in.ibm.com>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 2714a3b..b5f73cb 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -642,7 +642,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
 		if (pe->type & EEH_PE_VF) {
 			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
 		} else {
-			eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
 			pci_lock_rescan_remove();
 			pci_hp_remove_devices(bus);
 			pci_unlock_rescan_remove();
@@ -692,10 +691,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
 		 */
 		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
 		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
-		if (pe->type & EEH_PE_VF)
+		if (pe->type & EEH_PE_VF) {
 			eeh_add_virt_device(edev, NULL);
-		else
+		} else {
+			eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
 			pci_hp_add_devices(bus);
+		}
 	} else if (frozen_bus && rmv_data->removed) {
 		pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
 		ssleep(5);
-- 
cgit v0.10.2


From 8a092e682f20f193f2070dba2ea1904e95814126 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mika=20B=C3=A5tsman?= <mbatsman@mvista.com>
Date: Fri, 17 Jun 2016 13:31:37 +0300
Subject: regulator: anatop: allow regulator to be in bypass mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bypass support was added in commit d38018f2019c ("regulator: anatop: Add
bypass support to digital LDOs"). A check for valid voltage selectors was
added in commit da0607c8df5c ("regulator: anatop: Fail on invalid voltage
selector") but it also discards all regulators that are in bypass mode. Add
check for the bypass setting. Errors below were seen on a Variscite mx6
board.

anatop_regulator 20c8000.anatop:regulator-vddcore@140: Failed to read a valid default voltage selector.
anatop_regulator: probe of 20c8000.anatop:regulator-vddcore@140 failed with error -22
anatop_regulator 20c8000.anatop:regulator-vddsoc@140: Failed to read a valid default voltage selector.
anatop_regulator: probe of 20c8000.anatop:regulator-vddsoc@140 failed with error -22

Fixes: da0607c8df5c ("regulator: anatop: Fail on invalid voltage selector")
Signed-off-by: Mika Båtsman <mbatsman@mvista.com>
Signed-off-by: Mark Brown <broonie@kernel.org>

diff --git a/drivers/regulator/anatop-regulator.c b/drivers/regulator/anatop-regulator.c
index 63cd5e6..3a6d029 100644
--- a/drivers/regulator/anatop-regulator.c
+++ b/drivers/regulator/anatop-regulator.c
@@ -296,7 +296,7 @@ static int anatop_regulator_probe(struct platform_device *pdev)
 		if (!sreg->sel && !strcmp(sreg->name, "vddpu"))
 			sreg->sel = 22;
 
-		if (!sreg->sel) {
+		if (!sreg->bypass && !sreg->sel) {
 			dev_err(&pdev->dev, "Failed to read a valid default voltage selector.\n");
 			return -EINVAL;
 		}
-- 
cgit v0.10.2


From 8f45927c3cae4db85887700e5415286f766cbaf9 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 17 Jun 2016 12:54:18 +0200
Subject: netfilter: xt_SYNPROXY: add missing header to Kbuild

Matt Whitlock says:

 Without this line, the file xt_SYNPROXY.h does not get installed in
 /usr/include/linux/netfilter/, and thus user-space programs cannot make
 use of it.

Reported-by: Matt Whitlock <kernel@mattwhitlock.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild
index 1d973d2..cd26d7a 100644
--- a/include/uapi/linux/netfilter/Kbuild
+++ b/include/uapi/linux/netfilter/Kbuild
@@ -33,6 +33,7 @@ header-y += xt_NFLOG.h
 header-y += xt_NFQUEUE.h
 header-y += xt_RATEEST.h
 header-y += xt_SECMARK.h
+header-y += xt_SYNPROXY.h
 header-y += xt_TCPMSS.h
 header-y += xt_TCPOPTSTRIP.h
 header-y += xt_TEE.h
-- 
cgit v0.10.2


From 1463847e93fe693e89c52b03ab4ede6800d717c1 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 17 Jun 2016 12:54:18 +0200
Subject: netfilter: xt_SYNPROXY: include missing <linux/types.h>

./usr/include/linux/netfilter/xt_SYNPROXY.h:11: found __[us]{8,16,32,64} type without #include <linux/types.h>

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

diff --git a/include/uapi/linux/netfilter/xt_SYNPROXY.h b/include/uapi/linux/netfilter/xt_SYNPROXY.h
index 2d59fba..ca67e61 100644
--- a/include/uapi/linux/netfilter/xt_SYNPROXY.h
+++ b/include/uapi/linux/netfilter/xt_SYNPROXY.h
@@ -1,6 +1,8 @@
 #ifndef _XT_SYNPROXY_H
 #define _XT_SYNPROXY_H
 
+#include <linux/types.h>
+
 #define XT_SYNPROXY_OPT_MSS		0x01
 #define XT_SYNPROXY_OPT_WSCALE		0x02
 #define XT_SYNPROXY_OPT_SACK_PERM	0x04
-- 
cgit v0.10.2


From 8198868f0a283eb23e264951632ce61ec2f82228 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Fri, 17 Jun 2016 13:35:56 +0200
Subject: ALSA: hdac_regmap - fix the register access for runtime PM

Call path:

  1) snd_hdac_power_up_pm()
  2) snd_hdac_power_up()
  3) pm_runtime_get_sync()
  4) __pm_runtime_resume()
  5) rpm_resume()

The rpm_resume() returns 1 when the device is already active.
Because the return value is unmodified, the hdac regmap read/write
functions should allow this value for the retry I/O operation, too.

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/hda/hdac_regmap.c b/sound/hda/hdac_regmap.c
index 87041dd..47a358f 100644
--- a/sound/hda/hdac_regmap.c
+++ b/sound/hda/hdac_regmap.c
@@ -444,7 +444,7 @@ int snd_hdac_regmap_write_raw(struct hdac_device *codec, unsigned int reg,
 	err = reg_raw_write(codec, reg, val);
 	if (err == -EAGAIN) {
 		err = snd_hdac_power_up_pm(codec);
-		if (!err)
+		if (err >= 0)
 			err = reg_raw_write(codec, reg, val);
 		snd_hdac_power_down_pm(codec);
 	}
@@ -470,7 +470,7 @@ static int __snd_hdac_regmap_read_raw(struct hdac_device *codec,
 	err = reg_raw_read(codec, reg, val, uncached);
 	if (err == -EAGAIN) {
 		err = snd_hdac_power_up_pm(codec);
-		if (!err)
+		if (err >= 0)
 			err = reg_raw_read(codec, reg, val, uncached);
 		snd_hdac_power_down_pm(codec);
 	}
-- 
cgit v0.10.2


From 6762925df4642aec5629f7971ba477d6930f53f7 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Tue, 31 May 2016 21:47:17 +0900
Subject: phy: rcar-gen3-usb2: fix unexpected repeat interrupts of VBUS change

This patch fixes an issue that the driver is possible to cause
unexpected repeat interrupts if a board condition is wrong
(e.g. even if the ID pin is as function, a board supplies the VBUS.)

The reason why unexpected repeat interrupts happen is:
 1) The driver changed the mode to function if it detected the ID pin
    is high and the VBUS is high.
 2) After the driver changed function mode, it disabled the "VBUS control"
    feature. Then, the VBUS signal will be low.
 3) Since the VBUS change interruption happened, the driver checked
    the ID pin and VBUS.
 4) Since VBUS was low, the driver changed the mode to host and enabled
    the "VBUS control" feature. Then the VBUS signal will be high.
 5) Since the VBUS change interruption happened, the driver did 1) above.

So, this patch modified the condition in rcar_gen3_device_recognition()
to check the ID pin only.

Fixes: 1114e2d (phy: rcar-gen3-usb2: change the mode to OTG on the combined channel)
Cc: <stable@vger.kernel.org> # v4.5+
Reported-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>

diff --git a/drivers/phy/phy-rcar-gen3-usb2.c b/drivers/phy/phy-rcar-gen3-usb2.c
index 76bb88f..4be3f5d 100644
--- a/drivers/phy/phy-rcar-gen3-usb2.c
+++ b/drivers/phy/phy-rcar-gen3-usb2.c
@@ -144,12 +144,6 @@ static void rcar_gen3_init_for_peri(struct rcar_gen3_chan *ch)
 	extcon_set_cable_state_(ch->extcon, EXTCON_USB, true);
 }
 
-static bool rcar_gen3_check_vbus(struct rcar_gen3_chan *ch)
-{
-	return !!(readl(ch->base + USB2_ADPCTRL) &
-		  USB2_ADPCTRL_OTGSESSVLD);
-}
-
 static bool rcar_gen3_check_id(struct rcar_gen3_chan *ch)
 {
 	return !!(readl(ch->base + USB2_ADPCTRL) & USB2_ADPCTRL_IDDIG);
@@ -157,13 +151,7 @@ static bool rcar_gen3_check_id(struct rcar_gen3_chan *ch)
 
 static void rcar_gen3_device_recognition(struct rcar_gen3_chan *ch)
 {
-	bool is_host = true;
-
-	/* B-device? */
-	if (rcar_gen3_check_id(ch) && rcar_gen3_check_vbus(ch))
-		is_host = false;
-
-	if (is_host)
+	if (!rcar_gen3_check_id(ch))
 		rcar_gen3_init_for_host(ch);
 	else
 		rcar_gen3_init_for_peri(ch);
-- 
cgit v0.10.2


From 075adb8046532d9642f411a92b4f385d04ced24d Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Mon, 13 Jun 2016 23:31:47 +0000
Subject: phy: rockchip-dp: fix return value check in rockchip_dp_phy_probe()

In case of error, the function devm_kzalloc() returns NULL pointer
not ERR_PTR(). The IS_ERR() test in the return value check should
be replaced with NULL test.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>

diff --git a/drivers/phy/phy-rockchip-dp.c b/drivers/phy/phy-rockchip-dp.c
index 793ecb6..8b267a7 100644
--- a/drivers/phy/phy-rockchip-dp.c
+++ b/drivers/phy/phy-rockchip-dp.c
@@ -90,7 +90,7 @@ static int rockchip_dp_phy_probe(struct platform_device *pdev)
 		return -ENODEV;
 
 	dp = devm_kzalloc(dev, sizeof(*dp), GFP_KERNEL);
-	if (IS_ERR(dp))
+	if (!dp)
 		return -ENOMEM;
 
 	dp->dev = dev;
-- 
cgit v0.10.2


From 5cf700ac9d50353dc5b8194a57c6f40bf1fc4424 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz@free-electrons.com>
Date: Mon, 13 Jun 2016 13:45:48 +0200
Subject: phy: phy-sun4i-usb: Fix optional gpios failing probe

The interrupt 0 is not a valid interrupt number. In the event where the
retrieval of the vbus-det gpio would return null, the gpiod_to_irq
callback would return 0, while the current code makes the assumption
that it is a valid interrupt, and would go on calling request_irq.
Obviously, this would fail, preventing the driver from probing properly,
while the vbus and id gpios are optional.

Signed-off-by: Quentin Schulz <quentin.schulz@free-electrons.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>

diff --git a/drivers/phy/phy-sun4i-usb.c b/drivers/phy/phy-sun4i-usb.c
index bae54f7..45f01d6 100644
--- a/drivers/phy/phy-sun4i-usb.c
+++ b/drivers/phy/phy-sun4i-usb.c
@@ -645,11 +645,11 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev)
 
 	data->id_det_irq = gpiod_to_irq(data->id_det_gpio);
 	data->vbus_det_irq = gpiod_to_irq(data->vbus_det_gpio);
-	if ((data->id_det_gpio && data->id_det_irq < 0) ||
-	    (data->vbus_det_gpio && data->vbus_det_irq < 0))
+	if ((data->id_det_gpio && data->id_det_irq <= 0) ||
+	    (data->vbus_det_gpio && data->vbus_det_irq <= 0))
 		data->phy0_poll = true;
 
-	if (data->id_det_irq >= 0) {
+	if (data->id_det_irq > 0) {
 		ret = devm_request_irq(dev, data->id_det_irq,
 				sun4i_usb_phy0_id_vbus_det_irq,
 				IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
@@ -660,7 +660,7 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev)
 		}
 	}
 
-	if (data->vbus_det_irq >= 0) {
+	if (data->vbus_det_irq > 0) {
 		ret = devm_request_irq(dev, data->vbus_det_irq,
 				sun4i_usb_phy0_id_vbus_det_irq,
 				IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
-- 
cgit v0.10.2


From d99cb37828a2fe344ecc95d1fad570bbe447cc06 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben.dooks@codethink.co.uk>
Date: Tue, 7 Jun 2016 18:14:56 +0100
Subject: phy-sun4i-usb: fix missing __iomem *

Fix the missing __iomem attribute in sun4i_usb_phy_write()
function. This fixes the following sparse warnings:

drivers/phy/phy-sun4i-usb.c:178:39: warning: incorrect type in initializer (different address spaces)
drivers/phy/phy-sun4i-usb.c:178:39:    expected void *phyctl
drivers/phy/phy-sun4i-usb.c:178:39:    got void [noderef] <asn:2>*
drivers/phy/phy-sun4i-usb.c:185:17: warning: incorrect type in argument 2 (different address spaces)
drivers/phy/phy-sun4i-usb.c:185:17:    expected void volatile [noderef] <asn:2>*addr
drivers/phy/phy-sun4i-usb.c:185:17:    got void *phyctl
drivers/phy/phy-sun4i-usb.c:189:24: warning: incorrect type in argument 1 (different address spaces)
drivers/phy/phy-sun4i-usb.c:189:24:    expected void const volatile [noderef] <asn:2>*addr
drivers/phy/phy-sun4i-usb.c:189:24:    got void *phyctl
drivers/phy/phy-sun4i-usb.c:196:17: warning: incorrect type in argument 2 (different address spaces)
drivers/phy/phy-sun4i-usb.c:196:17:    expected void volatile [noderef] <asn:2>*addr
drivers/phy/phy-sun4i-usb.c:196:17:    got void *phyctl
drivers/phy/phy-sun4i-usb.c:199:24: warning: incorrect type in argument 1 (different address spaces)
drivers/phy/phy-sun4i-usb.c:199:24:    expected void const volatile [noderef] <asn:2>*addr
drivers/phy/phy-sun4i-usb.c:199:24:    got void *phyctl
drivers/phy/phy-sun4i-usb.c:205:17: warning: incorrect type in argument 2 (different address spaces)
drivers/phy/phy-sun4i-usb.c:205:17:    expected void volatile [noderef] <asn:2>*addr
drivers/phy/phy-sun4i-usb.c:205:17:    got void *phyctl
drivers/phy/phy-sun4i-usb.c:208:24: warning: incorrect type in argument 1 (different address spaces)
drivers/phy/phy-sun4i-usb.c:208:24:    expected void const volatile [noderef] <asn:2>*addr
drivers/phy/phy-sun4i-usb.c:208:24:    got void *phyctl
drivers/phy/phy-sun4i-usb.c:210:17: warning: incorrect type in argument 2 (different address spaces)
drivers/phy/phy-sun4i-usb.c:210:17:    expected void volatile [noderef] <asn:2>*addr
drivers/phy/phy-sun4i-usb.c:210:17:    got void *phyctl
drivers/phy/phy-sun4i-usb.c:212:24: warning: incorrect type in argument 1 (different address spaces)
drivers/phy/phy-sun4i-usb.c:212:24:    expected void const volatile [noderef] <asn:2>*addr
drivers/phy/phy-sun4i-usb.c:212:24:    got void *phyctl
drivers/phy/phy-sun4i-usb.c:214:17: warning: incorrect type in argument 2 (different address spaces)
drivers/phy/phy-sun4i-usb.c:214:17:    expected void volatile [noderef] <asn:2>*addr
drivers/phy/phy-sun4i-usb.c:214:17:    got void *phyctl

Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>

diff --git a/drivers/phy/phy-sun4i-usb.c b/drivers/phy/phy-sun4i-usb.c
index 45f01d6..7390294 100644
--- a/drivers/phy/phy-sun4i-usb.c
+++ b/drivers/phy/phy-sun4i-usb.c
@@ -175,7 +175,7 @@ static void sun4i_usb_phy_write(struct sun4i_usb_phy *phy, u32 addr, u32 data,
 {
 	struct sun4i_usb_phy_data *phy_data = to_sun4i_usb_phy_data(phy);
 	u32 temp, usbc_bit = BIT(phy->index * 2);
-	void *phyctl = phy_data->base + phy_data->cfg->phyctl_offset;
+	void __iomem *phyctl = phy_data->base + phy_data->cfg->phyctl_offset;
 	int i;
 
 	mutex_lock(&phy_data->mutex);
-- 
cgit v0.10.2


From 6c081ff6fd5abd621797570be43d5e3c6acfcd58 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 10 May 2016 11:01:33 +0300
Subject: phy: bcm-ns-usb2: checking the wrong variable

We intended to test "usb2->phy" here instead of "dev".

Fixes: d3feb4067335 ('phy: bcm-ns-usb2: new driver for USB 2.0 PHY on Northstar')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>

diff --git a/drivers/phy/phy-bcm-ns-usb2.c b/drivers/phy/phy-bcm-ns-usb2.c
index 95ab6b2..58dff80 100644
--- a/drivers/phy/phy-bcm-ns-usb2.c
+++ b/drivers/phy/phy-bcm-ns-usb2.c
@@ -109,8 +109,8 @@ static int bcm_ns_usb2_probe(struct platform_device *pdev)
 	}
 
 	usb2->phy = devm_phy_create(dev, NULL, &ops);
-	if (IS_ERR(dev))
-		return PTR_ERR(dev);
+	if (IS_ERR(usb2->phy))
+		return PTR_ERR(usb2->phy);
 
 	phy_set_drvdata(usb2->phy, usb2);
 	platform_set_drvdata(pdev, usb2);
-- 
cgit v0.10.2


From 79bb71bd1d93197ce227fa167b450b633f30a52b Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 15 Jun 2016 22:57:38 +0200
Subject: gpio: make sure gpiod_to_irq() returns negative on NULL desc

commit 54d77198fdfbc4f0fe11b4252c1d9c97d51a3264
("gpio: bail out silently on NULL descriptors")
doesn't work for gpiod_to_irq(): drivers assume that NULL
descriptors will give negative IRQ numbers in return.

It has been pointed out that returning 0 is NO_IRQ and that
drivers should be amended to treat this as an error, but that
is for the longer term: now let us repair the semantics.

Cc: Maxime Ripard <maxime.ripard@free-electrons.com>
Reported-by: Hans de Goede <hdegoede@redhat.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 58d822d..f39bf05 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2056,7 +2056,14 @@ int gpiod_to_irq(const struct gpio_desc *desc)
 	struct gpio_chip *chip;
 	int offset;
 
-	VALIDATE_DESC(desc);
+	/*
+	 * Cannot VALIDATE_DESC() here as gpiod_to_irq() consumer semantics
+	 * requires this function to not return zero on an invalid descriptor
+	 * but rather a negative error number.
+	 */
+	if (!desc || !desc->gdev || !desc->gdev->chip)
+		return -EINVAL;
+
 	chip = desc->gdev->chip;
 	offset = gpio_chip_hwgpio(desc);
 	if (chip->to_irq) {
-- 
cgit v0.10.2


From bfbbe44daf64d0ccf2de123179817f3557fb9237 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 16 Jun 2016 11:55:55 +0200
Subject: gpio: make library immune to error pointers

Most functions that take a GPIO descriptor in need to check the
descriptor for IS_ERR(). We do this mostly in the VALIDATE_DESC()
macro except for the gpiod_to_irq() function which needs special
handling.

Cc: stable@vger.kernel.org
Reported-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index f39bf05..570771e 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1373,8 +1373,12 @@ done:
 #define VALIDATE_DESC(desc) do { \
 	if (!desc) \
 		return 0; \
+	if (IS_ERR(desc)) {						\
+		pr_warn("%s: invalid GPIO (errorpointer)\n", __func__); \
+		return PTR_ERR(desc); \
+	} \
 	if (!desc->gdev) { \
-		pr_warn("%s: invalid GPIO\n", __func__); \
+		pr_warn("%s: invalid GPIO (no device)\n", __func__); \
 		return -EINVAL; \
 	} \
 	if ( !desc->gdev->chip ) { \
@@ -1386,8 +1390,12 @@ done:
 #define VALIDATE_DESC_VOID(desc) do { \
 	if (!desc) \
 		return; \
+	if (IS_ERR(desc)) {						\
+		pr_warn("%s: invalid GPIO (errorpointer)\n", __func__); \
+		return; \
+	} \
 	if (!desc->gdev) { \
-		pr_warn("%s: invalid GPIO\n", __func__); \
+		pr_warn("%s: invalid GPIO (no device)\n", __func__); \
 		return; \
 	} \
 	if (!desc->gdev->chip) { \
@@ -2061,7 +2069,7 @@ int gpiod_to_irq(const struct gpio_desc *desc)
 	 * requires this function to not return zero on an invalid descriptor
 	 * but rather a negative error number.
 	 */
-	if (!desc || !desc->gdev || !desc->gdev->chip)
+	if (!desc || IS_ERR(desc) || !desc->gdev || !desc->gdev->chip)
 		return -EINVAL;
 
 	chip = desc->gdev->chip;
-- 
cgit v0.10.2


From 76bf3441ad6584ddc3aea1501927f21b21ba3a3a Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben.dooks@codethink.co.uk>
Date: Tue, 7 Jun 2016 17:49:09 +0100
Subject: ata: sata_mv: fix mis-conversion in mv_write_cached_reg()

Fix the signed issue in mv_write_cached_reg() where the laddr
is assigned from a (long)addr instead of (unsigned long)addr.

Fixes the following warnings:

drivers/ata/sata_mv.c:989:26: warning: cast removes address space of expression
drivers/ata/sata_mv.c:989:26: warning: cast removes address space of expression
drivers/ata/sata_mv.c:989:26: warning: cast removes address space of expression
drivers/ata/sata_mv.c:989:26: warning: cast removes address space of expression

Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index bd74ee5..745489a 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -986,7 +986,7 @@ static inline void mv_write_cached_reg(void __iomem *addr, u32 *old, u32 new)
 		 * Looks like a lot of fuss, but it avoids an unnecessary
 		 * +1 usec read-after-write delay for unaffected registers.
 		 */
-		laddr = (long)addr & 0xffff;
+		laddr = (unsigned long)addr & 0xffff;
 		if (laddr >= 0x300 && laddr <= 0x33c) {
 			laddr &= 0x000f;
 			if (laddr == 0x4 || laddr == 0xc) {
-- 
cgit v0.10.2


From 041bf0225552044b85ce7ee7981e790d987d6ceb Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 16 Jun 2016 11:30:23 +0300
Subject: drm/amdgpu: missing bounds check in amdgpu_set_pp_force_state()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is no limit on high "idx" can go.  It should be less than
ARRAY_SIZE(data.states) which is 16.

The "data" variable wasn't declared in that scope so I shifted the code
around a bit to make it work.  Also I made "idx" unsigned.

Fixes: f3898ea12fc1 ('drm/amd/powerplay: add some sysfs interfaces for powerplay.')
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 589b36e..0e13d80 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -270,30 +270,28 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	enum amd_pm_state_type state = 0;
-	long idx;
+	unsigned long idx;
 	int ret;
 
 	if (strlen(buf) == 1)
 		adev->pp_force_state_enabled = false;
-	else {
-		ret = kstrtol(buf, 0, &idx);
+	else if (adev->pp_enabled) {
+		struct pp_states_info data;
 
-		if (ret) {
+		ret = kstrtoul(buf, 0, &idx);
+		if (ret || idx >= ARRAY_SIZE(data.states)) {
 			count = -EINVAL;
 			goto fail;
 		}
 
-		if (adev->pp_enabled) {
-			struct pp_states_info data;
-			amdgpu_dpm_get_pp_num_states(adev, &data);
-			state = data.states[idx];
-			/* only set user selected power states */
-			if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
-				state != POWER_STATE_TYPE_DEFAULT) {
-				amdgpu_dpm_dispatch_task(adev,
-						AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL);
-				adev->pp_force_state_enabled = true;
-			}
+		amdgpu_dpm_get_pp_num_states(adev, &data);
+		state = data.states[idx];
+		/* only set user selected power states */
+		if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
+		    state != POWER_STATE_TYPE_DEFAULT) {
+			amdgpu_dpm_dispatch_task(adev,
+					AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL);
+			adev->pp_force_state_enabled = true;
 		}
 	}
 fail:
-- 
cgit v0.10.2


From 0b10029d826ed8c18a5f9d2f4abfa415d15cd1d3 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 17 Jun 2016 10:17:17 -0400
Subject: drm/amdgpu: fix num_rbs exposed to userspace (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This was accidently broken for harvest cards when the
code was refactored for Polaris support.

v2: multiply by shader engines.  Noticed by Nicolai.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 40a2370..d851ea1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -447,7 +447,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			dev_info.max_memory_clock = adev->pm.default_mclk * 10;
 		}
 		dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
-		dev_info.num_rb_pipes = adev->gfx.config.num_rbs;
+		dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se *
+			adev->gfx.config.max_shader_engines;
 		dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
 		dev_info._pad = 0;
 		dev_info.ids_flags = 0;
-- 
cgit v0.10.2


From 8c5122e45a10a9262f872b53f151a592e870f905 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Date: Wed, 8 Jun 2016 17:28:29 -0600
Subject: IB/mlx4: Properly initialize GRH TClass and FlowLabel in AHs

When this code was reworked for IBoE support the order of assignments
for the sl_tclass_flowlabel got flipped around resulting in
TClass & FlowLabel being permanently set to 0 in the packet headers.

This breaks IB routers that rely on these headers, but only affects
kernel users - libmlx4 does this properly for user space.

Cc: stable@vger.kernel.org
Fixes: fa417f7b520e ("IB/mlx4: Add support for IBoE")
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 105246f..5fc6233 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -47,6 +47,7 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
 
 	ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
 	ah->av.ib.g_slid  = ah_attr->src_path_bits;
+	ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		ah->av.ib.g_slid   |= 0x80;
 		ah->av.ib.gid_index = ah_attr->grh.sgid_index;
@@ -64,7 +65,6 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
 		       !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
 			--ah->av.ib.stat_rate;
 	}
-	ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
 
 	return &ah->ibah;
 }
-- 
cgit v0.10.2


From 37e07cdafc111dfb7ce27e70e73d900d7cf2920c Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 10 Jun 2016 11:08:25 -0700
Subject: IB/cma: Make the code easier to verify

Static source code analysis tools like smatch cannot handle functions
that lock or not lock a mutex depending on the value of the arguments.
Hence inline the function cma_disable_callback(). Additionally, this
patch realizes a small performance optimization by reducing the number of
mutex_lock() and mutex_unlock() calls in the modified functions. With
this patch applied smatch no longer complains about source file cma.c.
Without this patch smatch reports the following for this source file:

drivers/infiniband/core/cma.c:1959: cma_req_handler() warn: inconsistent returns 'mutex:&listen_id->handler_mutex'.
  Locked on:   line 1880
               line 1959
  Unlocked on: line 1941
drivers/infiniband/core/cma.c:2112: iw_conn_req_handler() warn: inconsistent returns 'mutex:&listen_id->handler_mutex'.
  Locked on:   line 2048
  Unlocked on: line 2112

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Sean Hefty <sean.hefty@intel.com>
Cc: Steve Wise <swise@opengridcomputing.com>
Cc: Leon Romanovsky <leonro@mellanox.com>
Acked-by: Sean Hefty <sean.hefty@intel.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index f0c91ba..c58ee77 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -708,17 +708,6 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
 		complete(&id_priv->comp);
 }
 
-static int cma_disable_callback(struct rdma_id_private *id_priv,
-				enum rdma_cm_state state)
-{
-	mutex_lock(&id_priv->handler_mutex);
-	if (id_priv->state != state) {
-		mutex_unlock(&id_priv->handler_mutex);
-		return -EINVAL;
-	}
-	return 0;
-}
-
 struct rdma_cm_id *rdma_create_id(struct net *net,
 				  rdma_cm_event_handler event_handler,
 				  void *context, enum rdma_port_space ps,
@@ -1671,11 +1660,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	struct rdma_cm_event event;
 	int ret = 0;
 
+	mutex_lock(&id_priv->handler_mutex);
 	if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
-		cma_disable_callback(id_priv, RDMA_CM_CONNECT)) ||
+	     id_priv->state != RDMA_CM_CONNECT) ||
 	    (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
-		cma_disable_callback(id_priv, RDMA_CM_DISCONNECT)))
-		return 0;
+	     id_priv->state != RDMA_CM_DISCONNECT))
+		goto out;
 
 	memset(&event, 0, sizeof event);
 	switch (ib_event->event) {
@@ -1870,7 +1860,7 @@ static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_e
 
 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 {
-	struct rdma_id_private *listen_id, *conn_id;
+	struct rdma_id_private *listen_id, *conn_id = NULL;
 	struct rdma_cm_event event;
 	struct net_device *net_dev;
 	int offset, ret;
@@ -1884,9 +1874,10 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 		goto net_dev_put;
 	}
 
-	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) {
+	mutex_lock(&listen_id->handler_mutex);
+	if (listen_id->state != RDMA_CM_LISTEN) {
 		ret = -ECONNABORTED;
-		goto net_dev_put;
+		goto err1;
 	}
 
 	memset(&event, 0, sizeof event);
@@ -1976,8 +1967,9 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 	struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
 	struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
 
-	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
-		return 0;
+	mutex_lock(&id_priv->handler_mutex);
+	if (id_priv->state != RDMA_CM_CONNECT)
+		goto out;
 
 	memset(&event, 0, sizeof event);
 	switch (iw_event->event) {
@@ -2029,6 +2021,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 		return ret;
 	}
 
+out:
 	mutex_unlock(&id_priv->handler_mutex);
 	return ret;
 }
@@ -2039,13 +2032,15 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 	struct rdma_cm_id *new_cm_id;
 	struct rdma_id_private *listen_id, *conn_id;
 	struct rdma_cm_event event;
-	int ret;
+	int ret = -ECONNABORTED;
 	struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
 	struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
 
 	listen_id = cm_id->context;
-	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
-		return -ECONNABORTED;
+
+	mutex_lock(&listen_id->handler_mutex);
+	if (listen_id->state != RDMA_CM_LISTEN)
+		goto out;
 
 	/* Create a new RDMA id for the new IW CM ID */
 	new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
@@ -3216,8 +3211,9 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
 	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
 	int ret = 0;
 
-	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
-		return 0;
+	mutex_lock(&id_priv->handler_mutex);
+	if (id_priv->state != RDMA_CM_CONNECT)
+		goto out;
 
 	memset(&event, 0, sizeof event);
 	switch (ib_event->event) {
@@ -3673,12 +3669,13 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 	struct rdma_id_private *id_priv;
 	struct cma_multicast *mc = multicast->context;
 	struct rdma_cm_event event;
-	int ret;
+	int ret = 0;
 
 	id_priv = mc->id_priv;
-	if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) &&
-	    cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED))
-		return 0;
+	mutex_lock(&id_priv->handler_mutex);
+	if (id_priv->state != RDMA_CM_ADDR_BOUND &&
+	    id_priv->state != RDMA_CM_ADDR_RESOLVED)
+		goto out;
 
 	if (!status)
 		status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
@@ -3720,6 +3717,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 		return 0;
 	}
 
+out:
 	mutex_unlock(&id_priv->handler_mutex);
 	return 0;
 }
-- 
cgit v0.10.2


From b4ba6633ea153266429f16614029ab1578815390 Mon Sep 17 00:00:00 2001
From: Jubin John <jubin.john@intel.com>
Date: Thu, 9 Jun 2016 07:51:08 -0700
Subject: IB/hfi1: Fix credit return threshold adjustment

The credit return threshold adjustment on mtu change algorithm does not
take into account all the kernel send contexts that are assigned per VL.
Use the pio send context map to adjust the credit return thresholds for
all the allocated and assigned kernel send contexts based on the MTU
adjustment per VL.

The pio send context map can be changed dynamically based on the actual
number of operational vls which is set by the fabric manager. When this
happens update the credit return threshold values for all the remapped
kernel send contexts.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Jianxin Xiong <jianxin.xiong@intel.com>
Signed-off-by: Jubin John <jubin.john@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 81619fb..c533d44 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -9777,7 +9777,7 @@ static void set_send_length(struct hfi1_pportdata *ppd)
 	u64 len1 = 0, len2 = (((dd->vld[15].mtu + max_hb) >> 2)
 			      & SEND_LEN_CHECK1_LEN_VL15_MASK) <<
 		SEND_LEN_CHECK1_LEN_VL15_SHIFT;
-	int i;
+	int i, j;
 	u32 thres;
 
 	for (i = 0; i < ppd->vls_supported; i++) {
@@ -9801,7 +9801,10 @@ static void set_send_length(struct hfi1_pportdata *ppd)
 			    sc_mtu_to_threshold(dd->vld[i].sc,
 						dd->vld[i].mtu,
 						dd->rcd[0]->rcvhdrqentsize));
-		sc_set_cr_threshold(dd->vld[i].sc, thres);
+		for (j = 0; j < INIT_SC_PER_VL; j++)
+			sc_set_cr_threshold(
+					pio_select_send_context_vl(dd, j, i),
+					    thres);
 	}
 	thres = min(sc_percent_to_threshold(dd->vld[15].sc, 50),
 		    sc_mtu_to_threshold(dd->vld[15].sc,
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index d5edb1a..e0e1ff2 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1798,6 +1798,21 @@ static void pio_map_rcu_callback(struct rcu_head *list)
 }
 
 /*
+ * Set credit return threshold for the kernel send context
+ */
+static void set_threshold(struct hfi1_devdata *dd, int scontext, int i)
+{
+	u32 thres;
+
+	thres = min(sc_percent_to_threshold(dd->kernel_send_context[scontext],
+					    50),
+		    sc_mtu_to_threshold(dd->kernel_send_context[scontext],
+					dd->vld[i].mtu,
+					dd->rcd[0]->rcvhdrqentsize));
+	sc_set_cr_threshold(dd->kernel_send_context[scontext], thres);
+}
+
+/*
  * pio_map_init - called when #vls change
  * @dd: hfi1_devdata
  * @port: port number
@@ -1872,11 +1887,16 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
 			if (!newmap->map[i])
 				goto bail;
 			newmap->map[i]->mask = (1 << ilog2(sz)) - 1;
-			/* assign send contexts */
+			/*
+			 * assign send contexts and
+			 * adjust credit return threshold
+			 */
 			for (j = 0; j < sz; j++) {
-				if (dd->kernel_send_context[scontext])
+				if (dd->kernel_send_context[scontext]) {
 					newmap->map[i]->ksc[j] =
 					dd->kernel_send_context[scontext];
+					set_threshold(dd, scontext, i);
+				}
 				if (++scontext >= first_scontext +
 						  vl_scontexts[i])
 					/* wrap back to first send context */
-- 
cgit v0.10.2


From 96605672a4172f6e31f31ce29ee27fef68011de0 Mon Sep 17 00:00:00 2001
From: Brian Welty <brian.welty@intel.com>
Date: Thu, 9 Jun 2016 07:51:14 -0700
Subject: IB/rdmavt: Correct required callback functions for MODIFY_QP

Functions required for MODIFY_PORT were incorrectly being
required for MODIFY_QP.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Brian Welty <brian.welty@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index e1cc2cc..30c4fda 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -501,9 +501,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
 			    !rdi->driver_f.quiesce_qp ||
 			    !rdi->driver_f.notify_error_qp ||
 			    !rdi->driver_f.mtu_from_qp ||
-			    !rdi->driver_f.mtu_to_path_mtu ||
-			    !rdi->driver_f.shut_down_port ||
-			    !rdi->driver_f.cap_mask_chg)
+			    !rdi->driver_f.mtu_to_path_mtu)
 				return -EINVAL;
 		break;
 
-- 
cgit v0.10.2


From 501edc42446e89fa67fb6ef2c9afb50792c310c0 Mon Sep 17 00:00:00 2001
From: Brian Welty <brian.welty@intel.com>
Date: Thu, 9 Jun 2016 07:51:20 -0700
Subject: IB/rdmavt: Correct warning during QPN allocation

Correct calculation of the low order bits which should be unset
based on use of qos_shift parameter when assigning QPN.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Brian Welty <brian.welty@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 7de5134..c3e0d61 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -369,8 +369,8 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
 			/* wrap to first map page, invert bit 0 */
 			offset = qpt->incr | ((offset & 1) ^ 1);
 		}
-		/* there can be no bits at shift and below */
-		WARN_ON(offset & (rdi->dparms.qos_shift - 1));
+		/* there can be no set bits in low-order QoS bits */
+		WARN_ON(offset & (BIT(rdi->dparms.qos_shift) - 1));
 		qpn = mk_qpn(qpt, map, offset);
 	}
 
-- 
cgit v0.10.2


From c3c64a951cbdb6096d02dcc339a9c807ce1e976a Mon Sep 17 00:00:00 2001
From: Jubin John <jubin.john@intel.com>
Date: Thu, 9 Jun 2016 07:51:27 -0700
Subject: IB/hfi1: Increase packet egress timeout

The current value of 500us for the packet egress timeout is too small
which causes the host to declare failure on draining packets too early
and unnecessarily bounces the link. Increase this to 50ms taking into
account the switch packet discard timer default and the worst case
per-VL package drainage rate.

Reviewed-by: Dean Luick <dean.luick@intel.com>
Signed-off-by: Jubin John <jubin.john@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index e0e1ff2..d402245 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -995,7 +995,7 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
 		/* counter is reset if occupancy count changes */
 		if (reg != reg_prev)
 			loop = 0;
-		if (loop > 500) {
+		if (loop > 50000) {
 			/* timed out - bounce the link */
 			dd_dev_err(dd,
 				   "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n",
-- 
cgit v0.10.2


From ca2f30a0a6786e6b08eeb8abb2bbb8df58709d6e Mon Sep 17 00:00:00 2001
From: Ira Weiny <ira.weiny@intel.com>
Date: Thu, 9 Jun 2016 07:51:33 -0700
Subject: IB/hfi1: Prevent context loss

If a context has already been assigned to an FD, prevent
another assignment.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 7a5b0e6..c702a00 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -203,6 +203,9 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
 
 	switch (cmd) {
 	case HFI1_IOCTL_ASSIGN_CTXT:
+		if (uctxt)
+			return -EINVAL;
+
 		if (copy_from_user(&uinfo,
 				   (struct hfi1_user_info __user *)arg,
 				   sizeof(uinfo)))
-- 
cgit v0.10.2


From 5e9ef24619486213223053678eb9175630ef6bf9 Mon Sep 17 00:00:00 2001
From: Ira Weiny <ira.weiny@intel.com>
Date: Thu, 9 Jun 2016 07:51:39 -0700
Subject: IB/qib: Prevent context loss

If a context has already been assigned to an FD, prevent
another assignment.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index ff946d5..382466a 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -2178,6 +2178,11 @@ static ssize_t qib_write(struct file *fp, const char __user *data,
 
 	switch (cmd.type) {
 	case QIB_CMD_ASSIGN_CTXT:
+		if (rcd) {
+			ret = -EINVAL;
+			goto bail;
+		}
+
 		ret = qib_assign_ctxt(fp, &cmd.cmd.user_info);
 		if (ret)
 			goto bail;
-- 
cgit v0.10.2


From 93dd0a097859a174817ea94ec55bfc29c5706454 Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tadeusz.struk@intel.com>
Date: Thu, 9 Jun 2016 07:51:45 -0700
Subject: IB/hfi1: Fix potential NULL ptr dereference

This fixes potential NULL ptr dereference because IS_ERR(dd) doesn't
handle NULL. Fix the issue by initializing the pointer with a not NULL
error code.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 0d28a5a..eed971c 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1383,7 +1383,7 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	int ret = 0, j, pidx, initfail;
-	struct hfi1_devdata *dd = NULL;
+	struct hfi1_devdata *dd = ERR_PTR(-EINVAL);
 	struct hfi1_pportdata *ppd;
 
 	/* First, lock the non-writable module parameters */
-- 
cgit v0.10.2


From c078f0dd01b73c70b92a660cb1ce3dfc3cbf2903 Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tadeusz.struk@intel.com>
Date: Thu, 9 Jun 2016 07:51:51 -0700
Subject: IB/hfi1: Fix potential buffer overflow

This fixes potential buffer overflow because the sprintf function
doesn't check buffer boundaries. Use snprintf instead.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 2441669..9fb5616 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -579,7 +579,8 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len)
 
 	if (ppd->qsfp_info.cache_valid) {
 		if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS]))
-			sprintf(lenstr, "%dM ", cache[QSFP_MOD_LEN_OFFS]);
+			snprintf(lenstr, sizeof(lenstr), "%dM ",
+				 cache[QSFP_MOD_LEN_OFFS]);
 
 		power_byte = cache[QSFP_MOD_PWR_OFFS];
 		sofar += scnprintf(buf + sofar, len - sofar, "PWR:%.3sW\n",
-- 
cgit v0.10.2


From 3ec5fa28c9ea54fb172859d2612a0be6526e4dc1 Mon Sep 17 00:00:00 2001
From: Sebastian Sanchez <sebastian.sanchez@intel.com>
Date: Thu, 9 Jun 2016 07:51:57 -0700
Subject: IB/hfi1: Remove FULL_MGMT_P_KEY from pkey table at link up

FULL_MGMT_P_KEY doesn't get cleared from the pkey table at link bounce
because the link down and link bounce code paths are different when
moving a QSFP cable on a switch. This causes an HFI unit connected to a
switch to try to be initialized to an FM node when the QSFP cable is
moved from a MgmtAllowed=NO port to a MgmtAllowed=YES port and back to a
MgmtAllowed=NO port. Remove FULL_MGMT_P_KEY from pkey table at link up.

Reviewed-by: Dean Luick <dean.luick@intel.com>
Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index c533d44..3487fdf 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1037,7 +1037,7 @@ static void dc_shutdown(struct hfi1_devdata *);
 static void dc_start(struct hfi1_devdata *);
 static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
 			   unsigned int *np);
-static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd);
+static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
 
 /*
  * Error interrupt table entry.  This is used as input to the interrupt
@@ -6962,8 +6962,6 @@ void handle_link_down(struct work_struct *work)
 	}
 
 	reset_neighbor_info(ppd);
-	if (ppd->mgmt_allowed)
-		remove_full_mgmt_pkey(ppd);
 
 	/* disable the port */
 	clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
@@ -7072,10 +7070,12 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
 }
 
-static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd)
+static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd)
 {
-	ppd->pkeys[2] = 0;
-	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
+	if (ppd->pkeys[2] != 0) {
+		ppd->pkeys[2] = 0;
+		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
+	}
 }
 
 /*
@@ -9168,6 +9168,13 @@ int start_link(struct hfi1_pportdata *ppd)
 		return 0;
 	}
 
+	/*
+	 * FULL_MGMT_P_KEY is cleared from the pkey table, so that the
+	 * pkey table can be configured properly if the HFI unit is connected
+	 * to switch port with MgmtAllowed=NO
+	 */
+	clear_full_mgmt_pkey(ppd);
+
 	return set_link_state(ppd, HLS_DN_POLL);
 }
 
-- 
cgit v0.10.2


From 34d351f8ddf6dee24d739c4b00a4404e48089a04 Mon Sep 17 00:00:00 2001
From: Sebastian Sanchez <sebastian.sanchez@intel.com>
Date: Thu, 9 Jun 2016 07:52:03 -0700
Subject: IB/hfi1: Send a pkey change event on driver pkey update

Swapping a cable from a "Mgmt Allowed=No" switch port to a
"Mgmt Allowed=Yes" switch port doesn't send a pkey change
notification. Therefore, the link doesn't become active as
the oib_utils layer uses an old pkey table cache.

Fix by ensuring the pkey change notification is sent when
the table is changed both explicitly by the FM and implicitly
by the driver via a cable swap.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 3487fdf..f5de851 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -7068,6 +7068,7 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
 			    __func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
 	ppd->pkeys[2] = FULL_MGMT_P_KEY;
 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
+	hfi1_event_pkey_change(ppd->dd, ppd->port);
 }
 
 static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd)
@@ -7075,6 +7076,7 @@ static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd)
 	if (ppd->pkeys[2] != 0) {
 		ppd->pkeys[2] = 0;
 		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
+		hfi1_event_pkey_change(ppd->dd, ppd->port);
 	}
 }
 
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 2190295..fca07a1 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -78,6 +78,16 @@ static inline void clear_opa_smp_data(struct opa_smp *smp)
 	memset(data, 0, size);
 }
 
+void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port)
+{
+	struct ib_event event;
+
+	event.event = IB_EVENT_PKEY_CHANGE;
+	event.device = &dd->verbs_dev.rdi.ibdev;
+	event.element.port_num = port;
+	ib_dispatch_event(&event);
+}
+
 static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
 {
 	struct ib_mad_send_buf *send_buf;
@@ -1418,15 +1428,10 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
 	}
 
 	if (changed) {
-		struct ib_event event;
-
 		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
-
-		event.event = IB_EVENT_PKEY_CHANGE;
-		event.device = &dd->verbs_dev.rdi.ibdev;
-		event.element.port_num = port;
-		ib_dispatch_event(&event);
+		hfi1_event_pkey_change(dd, port);
 	}
+
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h
index 55ee086..8b734aa 100644
--- a/drivers/infiniband/hw/hfi1/mad.h
+++ b/drivers/infiniband/hw/hfi1/mad.h
@@ -434,4 +434,6 @@ struct sc2vlnt {
 		    COUNTER_MASK(1, 3) | \
 		    COUNTER_MASK(1, 4))
 
+void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port);
+
 #endif				/* _HFI1_MAD_H */
-- 
cgit v0.10.2


From f1d048f24e66ba85d3dabf3d076cefa5f2b546b0 Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Fri, 17 Jun 2016 06:35:57 -0400
Subject: tipc: fix socket timer deadlock

We sometimes observe a 'deadly embrace' type deadlock occurring
between mutually connected sockets on the same node. This happens
when the one-hour peer supervision timers happen to expire
simultaneously in both sockets.

The scenario is as follows:

CPU 1:                          CPU 2:
--------                        --------
tipc_sk_timeout(sk1)            tipc_sk_timeout(sk2)
  lock(sk1.slock)                 lock(sk2.slock)
  msg_create(probe)               msg_create(probe)
  unlock(sk1.slock)               unlock(sk2.slock)
  tipc_node_xmit_skb()            tipc_node_xmit_skb()
    tipc_node_xmit()                tipc_node_xmit()
      tipc_sk_rcv(sk2)                tipc_sk_rcv(sk1)
        lock(sk2.slock)                 lock((sk1.slock)
        filter_rcv()                    filter_rcv()
          tipc_sk_proto_rcv()             tipc_sk_proto_rcv()
            msg_create(probe_rsp)           msg_create(probe_rsp)
            tipc_sk_respond()               tipc_sk_respond()
              tipc_node_xmit_skb()            tipc_node_xmit_skb()
                tipc_node_xmit()                tipc_node_xmit()
                  tipc_sk_rcv(sk1)                tipc_sk_rcv(sk2)
                    lock((sk1.slock)                lock((sk2.slock)
                    ===> DEADLOCK                   ===> DEADLOCK

Further analysis reveals that there are three different locations in the
socket code where tipc_sk_respond() is called within the context of the
socket lock, with ensuing risk of similar deadlocks.

We now solve this by passing a buffer queue along with all upcalls where
sk_lock.slock may potentially be held. Response or rejected message
buffers are accumulated into this queue instead of being sent out
directly, and only sent once we know we are safely outside the slock
context.

Reported-by: GUNA <gbalasun@gmail.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 88bfcd7..c49b8df 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -796,9 +796,11 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
  * @tsk: receiving socket
  * @skb: pointer to message buffer.
  */
-static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb)
+static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
+			      struct sk_buff_head *xmitq)
 {
 	struct sock *sk = &tsk->sk;
+	u32 onode = tsk_own_node(tsk);
 	struct tipc_msg *hdr = buf_msg(skb);
 	int mtyp = msg_type(hdr);
 	bool conn_cong;
@@ -811,7 +813,8 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb)
 
 	if (mtyp == CONN_PROBE) {
 		msg_set_type(hdr, CONN_PROBE_REPLY);
-		tipc_sk_respond(sk, skb, TIPC_OK);
+		if (tipc_msg_reverse(onode, &skb, TIPC_OK))
+			__skb_queue_tail(xmitq, skb);
 		return;
 	} else if (mtyp == CONN_ACK) {
 		conn_cong = tsk_conn_cong(tsk);
@@ -1686,7 +1689,8 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
  *
  * Returns true if message was added to socket receive queue, otherwise false
  */
-static bool filter_rcv(struct sock *sk, struct sk_buff *skb)
+static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
+		       struct sk_buff_head *xmitq)
 {
 	struct socket *sock = sk->sk_socket;
 	struct tipc_sock *tsk = tipc_sk(sk);
@@ -1696,7 +1700,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb)
 	int usr = msg_user(hdr);
 
 	if (unlikely(msg_user(hdr) == CONN_MANAGER)) {
-		tipc_sk_proto_rcv(tsk, skb);
+		tipc_sk_proto_rcv(tsk, skb, xmitq);
 		return false;
 	}
 
@@ -1739,7 +1743,8 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb)
 	return true;
 
 reject:
-	tipc_sk_respond(sk, skb, err);
+	if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err))
+		__skb_queue_tail(xmitq, skb);
 	return false;
 }
 
@@ -1755,9 +1760,24 @@ reject:
 static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 {
 	unsigned int truesize = skb->truesize;
+	struct sk_buff_head xmitq;
+	u32 dnode, selector;
 
-	if (likely(filter_rcv(sk, skb)))
+	__skb_queue_head_init(&xmitq);
+
+	if (likely(filter_rcv(sk, skb, &xmitq))) {
 		atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt);
+		return 0;
+	}
+
+	if (skb_queue_empty(&xmitq))
+		return 0;
+
+	/* Send response/rejected message */
+	skb = __skb_dequeue(&xmitq);
+	dnode = msg_destnode(buf_msg(skb));
+	selector = msg_origport(buf_msg(skb));
+	tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
 	return 0;
 }
 
@@ -1771,12 +1791,13 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
  * Caller must hold socket lock
  */
 static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
-			    u32 dport)
+			    u32 dport, struct sk_buff_head *xmitq)
 {
+	unsigned long time_limit = jiffies + 2;
+	struct sk_buff *skb;
 	unsigned int lim;
 	atomic_t *dcnt;
-	struct sk_buff *skb;
-	unsigned long time_limit = jiffies + 2;
+	u32 onode;
 
 	while (skb_queue_len(inputq)) {
 		if (unlikely(time_after_eq(jiffies, time_limit)))
@@ -1788,7 +1809,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
 
 		/* Add message directly to receive queue if possible */
 		if (!sock_owned_by_user(sk)) {
-			filter_rcv(sk, skb);
+			filter_rcv(sk, skb, xmitq);
 			continue;
 		}
 
@@ -1801,7 +1822,9 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
 			continue;
 
 		/* Overload => reject message back to sender */
-		tipc_sk_respond(sk, skb, TIPC_ERR_OVERLOAD);
+		onode = tipc_own_addr(sock_net(sk));
+		if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
+			__skb_queue_tail(xmitq, skb);
 		break;
 	}
 }
@@ -1814,12 +1837,14 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
  */
 void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
 {
+	struct sk_buff_head xmitq;
 	u32 dnode, dport = 0;
 	int err;
 	struct tipc_sock *tsk;
 	struct sock *sk;
 	struct sk_buff *skb;
 
+	__skb_queue_head_init(&xmitq);
 	while (skb_queue_len(inputq)) {
 		dport = tipc_skb_peek_port(inputq, dport);
 		tsk = tipc_sk_lookup(net, dport);
@@ -1827,9 +1852,14 @@ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
 		if (likely(tsk)) {
 			sk = &tsk->sk;
 			if (likely(spin_trylock_bh(&sk->sk_lock.slock))) {
-				tipc_sk_enqueue(inputq, sk, dport);
+				tipc_sk_enqueue(inputq, sk, dport, &xmitq);
 				spin_unlock_bh(&sk->sk_lock.slock);
 			}
+			/* Send pending response/rejected messages, if any */
+			while ((skb = __skb_dequeue(&xmitq))) {
+				dnode = msg_destnode(buf_msg(skb));
+				tipc_node_xmit_skb(net, skb, dnode, dport);
+			}
 			sock_put(sk);
 			continue;
 		}
-- 
cgit v0.10.2


From 63dcdd35c1552ca0c911e98ba3389a0729a457f4 Mon Sep 17 00:00:00 2001
From: Nogah Frankel <nogahf@mellanox.com>
Date: Fri, 17 Jun 2016 15:09:05 +0200
Subject: mlxsw: spectrum: Don't count internal TX header bytes to stats

Stop the SW TX counter from counting the TX header bytes
since they are not being sent out.

Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC")
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 6f9e3dd..660429e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -408,7 +408,11 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb,
 	}
 
 	mlxsw_sp_txhdr_construct(skb, &tx_info);
-	len = skb->len;
+	/* TX header is consumed by HW on the way so we shouldn't count its
+	 * bytes as being sent.
+	 */
+	len = skb->len - MLXSW_TXHDR_LEN;
+
 	/* Due to a race we might fail here because of a full queue. In that
 	 * unlikely case we simply drop the packet.
 	 */
-- 
cgit v0.10.2


From 4e239fac7c6b9d703e83c81b52ec9fec00c50129 Mon Sep 17 00:00:00 2001
From: Nogah Frankel <nogahf@mellanox.com>
Date: Fri, 17 Jun 2016 15:09:06 +0200
Subject: mlxsw: switchx2: Don't count internal TX header bytes to stats

Stop the SW TX counter from counting the TX header bytes
since they are not being sent out.

Fixes: e577516b9db3 ("mlxsw: Fix use-after-free bug in mlxsw_sx_port_xmit")
Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index 3842eab..25f658b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -316,7 +316,10 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb,
 		}
 	}
 	mlxsw_sx_txhdr_construct(skb, &tx_info);
-	len = skb->len;
+	/* TX header is consumed by HW on the way so we shouldn't count its
+	 * bytes as being sent.
+	 */
+	len = skb->len - MLXSW_TXHDR_LEN;
 	/* Due to a race we might fail here because of a full queue. In that
 	 * unlikely case we simply drop the packet.
 	 */
-- 
cgit v0.10.2


From a9836cbb5fc885d3b8f3b91b2d47525f7269dec1 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 17 Jun 2016 18:15:30 +0200
Subject: net: tilegx: use correct timespec64 type

The conversion to the 64-bit time based ptp methods left two instances
of 'struct timespec' in place. This is harmless because 64-bit
architectures define timespec64 as timespec, and this driver is
not used on 32-bit machines.

However, using 'struct timespec64' directly is obviously the right
thing to do, and will help us remove 'struct timespec' in the future.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: b9acf24f779c ("ptp: tilegx: convert to the 64 bit get/set time methods.")
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c
index 0a15acc..11213a3 100644
--- a/drivers/net/ethernet/tile/tilegx.c
+++ b/drivers/net/ethernet/tile/tilegx.c
@@ -462,7 +462,7 @@ static void tile_tx_timestamp(struct sk_buff *skb, int instance)
 	if (unlikely((shtx->tx_flags & SKBTX_HW_TSTAMP) != 0)) {
 		struct mpipe_data *md = &mpipe_data[instance];
 		struct skb_shared_hwtstamps shhwtstamps;
-		struct timespec ts;
+		struct timespec64 ts;
 
 		shtx->tx_flags |= SKBTX_IN_PROGRESS;
 		gxio_mpipe_get_timestamp(&md->context, &ts);
@@ -886,9 +886,9 @@ static struct ptp_clock_info ptp_mpipe_caps = {
 /* Sync mPIPE's timestamp up with Linux system time and register PTP clock. */
 static void register_ptp_clock(struct net_device *dev, struct mpipe_data *md)
 {
-	struct timespec ts;
+	struct timespec64 ts;
 
-	getnstimeofday(&ts);
+	ktime_get_ts64(&ts);
 	gxio_mpipe_set_timestamp(&md->context, &ts);
 
 	mutex_init(&md->ptp_lock);
-- 
cgit v0.10.2


From 3bb549ae4c51028c1930528ae9fcd6eca0474724 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Fri, 17 Jun 2016 16:12:12 -0400
Subject: RDS: TCP: rds_tcp_accept_one() should transition socket from
 RESETTING to UP

The state of the rds_connection after rds_tcp_reset_callbacks() would
be RDS_CONN_RESETTING and this is the value that should be passed
by rds_tcp_accept_one()  to rds_connect_path_complete() to transition
the socket to RDS_CONN_UP.

Fixes: b5c21c0947c1 ("RDS: TCP: fix race windows in send-path quiescence
by rds_tcp_accept_one()")
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 686b1d0..245542c 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -138,7 +138,7 @@ int rds_tcp_accept_one(struct socket *sock)
 			rds_tcp_reset_callbacks(new_sock, conn);
 			conn->c_outgoing = 0;
 			/* rds_connect_path_complete() marks RDS_CONN_UP */
-			rds_connect_path_complete(conn, RDS_CONN_DISCONNECTING);
+			rds_connect_path_complete(conn, RDS_CONN_RESETTING);
 		}
 	} else {
 		rds_tcp_set_callbacks(new_sock, conn);
-- 
cgit v0.10.2


From 3720b69bafe9ccb989b9a8604f3e3f89b3610685 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Sat, 18 Jun 2016 10:10:26 -0700
Subject: Input: add BUS_CEC type

Inputs can come in over the HDMI CEC bus, so add a new type for this.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>

diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h
index 0111384..c514941 100644
--- a/include/uapi/linux/input.h
+++ b/include/uapi/linux/input.h
@@ -247,6 +247,7 @@ struct input_mask {
 #define BUS_ATARI		0x1B
 #define BUS_SPI			0x1C
 #define BUS_RMI			0x1D
+#define BUS_CEC			0x1E
 
 /*
  * MT_TOOL types
-- 
cgit v0.10.2


From 488326947cd1f038da8d2c9068a0d07b913b7983 Mon Sep 17 00:00:00 2001
From: Kamil Debski <kamil@wypas.org>
Date: Sat, 18 Jun 2016 10:10:56 -0700
Subject: Input: add HDMI CEC specific keycodes

Add HDMI CEC specific keycodes to the keycodes definition.

Signed-off-by: Kamil Debski <kamil@wypas.org>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>

diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
index 87cf351..737fa32 100644
--- a/include/uapi/linux/input-event-codes.h
+++ b/include/uapi/linux/input-event-codes.h
@@ -611,6 +611,37 @@
 #define KEY_KBDINPUTASSIST_ACCEPT		0x264
 #define KEY_KBDINPUTASSIST_CANCEL		0x265
 
+/* Diagonal movement keys */
+#define KEY_RIGHT_UP			0x266
+#define KEY_RIGHT_DOWN			0x267
+#define KEY_LEFT_UP			0x268
+#define KEY_LEFT_DOWN			0x269
+
+#define KEY_ROOT_MENU			0x26a /* Show Device's Root Menu */
+/* Show Top Menu of the Media (e.g. DVD) */
+#define KEY_MEDIA_TOP_MENU		0x26b
+#define KEY_NUMERIC_11			0x26c
+#define KEY_NUMERIC_12			0x26d
+/*
+ * Toggle Audio Description: refers to an audio service that helps blind and
+ * visually impaired consumers understand the action in a program. Note: in
+ * some countries this is referred to as "Video Description".
+ */
+#define KEY_AUDIO_DESC			0x26e
+#define KEY_3D_MODE			0x26f
+#define KEY_NEXT_FAVORITE		0x270
+#define KEY_STOP_RECORD			0x271
+#define KEY_PAUSE_RECORD		0x272
+#define KEY_VOD				0x273 /* Video on Demand */
+#define KEY_UNMUTE			0x274
+#define KEY_FASTREVERSE			0x275
+#define KEY_SLOWREVERSE			0x276
+/*
+ * Control a data application associated with the currently viewed channel,
+ * e.g. teletext or data broadcast application (MHEG, MHP, HbbTV, etc.)
+ */
+#define KEY_DATA			0x275
+
 #define BTN_TRIGGER_HAPPY		0x2c0
 #define BTN_TRIGGER_HAPPY1		0x2c0
 #define BTN_TRIGGER_HAPPY2		0x2c1
-- 
cgit v0.10.2


From 053ea640818812313892ec4f370f5cfac42fd355 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali.rohar@gmail.com>
Date: Sat, 18 Jun 2016 00:54:44 +0200
Subject: hwmon: (dell-smm) Fail in ioctl I8K_BIOS_VERSION when bios version is
 not a number
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ABI of I8K_BIOS_VERSION ioctl can return only number. But new BIOS versions
contain also other characters, which does not fit into that ABI. So in case
of non digit values return -EINVAL.

Reported-by: Mario Limonciello <Mario_Limonciello@dell.com>
Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>

diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index c43318d..480b2fa 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -35,6 +35,7 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/sched.h>
+#include <linux/ctype.h>
 
 #include <linux/i8k.h>
 
@@ -387,6 +388,10 @@ i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 	case I8K_BIOS_VERSION:
+		if (!isdigit(bios_version[0]) || !isdigit(bios_version[1]) ||
+		    !isdigit(bios_version[2]))
+			return -EINVAL;
+
 		val = (bios_version[0] << 16) |
 				(bios_version[1] << 8) | bios_version[2];
 		break;
-- 
cgit v0.10.2


From 7613663cc186f8f3c50279390ddc60286758001c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali.rohar@gmail.com>
Date: Sat, 18 Jun 2016 00:54:45 +0200
Subject: hwmon: (dell-smm) Restrict fan control and serial number to
 CAP_SYS_ADMIN by default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For security reasons ordinary user must not be able to control fan speed
via /proc/i8k by default. Some malicious software running under "nobody"
user could be able to turn fan off and cause HW problems. So this patch
changes default value of "restricted" parameter to 1.

Also restrict reading of DMI_PRODUCT_SERIAL from /proc/i8k via "restricted"
parameter. It is because non root user cannot read DMI_PRODUCT_SERIAL from
sysfs file /sys/class/dmi/id/product_serial.

Old non secure behaviour of file /proc/i8k can be achieved by loading this
module with "restricted" parameter set to 0.

Note that this patch has effects only for kernels compiled with CONFIG_I8K
and only for file /proc/i8k. Hwmon interface provided by this driver was
not changed and root access for setting fan speed was needed also before.

Reported-by: Mario Limonciello <Mario_Limonciello@dell.com>
Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Cc: stable@vger.kernel.org # will need backport
Signed-off-by: Guenter Roeck <linux@roeck-us.net>

diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index 480b2fa..c8bd3fdd 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -67,6 +67,7 @@
 
 static DEFINE_MUTEX(i8k_mutex);
 static char bios_version[4];
+static char bios_machineid[16];
 static struct device *i8k_hwmon_dev;
 static u32 i8k_hwmon_flags;
 static uint i8k_fan_mult = I8K_FAN_MULT;
@@ -95,13 +96,13 @@ module_param(ignore_dmi, bool, 0);
 MODULE_PARM_DESC(ignore_dmi, "Continue probing hardware even if DMI data does not match");
 
 #if IS_ENABLED(CONFIG_I8K)
-static bool restricted;
+static bool restricted = true;
 module_param(restricted, bool, 0);
-MODULE_PARM_DESC(restricted, "Allow fan control if SYS_ADMIN capability set");
+MODULE_PARM_DESC(restricted, "Restrict fan control and serial number to CAP_SYS_ADMIN (default: 1)");
 
 static bool power_status;
 module_param(power_status, bool, 0600);
-MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k");
+MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k (default: 0)");
 #endif
 
 static uint fan_mult;
@@ -397,9 +398,11 @@ i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg)
 		break;
 
 	case I8K_MACHINE_ID:
-		memset(buff, 0, 16);
-		strlcpy(buff, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
-			sizeof(buff));
+		if (restricted && !capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		memset(buff, 0, sizeof(buff));
+		strlcpy(buff, bios_machineid, sizeof(buff));
 		break;
 
 	case I8K_FN_STATUS:
@@ -516,7 +519,7 @@ static int i8k_proc_show(struct seq_file *seq, void *offset)
 	seq_printf(seq, "%s %s %s %d %d %d %d %d %d %d\n",
 		   I8K_PROC_FMT,
 		   bios_version,
-		   i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
+		   (restricted && !capable(CAP_SYS_ADMIN)) ? "-1" : bios_machineid,
 		   cpu_temp,
 		   left_fan, right_fan, left_speed, right_speed,
 		   ac_power, fn_key);
@@ -985,6 +988,8 @@ static int __init i8k_probe(void)
 
 	strlcpy(bios_version, i8k_get_dmi_data(DMI_BIOS_VERSION),
 		sizeof(bios_version));
+	strlcpy(bios_machineid, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
+		sizeof(bios_machineid));
 
 	/*
 	 * Get SMM Dell signature
-- 
cgit v0.10.2


From 2744d2fde00dc8bcc3679eb72c81a63058e90faa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali.rohar@gmail.com>
Date: Sat, 18 Jun 2016 00:54:46 +0200
Subject: hwmon: (dell-smm) Disallow fan_type() calls on broken machines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some Dell machines have especially broken SMM or BIOS which cause that once
fan_type() is called then CPU fan speed going randomly up and down. And for
fixing this behaviour reboot is required.

So this patch creates fan_type blacklist of affected Dell machines and
disallow fan_type() call on them to prevent that erratic behaviour.

Old blacklist which disabled loading driver on some machines added in
commits a4b45b25f18d ("hwmon: (dell-smm) Blacklist Dell Studio XPS 8100")
and 6220f4ebd7b4 ("hwmon: (dell-smm) Blacklist Dell Studio XPS 8000") were
moved to FAN_TYPE blacklist.

Reported-by: Jan C Peters <jcpeters89@gmail.com>
Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=100121
Cc: stable@vger.kernel.org # v4.0+, will need backport
Signed-off-by: Guenter Roeck <linux@roeck-us.net>

diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index c8bd3fdd..4bbc587 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -73,6 +73,7 @@ static u32 i8k_hwmon_flags;
 static uint i8k_fan_mult = I8K_FAN_MULT;
 static uint i8k_pwm_mult;
 static uint i8k_fan_max = I8K_FAN_HIGH;
+static bool disallow_fan_type_call;
 
 #define I8K_HWMON_HAVE_TEMP1	(1 << 0)
 #define I8K_HWMON_HAVE_TEMP2	(1 << 1)
@@ -241,6 +242,9 @@ static int i8k_get_fan_type(int fan)
 {
 	struct smm_regs regs = { .eax = I8K_SMM_GET_FAN_TYPE, };
 
+	if (disallow_fan_type_call)
+		return -EINVAL;
+
 	regs.ebx = fan & 0xff;
 	return i8k_smm(&regs) ? : regs.eax & 0xff;
 }
@@ -726,6 +730,9 @@ static struct attribute *i8k_attrs[] = {
 static umode_t i8k_is_visible(struct kobject *kobj, struct attribute *attr,
 			      int index)
 {
+	if (disallow_fan_type_call &&
+	    (index == 9 || index == 12))
+		return 0;
 	if (index >= 0 && index <= 1 &&
 	    !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP1))
 		return 0;
@@ -937,12 +944,14 @@ static struct dmi_system_id i8k_dmi_table[] __initdata = {
 
 MODULE_DEVICE_TABLE(dmi, i8k_dmi_table);
 
-static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
+/*
+ * On some machines once I8K_SMM_GET_FAN_TYPE is issued then CPU fan speed
+ * randomly going up and down due to bug in Dell SMM or BIOS. Here is blacklist
+ * of affected Dell machines for which we disallow I8K_SMM_GET_FAN_TYPE call.
+ * See bug: https://bugzilla.kernel.org/show_bug.cgi?id=100121
+ */
+static struct dmi_system_id i8k_blacklist_fan_type_dmi_table[] __initdata = {
 	{
-		/*
-		 * CPU fan speed going up and down on Dell Studio XPS 8000
-		 * for unknown reasons.
-		 */
 		.ident = "Dell Studio XPS 8000",
 		.matches = {
 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
@@ -950,16 +959,19 @@ static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
 		},
 	},
 	{
-		/*
-		 * CPU fan speed going up and down on Dell Studio XPS 8100
-		 * for unknown reasons.
-		 */
 		.ident = "Dell Studio XPS 8100",
 		.matches = {
 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Studio XPS 8100"),
 		},
 	},
+	{
+		.ident = "Dell Inspiron 580",
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Inspiron 580 "),
+		},
+	},
 	{ }
 };
 
@@ -974,8 +986,7 @@ static int __init i8k_probe(void)
 	/*
 	 * Get DMI information
 	 */
-	if (!dmi_check_system(i8k_dmi_table) ||
-	    dmi_check_system(i8k_blacklist_dmi_table)) {
+	if (!dmi_check_system(i8k_dmi_table)) {
 		if (!ignore_dmi && !force)
 			return -ENODEV;
 
@@ -986,6 +997,9 @@ static int __init i8k_probe(void)
 			i8k_get_dmi_data(DMI_BIOS_VERSION));
 	}
 
+	if (dmi_check_system(i8k_blacklist_fan_type_dmi_table))
+		disallow_fan_type_call = true;
+
 	strlcpy(bios_version, i8k_get_dmi_data(DMI_BIOS_VERSION),
 		sizeof(bios_version));
 	strlcpy(bios_machineid, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
-- 
cgit v0.10.2


From 30172936eefb70c27fade1bc912a25fc90827b76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali.rohar@gmail.com>
Date: Sat, 18 Jun 2016 17:47:38 -0700
Subject: =?UTF-8?q?MAINTAINERS:=20add=20Pali=20Roh=C3=A1r=20as=20reviewer?=
 =?UTF-8?q?=20of=20ALPS=20PS/2=20touchpad=20driver?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index 9c567a4..630151f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -596,6 +596,10 @@ S:	Odd Fixes
 L:	linux-alpha@vger.kernel.org
 F:	arch/alpha/
 
+ALPS PS/2 TOUCHPAD DRIVER
+R:	Pali Rohár <pali.rohar@gmail.com>
+F:	drivers/input/mouse/alps.*
+
 ALTERA MAILBOX DRIVER
 M:	Ley Foon Tan <lftan@altera.com>
 L:	nios2-dev@lists.rocketboards.org (moderated for non-subscribers)
-- 
cgit v0.10.2


From 4a7d99ea1b27734558feb6833f180cd38a159940 Mon Sep 17 00:00:00 2001
From: Basil Gunn <basil@pacabunga.com>
Date: Thu, 16 Jun 2016 09:42:30 -0700
Subject: AX.25: Close socket connection on session completion

A socket connection made in ax.25 is not closed when session is
completed.  The heartbeat timer is stopped prematurely and this is
where the socket gets closed. Allow heatbeat timer to run to close
socket. Symptom occurs in kernels >= 4.2.0

Originally sent 6/15/2016. Resend with distribution list matching
scripts/maintainer.pl output.

Signed-off-by: Basil Gunn <basil@pacabunga.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index fbd0acf..2fdebab 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -976,7 +976,8 @@ static int ax25_release(struct socket *sock)
 			release_sock(sk);
 			ax25_disconnect(ax25, 0);
 			lock_sock(sk);
-			ax25_destroy_socket(ax25);
+			if (!sock_flag(ax25->sk, SOCK_DESTROY))
+				ax25_destroy_socket(ax25);
 			break;
 
 		case AX25_STATE_3:
diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c
index 951cd57..5237dff 100644
--- a/net/ax25/ax25_ds_timer.c
+++ b/net/ax25/ax25_ds_timer.c
@@ -102,6 +102,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25)
 	switch (ax25->state) {
 
 	case AX25_STATE_0:
+	case AX25_STATE_2:
 		/* Magic here: If we listen() and a new link dies before it
 		   is accepted() it isn't 'dead' so doesn't get removed. */
 		if (!sk || sock_flag(sk, SOCK_DESTROY) ||
@@ -111,6 +112,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25)
 				sock_hold(sk);
 				ax25_destroy_socket(ax25);
 				bh_unlock_sock(sk);
+				/* Ungrab socket and destroy it */
 				sock_put(sk);
 			} else
 				ax25_destroy_socket(ax25);
@@ -213,7 +215,8 @@ void ax25_ds_t1_timeout(ax25_cb *ax25)
 	case AX25_STATE_2:
 		if (ax25->n2count == ax25->n2) {
 			ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
-			ax25_disconnect(ax25, ETIMEDOUT);
+			if (!sock_flag(ax25->sk, SOCK_DESTROY))
+				ax25_disconnect(ax25, ETIMEDOUT);
 			return;
 		} else {
 			ax25->n2count++;
diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
index 004467c9..2c0d6ef 100644
--- a/net/ax25/ax25_std_timer.c
+++ b/net/ax25/ax25_std_timer.c
@@ -38,6 +38,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25)
 
 	switch (ax25->state) {
 	case AX25_STATE_0:
+	case AX25_STATE_2:
 		/* Magic here: If we listen() and a new link dies before it
 		   is accepted() it isn't 'dead' so doesn't get removed. */
 		if (!sk || sock_flag(sk, SOCK_DESTROY) ||
@@ -47,6 +48,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25)
 				sock_hold(sk);
 				ax25_destroy_socket(ax25);
 				bh_unlock_sock(sk);
+				/* Ungrab socket and destroy it */
 				sock_put(sk);
 			} else
 				ax25_destroy_socket(ax25);
@@ -144,7 +146,8 @@ void ax25_std_t1timer_expiry(ax25_cb *ax25)
 	case AX25_STATE_2:
 		if (ax25->n2count == ax25->n2) {
 			ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND);
-			ax25_disconnect(ax25, ETIMEDOUT);
+			if (!sock_flag(ax25->sk, SOCK_DESTROY))
+				ax25_disconnect(ax25, ETIMEDOUT);
 			return;
 		} else {
 			ax25->n2count++;
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 3b78e84..655a7d4 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -264,7 +264,8 @@ void ax25_disconnect(ax25_cb *ax25, int reason)
 {
 	ax25_clear_queues(ax25);
 
-	ax25_stop_heartbeat(ax25);
+	if (!sock_flag(ax25->sk, SOCK_DESTROY))
+		ax25_stop_heartbeat(ax25);
 	ax25_stop_t1timer(ax25);
 	ax25_stop_t2timer(ax25);
 	ax25_stop_t3timer(ax25);
-- 
cgit v0.10.2


From 5c3da57d70f1ef1d9b60900b84a74d77a9cf0774 Mon Sep 17 00:00:00 2001
From: Joshua Houghton <josh@awful.name>
Date: Sat, 18 Jun 2016 15:46:31 +0000
Subject: net: rds: fix coding style issues

Fix coding style issues in the following files:

ib_cm.c:      add space
loop.c:       convert spaces to tabs
sysctl.c:     add space
tcp.h:        convert spaces to tabs
tcp_connect.c:remove extra indentation in switch statement
tcp_recv.c:   convert spaces to tabs
tcp_send.c:   convert spaces to tabs
transport.c:  move brace up one line on for statement

Signed-off-by: Joshua Houghton <josh@awful.name>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 310cabc..7c2a65a 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -111,7 +111,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
 		}
 	}
 
-	if (conn->c_version < RDS_PROTOCOL(3,1)) {
+	if (conn->c_version < RDS_PROTOCOL(3, 1)) {
 		printk(KERN_NOTICE "RDS/IB: Connection to %pI4 version %u.%u failed,"
 		       " no longer supported\n",
 		       &conn->c_faddr,
diff --git a/net/rds/loop.c b/net/rds/loop.c
index 6b12b68..814173b 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -95,8 +95,9 @@ out:
  */
 static void rds_loop_inc_free(struct rds_incoming *inc)
 {
-        struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
-        rds_message_put(rm);
+	struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
+
+	rds_message_put(rm);
 }
 
 /* we need to at least give the thread something to succeed */
diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
index c173f69..e381bbc 100644
--- a/net/rds/sysctl.c
+++ b/net/rds/sysctl.c
@@ -102,7 +102,8 @@ int rds_sysctl_init(void)
 	rds_sysctl_reconnect_min = msecs_to_jiffies(1);
 	rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min;
 
-	rds_sysctl_reg_table = register_net_sysctl(&init_net,"net/rds", rds_sysctl_rds_table);
+	rds_sysctl_reg_table =
+		register_net_sysctl(&init_net, "net/rds", rds_sysctl_rds_table);
 	if (!rds_sysctl_reg_table)
 		return -ENOMEM;
 	return 0;
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index ec0602b..7940bab 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -83,7 +83,7 @@ int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
 void rds_tcp_xmit_prepare(struct rds_connection *conn);
 void rds_tcp_xmit_complete(struct rds_connection *conn);
 int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
-	         unsigned int hdr_off, unsigned int sg, unsigned int off);
+		 unsigned int hdr_off, unsigned int sg, unsigned int off);
 void rds_tcp_write_space(struct sock *sk);
 
 /* tcp_stats.c */
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index fba13d0..f6e95d6 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -54,19 +54,19 @@ void rds_tcp_state_change(struct sock *sk)
 
 	rdsdebug("sock %p state_change to %d\n", tc->t_sock, sk->sk_state);
 
-	switch(sk->sk_state) {
-		/* ignore connecting sockets as they make progress */
-		case TCP_SYN_SENT:
-		case TCP_SYN_RECV:
-			break;
-		case TCP_ESTABLISHED:
-			rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
-			break;
-		case TCP_CLOSE_WAIT:
-		case TCP_CLOSE:
-			rds_conn_drop(conn);
-		default:
-			break;
+	switch (sk->sk_state) {
+	/* ignore connecting sockets as they make progress */
+	case TCP_SYN_SENT:
+	case TCP_SYN_RECV:
+		break;
+	case TCP_ESTABLISHED:
+		rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
+		break;
+	case TCP_CLOSE_WAIT:
+	case TCP_CLOSE:
+		rds_conn_drop(conn);
+	default:
+		break;
 	}
 out:
 	read_unlock_bh(&sk->sk_callback_lock);
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index c3196f9..6e6a711 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -171,7 +171,7 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
 	while (left) {
 		if (!tinc) {
 			tinc = kmem_cache_alloc(rds_tcp_incoming_slab,
-					        arg->gfp);
+						arg->gfp);
 			if (!tinc) {
 				desc->error = -ENOMEM;
 				goto out;
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 22d0f20..618be69 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -66,19 +66,19 @@ void rds_tcp_xmit_complete(struct rds_connection *conn)
 static int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len)
 {
 	struct kvec vec = {
-                .iov_base = data,
-                .iov_len = len,
+		.iov_base = data,
+		.iov_len = len,
+	};
+	struct msghdr msg = {
+		.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL,
 	};
-        struct msghdr msg = {
-                .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL,
-        };
 
 	return kernel_sendmsg(sock, &msg, &vec, 1, vec.iov_len);
 }
 
 /* the core send_sem serializes this with other xmit and shutdown */
 int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
-	         unsigned int hdr_off, unsigned int sg, unsigned int off)
+		 unsigned int hdr_off, unsigned int sg, unsigned int off)
 {
 	struct rds_tcp_connection *tc = conn->c_transport_data;
 	int done = 0;
@@ -196,7 +196,7 @@ void rds_tcp_write_space(struct sock *sk)
 	tc->t_last_seen_una = rds_tcp_snd_una(tc);
 	rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked);
 
-        if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
+	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
 		queue_delayed_work(rds_wq, &conn->c_send_w, 0);
 
 out:
diff --git a/net/rds/transport.c b/net/rds/transport.c
index f3afd1d..2ffd3e30 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -140,8 +140,7 @@ unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
 	rds_info_iter_unmap(iter);
 	down_read(&rds_trans_sem);
 
-	for (i = 0; i < RDS_TRANS_COUNT; i++)
-	{
+	for (i = 0; i < RDS_TRANS_COUNT; i++) {
 		trans = transports[i];
 		if (!trans || !trans->stats_info_copy)
 			continue;
-- 
cgit v0.10.2


From d90efc9ee0df3f13f248aaf47086b1e0ed6035ae Mon Sep 17 00:00:00 2001
From: Yakir Yang <ykk@rock-chips.com>
Date: Wed, 8 Jun 2016 10:13:27 -0400
Subject: drm/exynos: dp: Fix NULL pointer dereference due uninitialized
 connector

Commit 3424e3a4f844 ("drm: bridge: analogix/dp: split exynos dp driver to
bridge directory") split the Exynos DP core driver into a core driver and
a bridge driver for the Analogix chip since that is also used by Rockchip.

But the change introduced a regression causing a NULL pointer dereference
when trying to access an uninitialized connector in the driver .get_modes:

Fix this by instead of having a connector struct for both the Exynos and
Analogix drivers, just use the connector initialized in the bridge driver.

Fixes: 3424e3a4f844 ("drm: bridge: analogix/dp: split exynos dp driver to bridge directory")

Reported-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Yakir Yang <ykk@rock-chips.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>

diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c
index 468498e..4c1fb3f 100644
--- a/drivers/gpu/drm/exynos/exynos_dp.c
+++ b/drivers/gpu/drm/exynos/exynos_dp.c
@@ -34,7 +34,7 @@
 
 struct exynos_dp_device {
 	struct drm_encoder         encoder;
-	struct drm_connector       connector;
+	struct drm_connector       *connector;
 	struct drm_bridge          *ptn_bridge;
 	struct drm_device          *drm_dev;
 	struct device              *dev;
@@ -70,7 +70,7 @@ static int exynos_dp_poweroff(struct analogix_dp_plat_data *plat_data)
 static int exynos_dp_get_modes(struct analogix_dp_plat_data *plat_data)
 {
 	struct exynos_dp_device *dp = to_dp(plat_data);
-	struct drm_connector *connector = &dp->connector;
+	struct drm_connector *connector = dp->connector;
 	struct drm_display_mode *mode;
 	int num_modes = 0;
 
@@ -103,6 +103,7 @@ static int exynos_dp_bridge_attach(struct analogix_dp_plat_data *plat_data,
 	int ret;
 
 	drm_connector_register(connector);
+	dp->connector = connector;
 
 	/* Pre-empt DP connector creation if there's a bridge */
 	if (dp->ptn_bridge) {
-- 
cgit v0.10.2


From 5e0b37634cbb6bae69b7b15e1ea5f054dfaaa413 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Mon, 30 May 2016 20:20:22 -0400
Subject: drm/exynos: fimd: don't set .has_hw_trigger in s3c6400 driver data

The field value is only checked in fimd_setup_trigger() if .trg_type is
I80_HW_TRG so there's no point in setting this field for the s3c6400 if
is never going to be used since .trg_type is not set.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 3efe1aa..1c23a8f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -120,7 +120,6 @@ static struct fimd_driver_data s3c64xx_fimd_driver_data = {
 	.timing_base = 0x0,
 	.has_clksel = 1,
 	.has_limited_fmt = 1,
-	.has_hw_trigger = 1,
 };
 
 static struct fimd_driver_data exynos3_fimd_driver_data = {
-- 
cgit v0.10.2


From e0d7461ceb5bb9b50e534262eb23e92deaaae6f1 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Thu, 2 Jun 2016 10:20:10 -0400
Subject: drm/exynos: don't use HW trigger for Exynos5420/5422/5800

Commit a6f75aa161c5 ("drm/exynos: fimd: add HW trigger support") added
hardware trigger support to the FIMD controller driver. But this broke
the display in at least the Exynos5800 Peach Pi Chromebook.

So until the issue is fixed, avoid using HW trigger for the Exynos5420
based boards and use SW trigger as it was before the mentioned commit.

Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 1c23a8f..f10030f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -170,14 +170,11 @@ static struct fimd_driver_data exynos5420_fimd_driver_data = {
 	.lcdblk_vt_shift = 24,
 	.lcdblk_bypass_shift = 15,
 	.lcdblk_mic_bypass_shift = 11,
-	.trg_type = I80_HW_TRG,
 	.has_shadowcon = 1,
 	.has_vidoutcon = 1,
 	.has_vtsel = 1,
 	.has_mic_bypass = 1,
 	.has_dp_clk = 1,
-	.has_hw_trigger = 1,
-	.has_trigger_per_te = 1,
 };
 
 struct fimd_context {
-- 
cgit v0.10.2


From ed4dc2718d3510f0fa5e6794f47b98549848f656 Mon Sep 17 00:00:00 2001
From: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
Date: Wed, 25 May 2016 14:42:56 +0200
Subject: drm/exynos: g2d: drop the _REG postfix from the stride defines

This makes the defines consistent with the rest.

Signed-off-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
Signed-off-by: Inki Dae <inki.dae@samsung.com>

diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 4935523..8564c3d 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -48,13 +48,13 @@
 
 /* registers for base address */
 #define G2D_SRC_BASE_ADDR		0x0304
-#define G2D_SRC_STRIDE_REG		0x0308
+#define G2D_SRC_STRIDE			0x0308
 #define G2D_SRC_COLOR_MODE		0x030C
 #define G2D_SRC_LEFT_TOP		0x0310
 #define G2D_SRC_RIGHT_BOTTOM		0x0314
 #define G2D_SRC_PLANE2_BASE_ADDR	0x0318
 #define G2D_DST_BASE_ADDR		0x0404
-#define G2D_DST_STRIDE_REG		0x0408
+#define G2D_DST_STRIDE			0x0408
 #define G2D_DST_COLOR_MODE		0x040C
 #define G2D_DST_LEFT_TOP		0x0410
 #define G2D_DST_RIGHT_BOTTOM		0x0414
@@ -563,7 +563,7 @@ static enum g2d_reg_type g2d_get_reg_type(int reg_offset)
 
 	switch (reg_offset) {
 	case G2D_SRC_BASE_ADDR:
-	case G2D_SRC_STRIDE_REG:
+	case G2D_SRC_STRIDE:
 	case G2D_SRC_COLOR_MODE:
 	case G2D_SRC_LEFT_TOP:
 	case G2D_SRC_RIGHT_BOTTOM:
@@ -573,7 +573,7 @@ static enum g2d_reg_type g2d_get_reg_type(int reg_offset)
 		reg_type = REG_TYPE_SRC_PLANE2;
 		break;
 	case G2D_DST_BASE_ADDR:
-	case G2D_DST_STRIDE_REG:
+	case G2D_DST_STRIDE:
 	case G2D_DST_COLOR_MODE:
 	case G2D_DST_LEFT_TOP:
 	case G2D_DST_RIGHT_BOTTOM:
@@ -968,8 +968,8 @@ static int g2d_check_reg_offset(struct device *dev,
 			} else
 				buf_info->types[reg_type] = BUF_TYPE_GEM;
 			break;
-		case G2D_SRC_STRIDE_REG:
-		case G2D_DST_STRIDE_REG:
+		case G2D_SRC_STRIDE:
+		case G2D_DST_STRIDE:
 			if (for_addr)
 				goto err;
 
-- 
cgit v0.10.2


From f0fcf43f285cdbc1bbc372919d68aea0cf4483d6 Mon Sep 17 00:00:00 2001
From: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
Date: Wed, 25 May 2016 14:35:41 +0200
Subject: drm/exynos: remove superfluous inclusions of fbdev header

Neither of these files issue any fbdev related calls.

Signed-off-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
Signed-off-by: Inki Dae <inki.dae@samsung.com>

diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index f6223f9..7f9901b 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -31,7 +31,6 @@
 #include "exynos_drm_plane.h"
 #include "exynos_drm_drv.h"
 #include "exynos_drm_fb.h"
-#include "exynos_drm_fbdev.h"
 #include "exynos_drm_iommu.h"
 
 /*
diff --git a/drivers/gpu/drm/exynos/exynos_drm_core.c b/drivers/gpu/drm/exynos/exynos_drm_core.c
index 011211e..edbd98f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_core.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_core.c
@@ -15,7 +15,6 @@
 #include <drm/drmP.h>
 #include "exynos_drm_drv.h"
 #include "exynos_drm_crtc.h"
-#include "exynos_drm_fbdev.h"
 
 static LIST_HEAD(exynos_drm_subdrv_list);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index f10030f..d472164 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -30,7 +30,6 @@
 
 #include "exynos_drm_drv.h"
 #include "exynos_drm_fb.h"
-#include "exynos_drm_fbdev.h"
 #include "exynos_drm_crtc.h"
 #include "exynos_drm_plane.h"
 #include "exynos_drm_iommu.h"
-- 
cgit v0.10.2


From 41abbf5afa51136bcb2aeefc80bf5c3a005d0aa3 Mon Sep 17 00:00:00 2001
From: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
Date: Wed, 25 May 2016 14:30:07 +0200
Subject: drm/exynos: use logical AND in exynos_drm_plane_check_size()

The current bitwise AND should result in the same assembler
but this is what the code is actually supposed to do.

Signed-off-by: Tobias Jakobi <tjakobi@math.uni-bielefeld.de>
Signed-off-by: Inki Dae <inki.dae@samsung.com>

diff --git a/drivers/gpu/drm/exynos/exynos_drm_plane.c b/drivers/gpu/drm/exynos/exynos_drm_plane.c
index 55f1d37..77f12c0 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_plane.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_plane.c
@@ -242,7 +242,7 @@ exynos_drm_plane_check_size(const struct exynos_drm_plane_config *config,
 	    state->v_ratio == (1 << 15))
 		height_ok = true;
 
-	if (width_ok & height_ok)
+	if (width_ok && height_ok)
 		return 0;
 
 	DRM_DEBUG_KMS("scaling mode is not supported");
-- 
cgit v0.10.2


From e6bd89232b4a4c9282da1ac8bdd798c2a30d9a46 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Sun, 19 Jun 2016 15:18:11 +0300
Subject: qed: Correct default vlan behavior

When no vlan filter is configured, firmware has a configurable
default on whether to pass only untagged packets or all packets
regardless of their tagging. Driver currently doesn't set this
field in the necessary ramrod, causing the default to always be
'receive all'.

Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 8fba87dd..7c199a9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -72,6 +72,7 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
 	p_ramrod->mtu			= cpu_to_le16(p_params->mtu);
 	p_ramrod->inner_vlan_removal_en	= p_params->remove_inner_vlan;
 	p_ramrod->drop_ttl0_en		= p_params->drop_ttl0;
+	p_ramrod->untagged		= p_params->only_untagged;
 
 	SET_FIELD(rx_mode, ETH_VPORT_RX_MODE_UCAST_DROP_ALL, 1);
 	SET_FIELD(rx_mode, ETH_VPORT_RX_MODE_MCAST_DROP_ALL, 1);
-- 
cgit v0.10.2


From 326439883e17fca029f4ed05307480bdb6369877 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Sun, 19 Jun 2016 15:18:12 +0300
Subject: qed: Prevent VF from Tx-switching 'promisc'

Internal loopback in driver is based on two things - first
is the willingness of transmitter to use it [in case of VFs,
this can be forced based on VEPA/VEB] and secondly on another
vport classification configuration which should match the
packet's destination.

Current code allows non-linux VFs to configure a 'promisc'
mode on Tx, meaning all traffic sent by VF would be loopbacked
internally by firmware; This isn't considered a valid mode and
as such should be prevented by PF.

Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 7c199a9..7e7ae83 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -248,10 +248,6 @@ qed_sp_update_accept_mode(struct qed_hwfn *p_hwfn,
 		SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_DROP_ALL,
 			  !!(accept_filter & QED_ACCEPT_NONE));
 
-		SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL,
-			  (!!(accept_filter & QED_ACCEPT_UCAST_MATCHED) &&
-			   !!(accept_filter & QED_ACCEPT_UCAST_UNMATCHED)));
-
 		SET_FIELD(state, ETH_VPORT_TX_MODE_MCAST_DROP_ALL,
 			  !!(accept_filter & QED_ACCEPT_NONE));
 
-- 
cgit v0.10.2


From a0d26d5a4fc8e13993279f788deeb08069e73b69 Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Sun, 19 Jun 2016 15:18:13 +0300
Subject: qed*: Don't reset statistics on inner reload

Several user APIs can cause driver to perform an inner-reload.
Currently, doing this would cause the HW/FW statistics of the
adapter to reset, which isn't the expected behavior [statistics
should only reset on explicit unloads].

Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 7e7ae83..aada4c7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -1745,7 +1745,8 @@ static int qed_start_vport(struct qed_dev *cdev,
 			   start.vport_id, start.mtu);
 	}
 
-	qed_reset_vport_stats(cdev);
+	if (params->clear_stats)
+		qed_reset_vport_stats(cdev);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 5733d18..f8e11f9 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -3231,7 +3231,7 @@ static int qede_stop_queues(struct qede_dev *edev)
 	return rc;
 }
 
-static int qede_start_queues(struct qede_dev *edev)
+static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
 {
 	int rc, tc, i;
 	int vlan_removal_en = 1;
@@ -3462,6 +3462,7 @@ out:
 
 enum qede_load_mode {
 	QEDE_LOAD_NORMAL,
+	QEDE_LOAD_RELOAD,
 };
 
 static int qede_load(struct qede_dev *edev, enum qede_load_mode mode)
@@ -3500,7 +3501,7 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode)
 		goto err3;
 	DP_INFO(edev, "Setup IRQs succeeded\n");
 
-	rc = qede_start_queues(edev);
+	rc = qede_start_queues(edev, mode != QEDE_LOAD_RELOAD);
 	if (rc)
 		goto err4;
 	DP_INFO(edev, "Start VPORT, RXQ and TXQ succeeded\n");
@@ -3555,7 +3556,7 @@ void qede_reload(struct qede_dev *edev,
 	if (func)
 		func(edev, args);
 
-	qede_load(edev, QEDE_LOAD_NORMAL);
+	qede_load(edev, QEDE_LOAD_RELOAD);
 
 	mutex_lock(&edev->qede_lock);
 	qede_config_rx_mode(edev->ndev);
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 6ae8cb4..6c876a6 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -49,6 +49,7 @@ struct qed_start_vport_params {
 	bool drop_ttl0;
 	u8 vport_id;
 	u16 mtu;
+	bool clear_stats;
 };
 
 struct qed_stop_rxq_params {
-- 
cgit v0.10.2


From db511c37d4eb2b4e7312791eb8f9b34ed867445e Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Sun, 19 Jun 2016 15:18:14 +0300
Subject: qed: Fix returning unlimited SPQ entries

Driver has 2 sets of entries for handling ramrod configurations
toward firmware - a regular pre-allocated set of entires and a
possible 'unlimited' list of additional pending entries.

In most scenarios the 'unlimited' list would not be used, but
when it does the handling of the ramrod completion doesn't
properly handle the release of the entry.

Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index acac662..67d9893 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -614,7 +614,9 @@ qed_spq_add_entry(struct qed_hwfn *p_hwfn,
 
 			*p_en2 = *p_ent;
 
-			kfree(p_ent);
+			/* EBLOCK responsible to free the allocated p_ent */
+			if (p_ent->comp_mode != QED_SPQ_MODE_EBLOCK)
+				kfree(p_ent);
 
 			p_ent = p_en2;
 		}
@@ -749,6 +751,15 @@ int qed_spq_post(struct qed_hwfn *p_hwfn,
 		 * Thus, after gaining the answer perform the cleanup here.
 		 */
 		rc = qed_spq_block(p_hwfn, p_ent, fw_return_code);
+
+		if (p_ent->queue == &p_spq->unlimited_pending) {
+			/* This is an allocated p_ent which does not need to
+			 * return to pool.
+			 */
+			kfree(p_ent);
+			return rc;
+		}
+
 		if (rc)
 			goto spq_post_fail2;
 
@@ -844,8 +855,12 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
 		found->comp_cb.function(p_hwfn, found->comp_cb.cookie, p_data,
 					fw_return_code);
 
-	if (found->comp_mode != QED_SPQ_MODE_EBLOCK)
-		/* EBLOCK is responsible for freeing its own entry */
+	if ((found->comp_mode != QED_SPQ_MODE_EBLOCK) ||
+	    (found->queue == &p_spq->unlimited_pending))
+		/* EBLOCK  is responsible for returning its own entry into the
+		 * free list, unless it originally added the entry into the
+		 * unlimited pending list.
+		 */
 		qed_spq_return_entry(p_hwfn, found);
 
 	/* Attempt to post pending requests */
-- 
cgit v0.10.2


From b639f197210d37905a6018ae4297659eb3f48f8f Mon Sep 17 00:00:00 2001
From: Yuval Mintz <Yuval.Mintz@qlogic.com>
Date: Sun, 19 Jun 2016 15:18:15 +0300
Subject: qed: Add missing port-mode

The 'MODULE_FIBER' value replaced several other FIBER values
in newer management firmware images, so existing code would
fail to properly reflect its mode.

Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 9afc15f..e29ed5a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -3700,6 +3700,7 @@ struct public_port {
 #define MEDIA_DA_TWINAX         0x3
 #define MEDIA_BASE_T            0x4
 #define MEDIA_SFP_1G_FIBER      0x5
+#define MEDIA_MODULE_FIBER      0x6
 #define MEDIA_KR                0xf0
 #define MEDIA_NOT_PRESENT       0xff
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 61cc686..c7e01b3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -1085,6 +1085,7 @@ static int qed_get_port_type(u32 media_type)
 	case MEDIA_SFPP_10G_FIBER:
 	case MEDIA_SFP_1G_FIBER:
 	case MEDIA_XFP_FIBER:
+	case MEDIA_MODULE_FIBER:
 	case MEDIA_KR:
 		port_type = PORT_FIBRE;
 		break;
-- 
cgit v0.10.2


From 427460c83cdf55069eee49799a0caef7dde8df69 Mon Sep 17 00:00:00 2001
From: Thor Thayer <tthayer@opensource.altera.com>
Date: Thu, 16 Jun 2016 11:10:19 -0500
Subject: can: c_can: Update D_CAN TX and RX functions to 32 bit - fix Altera
 Cyclone access

When testing CAN write floods on Altera's CycloneV, the first 2 bytes
are sometimes 0x00, 0x00 or corrupted instead of the values sent. Also
observed bytes 4 & 5 were corrupted in some cases.

The D_CAN Data registers are 32 bits and changing from 16 bit writes to
32 bit writes fixes the problem.

Testing performed on Altera CycloneV (D_CAN).  Requesting tests on other
C_CAN & D_CAN platforms.

Reported-by: Richard Andrysek <richard.andrysek@gomtec.de>
Signed-off-by: Thor Thayer <tthayer@opensource.altera.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index f91b094..e3dccd3 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -332,9 +332,23 @@ static void c_can_setup_tx_object(struct net_device *dev, int iface,
 
 	priv->write_reg(priv, C_CAN_IFACE(MSGCTRL_REG, iface), ctrl);
 
-	for (i = 0; i < frame->can_dlc; i += 2) {
-		priv->write_reg(priv, C_CAN_IFACE(DATA1_REG, iface) + i / 2,
-				frame->data[i] | (frame->data[i + 1] << 8));
+	if (priv->type == BOSCH_D_CAN) {
+		u32 data = 0, dreg = C_CAN_IFACE(DATA1_REG, iface);
+
+		for (i = 0; i < frame->can_dlc; i += 4, dreg += 2) {
+			data = (u32)frame->data[i];
+			data |= (u32)frame->data[i + 1] << 8;
+			data |= (u32)frame->data[i + 2] << 16;
+			data |= (u32)frame->data[i + 3] << 24;
+			priv->write_reg32(priv, dreg, data);
+		}
+	} else {
+		for (i = 0; i < frame->can_dlc; i += 2) {
+			priv->write_reg(priv,
+					C_CAN_IFACE(DATA1_REG, iface) + i / 2,
+					frame->data[i] |
+					(frame->data[i + 1] << 8));
+		}
 	}
 }
 
@@ -402,10 +416,20 @@ static int c_can_read_msg_object(struct net_device *dev, int iface, u32 ctrl)
 	} else {
 		int i, dreg = C_CAN_IFACE(DATA1_REG, iface);
 
-		for (i = 0; i < frame->can_dlc; i += 2, dreg ++) {
-			data = priv->read_reg(priv, dreg);
-			frame->data[i] = data;
-			frame->data[i + 1] = data >> 8;
+		if (priv->type == BOSCH_D_CAN) {
+			for (i = 0; i < frame->can_dlc; i += 4, dreg += 2) {
+				data = priv->read_reg32(priv, dreg);
+				frame->data[i] = data;
+				frame->data[i + 1] = data >> 8;
+				frame->data[i + 2] = data >> 16;
+				frame->data[i + 3] = data >> 24;
+			}
+		} else {
+			for (i = 0; i < frame->can_dlc; i += 2, dreg++) {
+				data = priv->read_reg(priv, dreg);
+				frame->data[i] = data;
+				frame->data[i + 1] = data >> 8;
+			}
 		}
 	}
 
-- 
cgit v0.10.2


From 43200a4480cbbe660309621817f54cbb93907108 Mon Sep 17 00:00:00 2001
From: Wolfgang Grandegger <wg@grandegger.com>
Date: Mon, 13 Jun 2016 15:44:19 +0200
Subject: can: at91_can: RX queue could get stuck at high bus load

At high bus load it could happen that "at91_poll()" enters with all RX
message boxes filled up. If then at the end the "quota" is exceeded as
well, "rx_next" will not be reset to the first RX mailbox and hence the
interrupts remain disabled.

Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
Tested-by: Amr Bekhit <amrbekhit@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>

diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index 8b3275d..8f5e93c 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -712,9 +712,10 @@ static int at91_poll_rx(struct net_device *dev, int quota)
 
 	/* upper group completed, look again in lower */
 	if (priv->rx_next > get_mb_rx_low_last(priv) &&
-	    quota > 0 && mb > get_mb_rx_last(priv)) {
+	    mb > get_mb_rx_last(priv)) {
 		priv->rx_next = get_mb_rx_first(priv);
-		goto again;
+		if (quota > 0)
+			goto again;
 	}
 
 	return received;
-- 
cgit v0.10.2


From f155d9c0a138463c3c9380d35e54e433c1477336 Mon Sep 17 00:00:00 2001
From: Maximilian Schneider <max@schneidersoft.net>
Date: Wed, 8 Jun 2016 18:00:26 +0000
Subject: can: gs_usb: Add Basic support for the bytewerk.org candleLight
 interface

This patchs adds basic support for the bytewerk.org candleLight interface,
a open hardware (CERN OHL) USB CAN adapter.

Signed-off-by: Hubert Denkmair <hubert@denkmair.de>
Signed-off-by: Maximilian Schneider <max@schneidersoft.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>

diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig
index bcb272f..2ff0df3 100644
--- a/drivers/net/can/usb/Kconfig
+++ b/drivers/net/can/usb/Kconfig
@@ -16,7 +16,8 @@ config CAN_ESD_USB2
 config CAN_GS_USB
 	tristate "Geschwister Schneider UG interfaces"
 	---help---
-	  This driver supports the Geschwister Schneider USB/CAN devices.
+	  This driver supports the Geschwister Schneider and bytewerk.org
+	  candleLight USB CAN interfaces USB/CAN devices
 	  If unsure choose N,
 	  choose Y for built in support,
 	  M to compile as module (module will be named: gs_usb).
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 1556d42..acb0c84 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -1,7 +1,9 @@
-/* CAN driver for Geschwister Schneider USB/CAN devices.
+/* CAN driver for Geschwister Schneider USB/CAN devices
+ * and bytewerk.org candleLight USB CAN interfaces.
  *
- * Copyright (C) 2013 Geschwister Schneider Technologie-,
+ * Copyright (C) 2013-2016 Geschwister Schneider Technologie-,
  * Entwicklungs- und Vertriebs UG (Haftungsbeschränkt).
+ * Copyright (C) 2016 Hubert Denkmair
  *
  * Many thanks to all socketcan devs!
  *
@@ -29,6 +31,9 @@
 #define USB_GSUSB_1_VENDOR_ID      0x1d50
 #define USB_GSUSB_1_PRODUCT_ID     0x606f
 
+#define USB_CANDLELIGHT_VENDOR_ID  0x1209
+#define USB_CANDLELIGHT_PRODUCT_ID 0x2323
+
 #define GSUSB_ENDPOINT_IN          1
 #define GSUSB_ENDPOINT_OUT         2
 
@@ -952,6 +957,8 @@ static void gs_usb_disconnect(struct usb_interface *intf)
 static const struct usb_device_id gs_usb_table[] = {
 	{ USB_DEVICE_INTERFACE_NUMBER(USB_GSUSB_1_VENDOR_ID,
 				      USB_GSUSB_1_PRODUCT_ID, 0) },
+	{ USB_DEVICE_INTERFACE_NUMBER(USB_CANDLELIGHT_VENDOR_ID,
+				      USB_CANDLELIGHT_PRODUCT_ID, 0) },
 	{} /* Terminating entry */
 };
 
@@ -969,5 +976,6 @@ module_usb_driver(gs_usb_driver);
 MODULE_AUTHOR("Maximilian Schneider <mws@schneidersoft.net>");
 MODULE_DESCRIPTION(
 "Socket CAN device driver for Geschwister Schneider Technologie-, "
-"Entwicklungs- und Vertriebs UG. USB2.0 to CAN interfaces.");
+"Entwicklungs- und Vertriebs UG. USB2.0 to CAN interfaces\n"
+"and bytewerk.org candleLight USB CAN interfaces.");
 MODULE_LICENSE("GPL v2");
-- 
cgit v0.10.2


From 8974189222159154c55f24ddad33e3613960521a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 16 Jun 2016 10:50:40 +0200
Subject: sched/fair: Fix cfs_rq avg tracking underflow

As per commit:

  b7fa30c9cc48 ("sched/fair: Fix post_init_entity_util_avg() serialization")

> the code generated from update_cfs_rq_load_avg():
>
> 	if (atomic_long_read(&cfs_rq->removed_load_avg)) {
> 		s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
> 		sa->load_avg = max_t(long, sa->load_avg - r, 0);
> 		sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
> 		removed_load = 1;
> 	}
>
> turns into:
>
> ffffffff81087064:       49 8b 85 98 00 00 00    mov    0x98(%r13),%rax
> ffffffff8108706b:       48 85 c0                test   %rax,%rax
> ffffffff8108706e:       74 40                   je     ffffffff810870b0 <update_blocked_averages+0xc0>
> ffffffff81087070:       4c 89 f8                mov    %r15,%rax
> ffffffff81087073:       49 87 85 98 00 00 00    xchg   %rax,0x98(%r13)
> ffffffff8108707a:       49 29 45 70             sub    %rax,0x70(%r13)
> ffffffff8108707e:       4c 89 f9                mov    %r15,%rcx
> ffffffff81087081:       bb 01 00 00 00          mov    $0x1,%ebx
> ffffffff81087086:       49 83 7d 70 00          cmpq   $0x0,0x70(%r13)
> ffffffff8108708b:       49 0f 49 4d 70          cmovns 0x70(%r13),%rcx
>
> Which you'll note ends up with sa->load_avg -= r in memory at
> ffffffff8108707a.

So I _should_ have looked at other unserialized users of ->load_avg,
but alas. Luckily nikbor reported a similar /0 from task_h_load() which
instantly triggered recollection of this here problem.

Aside from the intermediate value hitting memory and causing problems,
there's another problem: the underflow detection relies on the signed
bit. This reduces the effective width of the variables, IOW its
effectively the same as having these variables be of signed type.

This patch changes to a different means of unsigned underflow
detection to not rely on the signed bit. This allows the variables to
use the 'full' unsigned range. And it does so with explicit LOAD -
STORE to ensure any intermediate value will never be visible in
memory, allowing these unserialized loads.

Note: GCC generates crap code for this, might warrant a look later.

Note2: I say 'full' above, if we end up at U*_MAX we'll still explode;
       maybe we should do clamping on add too.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yuyang Du <yuyang.du@intel.com>
Cc: bsegall@google.com
Cc: kernel@kyup.com
Cc: morten.rasmussen@arm.com
Cc: pjt@google.com
Cc: steve.muckle@linaro.org
Fixes: 9d89c257dfb9 ("sched/fair: Rewrite runnable load and utilization average tracking")
Link: http://lkml.kernel.org/r/20160617091948.GJ30927@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a2348de..2ae68f0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2904,6 +2904,23 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
 	}
 }
 
+/*
+ * Unsigned subtract and clamp on underflow.
+ *
+ * Explicitly do a load-store to ensure the intermediate value never hits
+ * memory. This allows lockless observations without ever seeing the negative
+ * values.
+ */
+#define sub_positive(_ptr, _val) do {				\
+	typeof(_ptr) ptr = (_ptr);				\
+	typeof(*ptr) val = (_val);				\
+	typeof(*ptr) res, var = READ_ONCE(*ptr);		\
+	res = var - val;					\
+	if (res > var)						\
+		res = 0;					\
+	WRITE_ONCE(*ptr, res);					\
+} while (0)
+
 /* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
 static inline int
 update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
@@ -2913,15 +2930,15 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
 
 	if (atomic_long_read(&cfs_rq->removed_load_avg)) {
 		s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
-		sa->load_avg = max_t(long, sa->load_avg - r, 0);
-		sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
+		sub_positive(&sa->load_avg, r);
+		sub_positive(&sa->load_sum, r * LOAD_AVG_MAX);
 		removed_load = 1;
 	}
 
 	if (atomic_long_read(&cfs_rq->removed_util_avg)) {
 		long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
-		sa->util_avg = max_t(long, sa->util_avg - r, 0);
-		sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0);
+		sub_positive(&sa->util_avg, r);
+		sub_positive(&sa->util_sum, r * LOAD_AVG_MAX);
 		removed_util = 1;
 	}
 
@@ -2994,10 +3011,10 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 			  &se->avg, se->on_rq * scale_load_down(se->load.weight),
 			  cfs_rq->curr == se, NULL);
 
-	cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0);
-	cfs_rq->avg.load_sum = max_t(s64,  cfs_rq->avg.load_sum - se->avg.load_sum, 0);
-	cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0);
-	cfs_rq->avg.util_sum = max_t(s32,  cfs_rq->avg.util_sum - se->avg.util_sum, 0);
+	sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
+	sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum);
+	sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
+	sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
 
 	cfs_rq_util_change(cfs_rq);
 }
-- 
cgit v0.10.2


From 70c8217acd4383e069fe1898bbad36ea4fcdbdcc Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 17 Jun 2016 16:10:42 -0400
Subject: tracing: Handle NULL formats in hold_module_trace_bprintk_format()

If a task uses a non constant string for the format parameter in
trace_printk(), then the trace_printk_fmt variable is set to NULL. This
variable is then saved in the __trace_printk_fmt section.

The function hold_module_trace_bprintk_format() checks to see if duplicate
formats are used by modules, and reuses them if so (saves them to the list
if it is new). But this function calls lookup_format() that does a strcmp()
to the value (which is now NULL) and can cause a kernel oops.

This wasn't an issue till 3debb0a9ddb ("tracing: Fix trace_printk() to print
when not using bprintk()") which added "__used" to the trace_printk_fmt
variable, and before that, the kernel simply optimized it out (no NULL value
was saved).

The fix is simply to handle the NULL pointer in lookup_format() and have the
caller ignore the value if it was NULL.

Link: http://lkml.kernel.org/r/1464769870-18344-1-git-send-email-zhengjun.xing@intel.com

Reported-by: xingzhen <zhengjun.xing@intel.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Fixes: 3debb0a9ddb ("tracing: Fix trace_printk() to print when not using bprintk()")
Cc: stable@vger.kernel.org # v3.5+
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index f96f038..ad1d616 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -36,6 +36,10 @@ struct trace_bprintk_fmt {
 static inline struct trace_bprintk_fmt *lookup_format(const char *fmt)
 {
 	struct trace_bprintk_fmt *pos;
+
+	if (!fmt)
+		return ERR_PTR(-EINVAL);
+
 	list_for_each_entry(pos, &trace_bprintk_fmt_list, list) {
 		if (!strcmp(pos->fmt, fmt))
 			return pos;
@@ -57,7 +61,8 @@ void hold_module_trace_bprintk_format(const char **start, const char **end)
 	for (iter = start; iter < end; iter++) {
 		struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);
 		if (tb_fmt) {
-			*iter = tb_fmt->fmt;
+			if (!IS_ERR(tb_fmt))
+				*iter = tb_fmt->fmt;
 			continue;
 		}
 
-- 
cgit v0.10.2


From 0ded5174e976e2b2a354fe38abf1ebf4492c6dc3 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Mon, 23 May 2016 15:06:30 -0400
Subject: ftracetest: Fix hist unsupported result in hist selftests

When histograms are not configured in the kernel, the ftracetest histogram
selftests should return "unsupported" and not "Failed". To detect this, the
test scripts have:

 FEATURE=`grep hist events/sched/sched_process_fork/trigger`
 if [ -z "$FEATURE" ]; then
     echo "hist trigger is not supported"
     exit_unsupported
 fi

The problem is that '-e' is in effect and any error will cause the program
to terminate. The grep for 'hist' fails, because it is not compiled it (thus
unsupported), but because grep has an error code for failing to find the
string, it causes the program to terminate, and is marked as a failed test.

Namhyung Kim recommended to test for the "hist" file located in
events/sched/sched_process_fork/hist instead, as it is more inline with the
other checks. As the hist file is only created if the histogram feature is
enabled, that is a valid check.

Link: http://lkml.kernel.org/r/20160523151538.4ea9ce0c@gandalf.local.home

Suggested-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Fixes: 76929ab51f0ee ("kselftests/ftrace: Add hist trigger testcases")
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
index c2b61c4..0bf5085 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
@@ -23,15 +23,14 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
-FEATURE=`grep hist events/sched/sched_process_fork/trigger`
-if [ -z "$FEATURE" ]; then
+if [ ! -f events/sched/sched_process_fork/hist ]; then
     echo "hist trigger is not supported"
     exit_unsupported
 fi
 
+reset_tracer
+do_reset
+
 echo "Test histogram with execname modifier"
 
 echo 'hist:keys=common_pid.execname' > events/sched/sched_process_fork/trigger
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
index b2902d4..a00184c 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
@@ -23,15 +23,14 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
-FEATURE=`grep hist events/sched/sched_process_fork/trigger`
-if [ -z "$FEATURE" ]; then
+if [ ! -f events/sched/sched_process_fork/hist ]; then
     echo "hist trigger is not supported"
     exit_unsupported
 fi
 
+reset_tracer
+do_reset
+
 echo "Test histogram basic tigger"
 
 echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
index 03c4a46..3478b00 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
@@ -23,15 +23,14 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then
     exit_unsupported
 fi
 
-reset_tracer
-do_reset
-
-FEATURE=`grep hist events/sched/sched_process_fork/trigger`
-if [ -z "$FEATURE" ]; then
+if [ ! -f events/sched/sched_process_fork/hist ]; then
     echo "hist trigger is not supported"
     exit_unsupported
 fi
 
+reset_tracer
+do_reset
+
 reset_trigger
 
 echo "Test histogram multiple tiggers"
-- 
cgit v0.10.2


From e7d6ef9790bc281f5c29d0132b68031248523fe8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 20 Jun 2016 01:35:59 -0400
Subject: fix idiotic braino in d_alloc_parallel()

Check for d_unhashed() while searching in in-lookup hash was absolutely
wrong.  Worse, it masked a deadlock on dget() done under bitlock that
nests inside ->d_lock.  Thanks to J. R. Okajima for spotting it.

Spotted-by: "J. R. Okajima" <hooanon05g@gmail.com>
Wearing-brown-paperbag: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/dcache.c b/fs/dcache.c
index b7eddfd..d6847d7 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2503,7 +2503,6 @@ retry:
 		rcu_read_unlock();
 		goto retry;
 	}
-	rcu_read_unlock();
 	/*
 	 * No changes for the parent since the beginning of d_lookup().
 	 * Since all removals from the chain happen with hlist_bl_lock(),
@@ -2516,8 +2515,6 @@ retry:
 			continue;
 		if (dentry->d_parent != parent)
 			continue;
-		if (d_unhashed(dentry))
-			continue;
 		if (parent->d_flags & DCACHE_OP_COMPARE) {
 			int tlen = dentry->d_name.len;
 			const char *tname = dentry->d_name.name;
@@ -2529,9 +2526,18 @@ retry:
 			if (dentry_cmp(dentry, str, len))
 				continue;
 		}
-		dget(dentry);
 		hlist_bl_unlock(b);
-		/* somebody is doing lookup for it right now; wait for it */
+		/* now we can try to grab a reference */
+		if (!lockref_get_not_dead(&dentry->d_lockref)) {
+			rcu_read_unlock();
+			goto retry;
+		}
+
+		rcu_read_unlock();
+		/*
+		 * somebody is likely to be still doing lookup for it;
+		 * wait for them to finish
+		 */
 		spin_lock(&dentry->d_lock);
 		d_wait_lookup(dentry);
 		/*
@@ -2562,6 +2568,7 @@ retry:
 		dput(new);
 		return dentry;
 	}
+	rcu_read_unlock();
 	/* we can't take ->d_lock here; it's OK, though. */
 	new->d_flags |= DCACHE_PAR_LOOKUP;
 	new->d_wait = wq;
-- 
cgit v0.10.2


From ef0dab4aae14e25efddf1577736f8450132800c5 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Sat, 18 Jun 2016 23:52:25 -0700
Subject: PCI: Fix unaligned accesses in VC code

The save/restore buffers for VC state is first composed of a 2-byte control
register, then a bunch of 4-byte words.

This causes unaligned accesses which trap on platform such as sparc.

This is easy to fix by simply moving the buffer pointer forward by 4 bytes
instead of 2 after dealing with the control register.  The length
adjustment needs to be changed likewise as well.

Fixes: 5f8fc43217a0 ("PCI: Include pci/pcie/Kconfig directly from pci/Kconfig")
Reported-by: Meelis Roos <mroos@linux.ee>
Reported-by: Anatoly Pugachev <matorola@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: stable@vger.kernel.org	# v4.6+

diff --git a/drivers/pci/vc.c b/drivers/pci/vc.c
index dfbab61..1fa3a32 100644
--- a/drivers/pci/vc.c
+++ b/drivers/pci/vc.c
@@ -221,9 +221,9 @@ static int pci_vc_do_save_buffer(struct pci_dev *dev, int pos,
 		else
 			pci_write_config_word(dev, pos + PCI_VC_PORT_CTRL,
 					      *(u16 *)buf);
-		buf += 2;
+		buf += 4;
 	}
-	len += 2;
+	len += 4;
 
 	/*
 	 * If we have any Low Priority VCs and a VC Arbitration Table Offset
-- 
cgit v0.10.2


From 48a70e1ca85e3b484791e100bb403c05ef9d37c8 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 18 Jun 2016 11:38:44 +0300
Subject: drm/amdgpu: precedence bug in amdgpu_device_init()

! has higher precedence than bitwise & so we need to add parenthesis
for this to work as intended.

Fixes: 048765ad5af7 ('amdgpu: fix asic initialization for virtualized environments (v2)')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 66482b4..6e92008 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1535,7 +1535,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	/* Post card if necessary */
 	if (!amdgpu_card_posted(adev) ||
 	    (adev->virtualization.is_virtual &&
-	     !adev->virtualization.caps & AMDGPU_VIRT_CAPS_SRIOV_EN)) {
+	     !(adev->virtualization.caps & AMDGPU_VIRT_CAPS_SRIOV_EN))) {
 		if (!adev->bios) {
 			dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
 			return -EINVAL;
-- 
cgit v0.10.2


From 29b9c528b8c295911e8b1e515273e89a2b7fa2d8 Mon Sep 17 00:00:00 2001
From: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Date: Sat, 18 Jun 2016 22:55:00 +0200
Subject: drm/amdgpu: initialize amdgpu_cgs_acpi_eval_object result value

amdgpu_cgs_acpi_eval_object() returned the value of variable "result"
without initializing it first.

This bug has been found by compiling the kernel with clang.  The
compiler complained:

    drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c:972:14: error: variable
    'result' is used uninitialized whenever 'for' loop exits because its
    condition is false [-Werror,-Wsometimes-uninitialized]
            for (i = 0; i < count; i++) {
                        ^~~~~~~~~
    drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c:1011:9: note: uninitialized
    use occurs here
            return result;
                   ^~~~~~
    drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c:972:14: note: remove the
    condition if it is always true
            for (i = 0; i < count; i++) {
                        ^~~~~~~~~
    drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c:864:12: note: initialize the
    variable 'result' to silence this warning
            int result;
                      ^
                       = 0

Fixes: 3f1d35a03b3c ("drm/amdgpu: implement new cgs interface for acpi
function")
Signed-off-by: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Cc: stable@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 8943099..cf6f49f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -909,7 +909,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 	struct cgs_acpi_method_argument *argument = NULL;
 	uint32_t i, count;
 	acpi_status status;
-	int result;
+	int result = 0;
 	uint32_t func_no = 0xFFFFFFFF;
 
 	handle = ACPI_HANDLE(&adev->pdev->dev);
-- 
cgit v0.10.2


From 274f5b041d3c9c0974a7dd1f66b67c33bb5b0f42 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 6 Jun 2016 18:55:57 -0400
Subject: dcache_{readdir,dir_lseek}(): don't bother with nested ->d_lock

Make sure that directory is locked shared in dcache_dir_lseek();
for dcache_readdir() it's already tru, and that's enough to make
simple_positive() stable.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/libfs.c b/fs/libfs.c
index cedeacb..f56acb1 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -103,6 +103,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
 			struct dentry *cursor = file->private_data;
 			loff_t n = file->f_pos - 2;
 
+			inode_lock_shared(dentry->d_inode);
 			spin_lock(&dentry->d_lock);
 			/* d_lock not required for cursor */
 			list_del(&cursor->d_child);
@@ -110,14 +111,13 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
 			while (n && p != &dentry->d_subdirs) {
 				struct dentry *next;
 				next = list_entry(p, struct dentry, d_child);
-				spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
 				if (simple_positive(next))
 					n--;
-				spin_unlock(&next->d_lock);
 				p = p->next;
 			}
 			list_add_tail(&cursor->d_child, p);
 			spin_unlock(&dentry->d_lock);
+			inode_unlock_shared(dentry->d_inode);
 		}
 	}
 	return offset;
@@ -150,22 +150,16 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
 
 	for (p = q->next; p != &dentry->d_subdirs; p = p->next) {
 		struct dentry *next = list_entry(p, struct dentry, d_child);
-		spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
-		if (!simple_positive(next)) {
-			spin_unlock(&next->d_lock);
+		if (!simple_positive(next))
 			continue;
-		}
 
-		spin_unlock(&next->d_lock);
 		spin_unlock(&dentry->d_lock);
 		if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
 			      d_inode(next)->i_ino, dt_type(d_inode(next))))
 			return 0;
 		spin_lock(&dentry->d_lock);
-		spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
 		/* next is still alive */
 		list_move(q, p);
-		spin_unlock(&next->d_lock);
 		p = q;
 		ctx->pos++;
 	}
-- 
cgit v0.10.2


From 4f42c1b5b9c27b6228e6b9c57eee4beb3118b6b0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 6 Jun 2016 19:37:13 -0400
Subject: libfs.c: new helper - next_positive()

Return nth positive child after given or NULL if there's
less than n left.  dcache_readdir() and dcache_dir_lseek()
switched to it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/libfs.c b/fs/libfs.c
index f56acb1..b05b74a 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -84,6 +84,39 @@ int dcache_dir_close(struct inode *inode, struct file *file)
 }
 EXPORT_SYMBOL(dcache_dir_close);
 
+/* parent is locked at least shared */
+static struct dentry *next_positive(struct dentry *parent,
+				    struct list_head *from,
+				    int count)
+{
+	struct dentry *res = NULL;
+	struct list_head *p;
+
+	spin_lock(&parent->d_lock);
+	for (p = from->next; p != &parent->d_subdirs; p = p->next) {
+		struct dentry *d = list_entry(p, struct dentry, d_child);
+		if (simple_positive(d) && !--count) {
+			res = d;
+			break;
+		}
+	}
+	spin_unlock(&parent->d_lock);
+	return res;
+}
+
+static void move_cursor(struct dentry *cursor, struct list_head *after)
+{
+	struct dentry *parent = cursor->d_parent;
+
+	spin_lock(&parent->d_lock);
+	__list_del(cursor->d_child.prev, cursor->d_child.next);
+	if (after)
+		list_add(&cursor->d_child, after);
+	else
+		list_add_tail(&cursor->d_child, &parent->d_subdirs);
+	spin_unlock(&parent->d_lock);
+}
+
 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
 {
 	struct dentry *dentry = file->f_path.dentry;
@@ -99,24 +132,13 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
 	if (offset != file->f_pos) {
 		file->f_pos = offset;
 		if (file->f_pos >= 2) {
-			struct list_head *p;
 			struct dentry *cursor = file->private_data;
+			struct dentry *to;
 			loff_t n = file->f_pos - 2;
 
 			inode_lock_shared(dentry->d_inode);
-			spin_lock(&dentry->d_lock);
-			/* d_lock not required for cursor */
-			list_del(&cursor->d_child);
-			p = dentry->d_subdirs.next;
-			while (n && p != &dentry->d_subdirs) {
-				struct dentry *next;
-				next = list_entry(p, struct dentry, d_child);
-				if (simple_positive(next))
-					n--;
-				p = p->next;
-			}
-			list_add_tail(&cursor->d_child, p);
-			spin_unlock(&dentry->d_lock);
+			to = next_positive(dentry, &dentry->d_subdirs, n);
+			move_cursor(cursor, to ? &to->d_child : NULL);
 			inode_unlock_shared(dentry->d_inode);
 		}
 	}
@@ -140,30 +162,25 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
 {
 	struct dentry *dentry = file->f_path.dentry;
 	struct dentry *cursor = file->private_data;
-	struct list_head *p, *q = &cursor->d_child;
+	struct list_head *p = &cursor->d_child;
+	struct dentry *next;
+	bool moved = false;
 
 	if (!dir_emit_dots(file, ctx))
 		return 0;
-	spin_lock(&dentry->d_lock);
-	if (ctx->pos == 2)
-		list_move(q, &dentry->d_subdirs);
 
-	for (p = q->next; p != &dentry->d_subdirs; p = p->next) {
-		struct dentry *next = list_entry(p, struct dentry, d_child);
-		if (!simple_positive(next))
-			continue;
-
-		spin_unlock(&dentry->d_lock);
+	if (ctx->pos == 2)
+		p = &dentry->d_subdirs;
+	while ((next = next_positive(dentry, p, 1)) != NULL) {
 		if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
 			      d_inode(next)->i_ino, dt_type(d_inode(next))))
-			return 0;
-		spin_lock(&dentry->d_lock);
-		/* next is still alive */
-		list_move(q, p);
-		p = q;
+			break;
+		moved = true;
+		p = &next->d_child;
 		ctx->pos++;
 	}
-	spin_unlock(&dentry->d_lock);
+	if (moved)
+		move_cursor(cursor, p);
 	return 0;
 }
 EXPORT_SYMBOL(dcache_readdir);
-- 
cgit v0.10.2


From ebaaa80e8f20ff2cbbccd6823f73a99565487502 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 6 Jun 2016 20:55:34 -0400
Subject: lockless next_positive()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/libfs.c b/fs/libfs.c
index b05b74a..74dc8b9 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -89,31 +89,53 @@ static struct dentry *next_positive(struct dentry *parent,
 				    struct list_head *from,
 				    int count)
 {
-	struct dentry *res = NULL;
+	unsigned *seq = &parent->d_inode->i_dir_seq, n;
+	struct dentry *res;
 	struct list_head *p;
+	bool skipped;
+	int i;
 
-	spin_lock(&parent->d_lock);
+retry:
+	i = count;
+	skipped = false;
+	n = smp_load_acquire(seq) & ~1;
+	res = NULL;
+	rcu_read_lock();
 	for (p = from->next; p != &parent->d_subdirs; p = p->next) {
 		struct dentry *d = list_entry(p, struct dentry, d_child);
-		if (simple_positive(d) && !--count) {
+		if (!simple_positive(d)) {
+			skipped = true;
+		} else if (!--i) {
 			res = d;
 			break;
 		}
 	}
-	spin_unlock(&parent->d_lock);
+	rcu_read_unlock();
+	if (skipped) {
+		smp_rmb();
+		if (unlikely(*seq != n))
+			goto retry;
+	}
 	return res;
 }
 
 static void move_cursor(struct dentry *cursor, struct list_head *after)
 {
 	struct dentry *parent = cursor->d_parent;
-
+	unsigned n, *seq = &parent->d_inode->i_dir_seq;
 	spin_lock(&parent->d_lock);
+	for (;;) {
+		n = *seq;
+		if (!(n & 1) && cmpxchg(seq, n, n + 1) == n)
+			break;
+		cpu_relax();
+	}
 	__list_del(cursor->d_child.prev, cursor->d_child.next);
 	if (after)
 		list_add(&cursor->d_child, after);
 	else
 		list_add_tail(&cursor->d_child, &parent->d_subdirs);
+	smp_store_release(seq, n + 2);
 	spin_unlock(&parent->d_lock);
 }
 
-- 
cgit v0.10.2


From a5e9b85a6540df6c4074d3a56674f6fb6c5fc830 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Fri, 17 Jun 2016 17:24:23 +0000
Subject: clk: Fix return value check in oxnas_stdclk_probe()

In case of error, the function syscon_node_to_regmap() returns
ERR_PTR() and never returns NULL. The NULL test in the return
value check should be replaced with IS_ERR().

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Fixes: 0bbd72b4c64f ("clk: Add Oxford Semiconductor OXNAS Standard Clocks")
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>

diff --git a/drivers/clk/clk-oxnas.c b/drivers/clk/clk-oxnas.c
index efba7d4..79bcb2e 100644
--- a/drivers/clk/clk-oxnas.c
+++ b/drivers/clk/clk-oxnas.c
@@ -144,9 +144,9 @@ static int oxnas_stdclk_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	regmap = syscon_node_to_regmap(of_get_parent(np));
-	if (!regmap) {
+	if (IS_ERR(regmap)) {
 		dev_err(&pdev->dev, "failed to have parent regmap\n");
-		return -EINVAL;
+		return PTR_ERR(regmap);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(clk_oxnas_init); i++) {
-- 
cgit v0.10.2


From 1b7e38b92b0bbd363369f5160f13f4d26140972d Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Fri, 27 May 2016 16:09:25 +0200
Subject: drm: atmel-hlcdc: actually disable scaling when no scaling is
 required

The driver is only enabling scaling, but never disabling it, thus, if you
enable the scaling feature once it stays enabled forever.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Reported-by: Alex Vazquez <avazquez.dev@gmail.com>
Reviewed-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Fixes: 1a396789f65a ("drm: add Atmel HLCDC Display Controller support")
Cc: <stable@vger.kernel.org>

diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index aef3ca8..016c191 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@ -339,6 +339,8 @@ atmel_hlcdc_plane_update_pos_and_size(struct atmel_hlcdc_plane *plane,
 
 		atmel_hlcdc_layer_update_cfg(&plane->layer, 13, 0xffffffff,
 					     factor_reg);
+	} else {
+		atmel_hlcdc_layer_update_cfg(&plane->layer, 13, 0xffffffff, 0);
 	}
 }
 
-- 
cgit v0.10.2


From 0b1e1eb76220afa043b2733dfe61f5927cf0e458 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Fri, 3 Jun 2016 09:17:36 +0200
Subject: drm: atmel-hlcdc: Fix OF graph parsing

atmel_hlcdc_create_outputs() iterates over OF graph nodes and releases
the node (using of_node_put()) after each iteration, which is wrong
since for_each_endpoint_of_node() is already taking care of that.

Move the of_node_put() call in the error path.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Reviewed-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Fixes: 17a8e03e7e97 ("drm: atmel-hlcdc: rework the output code to support drm bridges")

diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
index 39802c0..3d34fc4 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
@@ -266,9 +266,10 @@ int atmel_hlcdc_create_outputs(struct drm_device *dev)
 		if (!ret)
 			ret = atmel_hlcdc_check_endpoint(dev, &ep);
 
-		of_node_put(ep_np);
-		if (ret)
+		if (ret) {
+			of_node_put(ep_np);
 			return ret;
+		}
 	}
 
 	for_each_endpoint_of_node(dev->dev->of_node, ep_np) {
@@ -276,9 +277,10 @@ int atmel_hlcdc_create_outputs(struct drm_device *dev)
 		if (!ret)
 			ret = atmel_hlcdc_attach_endpoint(dev, &ep);
 
-		of_node_put(ep_np);
-		if (ret)
+		if (ret) {
+			of_node_put(ep_np);
 			return ret;
+		}
 	}
 
 	return 0;
-- 
cgit v0.10.2


From 1d7b84d12af8312b52316029f1fa0fa4eac3c9e4 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Fri, 17 Jun 2016 18:21:01 +0800
Subject: drm/amd/powerplay: fix logic error.

the error lead powerplay can't get display info in DGPU case.
store_cc6_data just implement in APU.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index fa208ad..efb77ed 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -306,10 +306,14 @@ int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr,
 {
 	PHM_FUNC_CHECK(hwmgr);
 
-	if (hwmgr->hwmgr_func->store_cc6_data == NULL)
+	if (display_config == NULL)
 		return -EINVAL;
 
 	hwmgr->display_config = *display_config;
+
+	if (hwmgr->hwmgr_func->store_cc6_data == NULL)
+		return -EINVAL;
+
 	/* to do pass other display configuration in furture */
 
 	if (hwmgr->hwmgr_func->store_cc6_data)
-- 
cgit v0.10.2


From 576b4401b1971fe40be4cfd379430a61cd8426b2 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Mon, 13 Jun 2016 17:39:19 +0800
Subject: drm/amd/powerplay: fix bug that function parameter was incorect.

Wrong value passed to acpi_pcie_perf_request.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Ken Wang <Qingqing.Wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
index 58742e0..d19a9b6 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
@@ -77,7 +77,7 @@ int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
 						ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST,
 						&atcs_input,
 						&atcs_output,
-						0,
+						1,
 						sizeof(atcs_input),
 						sizeof(atcs_output));
 		if (result != 0)
-- 
cgit v0.10.2


From 0a4fef559b69ae2e682c98f31d53a225fbda78bd Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Tue, 14 Jun 2016 18:36:36 +0800
Subject: drm/amd/powerplay: need to notify system bios pcie device ready

before request performance state.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Ken Wang <Qingqing.Wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
index d19a9b6..a3c38bb 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
@@ -44,6 +44,20 @@ bool acpi_atcs_functions_supported(void *device, uint32_t index)
 	return result == 0 ? (output_buf.function_bits & (1 << (index - 1))) != 0 : false;
 }
 
+bool acpi_atcs_notify_pcie_device_ready(void *device)
+{
+	int32_t temp_buffer = 1;
+
+	return cgs_call_acpi_method(device, CGS_ACPI_METHOD_ATCS,
+				ATCS_FUNCTION_PCIE_DEVICE_READY_NOTIFICATION,
+						&temp_buffer,
+						NULL,
+						0,
+						sizeof(temp_buffer),
+						0);
+}
+
+
 int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
 {
 	struct atcs_pref_req_input atcs_input;
@@ -52,7 +66,7 @@ int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
 	int result;
 	struct cgs_system_info info = {0};
 
-	if (!acpi_atcs_functions_supported(device, ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST))
+	if( 0 != acpi_atcs_notify_pcie_device_ready(device))
 		return -EINVAL;
 
 	info.size = sizeof(struct cgs_system_info);
diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h b/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
index 3bd5e69..3df5de2 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
@@ -26,3 +26,4 @@ extern bool acpi_atcs_functions_supported(void *device,
 extern int acpi_pcie_perf_request(void *device,
 						uint8_t perf_req,
 						bool advertise);
+extern bool acpi_atcs_notify_pcie_device_ready(void *device);
-- 
cgit v0.10.2


From 919e334dec283294acd99951d016e59ad725c9a7 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Wed, 11 May 2016 17:04:07 +0800
Subject: drm/amd/powerplay: enable PowerContainment feature for polaris10/11.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
index 1400bc4..0715995 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
@@ -2606,6 +2606,7 @@ int polaris10_set_features_platform_caps(struct pp_hwmgr *hwmgr)
 
 	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 						PHM_PlatformCaps_FanSpeedInTableIsRPM);
+
 	if (hwmgr->chip_id == CHIP_POLARIS11)
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 					PHM_PlatformCaps_SPLLShutdownSupport);
@@ -2938,6 +2939,9 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	data->vddci_control = POLARIS10_VOLTAGE_CONTROL_NONE;
 	data->mvdd_control = POLARIS10_VOLTAGE_CONTROL_NONE;
 
+	data->enable_tdc_limit_feature = true;
+	data->enable_pkg_pwr_tracking_feature = true;
+
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 			VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2))
 		data->voltage_control = POLARIS10_VOLTAGE_CONTROL_BY_SVID2;
-- 
cgit v0.10.2


From a2fb4934e960b11e4430dccc08d606c99910b447 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Mon, 13 Jun 2016 17:46:31 +0800
Subject: drm/amd/powerplay: initialize variables which were missed.

Missing pcie dpm settings.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Ken Wang <Qingqing.Wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
index 586f732..92912ab 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
@@ -633,6 +633,8 @@ static int fiji_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	data->vddci_control = FIJI_VOLTAGE_CONTROL_NONE;
 	data->mvdd_control = FIJI_VOLTAGE_CONTROL_NONE;
 
+	data->force_pcie_gen = PP_PCIEGenInvalid;
+
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 			VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2))
 		data->voltage_control = FIJI_VOLTAGE_CONTROL_BY_SVID2;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
index 0715995..643677f 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
@@ -2941,6 +2941,7 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 
 	data->enable_tdc_limit_feature = true;
 	data->enable_pkg_pwr_tracking_feature = true;
+	data->force_pcie_gen = PP_PCIEGenInvalid;
 
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 			VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2))
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
index d27e8c4..233eb7f 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
@@ -4489,6 +4489,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	data->vdd_ci_control = TONGA_VOLTAGE_CONTROL_NONE;
 	data->vdd_gfx_control = TONGA_VOLTAGE_CONTROL_NONE;
 	data->mvdd_control = TONGA_VOLTAGE_CONTROL_NONE;
+	data->force_pcie_gen = PP_PCIEGenInvalid;
 
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 				VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2)) {
-- 
cgit v0.10.2


From 40787ef21c2889fc3d96a11775fa412e715d7d48 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Wed, 8 Jun 2016 19:41:00 +0800
Subject: drm/amd/powerplay: disable UVD SMU handshake for MCLK.

sync up with internal programming recommendations.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
index 643677f..5ecde13 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
@@ -2252,6 +2252,9 @@ static int polaris10_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
 static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t soft_register_value = 0;
+	uint32_t handshake_disables_offset = data->soft_regs_start
+				+ offsetof(SMU74_SoftRegisters, HandshakeDisables);
 
 	/* enable SCLK dpm */
 	if (!data->sclk_dpm_key_disabled)
@@ -2262,6 +2265,12 @@ static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 
 	/* enable MCLK dpm */
 	if (0 == data->mclk_dpm_key_disabled) {
+/* Disable UVD - SMU handshake for MCLK. */
+		soft_register_value = cgs_read_ind_register(hwmgr->device,
+					CGS_IND_REG__SMC, handshake_disables_offset);
+		soft_register_value |= SMU7_UVD_MCLK_HANDSHAKE_DISABLE;
+		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+				handshake_disables_offset, soft_register_value);
 
 		PP_ASSERT_WITH_CODE(
 				(0 == smum_send_msg_to_smc(hwmgr->smumgr,
-- 
cgit v0.10.2


From 9a3c1b342be28a14006f644528dd9baad43db443 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Wed, 8 Jun 2016 19:42:48 +0800
Subject: drm/amd/powrplay: enable stutter_mode for polaris.

To minimize the dram power expenditure during static -screen

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
index 5ecde13..c2f5bec 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
@@ -1296,7 +1296,6 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr,
 	}
 
 	mem_level->MclkFrequency = clock;
-	mem_level->StutterEnable = 0;
 	mem_level->EnabledForThrottle = 1;
 	mem_level->EnabledForActivity = 0;
 	mem_level->UpHyst = 0;
@@ -1363,7 +1362,7 @@ static int polaris10_populate_all_memory_levels(struct pp_hwmgr *hwmgr)
 	 * a higher state by default such that we are not effected by
 	 * up threshold or and MCLK DPM latency.
 	 */
-	levels[0].ActivityLevel = (uint16_t)data->mclk_dpm0_activity_target;
+	levels[0].ActivityLevel = 0x1f;
 	CONVERT_FROM_HOST_TO_SMC_US(levels[0].ActivityLevel);
 
 	data->smc_state_table.MemoryDpmLevelCount =
@@ -2951,6 +2950,7 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	data->enable_tdc_limit_feature = true;
 	data->enable_pkg_pwr_tracking_feature = true;
 	data->force_pcie_gen = PP_PCIEGenInvalid;
+	data->mclk_stutter_mode_threshold = 40000;
 
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 			VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2))
-- 
cgit v0.10.2


From 31b21243776e1f41813f40ce16c465bf03acd9ba Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Tue, 7 Jun 2016 18:38:39 +0800
Subject: drm/amd/powerplay: add avfs related define for polaris

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu74.h b/drivers/gpu/drm/amd/powerplay/inc/smu74.h
index 1a12d85..fd10a9f 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu74.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu74.h
@@ -34,6 +34,30 @@
 #define SMU__NUM_LCLK_DPM_LEVELS 8
 #define SMU__NUM_PCIE_DPM_LEVELS 8
 
+#define EXP_M1  35
+#define EXP_M2  92821
+#define EXP_B   66629747
+
+#define EXP_M1_1  365
+#define EXP_M2_1  658700
+#define EXP_B_1   305506134
+
+#define EXP_M1_2  189
+#define EXP_M2_2  379692
+#define EXP_B_2   194609469
+
+#define EXP_M1_3  99
+#define EXP_M2_3  217915
+#define EXP_B_3   122255994
+
+#define EXP_M1_4  51
+#define EXP_M2_4  122643
+#define EXP_B_4   74893384
+
+#define EXP_M1_5  423
+#define EXP_M2_5  1103326
+#define EXP_B_5   728122621
+
 enum SID_OPTION {
 	SID_OPTION_HI,
 	SID_OPTION_LO,
@@ -548,20 +572,20 @@ struct SMU74_Firmware_Header {
 	uint32_t CacConfigTable;
 	uint32_t CacStatusTable;
 
-
 	uint32_t mcRegisterTable;
 
-
 	uint32_t mcArbDramTimingTable;
 
-
-
-
 	uint32_t PmFuseTable;
 	uint32_t Globals;
 	uint32_t ClockStretcherTable;
 	uint32_t VftTable;
-	uint32_t Reserved[21];
+	uint32_t Reserved1;
+	uint32_t AvfsTable;
+	uint32_t AvfsCksOffGbvTable;
+	uint32_t AvfsMeanNSigma;
+	uint32_t AvfsSclkOffsetTable;
+	uint32_t Reserved[16];
 	uint32_t Signature;
 };
 
@@ -701,8 +725,6 @@ VR Config info is contained in dpmTable.VRConfig */
 struct SMU_ClockStretcherDataTableEntry {
 	uint8_t minVID;
 	uint8_t maxVID;
-
-
 	uint16_t setting;
 };
 typedef struct SMU_ClockStretcherDataTableEntry SMU_ClockStretcherDataTableEntry;
@@ -769,6 +791,43 @@ struct VFT_TABLE_t {
 typedef struct VFT_TABLE_t VFT_TABLE_t;
 
 
+/* Total margin, root mean square of Fmax + DC + Platform */
+struct AVFS_Margin_t {
+	VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_Margin_t AVFS_Margin_t;
+
+#define BTCGB_VDROOP_TABLE_MAX_ENTRIES 2
+#define AVFSGB_VDROOP_TABLE_MAX_ENTRIES 2
+
+struct GB_VDROOP_TABLE_t {
+	int32_t a0;
+	int32_t a1;
+	int32_t a2;
+	uint32_t spare;
+};
+typedef struct GB_VDROOP_TABLE_t GB_VDROOP_TABLE_t;
+
+struct AVFS_CksOff_Gbv_t {
+	VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_CksOff_Gbv_t AVFS_CksOff_Gbv_t;
+
+struct AVFS_meanNsigma_t {
+	uint32_t Aconstant[3];
+	uint16_t DC_tol_sigma;
+	uint16_t Platform_mean;
+	uint16_t Platform_sigma;
+	uint16_t PSM_Age_CompFactor;
+	uint8_t  Static_Voltage_Offset[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_meanNsigma_t AVFS_meanNsigma_t;
+
+struct AVFS_Sclk_Offset_t {
+	uint16_t Sclk_Offset[8];
+};
+typedef struct AVFS_Sclk_Offset_t AVFS_Sclk_Offset_t;
+
 #endif
 
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
index 0dfe823..b85ff54 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
@@ -223,6 +223,16 @@ struct SMU74_Discrete_StateInfo {
 
 typedef struct SMU74_Discrete_StateInfo SMU74_Discrete_StateInfo;
 
+struct SMU_QuadraticCoeffs {
+	int32_t m1;
+	uint32_t b;
+
+	int16_t m2;
+	uint8_t m1_shift;
+	uint8_t m2_shift;
+};
+typedef struct SMU_QuadraticCoeffs SMU_QuadraticCoeffs;
+
 struct SMU74_Discrete_DpmTable {
 
 	SMU74_PIDController                  GraphicsPIDController;
@@ -258,7 +268,14 @@ struct SMU74_Discrete_DpmTable {
 	uint8_t                             ThermOutPolarity;
 	uint8_t                             ThermOutMode;
 	uint8_t                             BootPhases;
-	uint32_t                            Reserved[4];
+
+	uint8_t                             VRHotLevel;
+	uint8_t                             Reserved1[3];
+	uint16_t                            FanStartTemperature;
+	uint16_t                            FanStopTemperature;
+	uint16_t                            MaxVoltage;
+	uint16_t                            Reserved2;
+	uint32_t                            Reserved[1];
 
 	SMU74_Discrete_GraphicsLevel        GraphicsLevel[SMU74_MAX_LEVELS_GRAPHICS];
 	SMU74_Discrete_MemoryLevel          MemoryACPILevel;
@@ -347,6 +364,8 @@ struct SMU74_Discrete_DpmTable {
 
 	uint32_t                            CurrSclkPllRange;
 	sclkFcwRange_t                      SclkFcwRangeTable[NUM_SCLK_RANGE];
+	GB_VDROOP_TABLE_t                   BTCGB_VDROOP_TABLE[BTCGB_VDROOP_TABLE_MAX_ENTRIES];
+	SMU_QuadraticCoeffs                 AVFSGB_VDROOP_TABLE[AVFSGB_VDROOP_TABLE_MAX_ENTRIES];
 };
 
 typedef struct SMU74_Discrete_DpmTable SMU74_Discrete_DpmTable;
@@ -550,16 +569,6 @@ struct SMU7_AcpiScoreboard {
 
 typedef struct SMU7_AcpiScoreboard SMU7_AcpiScoreboard;
 
-struct SMU_QuadraticCoeffs {
-	int32_t m1;
-	uint32_t b;
-
-	int16_t m2;
-	uint8_t m1_shift;
-	uint8_t m2_shift;
-};
-typedef struct SMU_QuadraticCoeffs SMU_QuadraticCoeffs;
-
 struct SMU74_Discrete_PmFuses {
 	uint8_t BapmVddCVidHiSidd[8];
 	uint8_t BapmVddCVidLoSidd[8];
@@ -821,6 +830,17 @@ typedef struct SMU7_GfxCuPgScoreboard SMU7_GfxCuPgScoreboard;
 #define DB_PCC_SHIFT 26 
 #define DB_EDC_SHIFT 27
 
+#define BTCGB0_Vdroop_Enable_MASK  0x1
+#define BTCGB1_Vdroop_Enable_MASK  0x2
+#define AVFSGB0_Vdroop_Enable_MASK 0x4
+#define AVFSGB1_Vdroop_Enable_MASK 0x8
+
+#define BTCGB0_Vdroop_Enable_SHIFT  0
+#define BTCGB1_Vdroop_Enable_SHIFT  1
+#define AVFSGB0_Vdroop_Enable_SHIFT 2
+#define AVFSGB1_Vdroop_Enable_SHIFT 3
+
+
 #pragma pack(pop)
 
 
-- 
cgit v0.10.2


From c11cb70483c33404d1c0cf9aa48e7627eecbe14d Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Wed, 8 Jun 2016 19:17:31 +0800
Subject: drm/amdgpu/atombios: add avfs struct for Polaris10/11

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h
index 32f3e34..3493da5 100644
--- a/drivers/gpu/drm/amd/include/atombios.h
+++ b/drivers/gpu/drm/amd/include/atombios.h
@@ -5538,6 +5538,78 @@ typedef struct  _ATOM_ASIC_PROFILING_INFO_V3_5
   ULONG  ulReserved[12];
 }ATOM_ASIC_PROFILING_INFO_V3_5;
 
+/* for Polars10/11 AVFS parameters */
+typedef struct  _ATOM_ASIC_PROFILING_INFO_V3_6
+{
+  ATOM_COMMON_TABLE_HEADER         asHeader;
+  ULONG  ulMaxVddc;
+  ULONG  ulMinVddc;
+  USHORT usLkgEuseIndex;
+  UCHAR  ucLkgEfuseBitLSB;
+  UCHAR  ucLkgEfuseLength;
+  ULONG  ulLkgEncodeLn_MaxDivMin;
+  ULONG  ulLkgEncodeMax;
+  ULONG  ulLkgEncodeMin;
+  EFUSE_LINEAR_FUNC_PARAM sRoFuse;
+  ULONG  ulEvvDefaultVddc;
+  ULONG  ulEvvNoCalcVddc;
+  ULONG  ulSpeed_Model;
+  ULONG  ulSM_A0;
+  ULONG  ulSM_A1;
+  ULONG  ulSM_A2;
+  ULONG  ulSM_A3;
+  ULONG  ulSM_A4;
+  ULONG  ulSM_A5;
+  ULONG  ulSM_A6;
+  ULONG  ulSM_A7;
+  UCHAR  ucSM_A0_sign;
+  UCHAR  ucSM_A1_sign;
+  UCHAR  ucSM_A2_sign;
+  UCHAR  ucSM_A3_sign;
+  UCHAR  ucSM_A4_sign;
+  UCHAR  ucSM_A5_sign;
+  UCHAR  ucSM_A6_sign;
+  UCHAR  ucSM_A7_sign;
+  ULONG  ulMargin_RO_a;
+  ULONG  ulMargin_RO_b;
+  ULONG  ulMargin_RO_c;
+  ULONG  ulMargin_fixed;
+  ULONG  ulMargin_Fmax_mean;
+  ULONG  ulMargin_plat_mean;
+  ULONG  ulMargin_Fmax_sigma;
+  ULONG  ulMargin_plat_sigma;
+  ULONG  ulMargin_DC_sigma;
+  ULONG  ulLoadLineSlop;
+  ULONG  ulaTDClimitPerDPM[8];
+  ULONG  ulaNoCalcVddcPerDPM[8];
+  ULONG  ulAVFS_meanNsigma_Acontant0;
+  ULONG  ulAVFS_meanNsigma_Acontant1;
+  ULONG  ulAVFS_meanNsigma_Acontant2;
+  USHORT usAVFS_meanNsigma_DC_tol_sigma;
+  USHORT usAVFS_meanNsigma_Platform_mean;
+  USHORT usAVFS_meanNsigma_Platform_sigma;
+  ULONG  ulGB_VDROOP_TABLE_CKSOFF_a0;
+  ULONG  ulGB_VDROOP_TABLE_CKSOFF_a1;
+  ULONG  ulGB_VDROOP_TABLE_CKSOFF_a2;
+  ULONG  ulGB_VDROOP_TABLE_CKSON_a0;
+  ULONG  ulGB_VDROOP_TABLE_CKSON_a1;
+  ULONG  ulGB_VDROOP_TABLE_CKSON_a2;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSOFF_m1;
+  USHORT usAVFSGB_FUSE_TABLE_CKSOFF_m2;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSOFF_b;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSON_m1;
+  USHORT usAVFSGB_FUSE_TABLE_CKSON_m2;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSON_b;
+  USHORT usMaxVoltage_0_25mv;
+  UCHAR  ucEnableGB_VDROOP_TABLE_CKSOFF;
+  UCHAR  ucEnableGB_VDROOP_TABLE_CKSON;
+  UCHAR  ucEnableGB_FUSE_TABLE_CKSOFF;
+  UCHAR  ucEnableGB_FUSE_TABLE_CKSON;
+  USHORT usPSM_Age_ComFactor;
+  UCHAR  ucEnableApplyAVFS_CKS_OFF_Voltage;
+  UCHAR  ucReserved;
+}ATOM_ASIC_PROFILING_INFO_V3_6;
+
 
 typedef struct _ATOM_SCLK_FCW_RANGE_ENTRY_V1{
   ULONG  ulMaxSclkFreq;
-- 
cgit v0.10.2


From 432c3a3ca794bd2301425cb58bf097fc26690c17 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Wed, 8 Jun 2016 19:39:42 +0800
Subject: drm/amd/powerplay: enable avfs feature for polaris

avfs feature is for voltage control based on
gpu system clock on polaris10

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
index c2f5bec..f730ec8 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
@@ -1303,7 +1303,6 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr,
 	mem_level->VoltageDownHyst = 0;
 	mem_level->ActivityLevel = (uint16_t)data->mclk_activity_target;
 	mem_level->StutterEnable = false;
-
 	mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
 
 	data->display_timing.num_existing_displays = info.display_count;
@@ -1955,6 +1954,90 @@ static int polaris10_populate_vr_config(struct pp_hwmgr *hwmgr,
 	return 0;
 }
 
+
+int polaris10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	SMU74_Discrete_DpmTable  *table = &(data->smc_state_table);
+	int result = 0;
+	struct pp_atom_ctrl__avfs_parameters avfs_params = {0};
+	AVFS_meanNsigma_t AVFS_meanNsigma = { {0} };
+	AVFS_Sclk_Offset_t AVFS_SclkOffset = { {0} };
+	uint32_t tmp, i;
+	struct pp_smumgr *smumgr = hwmgr->smumgr;
+	struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(smumgr->backend);
+
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)hwmgr->pptable;
+	struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table =
+			table_info->vdd_dep_on_sclk;
+
+
+	if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED)
+		return result;
+
+	result = atomctrl_get_avfs_information(hwmgr, &avfs_params);
+
+	if (0 == result) {
+		table->BTCGB_VDROOP_TABLE[0].a0  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a0);
+		table->BTCGB_VDROOP_TABLE[0].a1  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a1);
+		table->BTCGB_VDROOP_TABLE[0].a2  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a2);
+		table->BTCGB_VDROOP_TABLE[1].a0  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0);
+		table->BTCGB_VDROOP_TABLE[1].a1  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1);
+		table->BTCGB_VDROOP_TABLE[1].a2  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2);
+		table->AVFSGB_VDROOP_TABLE[0].m1 = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_m1);
+		table->AVFSGB_VDROOP_TABLE[0].m2 = PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSON_m2);
+		table->AVFSGB_VDROOP_TABLE[0].b  = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_b);
+		table->AVFSGB_VDROOP_TABLE[0].m1_shift = 24;
+		table->AVFSGB_VDROOP_TABLE[0].m2_shift  = 12;
+		table->AVFSGB_VDROOP_TABLE[1].m1 = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1);
+		table->AVFSGB_VDROOP_TABLE[1].m2 = PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2);
+		table->AVFSGB_VDROOP_TABLE[1].b  = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b);
+		table->AVFSGB_VDROOP_TABLE[1].m1_shift = 24;
+		table->AVFSGB_VDROOP_TABLE[1].m2_shift  = 12;
+		table->MaxVoltage                = PP_HOST_TO_SMC_US(avfs_params.usMaxVoltage_0_25mv);
+		AVFS_meanNsigma.Aconstant[0]      = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant0);
+		AVFS_meanNsigma.Aconstant[1]      = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant1);
+		AVFS_meanNsigma.Aconstant[2]      = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant2);
+		AVFS_meanNsigma.DC_tol_sigma      = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_DC_tol_sigma);
+		AVFS_meanNsigma.Platform_mean     = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_mean);
+		AVFS_meanNsigma.PSM_Age_CompFactor = PP_HOST_TO_SMC_US(avfs_params.usPSM_Age_ComFactor);
+		AVFS_meanNsigma.Platform_sigma     = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_sigma);
+
+		for (i = 0; i < NUM_VFT_COLUMNS; i++) {
+			AVFS_meanNsigma.Static_Voltage_Offset[i] = (uint8_t)(sclk_table->entries[i].cks_voffset * 100 / 625);
+			AVFS_SclkOffset.Sclk_Offset[i] = PP_HOST_TO_SMC_US((uint16_t)(sclk_table->entries[i].sclk_offset) / 100);
+		}
+
+		result = polaris10_read_smc_sram_dword(smumgr,
+				SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU74_Firmware_Header, AvfsMeanNSigma),
+				&tmp, data->sram_end);
+
+		polaris10_copy_bytes_to_smc(smumgr,
+					tmp,
+					(uint8_t *)&AVFS_meanNsigma,
+					sizeof(AVFS_meanNsigma_t),
+					data->sram_end);
+
+		result = polaris10_read_smc_sram_dword(smumgr,
+				SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU74_Firmware_Header, AvfsSclkOffsetTable),
+				&tmp, data->sram_end);
+		polaris10_copy_bytes_to_smc(smumgr,
+					tmp,
+					(uint8_t *)&AVFS_SclkOffset,
+					sizeof(AVFS_Sclk_Offset_t),
+					data->sram_end);
+
+		data->avfs_vdroop_override_setting = (avfs_params.ucEnableGB_VDROOP_TABLE_CKSON << BTCGB0_Vdroop_Enable_SHIFT) |
+						(avfs_params.ucEnableGB_VDROOP_TABLE_CKSOFF << BTCGB1_Vdroop_Enable_SHIFT) |
+						(avfs_params.ucEnableGB_FUSE_TABLE_CKSON << AVFSGB0_Vdroop_Enable_SHIFT) |
+						(avfs_params.ucEnableGB_FUSE_TABLE_CKSOFF << AVFSGB1_Vdroop_Enable_SHIFT);
+		data->apply_avfs_cks_off_voltage = (avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage == 1) ? true : false;
+	}
+	return result;
+}
+
+
 /**
 * Initializes the SMC table and uploads it
 *
@@ -2055,6 +2138,10 @@ static int polaris10_init_smc_table(struct pp_hwmgr *hwmgr)
 				"Failed to populate Clock Stretcher Data Table!",
 				return result);
 	}
+
+	result = polaris10_populate_avfs_parameters(hwmgr);
+	PP_ASSERT_WITH_CODE(0 == result, "Failed to populate AVFS Parameters!", return result;);
+
 	table->CurrSclkPllRange = 0xff;
 	table->GraphicsVoltageChangeEnable  = 1;
 	table->GraphicsThermThrottleEnable  = 1;
@@ -2277,7 +2364,6 @@ static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 				"Failed to enable MCLK DPM during DPM Start Function!",
 				return -1);
 
-
 		PHM_WRITE_FIELD(hwmgr->device, MC_SEQ_CNTL_3, CAC_EN, 0x1);
 
 		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC0_CNTL, 0x5);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
index beedf35..d717789 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
@@ -312,6 +312,9 @@ struct polaris10_hwmgr {
 
 	/* soft pptable for re-uploading into smu */
 	void *soft_pp_table;
+
+	uint32_t                              avfs_vdroop_override_setting;
+	bool                                  apply_avfs_cks_off_voltage;
 };
 
 /* To convert to Q8.8 format for firmware */
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c
index aba167f..b206632 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c
@@ -625,10 +625,14 @@ static int tf_polaris10_thermal_avfs_enable(struct pp_hwmgr *hwmgr,
 	int ret;
 	struct pp_smumgr *smumgr = (struct pp_smumgr *)(hwmgr->smumgr);
 	struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(smumgr->backend);
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
 
-	if (smu_data->avfs.avfs_btc_status != AVFS_BTC_ENABLEAVFS)
+	if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED)
 		return 0;
 
+	ret = smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+			PPSMC_MSG_SetGBDroopSettings, data->avfs_vdroop_override_setting);
+
 	ret = (smum_send_msg_to_smc(smumgr, PPSMC_MSG_EnableAvfs) == 0) ?
 			0 : -1;
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
index da9f5f1..bf4e18f 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
@@ -1302,3 +1302,46 @@ int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctr
 
 	return 0;
 }
+
+int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl__avfs_parameters *param)
+{
+	ATOM_ASIC_PROFILING_INFO_V3_6 *profile = NULL;
+
+	if (param == NULL)
+		return -EINVAL;
+
+	profile = (ATOM_ASIC_PROFILING_INFO_V3_6 *)
+			cgs_atom_get_data_table(hwmgr->device,
+					GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo),
+					NULL, NULL, NULL);
+	if (!profile)
+		return -1;
+
+	param->ulAVFS_meanNsigma_Acontant0 = profile->ulAVFS_meanNsigma_Acontant0;
+	param->ulAVFS_meanNsigma_Acontant1 = profile->ulAVFS_meanNsigma_Acontant1;
+	param->ulAVFS_meanNsigma_Acontant2 = profile->ulAVFS_meanNsigma_Acontant2;
+	param->usAVFS_meanNsigma_DC_tol_sigma = profile->usAVFS_meanNsigma_DC_tol_sigma;
+	param->usAVFS_meanNsigma_Platform_mean = profile->usAVFS_meanNsigma_Platform_mean;
+	param->usAVFS_meanNsigma_Platform_sigma = profile->usAVFS_meanNsigma_Platform_sigma;
+	param->ulGB_VDROOP_TABLE_CKSOFF_a0 = profile->ulGB_VDROOP_TABLE_CKSOFF_a0;
+	param->ulGB_VDROOP_TABLE_CKSOFF_a1 = profile->ulGB_VDROOP_TABLE_CKSOFF_a1;
+	param->ulGB_VDROOP_TABLE_CKSOFF_a2 = profile->ulGB_VDROOP_TABLE_CKSOFF_a2;
+	param->ulGB_VDROOP_TABLE_CKSON_a0 = profile->ulGB_VDROOP_TABLE_CKSON_a0;
+	param->ulGB_VDROOP_TABLE_CKSON_a1 = profile->ulGB_VDROOP_TABLE_CKSON_a1;
+	param->ulGB_VDROOP_TABLE_CKSON_a2 = profile->ulGB_VDROOP_TABLE_CKSON_a2;
+	param->ulAVFSGB_FUSE_TABLE_CKSOFF_m1 = profile->ulAVFSGB_FUSE_TABLE_CKSOFF_m1;
+	param->usAVFSGB_FUSE_TABLE_CKSOFF_m2 = profile->usAVFSGB_FUSE_TABLE_CKSOFF_m2;
+	param->ulAVFSGB_FUSE_TABLE_CKSOFF_b = profile->ulAVFSGB_FUSE_TABLE_CKSOFF_b;
+	param->ulAVFSGB_FUSE_TABLE_CKSON_m1 = profile->ulAVFSGB_FUSE_TABLE_CKSON_m1;
+	param->usAVFSGB_FUSE_TABLE_CKSON_m2 = profile->usAVFSGB_FUSE_TABLE_CKSON_m2;
+	param->ulAVFSGB_FUSE_TABLE_CKSON_b = profile->ulAVFSGB_FUSE_TABLE_CKSON_b;
+	param->usMaxVoltage_0_25mv = profile->usMaxVoltage_0_25mv;
+	param->ucEnableGB_VDROOP_TABLE_CKSOFF = profile->ucEnableGB_VDROOP_TABLE_CKSOFF;
+	param->ucEnableGB_VDROOP_TABLE_CKSON = profile->ucEnableGB_VDROOP_TABLE_CKSON;
+	param->ucEnableGB_FUSE_TABLE_CKSOFF = profile->ucEnableGB_FUSE_TABLE_CKSOFF;
+	param->ucEnableGB_FUSE_TABLE_CKSON = profile->ucEnableGB_FUSE_TABLE_CKSON;
+	param->usPSM_Age_ComFactor = profile->usPSM_Age_ComFactor;
+	param->ucEnableApplyAVFS_CKS_OFF_Voltage = profile->ucEnableApplyAVFS_CKS_OFF_Voltage;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
index d24ebb5..248c5db 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
@@ -250,6 +250,35 @@ struct pp_atomctrl_gpio_pin_assignment {
 };
 typedef struct pp_atomctrl_gpio_pin_assignment pp_atomctrl_gpio_pin_assignment;
 
+struct pp_atom_ctrl__avfs_parameters {
+	uint32_t  ulAVFS_meanNsigma_Acontant0;
+	uint32_t  ulAVFS_meanNsigma_Acontant1;
+	uint32_t  ulAVFS_meanNsigma_Acontant2;
+	uint16_t usAVFS_meanNsigma_DC_tol_sigma;
+	uint16_t usAVFS_meanNsigma_Platform_mean;
+	uint16_t usAVFS_meanNsigma_Platform_sigma;
+	uint32_t  ulGB_VDROOP_TABLE_CKSOFF_a0;
+	uint32_t  ulGB_VDROOP_TABLE_CKSOFF_a1;
+	uint32_t  ulGB_VDROOP_TABLE_CKSOFF_a2;
+	uint32_t  ulGB_VDROOP_TABLE_CKSON_a0;
+	uint32_t  ulGB_VDROOP_TABLE_CKSON_a1;
+	uint32_t  ulGB_VDROOP_TABLE_CKSON_a2;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSOFF_m1;
+	uint16_t  usAVFSGB_FUSE_TABLE_CKSOFF_m2;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSOFF_b;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSON_m1;
+	uint16_t  usAVFSGB_FUSE_TABLE_CKSON_m2;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSON_b;
+	uint16_t  usMaxVoltage_0_25mv;
+	uint8_t  ucEnableGB_VDROOP_TABLE_CKSOFF;
+	uint8_t  ucEnableGB_VDROOP_TABLE_CKSON;
+	uint8_t  ucEnableGB_FUSE_TABLE_CKSOFF;
+	uint8_t  ucEnableGB_FUSE_TABLE_CKSON;
+	uint16_t usPSM_Age_ComFactor;
+	uint8_t  ucEnableApplyAVFS_CKS_OFF_Voltage;
+	uint8_t  ucReserved;
+};
+
 extern bool atomctrl_get_pp_assign_pin(struct pp_hwmgr *hwmgr, const uint32_t pinId, pp_atomctrl_gpio_pin_assignment *gpio_pin_assignment);
 extern int atomctrl_get_voltage_evv_on_sclk(struct pp_hwmgr *hwmgr, uint8_t voltage_type, uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage);
 extern uint32_t atomctrl_get_mpll_reference_clock(struct pp_hwmgr *hwmgr);
@@ -278,5 +307,8 @@ extern int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clo
 extern int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
 				uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage);
 extern int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl_sclk_range_table *table);
+
+extern int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl__avfs_parameters *param);
+
 #endif
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
index 0c6a413..d41d37a 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
@@ -27,6 +27,7 @@
 
 #pragma pack(push, 1)
 
+#define PPSMC_MSG_SetGBDroopSettings          ((uint16_t) 0x305)
 
 #define PPSMC_SWSTATE_FLAG_DC                           0x01
 #define PPSMC_SWSTATE_FLAG_UVD                          0x02
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
index 043b6ac..5dba7c5 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
@@ -52,19 +52,18 @@
 static const SMU74_Discrete_GraphicsLevel avfs_graphics_level_polaris10[8] = {
 	/*  Min      pcie   DeepSleep Activity  CgSpll      CgSpll    CcPwr  CcPwr  Sclk         Enabled      Enabled                       Voltage    Power */
 	/* Voltage, DpmLevel, DivId,  Level,  FuncCntl3,  FuncCntl4,  DynRm, DynRm1 Did, Padding,ForActivity, ForThrottle, UpHyst, DownHyst, DownHyst, Throttle */
-	{ 0x3c0fd047, 0x00, 0x03, 0x1e00, 0x00200410, 0x87020000, 0, 0, 0x16, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x30750000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0xa00fd047, 0x01, 0x04, 0x1e00, 0x00800510, 0x87020000, 0, 0, 0x16, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x409c0000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x0410d047, 0x01, 0x00, 0x1e00, 0x00600410, 0x87020000, 0, 0, 0x0e, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x50c30000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x6810d047, 0x01, 0x00, 0x1e00, 0x00800410, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x60ea0000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0xcc10d047, 0x01, 0x00, 0x1e00, 0x00e00410, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0xe8fd0000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x3011d047, 0x01, 0x00, 0x1e00, 0x00400510, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x70110100, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x9411d047, 0x01, 0x00, 0x1e00, 0x00a00510, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0xf8240100, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0xf811d047, 0x01, 0x00, 0x1e00, 0x00000610, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x80380100, 0, 0, 0, 0, 0, 0, 0 } }
+	{ 0x100ea446, 0x00, 0x03, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x30750000, 0x3000, 0, 0x2600, 0, 0, 0x0004, 0x8f02, 0xffff, 0x2f00, 0x300e, 0x2700 } },
+	{ 0x400ea446, 0x01, 0x04, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x409c0000, 0x2000, 0, 0x1e00, 1, 1, 0x0004, 0x8300, 0xffff, 0x1f00, 0xcb5e, 0x1a00 } },
+	{ 0x740ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x50c30000, 0x2800, 0, 0x2000, 1, 1, 0x0004, 0x0c02, 0xffff, 0x2700, 0x6433, 0x2100 } },
+	{ 0xa40ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x60ea0000, 0x3000, 0, 0x2600, 1, 1, 0x0004, 0x8f02, 0xffff, 0x2f00, 0x300e, 0x2700 } },
+	{ 0xd80ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x70110100, 0x3800, 0, 0x2c00, 1, 1, 0x0004, 0x1203, 0xffff, 0x3600, 0xc9e2, 0x2e00 } },
+	{ 0x3c0fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x80380100, 0x2000, 0, 0x1e00, 2, 1, 0x0004, 0x8300, 0xffff, 0x1f00, 0xcb5e, 0x1a00 } },
+	{ 0x6c0fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x905f0100, 0x2400, 0, 0x1e00, 2, 1, 0x0004, 0x8901, 0xffff, 0x2300, 0x314c, 0x1d00 } },
+	{ 0xa00fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0xa0860100, 0x2800, 0, 0x2000, 2, 1, 0x0004, 0x0c02, 0xffff, 0x2700, 0x6433, 0x2100 } }
 };
 
 static const SMU74_Discrete_MemoryLevel avfs_memory_level_polaris10 =
-	{0x50140000, 0x50140000, 0x00320000, 0x00, 0x00,
-	 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x0000, 0x00, 0x00};
+	{0x100ea446, 0, 0x30750000, 0x01, 0x01, 0x01, 0x00, 0x00, 0x64, 0x00, 0x00, 0x1f00, 0x00, 0x00};
 
 /**
 * Set the address for reading/writing the SMC SRAM space.
@@ -219,6 +218,18 @@ bool polaris10_is_smc_ram_running(struct pp_smumgr *smumgr)
 	&& (0x20100 <= cgs_read_ind_register(smumgr->device, CGS_IND_REG__SMC, ixSMC_PC_C)));
 }
 
+static bool polaris10_is_hw_avfs_present(struct pp_smumgr *smumgr)
+{
+	uint32_t efuse;
+
+	efuse = cgs_read_ind_register(smumgr->device, CGS_IND_REG__SMC, ixSMU_EFUSE_0 + (49*4));
+	efuse &= 0x00000001;
+	if (efuse)
+		return true;
+
+	return false;
+}
+
 /**
 * Send a message to the SMC, and wait for its response.
 *
@@ -228,21 +239,27 @@ bool polaris10_is_smc_ram_running(struct pp_smumgr *smumgr)
 */
 int polaris10_send_msg_to_smc(struct pp_smumgr *smumgr, uint16_t msg)
 {
+	int ret;
+
 	if (!polaris10_is_smc_ram_running(smumgr))
 		return -1;
 
+
 	SMUM_WAIT_FIELD_UNEQUAL(smumgr, SMC_RESP_0, SMC_RESP, 0);
 
-	if (1 != SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP))
-		printk("Failed to send Previous Message.\n");
+	ret = SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP);
 
+	if (ret != 1)
+		printk("\n failed to send pre message %x ret is %d \n",  msg, ret);
 
 	cgs_write_register(smumgr->device, mmSMC_MESSAGE_0, msg);
 
 	SMUM_WAIT_FIELD_UNEQUAL(smumgr, SMC_RESP_0, SMC_RESP, 0);
 
-	if (1 != SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP))
-		printk("Failed to send Message.\n");
+	ret = SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP);
+
+	if (ret != 1)
+		printk("\n failed to send message %x ret is %d \n",  msg, ret);
 
 	return 0;
 }
@@ -953,6 +970,11 @@ static int polaris10_smu_init(struct pp_smumgr *smumgr)
 		(cgs_handle_t)smu_data->smu_buffer.handle);
 		return -1;);
 
+	if (polaris10_is_hw_avfs_present(smumgr))
+		smu_data->avfs.avfs_btc_status = AVFS_BTC_BOOT;
+	else
+		smu_data->avfs.avfs_btc_status = AVFS_BTC_NOTSUPPORTED;
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From 92d1576859577b94eafaea9b64f78ab99fe20a78 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Tue, 7 Jun 2016 14:09:56 +0800
Subject: drm/amdgpu/gfx8: update golden setting for polaris10

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 9f6f866..1a5cbaf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -297,7 +297,8 @@ static const u32 polaris11_golden_common_all[] =
 static const u32 golden_settings_polaris10_a11[] =
 {
 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
-	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
+	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
+	mmCB_HW_CONTROL_2, 0, 0x0f000000,
 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
-- 
cgit v0.10.2


From 270d013659ddab52a6fd0eacae452c422d08aa39 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Tue, 7 Jun 2016 18:39:06 +0800
Subject: drm/amd/powerplay: enable clock stretch feature for polaris

Power saving feature which reduces the amount of
voltage needed for specific engine clocks.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
index f730ec8..64ee78f 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
@@ -1759,12 +1759,9 @@ static int polaris10_populate_smc_initailial_state(struct pp_hwmgr *hwmgr)
 
 static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 {
-	uint32_t ro, efuse, efuse2, clock_freq, volt_without_cks,
-			volt_with_cks, value;
-	uint16_t clock_freq_u16;
+	uint32_t ro, efuse, volt_without_cks, volt_with_cks, value, max, min;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
-	uint8_t type, i, j, cks_setting, stretch_amount, stretch_amount2,
-			volt_offset = 0;
+	uint8_t i, stretch_amount, stretch_amount2, volt_offset = 0;
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 	struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table =
@@ -1776,50 +1773,38 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 	 * if the part is SS or FF. if RO >= 1660MHz, part is FF.
 	 */
 	efuse = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixSMU_EFUSE_0 + (146 * 4));
-	efuse2 = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixSMU_EFUSE_0 + (148 * 4));
+			ixSMU_EFUSE_0 + (67 * 4));
 	efuse &= 0xFF000000;
 	efuse = efuse >> 24;
-	efuse2 &= 0xF;
 
-	if (efuse2 == 1)
-		ro = (2300 - 1350) * efuse / 255 + 1350;
-	else
-		ro = (2500 - 1000) * efuse / 255 + 1000;
-
-	if (ro >= 1660)
-		type = 0;
-	else
-		type = 1;
+	if (hwmgr->chip_id == CHIP_POLARIS10) {
+		min = 1000;
+		max = 2300;
+	} else {
+		min = 1100;
+		max = 2100;
+	}
 
-	/* Populate Stretch amount */
-	data->smc_state_table.ClockStretcherAmount = stretch_amount;
+	ro = efuse * (max -min)/255 + min;
 
 	/* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */
 	for (i = 0; i < sclk_table->count; i++) {
 		data->smc_state_table.Sclk_CKS_masterEn0_7 |=
 				sclk_table->entries[i].cks_enable << i;
-		volt_without_cks = (uint32_t)((14041 *
-			(sclk_table->entries[i].clk/100) / 10000 + 3571 + 75 - ro) * 1000 /
-			(4026 - (13924 * (sclk_table->entries[i].clk/100) / 10000)));
-		volt_with_cks = (uint32_t)((13946 *
-			(sclk_table->entries[i].clk/100) / 10000 + 3320 + 45 - ro) * 1000 /
-			(3664 - (11454 * (sclk_table->entries[i].clk/100) / 10000)));
+
+		volt_without_cks =  (uint32_t)(((ro - 40) * 1000 - 2753594 - sclk_table->entries[i].clk/100 * 136418 /1000) / \
+					(sclk_table->entries[i].clk/100 * 1132925 /10000 - 242418)/100);
+
+		volt_with_cks = (uint32_t)((ro * 1000 -2396351 - sclk_table->entries[i].clk/100 * 329021/1000) / \
+				(sclk_table->entries[i].clk/10000 * 649434 /1000  - 18005)/10);
+
 		if (volt_without_cks >= volt_with_cks)
 			volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks +
 					sclk_table->entries[i].cks_voffset) * 100 / 625) + 1);
+
 		data->smc_state_table.Sclk_voltageOffset[i] = volt_offset;
 	}
 
-	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE,
-			STRETCH_ENABLE, 0x0);
-	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE,
-			masterReset, 0x1);
-	/* PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, staticEnable, 0x1); */
-	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE,
-			masterReset, 0x0);
-
 	/* Populate CKS Lookup Table */
 	if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5)
 		stretch_amount2 = 0;
@@ -1833,69 +1818,6 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 				return -EINVAL);
 	}
 
-	value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixPWR_CKS_CNTL);
-	value &= 0xFFC2FF87;
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].minFreq =
-			polaris10_clock_stretcher_lookup_table[stretch_amount2][0];
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].maxFreq =
-			polaris10_clock_stretcher_lookup_table[stretch_amount2][1];
-	clock_freq_u16 = (uint16_t)(PP_SMC_TO_HOST_UL(data->smc_state_table.
-			GraphicsLevel[data->smc_state_table.GraphicsDpmLevelCount - 1].SclkSetting.SclkFrequency) / 100);
-	if (polaris10_clock_stretcher_lookup_table[stretch_amount2][0] < clock_freq_u16
-	&& polaris10_clock_stretcher_lookup_table[stretch_amount2][1] > clock_freq_u16) {
-		/* Program PWR_CKS_CNTL. CKS_USE_FOR_LOW_FREQ */
-		value |= (polaris10_clock_stretcher_lookup_table[stretch_amount2][3]) << 16;
-		/* Program PWR_CKS_CNTL. CKS_LDO_REFSEL */
-		value |= (polaris10_clock_stretcher_lookup_table[stretch_amount2][2]) << 18;
-		/* Program PWR_CKS_CNTL. CKS_STRETCH_AMOUNT */
-		value |= (polaris10_clock_stretch_amount_conversion
-				[polaris10_clock_stretcher_lookup_table[stretch_amount2][3]]
-				 [stretch_amount]) << 3;
-	}
-	CONVERT_FROM_HOST_TO_SMC_US(data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].minFreq);
-	CONVERT_FROM_HOST_TO_SMC_US(data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].maxFreq);
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting =
-			polaris10_clock_stretcher_lookup_table[stretch_amount2][2] & 0x7F;
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting |=
-			(polaris10_clock_stretcher_lookup_table[stretch_amount2][3]) << 7;
-
-	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixPWR_CKS_CNTL, value);
-
-	/* Populate DDT Lookup Table */
-	for (i = 0; i < 4; i++) {
-		/* Assign the minimum and maximum VID stored
-		 * in the last row of Clock Stretcher Voltage Table.
-		 */
-		data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].minVID =
-				(uint8_t) polaris10_clock_stretcher_ddt_table[type][i][2];
-		data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].maxVID =
-				(uint8_t) polaris10_clock_stretcher_ddt_table[type][i][3];
-		/* Loop through each SCLK and check the frequency
-		 * to see if it lies within the frequency for clock stretcher.
-		 */
-		for (j = 0; j < data->smc_state_table.GraphicsDpmLevelCount; j++) {
-			cks_setting = 0;
-			clock_freq = PP_SMC_TO_HOST_UL(
-					data->smc_state_table.GraphicsLevel[j].SclkSetting.SclkFrequency);
-			/* Check the allowed frequency against the sclk level[j].
-			 *  Sclk's endianness has already been converted,
-			 *  and it's in 10Khz unit,
-			 *  as opposed to Data table, which is in Mhz unit.
-			 */
-			if (clock_freq >= (polaris10_clock_stretcher_ddt_table[type][i][0]) * 100) {
-				cks_setting |= 0x2;
-				if (clock_freq < (polaris10_clock_stretcher_ddt_table[type][i][1]) * 100)
-					cks_setting |= 0x1;
-			}
-			data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].setting
-							|= cks_setting << (j * 2);
-		}
-		CONVERT_FROM_HOST_TO_SMC_US(
-			data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].setting);
-	}
-
 	value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL);
 	value &= 0xFFFFFFFE;
 	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL, value);
@@ -3062,6 +2984,10 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 			data->vddci_control = POLARIS10_VOLTAGE_CONTROL_BY_SVID2;
 	}
 
+	if (table_info->cac_dtp_table->usClockStretchAmount != 0)
+		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+					PHM_PlatformCaps_ClockStretcher);
+
 	polaris10_set_features_platform_caps(hwmgr);
 
 	polaris10_init_dpm_defaults(hwmgr);
-- 
cgit v0.10.2


From 34511dce4b35685d3988d5c8b100d11a068db5bd Mon Sep 17 00:00:00 2001
From: Mika Kahola <mika.kahola@intel.com>
Date: Mon, 20 Jun 2016 11:10:26 +0300
Subject: drm/i915: Revert DisplayPort fast link training feature

It has been found out that in some HW combination the DisplayPort
fast link training feature caused screen flickering. Let's revert
this feature for now until we can ensure that the feature works for
all platforms.

This is a manual revert of commits 5fa836a9d859 ("drm/i915: DP link
training optimization") and 4e96c97742f4 ("drm/i915: eDP link training
optimization").

Fixes: 5fa836a9d859 ("drm/i915: DP link training optimization")
Fixes: 4e96c97742f4 ("drm/i915: eDP link training optimization")
Cc: <stable@vger.kernel.org> # v4.2+

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91393
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Mika Kahola <mika.kahola@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1466410226-19543-1-git-send-email-mika.kahola@intel.com
(cherry picked from commit 91df09d92ad82c8778ca218097bf827f154292ca)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index ffe5f84..79cf2d5 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -4977,9 +4977,6 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 	intel_display_power_get(dev_priv, power_domain);
 
 	if (long_hpd) {
-		/* indicate that we need to restart link training */
-		intel_dp->train_set_valid = false;
-
 		intel_dp_long_pulse(intel_dp->attached_connector);
 		if (intel_dp->is_mst)
 			ret = IRQ_HANDLED;
diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c
index 0b8eefc..60fb39c 100644
--- a/drivers/gpu/drm/i915/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/intel_dp_link_training.c
@@ -85,8 +85,7 @@ static bool
 intel_dp_reset_link_train(struct intel_dp *intel_dp,
 			uint8_t dp_train_pat)
 {
-	if (!intel_dp->train_set_valid)
-		memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set));
+	memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set));
 	intel_dp_set_signal_levels(intel_dp);
 	return intel_dp_set_link_train(intel_dp, dp_train_pat);
 }
@@ -161,23 +160,6 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp)
 			break;
 		}
 
-		/*
-		 * if we used previously trained voltage and pre-emphasis values
-		 * and we don't get clock recovery, reset link training values
-		 */
-		if (intel_dp->train_set_valid) {
-			DRM_DEBUG_KMS("clock recovery not ok, reset");
-			/* clear the flag as we are not reusing train set */
-			intel_dp->train_set_valid = false;
-			if (!intel_dp_reset_link_train(intel_dp,
-						       DP_TRAINING_PATTERN_1 |
-						       DP_LINK_SCRAMBLING_DISABLE)) {
-				DRM_ERROR("failed to enable link training\n");
-				return;
-			}
-			continue;
-		}
-
 		/* Check to see if we've tried the max voltage */
 		for (i = 0; i < intel_dp->lane_count; i++)
 			if ((intel_dp->train_set[i] & DP_TRAIN_MAX_SWING_REACHED) == 0)
@@ -284,7 +266,6 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
 		/* Make sure clock is still ok */
 		if (!drm_dp_clock_recovery_ok(link_status,
 					      intel_dp->lane_count)) {
-			intel_dp->train_set_valid = false;
 			intel_dp_link_training_clock_recovery(intel_dp);
 			intel_dp_set_link_train(intel_dp,
 						training_pattern |
@@ -301,7 +282,6 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
 
 		/* Try 5 times, then try clock recovery if that fails */
 		if (tries > 5) {
-			intel_dp->train_set_valid = false;
 			intel_dp_link_training_clock_recovery(intel_dp);
 			intel_dp_set_link_train(intel_dp,
 						training_pattern |
@@ -322,10 +302,8 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
 
 	intel_dp_set_idle_link_train(intel_dp);
 
-	if (channel_eq) {
-		intel_dp->train_set_valid = true;
+	if (channel_eq)
 		DRM_DEBUG_KMS("Channel EQ done. DP Training successful\n");
-	}
 }
 
 void intel_dp_stop_link_train(struct intel_dp *intel_dp)
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 4a24b00..f7f0f01 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -863,8 +863,6 @@ struct intel_dp {
 	/* This is called before a link training is starterd */
 	void (*prepare_link_retrain)(struct intel_dp *intel_dp);
 
-	bool train_set_valid;
-
 	/* Displayport compliance testing */
 	unsigned long compliance_test_type;
 	unsigned long compliance_test_data;
-- 
cgit v0.10.2


From 1e3fa0acfec677e915d7de5ac6e1f18cfa4f805b Mon Sep 17 00:00:00 2001
From: Lyude <cpaul@redhat.com>
Date: Thu, 9 Jun 2016 11:58:15 -0400
Subject: drm/i915/fbc: Disable on HSW by default for now

>From https://bugs.freedesktop.org/show_bug.cgi?id=96461 :

This was kind of a difficult bug to track down. If you're using a
Haswell system running GNOME and you have fbc completely enabled and
working, playing videos can result in video artifacts. Steps to
reproduce:

- Run GNOME
- Ensure FBC is enabled and active
- Download a movie, I used the ogg version of Big Buck Bunny for this
- Run `gst-launch-1.0 filesrc location='some_movie.ogg' ! decodebin !
  glimagesink` in a terminal
- Watch for about over a minute, you'll see small horizontal lines go
  down the screen.

For the time being, disable FBC for Haswell by default.

Stefan Richter reported kernel freezes (no video artifacts) when fbc
is on.  (E3-1245 v3 with HD P4600; openbox and some KDE and LXDE
applications, thread begins at https://lkml.org/lkml/2016/4/26/813).
We also got reports from Steven Honeyman on openbox+roxterm.

v2 (From Paulo):
  - Add extra information to the commit message
  - Add Fixes tag
  - Rebase

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96461
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96464
Fixes: a98ee79317b4 ("drm/i915/fbc: enable FBC by default on HSW and BDW")
Cc: stable@vger.kernel.org
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: Lyude <cpaul@redhat.com>
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1465487895-7401-1-git-send-email-cpaul@redhat.com
(cherry picked from commit c7f7e2feffb0294302041507dfd5fc15f01afccc)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>

diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index d5a7cfe..647127f 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -824,8 +824,7 @@ static bool intel_fbc_can_choose(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct intel_fbc *fbc = &dev_priv->fbc;
-	bool enable_by_default = IS_HASWELL(dev_priv) ||
-				 IS_BROADWELL(dev_priv);
+	bool enable_by_default = IS_BROADWELL(dev_priv);
 
 	if (intel_vgpu_active(dev_priv->dev)) {
 		fbc->no_fbc_reason = "VGPU is active";
-- 
cgit v0.10.2


From f7e0efc9b50266f5317b50732efff98d40fc879a Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Date: Fri, 17 Jun 2016 18:33:00 +0100
Subject: arm64: update ASID limit

During a rollover, we mark the active ASID on each CPU as reserved, before
allocating a new ID for the task that caused the rollover. This means that
with N CPUs, we can only guarantee the new task to obtain a valid ASID if
we have at least N+1 ASIDs. Update this limit in the initcall check.

Note that this restriction was introduced by commit 8e648066 on the
arch/arm side, which disallow re-using the previously active ASID on the
local CPU, as it would introduce a TLB race.

In addition, we only dispose of NUM_USER_ASIDS-1, since ASID 0 is
reserved. Add this restriction as well.

Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index b7b3978..efcf1f7 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -179,7 +179,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 						 &asid_generation);
 	flush_context(cpu);
 
-	/* We have at least 1 ASID per CPU, so this will always succeed */
+	/* We have more ASIDs than CPUs, so this will always succeed */
 	asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
 
 set_asid:
@@ -227,8 +227,11 @@ switch_mm_fastpath:
 static int asids_init(void)
 {
 	asid_bits = get_cpu_asid_bits();
-	/* If we end up with more CPUs than ASIDs, expect things to crash */
-	WARN_ON(NUM_USER_ASIDS < num_possible_cpus());
+	/*
+	 * Expect allocation after rollover to fail if we don't have at least
+	 * one more ASID than CPUs. ASID #0 is reserved for init_mm.
+	 */
+	WARN_ON(NUM_USER_ASIDS - 1 <= num_possible_cpus());
 	atomic64_set(&asid_generation, ASID_FIRST_VERSION);
 	asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map),
 			   GFP_KERNEL);
-- 
cgit v0.10.2


From 9ca4e58c20d5cb2a5bc378238188c71317aceac5 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 21 Jun 2016 10:44:00 +0900
Subject: arm64: fix boot image dependencies to not generate invalid images

I fixed boot image dependencies for arch/arm in commit 3939f3345050
("ARM: 8418/1: add boot image dependencies to not generate invalid
images").

I see a similar problem for arch/arm64; "make -jN Image Image.gz"
would sometimes end up generating bad images where N > 1.

Fix the dependency in arch/arm64/Makefile to avoid the race
between "make Image" and "make Image.*".

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 7085e32..648a32c 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -95,7 +95,7 @@ boot := arch/arm64/boot
 Image: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
-Image.%: vmlinux
+Image.%: Image
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
 zinstall install:
-- 
cgit v0.10.2


From 20c27a4270c775d7ed661491af8ac03264d60fc6 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Tue, 21 Jun 2016 15:32:57 +0800
Subject: arm64: mm: remove page_mapping check in __sync_icache_dcache

__sync_icache_dcache unconditionally skips the cache maintenance for
anonymous pages, under the assumption that flushing is only required in
the presence of D-side aliases [see 7249b79f6b4cc ("arm64: Do not flush
the D-cache for anonymous pages")].

Unfortunately, this breaks migration of anonymous pages holding
self-modifying code, where userspace cannot be reasonably expected to
reissue maintenance instructions in response to a migration.

This patch fixes the problem by removing the broken page_mapping(page)
check from the cache syncing code, otherwise we may end up fetching and
executing stale instructions from the PoU.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: <stable@vger.kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>

diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index dbd12ea..43a76b0 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -71,10 +71,6 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr)
 {
 	struct page *page = pte_page(pte);
 
-	/* no flushing needed for anonymous pages */
-	if (!page_mapping(page))
-		return;
-
 	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
 		sync_icache_aliases(page_address(page),
 				    PAGE_SIZE << compound_order(page));
-- 
cgit v0.10.2


From 2f38b1b16d9280689e5cfa47a4c50956bf437f0d Mon Sep 17 00:00:00 2001
From: Lv Zheng <lv.zheng@intel.com>
Date: Tue, 21 Jun 2016 12:34:15 +0800
Subject: ACPICA: Namespace: Fix deadlock triggered by MLC support in dynamic
 table loading

The new module-level code (MLC) approach invokes MLC on the per-table
basis, but the dynamic loading support of this is incorrect because
of the lock order:

 acpi_ns_evaluate
   acpi_ex_enter_intperter
     acpi_ns_load_table (triggered by Load opcode)
       acpi_ns_exec_module_code_list
         acpi_ex_enter_intperter

The regression is introduced by the following commit:

  Commit: 2785ce8d0da1cac9d8f78615e116cf929e9a9123
  ACPICA Commit: 071eff738c59eda1792ac24b3b688b61691d7e7c
  Subject: ACPICA: Add per-table execution of module-level code

This patch fixes this regression by unlocking the interpreter lock
before invoking MLC.  However, the unlocking is done to the
acpi_ns_load_table(), in which the interpreter lock should be locked
by acpi_ns_parse_table() but it wasn't.

Fixes: 2785ce8d0da1 (ACPICA: Add per-table execution of module-level code)
Reported-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Cc: 4.5+ <stable@vger.kernel.org> # 4.5+
[ rjw : Subject ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c
index a1d177d..21932d6 100644
--- a/drivers/acpi/acpica/exconfig.c
+++ b/drivers/acpi/acpica/exconfig.c
@@ -108,7 +108,9 @@ acpi_ex_add_table(u32 table_index,
 
 	/* Add the table to the namespace */
 
+	acpi_ex_exit_interpreter();
 	status = acpi_ns_load_table(table_index, parent_node);
+	acpi_ex_enter_interpreter();
 	if (ACPI_FAILURE(status)) {
 		acpi_ut_remove_reference(obj_desc);
 		*ddb_handle = NULL;
diff --git a/drivers/acpi/acpica/nsparse.c b/drivers/acpi/acpica/nsparse.c
index f631a47..1783cd7 100644
--- a/drivers/acpi/acpica/nsparse.c
+++ b/drivers/acpi/acpica/nsparse.c
@@ -47,6 +47,7 @@
 #include "acparser.h"
 #include "acdispat.h"
 #include "actables.h"
+#include "acinterp.h"
 
 #define _COMPONENT          ACPI_NAMESPACE
 ACPI_MODULE_NAME("nsparse")
@@ -170,6 +171,8 @@ acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node)
 
 	ACPI_FUNCTION_TRACE(ns_parse_table);
 
+	acpi_ex_enter_interpreter();
+
 	/*
 	 * AML Parse, pass 1
 	 *
@@ -185,7 +188,7 @@ acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node)
 	status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS1,
 					    table_index, start_node);
 	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
+		goto error_exit;
 	}
 
 	/*
@@ -201,8 +204,10 @@ acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node)
 	status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS2,
 					    table_index, start_node);
 	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
+		goto error_exit;
 	}
 
+error_exit:
+	acpi_ex_exit_interpreter();
 	return_ACPI_STATUS(status);
 }
-- 
cgit v0.10.2


From 3e1d7fb0d279fea19eb4e36cc9bddf89264ba03f Mon Sep 17 00:00:00 2001
From: MyungJoo Ham <myungjoo.ham@samsung.com>
Date: Fri, 2 Oct 2015 12:39:23 +0900
Subject: PM / devfreq: devm_kzalloc to have dev pointer more precisely

devm_kzalloc of devfreq's statistics data structure has been
using its parent device as the dev allocated for.
If a device's devfreq is disabled in run-time,
such allocated memory won't be freed.

Desginating more precisely with the devfreq device
pointer fixes the issue.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 1d6c803..3801737 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -561,15 +561,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
 		mutex_lock(&devfreq->lock);
 	}
 
-	devfreq->trans_table =	devm_kzalloc(dev, sizeof(unsigned int) *
-						devfreq->profile->max_state *
-						devfreq->profile->max_state,
-						GFP_KERNEL);
-	devfreq->time_in_state = devm_kzalloc(dev, sizeof(unsigned long) *
-						devfreq->profile->max_state,
-						GFP_KERNEL);
-	devfreq->last_stat_updated = jiffies;
-
 	dev_set_name(&devfreq->dev, "%s", dev_name(dev));
 	err = device_register(&devfreq->dev);
 	if (err) {
@@ -578,6 +569,15 @@ struct devfreq *devfreq_add_device(struct device *dev,
 		goto err_out;
 	}
 
+	devfreq->trans_table =	devm_kzalloc(&devfreq->dev, sizeof(unsigned int) *
+						devfreq->profile->max_state *
+						devfreq->profile->max_state,
+						GFP_KERNEL);
+	devfreq->time_in_state = devm_kzalloc(&devfreq->dev, sizeof(unsigned long) *
+						devfreq->profile->max_state,
+						GFP_KERNEL);
+	devfreq->last_stat_updated = jiffies;
+
 	srcu_init_notifier_head(&devfreq->transition_notifier_list);
 
 	mutex_unlock(&devfreq->lock);
-- 
cgit v0.10.2


From ac4b281176a54d17dd3f91b7fb4a4656471d8730 Mon Sep 17 00:00:00 2001
From: MyungJoo Ham <myungjoo.ham@samsung.com>
Date: Fri, 2 Oct 2015 12:48:54 +0900
Subject: PM / devfreq: fix duplicated kfree on devfreq pointer

device_unregister() calls kfree already.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 3801737..6f33c1e 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -603,7 +603,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
 err_init:
 	list_del(&devfreq->node);
 	device_unregister(&devfreq->dev);
-	kfree(devfreq);
 err_out:
 	return ERR_PTR(err);
 }
-- 
cgit v0.10.2


From a5e9b937fa91b3f404618b5281c3239bd1f09c7b Mon Sep 17 00:00:00 2001
From: Cai Zhiyong <caizhiyong@huawei.com>
Date: Sat, 14 May 2016 14:13:30 +0800
Subject: PM / devfreq: fix double call put_device

1295  */
1296 void device_unregister(struct device *dev)
1297 {
1298         pr_debug("device: '%s': %s\n", dev_name(dev), __func__);
1299         device_del(dev);
1300         put_device(dev);
1301 }
1302 EXPORT_SYMBOL_GPL(device_unregister);
1303

device_unregister is called put_device, there is no need to call
put_device(&devfreq->dev) again.

Signed-off-by: Cai Zhiyong <caizhiyong@huawei.com>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 6f33c1e..3e1afb4 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -620,7 +620,6 @@ int devfreq_remove_device(struct devfreq *devfreq)
 		return -EINVAL;
 
 	device_unregister(&devfreq->dev);
-	put_device(&devfreq->dev);
 
 	return 0;
 }
-- 
cgit v0.10.2


From 67ffdb529b4ec9833f73947dec01eaa78dd53c3d Mon Sep 17 00:00:00 2001
From: MyungJoo Ham <myungjoo.ham@samsung.com>
Date: Mon, 16 May 2016 11:41:57 +0900
Subject: PM / devfreq: remove double put_device

When device_register() returns with error, it has already
done put_device() on the input device pointer.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 3e1afb4..0ebd64d 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -564,7 +564,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
 	dev_set_name(&devfreq->dev, "%s", dev_name(dev));
 	err = device_register(&devfreq->dev);
 	if (err) {
-		put_device(&devfreq->dev);
 		mutex_unlock(&devfreq->lock);
 		goto err_out;
 	}
-- 
cgit v0.10.2


From 674789dd2c4b53ed368dc81ae22d72ac4fdb92ec Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 26 May 2016 09:45:42 +0300
Subject: PM / devfreq: exynos-nocp: Remove incorrect IS_ERR() check

Smatch complains because platform_get_resource() returns NULL on error
and not an error pointer so the check is wrong.  Julia Lawall pointed
out that normally we don't check these, because devm_ioremap_resource()
has a check for NULL.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>

diff --git a/drivers/devfreq/event/exynos-nocp.c b/drivers/devfreq/event/exynos-nocp.c
index 6b6a5f3..a584140 100644
--- a/drivers/devfreq/event/exynos-nocp.c
+++ b/drivers/devfreq/event/exynos-nocp.c
@@ -220,9 +220,6 @@ static int exynos_nocp_parse_dt(struct platform_device *pdev,
 
 	/* Maps the memory mapped IO to control nocp register */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (IS_ERR(res))
-		return PTR_ERR(res);
-
 	base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
-- 
cgit v0.10.2


From 8d39fc085d268a56486066a3deca94745f804f2d Mon Sep 17 00:00:00 2001
From: Lukasz Luba <lukasz.luba@arm.com>
Date: Tue, 31 May 2016 11:25:09 +0100
Subject: PM / devfreq: fix initialization of current frequency in last status

Some systems need current frequency from last_status for calculation
but it is zeroed during initialization. When the device starts there is
no history, but we can assume that the last frequency was the
same as the initial frequency (which is also used in 'previous_freq').
The log shows the result of this misinterpreted value.
[    2.042847] ... Failed to get voltage for frequency 0: -34

Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
Reviewed-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 0ebd64d..c7f47e3 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -552,6 +552,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
 	devfreq->profile = profile;
 	strncpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN);
 	devfreq->previous_freq = profile->initial_freq;
+	devfreq->last_status.current_frequency = profile->initial_freq;
 	devfreq->data = data;
 	devfreq->nb.notifier_call = devfreq_notifier_call;
 
-- 
cgit v0.10.2


From 04e59a0211ff012ba60c00baca673482570784e9 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 18 Jun 2016 11:31:33 +0200
Subject: phy-sun4i-usb: Fix irq free conditions to match request conditions

commit 5cf700ac9d50 ("phy: phy-sun4i-usb: Fix optional gpios failing
probe")
changed the condition under which irqs are requested, but omitted matching
changes to sun4i_usb_phy_remove(). This commit fixes this.

Fixes: 5cf700ac9d50 ("phy: phy-sun4i-usb: Fix optional gpios failing probe")
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>

diff --git a/drivers/phy/phy-sun4i-usb.c b/drivers/phy/phy-sun4i-usb.c
index 7390294..de3101f 100644
--- a/drivers/phy/phy-sun4i-usb.c
+++ b/drivers/phy/phy-sun4i-usb.c
@@ -514,9 +514,9 @@ static int sun4i_usb_phy_remove(struct platform_device *pdev)
 
 	if (data->vbus_power_nb_registered)
 		power_supply_unreg_notifier(&data->vbus_power_nb);
-	if (data->id_det_irq >= 0)
+	if (data->id_det_irq > 0)
 		devm_free_irq(dev, data->id_det_irq, data);
-	if (data->vbus_det_irq >= 0)
+	if (data->vbus_det_irq > 0)
 		devm_free_irq(dev, data->vbus_det_irq, data);
 
 	cancel_delayed_work_sync(&data->detect);
-- 
cgit v0.10.2


From c9058d43d99404986133112393be1d1a4daf1b69 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben.dooks@codethink.co.uk>
Date: Tue, 21 Jun 2016 19:18:32 +0100
Subject: ALSA: hda/tegra: iomem fixups for sparse warnings

The readl/writel are not being passed __iomem annotated
variables, so fix the following sparse warnings by adding
__iomem in:

sound/pci/hda/hda_tegra.c:120:9: warning: incorrect type in argument 2 (different address spaces)
sound/pci/hda/hda_tegra.c:120:9:    expected void volatile [noderef] <asn:2>*addr
sound/pci/hda/hda_tegra.c:120:9:    got unsigned int [usertype] *addr
sound/pci/hda/hda_tegra.c:125:16: warning: incorrect type in argument 1 (different address spaces)
sound/pci/hda/hda_tegra.c:125:16:    expected void const volatile [noderef] <asn:2>*addr
sound/pci/hda/hda_tegra.c:125:16:    got unsigned int [usertype] *addr
sound/pci/hda/hda_tegra.c:134:13: warning: incorrect type in argument 1 (different address spaces)
sound/pci/hda/hda_tegra.c:134:13:    expected void const volatile [noderef] <asn:2>*addr
sound/pci/hda/hda_tegra.c:134:13:    got void *dword_addr
sound/pci/hda/hda_tegra.c:137:9: warning: incorrect type in argument 2 (different address spaces)
sound/pci/hda/hda_tegra.c:137:9:    expected void volatile [noderef] <asn:2>*addr
sound/pci/hda/hda_tegra.c:137:9:    got void *dword_addr
sound/pci/hda/hda_tegra.c:146:13: warning: incorrect type in argument 1 (different address spaces)
sound/pci/hda/hda_tegra.c:146:13:    expected void const volatile [noderef] <asn:2>*addr
sound/pci/hda/hda_tegra.c:146:13:    got void *dword_addr
sound/pci/hda/hda_tegra.c:156:13: warning: incorrect type in argument 1 (different address spaces)
sound/pci/hda/hda_tegra.c:156:13:    expected void const volatile [noderef] <asn:2>*addr
sound/pci/hda/hda_tegra.c:156:13:    got void *dword_addr
sound/pci/hda/hda_tegra.c:159:9: warning: incorrect type in argument 2 (different address spaces)
sound/pci/hda/hda_tegra.c:159:9:    expected void volatile [noderef] <asn:2>*addr
sound/pci/hda/hda_tegra.c:159:9:    got void *dword_addr
sound/pci/hda/hda_tegra.c:168:13: warning: incorrect type in argument 1 (different address spaces)
sound/pci/hda/hda_tegra.c:168:13:    expected void const volatile [noderef] <asn:2>*addr
sound/pci/hda/hda_tegra.c:168:13:    got void *dword_addr
sound/pci/hda/hda_tegra.c:173:23: warning: incorrect type in initializer (incompatible argument 2 (different address spaces))
sound/pci/hda/hda_tegra.c:173:23:    expected void ( *reg_writel )( ... )
sound/pci/hda/hda_tegra.c:173:23:    got void ( static [toplevel] *<noident> )( ... )
sound/pci/hda/hda_tegra.c:174:22: warning: incorrect type in initializer (incompatible argument 1 (different address spaces))
sound/pci/hda/hda_tegra.c:174:22:    expected unsigned int ( *reg_readl )( ... )
sound/pci/hda/hda_tegra.c:174:22:    got unsigned int ( static [toplevel] *<noident> )( ... )
sound/pci/hda/hda_tegra.c:175:23: warning: incorrect type in initializer (incompatible argument 2 (different address spaces))
sound/pci/hda/hda_tegra.c:175:23:    expected void ( *reg_writew )( ... )
sound/pci/hda/hda_tegra.c:175:23:    got void ( static [toplevel] *<noident> )( ... )
sound/pci/hda/hda_tegra.c:176:22: warning: incorrect type in initializer (incompatible argument 1 (different address spaces))
sound/pci/hda/hda_tegra.c:176:22:    expected unsigned short ( *reg_readw )( ... )
sound/pci/hda/hda_tegra.c:176:22:    got unsigned short ( static [toplevel] *<noident> )( ... )
sound/pci/hda/hda_tegra.c:177:23: warning: incorrect type in initializer (incompatible argument 2 (different address spaces))
sound/pci/hda/hda_tegra.c:177:23:    expected void ( *reg_writeb )( ... )
sound/pci/hda/hda_tegra.c:177:23:    got void ( static [toplevel] *<noident> )( ... )
sound/pci/hda/hda_tegra.c:178:22: warning: incorrect type in initializer (incompatible argument 1 (different address spaces))
sound/pci/hda/hda_tegra.c:178:22:    expected unsigned char ( *reg_readb )( ... )
sound/pci/hda/hda_tegra.c:178:22:    got unsigned char ( static [toplevel] *<noident> )( ... )

Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
Acked-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c
index 17fd817..0621920 100644
--- a/sound/pci/hda/hda_tegra.c
+++ b/sound/pci/hda/hda_tegra.c
@@ -115,20 +115,20 @@ static int substream_free_pages(struct azx *chip,
 /*
  * Register access ops. Tegra HDA register access is DWORD only.
  */
-static void hda_tegra_writel(u32 value, u32 *addr)
+static void hda_tegra_writel(u32 value, u32 __iomem *addr)
 {
 	writel(value, addr);
 }
 
-static u32 hda_tegra_readl(u32 *addr)
+static u32 hda_tegra_readl(u32 __iomem *addr)
 {
 	return readl(addr);
 }
 
-static void hda_tegra_writew(u16 value, u16 *addr)
+static void hda_tegra_writew(u16 value, u16 __iomem  *addr)
 {
 	unsigned int shift = ((unsigned long)(addr) & 0x3) << 3;
-	void *dword_addr = (void *)((unsigned long)(addr) & ~0x3);
+	void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3);
 	u32 v;
 
 	v = readl(dword_addr);
@@ -137,20 +137,20 @@ static void hda_tegra_writew(u16 value, u16 *addr)
 	writel(v, dword_addr);
 }
 
-static u16 hda_tegra_readw(u16 *addr)
+static u16 hda_tegra_readw(u16 __iomem *addr)
 {
 	unsigned int shift = ((unsigned long)(addr) & 0x3) << 3;
-	void *dword_addr = (void *)((unsigned long)(addr) & ~0x3);
+	void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3);
 	u32 v;
 
 	v = readl(dword_addr);
 	return (v >> shift) & 0xffff;
 }
 
-static void hda_tegra_writeb(u8 value, u8 *addr)
+static void hda_tegra_writeb(u8 value, u8 __iomem *addr)
 {
 	unsigned int shift = ((unsigned long)(addr) & 0x3) << 3;
-	void *dword_addr = (void *)((unsigned long)(addr) & ~0x3);
+	void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3);
 	u32 v;
 
 	v = readl(dword_addr);
@@ -159,10 +159,10 @@ static void hda_tegra_writeb(u8 value, u8 *addr)
 	writel(v, dword_addr);
 }
 
-static u8 hda_tegra_readb(u8 *addr)
+static u8 hda_tegra_readb(u8 __iomem *addr)
 {
 	unsigned int shift = ((unsigned long)(addr) & 0x3) << 3;
-	void *dword_addr = (void *)((unsigned long)(addr) & ~0x3);
+	void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3);
 	u32 v;
 
 	v = readl(dword_addr);
-- 
cgit v0.10.2


From 5c492c3f5255bd34f7ff8867515ecf98dcba2a2e Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 22 Jun 2016 10:06:12 +0100
Subject: arm64: smp: Add function to determine if cpus are stuck in the kernel

kernel/smp.c has a fancy counter that keeps track of the number of CPUs
it marked as not-present and left in cpu_park_loop(). If there are any
CPUs spinning in here, features like kexec or hibernate may release them
by overwriting this memory.

This problem also occurs on machines using spin-tables to release
secondary cores.
After commit 44dbcc93ab67 ("arm64: Fix behavior of maxcpus=N")
we bring all known cpus into the secondary holding pen, meaning this
memory can't be re-used by kexec or hibernate.

Add a function cpus_are_stuck_in_kernel() to determine if either of these
cases have occurred.

Signed-off-by: James Morse <james.morse@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>

diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 433e504..0226447 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -124,6 +124,18 @@ static inline void cpu_panic_kernel(void)
 	cpu_park_loop();
 }
 
+/*
+ * If a secondary CPU enters the kernel but fails to come online,
+ * (e.g. due to mismatched features), and cannot exit the kernel,
+ * we increment cpus_stuck_in_kernel and leave the CPU in a
+ * quiesecent loop within the kernel text. The memory containing
+ * this loop must not be re-used for anything else as the 'stuck'
+ * core is executing it.
+ *
+ * This function is used to inhibit features like kexec and hibernate.
+ */
+bool cpus_are_stuck_in_kernel(void);
+
 #endif /* ifndef __ASSEMBLY__ */
 
 #endif /* ifndef __ASM_SMP_H */
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 678e084..62ff3c0 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -909,3 +909,21 @@ int setup_profiling_timer(unsigned int multiplier)
 {
 	return -EINVAL;
 }
+
+static bool have_cpu_die(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	int any_cpu = raw_smp_processor_id();
+
+	if (cpu_ops[any_cpu]->cpu_die)
+		return true;
+#endif
+	return false;
+}
+
+bool cpus_are_stuck_in_kernel(void)
+{
+	bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die());
+
+	return !!cpus_stuck_in_kernel || smp_spin_tables;
+}
-- 
cgit v0.10.2


From d74b4e4f1a6dbe27acce723e071c86a6ed154bf2 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 22 Jun 2016 10:06:13 +0100
Subject: arm64: hibernate: Don't hibernate on systems with stuck CPUs

Hibernate relies on cpu hotplug to prevent secondary cores executing
the kernel text while it is being restored.

Add a call to cpus_are_stuck_in_kernel() to determine if there are
CPUs not counted by 'num_online_cpus()', and prevent hibernate in this
case.

Fixes: 82869ac57b5 ("arm64: kernel: Add support for hibernate/suspend-to-disk")
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>

diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index f8df75d..21ab5df 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -33,6 +33,7 @@
 #include <asm/pgtable.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/sections.h>
+#include <asm/smp.h>
 #include <asm/suspend.h>
 #include <asm/virt.h>
 
@@ -236,6 +237,11 @@ int swsusp_arch_suspend(void)
 	unsigned long flags;
 	struct sleep_stack_data state;
 
+	if (cpus_are_stuck_in_kernel()) {
+		pr_err("Can't hibernate: no mechanism to offline secondary CPUs.\n");
+		return -EBUSY;
+	}
+
 	local_dbg_save(flags);
 
 	if (__cpu_suspend_enter(&state)) {
-- 
cgit v0.10.2


From ba562d5e54fd3136bfea0457add3675850247774 Mon Sep 17 00:00:00 2001
From: Alexander Shiyan <shc_work@mail.ru>
Date: Wed, 1 Jun 2016 22:21:53 +0300
Subject: pinctrl: imx: Do not treat a PIN without MUX register as an error

Some PINs do not have a MUX register, it is not an error.
It is necessary to allow the continuation of the PINs configuration,
otherwise the whole PIN-group will be configured incorrectly.

Cc: stable@vger.kernel.org
Signed-off-by: Alexander Shiyan <shc_work@mail.ru>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>

diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c
index 47ccfcc..eccb474 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx.c
@@ -209,9 +209,9 @@ static int imx_pmx_set(struct pinctrl_dev *pctldev, unsigned selector,
 		pin_reg = &info->pin_regs[pin_id];
 
 		if (pin_reg->mux_reg == -1) {
-			dev_err(ipctl->dev, "Pin(%s) does not support mux function\n",
+			dev_dbg(ipctl->dev, "Pin(%s) does not support mux function\n",
 				info->pins[pin_id].name);
-			return -EINVAL;
+			continue;
 		}
 
 		if (info->flags & SHARE_MUX_CONF_REG) {
-- 
cgit v0.10.2


From 0ac3c0a4025f41748a083bdd4970cb3ede802b15 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 31 May 2016 14:17:06 -0700
Subject: pinctrl: single: Fix missing flush of posted write for a wakeirq

With many repeated suspend resume cycles, the pin specific wakeirq
may not always work on omaps. This is because the write to enable the
pin interrupt may not have reached the device over the interconnect
before suspend happens.

Let's fix the issue with a flush of posted write with a readback.

Cc: stable@vger.kernel.org
Reported-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>

diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index cf9bafa..bfdf720 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -1580,6 +1580,9 @@ static inline void pcs_irq_set(struct pcs_soc_data *pcs_soc,
 		else
 			mask &= ~soc_mask;
 		pcs->write(mask, pcswi->reg);
+
+		/* flush posted write */
+		mask = pcs->read(pcswi->reg);
 		raw_spin_unlock(&pcs->lock);
 	}
 
-- 
cgit v0.10.2


From 9ee8ff4867d07a6429991a0b748fa9c1586a7764 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 6 Jun 2016 18:56:27 +0200
Subject: gpio: tegra: Make lockdep class file-scoped

Commit b546be0db955 ("gpio: tegra: Get rid of all file scoped global
variables") moved all file scoped variables into the driver-private
structure to allow potentially multiple instances of the driver. The
change also included turning the lockdep class into a driver-private
field, which doesn't work and produces error messages such as this:

	[    0.142310] BUG: key ffff8000fb3f7ab0 not in .data!

Make the lockdep class file-scoped again to fix this issue.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>

diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index ec891a27..661b0e3 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -98,7 +98,6 @@ struct tegra_gpio_info {
 	const struct tegra_gpio_soc_config	*soc;
 	struct gpio_chip			gc;
 	struct irq_chip				ic;
-	struct lock_class_key			lock_class;
 	u32					bank_count;
 };
 
@@ -547,6 +546,12 @@ static const struct dev_pm_ops tegra_gpio_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(tegra_gpio_suspend, tegra_gpio_resume)
 };
 
+/*
+ * This lock class tells lockdep that GPIO irqs are in a different category
+ * than their parents, so it won't report false recursion.
+ */
+static struct lock_class_key gpio_lock_class;
+
 static int tegra_gpio_probe(struct platform_device *pdev)
 {
 	const struct tegra_gpio_soc_config *config;
@@ -660,7 +665,7 @@ static int tegra_gpio_probe(struct platform_device *pdev)
 
 		bank = &tgi->bank_info[GPIO_BANK(gpio)];
 
-		irq_set_lockdep_class(irq, &tgi->lock_class);
+		irq_set_lockdep_class(irq, &gpio_lock_class);
 		irq_set_chip_data(irq, bank);
 		irq_set_chip_and_handler(irq, &tgi->ic, handle_simple_irq);
 	}
-- 
cgit v0.10.2


From 19b5a91764fddcc51b2f8e54a81d4ef231980181 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sat, 4 Jun 2016 14:35:56 +0800
Subject: pinctrl: tegra: Fix build dependency

I got below build error:
ERROR: "tegra_xusb_padctl_legacy_probe"
 [drivers/phy/tegra/phy-tegra-xusb.ko] undefined!
with below build configuration:
CONFIG_ARCH_TEGRA=y
CONFIG_PINCTRL_TEGRA_XUSB=y
CONFIG_PHY_TEGRA_XUSB=y

The problem is below line in drivers/pinctrl/Makefile
obj-$(CONFIG_PINCTRL_TEGRA)     += tegra/

So even CONFIG_PINCTRL_TEGRA_XUSB=y is set, kbuild still does not compile
the code in drivers/pinctrl/tegra folder if !CONFIG_PINCTRL_TEGRA.

phy-tegra-xusb.c does not use any symbol from pinctrl-tegra.c,
so build pinctrl-tegra.c only when CONFIG_PINCTRL_TEGRA is set.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>

diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile
index e4bc115..42a5c1d 100644
--- a/drivers/pinctrl/Makefile
+++ b/drivers/pinctrl/Makefile
@@ -23,7 +23,7 @@ obj-$(CONFIG_PINCTRL_PISTACHIO)	+= pinctrl-pistachio.o
 obj-$(CONFIG_PINCTRL_ROCKCHIP)	+= pinctrl-rockchip.o
 obj-$(CONFIG_PINCTRL_SINGLE)	+= pinctrl-single.o
 obj-$(CONFIG_PINCTRL_SIRF)	+= sirf/
-obj-$(CONFIG_PINCTRL_TEGRA)	+= tegra/
+obj-$(CONFIG_ARCH_TEGRA)	+= tegra/
 obj-$(CONFIG_PINCTRL_TZ1090)	+= pinctrl-tz1090.o
 obj-$(CONFIG_PINCTRL_TZ1090_PDC)	+= pinctrl-tz1090-pdc.o
 obj-$(CONFIG_PINCTRL_U300)	+= pinctrl-u300.o
diff --git a/drivers/pinctrl/tegra/Makefile b/drivers/pinctrl/tegra/Makefile
index a927379..d9ea2be 100644
--- a/drivers/pinctrl/tegra/Makefile
+++ b/drivers/pinctrl/tegra/Makefile
@@ -1,4 +1,4 @@
-obj-y					+= pinctrl-tegra.o
+obj-$(CONFIG_PINCTRL_TEGRA)		+= pinctrl-tegra.o
 obj-$(CONFIG_PINCTRL_TEGRA20)		+= pinctrl-tegra20.o
 obj-$(CONFIG_PINCTRL_TEGRA30)		+= pinctrl-tegra30.o
 obj-$(CONFIG_PINCTRL_TEGRA114)		+= pinctrl-tegra114.o
-- 
cgit v0.10.2


From 942f64c4c2be19a1b4a0c4295182b42d153899bf Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 20 Jun 2016 11:53:20 +0300
Subject: team: Fix possible deadlock during team enslave

Both dev_uc_sync_multiple() and dev_mc_sync_multiple() require the
source device to be locked by netif_addr_lock_bh(), but this is missing
in team's enslave function, so add it.

This fixes the following lockdep warning:

Possible interrupt unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(_xmit_ETHER/1);
                                local_irq_disable();
                                lock(&(&mc->mca_lock)->rlock);
                                lock(&team_netdev_addr_lock_key);
   <Interrupt>
     lock(&(&mc->mca_lock)->rlock);

  *** DEADLOCK ***

Fixes: cb41c997d444 ("team: team should sync the port's uc/mc addrs when add a port")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 2ace126..fdee772 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1203,8 +1203,10 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 		goto err_dev_open;
 	}
 
+	netif_addr_lock_bh(dev);
 	dev_uc_sync_multiple(port_dev, dev);
 	dev_mc_sync_multiple(port_dev, dev);
+	netif_addr_unlock_bh(dev);
 
 	err = vlan_vids_add_by_dev(port_dev, dev);
 	if (err) {
-- 
cgit v0.10.2


From d19af0a76444fde629667ecb823c0ee28f9f67d8 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Mon, 20 Jun 2016 11:36:28 +0200
Subject: kcm: fix /proc memory leak

Every open of /proc/net/kcm leaks 16 bytes of memory as is reported by
kmemleak:
unreferenced object 0xffff88059c0e3458 (size 192):
  comm "cat", pid 1401, jiffies 4294935742 (age 310.720s)
  hex dump (first 32 bytes):
    28 45 71 96 05 88 ff ff 00 10 00 00 00 00 00 00  (Eq.............
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff8156a2de>] kmem_cache_alloc_trace+0x16e/0x230
    [<ffffffff8162a479>] seq_open+0x79/0x1d0
    [<ffffffffa0578510>] kcm_seq_open+0x0/0x30 [kcm]
    [<ffffffff8162a479>] seq_open+0x79/0x1d0
    [<ffffffff8162a8cf>] __seq_open_private+0x2f/0xa0
    [<ffffffff81712548>] seq_open_net+0x38/0xa0
...

It is caused by a missing free in the ->release path. So fix it by
providing seq_release_net as the ->release method.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Fixes: cd6e111bf5 (kcm: Add statistics and proc interfaces)
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Tom Herbert <tom@herbertland.com>
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index 7380087..fda7f47 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -241,6 +241,7 @@ static const struct file_operations kcm_seq_fops = {
 	.open		= kcm_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
+	.release	= seq_release_net,
 };
 
 static struct kcm_seq_muxinfo kcm_seq_muxinfo = {
-- 
cgit v0.10.2


From 27777daa8b6df0c19aaf591d1536a586b3eb5e36 Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Mon, 20 Jun 2016 09:20:46 -0400
Subject: tipc: unclone unbundled buffers before forwarding

When extracting an individual message from a received "bundle" buffer,
we just create a clone of the base buffer, and adjust it to point into
the right position of the linearized data area of the latter. This works
well for regular message reception, but during periods of extremely high
load it may happen that an extracted buffer, e.g, a connection probe, is
reversed and forwarded through an external interface while the preceding
extracted message is still unhandled. When this happens, the header or
data area of the preceding message will be partially overwritten by a
MAC header, leading to unpredicatable consequences, such as a link
reset.

We now fix this by ensuring that the msg_reverse() function never
returns a cloned buffer, and that the returned buffer always contains
sufficient valid head and tail room to be forwarded.

Reported-by: Erik Hugne <erik.hugne@gmail.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 8740930..17201aa 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -41,6 +41,8 @@
 #include "name_table.h"
 
 #define MAX_FORWARD_SIZE 1024
+#define BUF_HEADROOM (LL_MAX_HEADER + 48)
+#define BUF_TAILROOM 16
 
 static unsigned int align(unsigned int i)
 {
@@ -505,6 +507,10 @@ bool tipc_msg_reverse(u32 own_node,  struct sk_buff **skb, int err)
 		msg_set_hdr_sz(hdr, BASIC_H_SIZE);
 	}
 
+	if (skb_cloned(_skb) &&
+	    pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_KERNEL))
+		goto exit;
+
 	/* Now reverse the concerned fields */
 	msg_set_errcode(hdr, err);
 	msg_set_origport(hdr, msg_destport(&ohdr));
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 024da8a..7cf52fb 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -94,17 +94,6 @@ struct plist;
 
 #define TIPC_MEDIA_INFO_OFFSET	5
 
-/**
- * TIPC message buffer code
- *
- * TIPC message buffer headroom reserves space for the worst-case
- * link-level device header (in case the message is sent off-node).
- *
- * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields
- *       are word aligned for quicker access
- */
-#define BUF_HEADROOM (LL_MAX_HEADER + 48)
-
 struct tipc_skb_cb {
 	void *handle;
 	struct sk_buff *tail;
-- 
cgit v0.10.2


From 93c098af09455ea7bdc6f0f6b08f6ac14fa06cf4 Mon Sep 17 00:00:00 2001
From: Kamal Heib <kamalh@mellanox.com>
Date: Tue, 21 Jun 2016 14:20:02 +0300
Subject: net/mlx4_en: Fix the return value of a failure in VLAN VID add/kill

Modify mlx4_en_vlan_rx_[add/kill]_vid to return error value in case of
failure.

Fixes: 8e586137e6b6 ('net: make vlan ndo_vlan_rx_[add/kill]_vid return error value')
Signed-off-by: Kamal Heib <kamalh@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index a2c2536..43f505e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -406,14 +406,18 @@ static int mlx4_en_vlan_rx_add_vid(struct net_device *dev,
 	mutex_lock(&mdev->state_lock);
 	if (mdev->device_up && priv->port_up) {
 		err = mlx4_SET_VLAN_FLTR(mdev->dev, priv);
-		if (err)
+		if (err) {
 			en_err(priv, "Failed configuring VLAN filter\n");
+			goto out;
+		}
 	}
-	if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx))
-		en_dbg(HW, priv, "failed adding vlan %d\n", vid);
-	mutex_unlock(&mdev->state_lock);
+	err = mlx4_register_vlan(mdev->dev, priv->port, vid, &idx);
+	if (err)
+		en_dbg(HW, priv, "Failed adding vlan %d\n", vid);
 
-	return 0;
+out:
+	mutex_unlock(&mdev->state_lock);
+	return err;
 }
 
 static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev,
@@ -421,7 +425,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev,
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
-	int err;
+	int err = 0;
 
 	en_dbg(HW, priv, "Killing VID:%d\n", vid);
 
@@ -438,7 +442,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev,
 	}
 	mutex_unlock(&mdev->state_lock);
 
-	return 0;
+	return err;
 }
 
 static void mlx4_en_u64_to_mac(unsigned char dst_mac[ETH_ALEN + 2], u64 src_mac)
-- 
cgit v0.10.2


From 9d76931180557270796f9631e2c79b9c7bb3c9fb Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Tue, 21 Jun 2016 14:20:03 +0300
Subject: net/mlx4_en: Avoid unregister_netdev at shutdown flow

This allows a clean shutdown, even if some netdev clients do not
release their reference from this netdev. It is enough to release
the HW resources only as the kernel is shutting down.

Fixes: 2ba5fbd62b25 ('net/mlx4_core: Handle AER flow properly')
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 43f505e..0c0dfd6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2036,11 +2036,20 @@ err:
 	return -ENOMEM;
 }
 
+static void mlx4_en_shutdown(struct net_device *dev)
+{
+	rtnl_lock();
+	netif_device_detach(dev);
+	mlx4_en_close(dev);
+	rtnl_unlock();
+}
 
 void mlx4_en_destroy_netdev(struct net_device *dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
+	bool shutdown = mdev->dev->persist->interface_state &
+					    MLX4_INTERFACE_STATE_SHUTDOWN;
 
 	en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port);
 
@@ -2048,7 +2057,10 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
 	if (priv->registered) {
 		devlink_port_type_clear(mlx4_get_devlink_port(mdev->dev,
 							      priv->port));
-		unregister_netdev(dev);
+		if (shutdown)
+			mlx4_en_shutdown(dev);
+		else
+			unregister_netdev(dev);
 	}
 
 	if (priv->allocated)
@@ -2073,7 +2085,8 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
 	kfree(priv->tx_ring);
 	kfree(priv->tx_cq);
 
-	free_netdev(dev);
+	if (!shutdown)
+		free_netdev(dev);
 }
 
 static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 372ebfa..546fab0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -4135,8 +4135,11 @@ static void mlx4_shutdown(struct pci_dev *pdev)
 
 	mlx4_info(persist->dev, "mlx4_shutdown was called\n");
 	mutex_lock(&persist->interface_state_mutex);
-	if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
+	if (persist->interface_state & MLX4_INTERFACE_STATE_UP) {
+		/* Notify mlx4 clients that the kernel is being shut down */
+		persist->interface_state |= MLX4_INTERFACE_STATE_SHUTDOWN;
 		mlx4_unload_one(pdev);
+	}
 	mutex_unlock(&persist->interface_state_mutex);
 }
 
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 80dec87..d46a0e7 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -466,6 +466,7 @@ enum {
 enum {
 	MLX4_INTERFACE_STATE_UP		= 1 << 0,
 	MLX4_INTERFACE_STATE_DELETION	= 1 << 1,
+	MLX4_INTERFACE_STATE_SHUTDOWN	= 1 << 2,
 };
 
 #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
-- 
cgit v0.10.2


From 0b305ccc1b545d3389068ddc3548cbc877513b97 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 13 Jun 2016 13:48:53 +0800
Subject: gpio: 104-idi-48: Fix missing spin_lock_init for ack_lock

Fixes: 9ae482104cb9 ("gpio: 104-idi-48: Clear pending interrupt once in IRQ handler")
Signed-off-by: Axel Lin <axel.lin@ingics.com>
Acked-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>

diff --git a/drivers/gpio/gpio-104-idi-48.c b/drivers/gpio/gpio-104-idi-48.c
index 6c75c83..2d2763e 100644
--- a/drivers/gpio/gpio-104-idi-48.c
+++ b/drivers/gpio/gpio-104-idi-48.c
@@ -247,6 +247,7 @@ static int idi_48_probe(struct device *dev, unsigned int id)
 	idi48gpio->irq = irq[id];
 
 	spin_lock_init(&idi48gpio->lock);
+	spin_lock_init(&idi48gpio->ack_lock);
 
 	dev_set_drvdata(dev, idi48gpio);
 
-- 
cgit v0.10.2


From 39243ee771666e02201ba89c1d76fdc28e9cf681 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 15 Jun 2016 22:57:38 +0200
Subject: gpio: make sure gpiod_to_irq() returns negative on NULL desc

commit 54d77198fdfbc4f0fe11b4252c1d9c97d51a3264
("gpio: bail out silently on NULL descriptors")
doesn't work for gpiod_to_irq(): drivers assume that NULL
descriptors will give negative IRQ numbers in return.

It has been pointed out that returning 0 is NO_IRQ and that
drivers should be amended to treat this as an error, but that
is for the longer term: now let us repair the semantics.

Cc: Maxime Ripard <maxime.ripard@free-electrons.com>
Reported-by: Hans de Goede <hdegoede@redhat.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 58d822d..f39bf05 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2056,7 +2056,14 @@ int gpiod_to_irq(const struct gpio_desc *desc)
 	struct gpio_chip *chip;
 	int offset;
 
-	VALIDATE_DESC(desc);
+	/*
+	 * Cannot VALIDATE_DESC() here as gpiod_to_irq() consumer semantics
+	 * requires this function to not return zero on an invalid descriptor
+	 * but rather a negative error number.
+	 */
+	if (!desc || !desc->gdev || !desc->gdev->chip)
+		return -EINVAL;
+
 	chip = desc->gdev->chip;
 	offset = gpio_chip_hwgpio(desc);
 	if (chip->to_irq) {
-- 
cgit v0.10.2


From c0a1ecb9f4e208f4b75d88fa9669748e3fd705ab Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 16 Jun 2016 11:55:55 +0200
Subject: gpio: make library immune to error pointers

Most functions that take a GPIO descriptor in need to check the
descriptor for IS_ERR(). We do this mostly in the VALIDATE_DESC()
macro except for the gpiod_to_irq() function which needs special
handling.

Cc: stable@vger.kernel.org
Reported-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index f39bf05..570771e 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1373,8 +1373,12 @@ done:
 #define VALIDATE_DESC(desc) do { \
 	if (!desc) \
 		return 0; \
+	if (IS_ERR(desc)) {						\
+		pr_warn("%s: invalid GPIO (errorpointer)\n", __func__); \
+		return PTR_ERR(desc); \
+	} \
 	if (!desc->gdev) { \
-		pr_warn("%s: invalid GPIO\n", __func__); \
+		pr_warn("%s: invalid GPIO (no device)\n", __func__); \
 		return -EINVAL; \
 	} \
 	if ( !desc->gdev->chip ) { \
@@ -1386,8 +1390,12 @@ done:
 #define VALIDATE_DESC_VOID(desc) do { \
 	if (!desc) \
 		return; \
+	if (IS_ERR(desc)) {						\
+		pr_warn("%s: invalid GPIO (errorpointer)\n", __func__); \
+		return; \
+	} \
 	if (!desc->gdev) { \
-		pr_warn("%s: invalid GPIO\n", __func__); \
+		pr_warn("%s: invalid GPIO (no device)\n", __func__); \
 		return; \
 	} \
 	if (!desc->gdev->chip) { \
@@ -2061,7 +2069,7 @@ int gpiod_to_irq(const struct gpio_desc *desc)
 	 * requires this function to not return zero on an invalid descriptor
 	 * but rather a negative error number.
 	 */
-	if (!desc || !desc->gdev || !desc->gdev->chip)
+	if (!desc || IS_ERR(desc) || !desc->gdev || !desc->gdev->chip)
 		return -EINVAL;
 
 	chip = desc->gdev->chip;
-- 
cgit v0.10.2


From 972228d87445dc46c0a01f5f3de673ac017626f7 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Tue, 21 Jun 2016 00:31:50 +0200
Subject: ubi: Make recover_peb power cut aware
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

recover_peb() was never power cut aware,
if a power cut happened right after writing the VID header
upon next attach UBI would blindly use the new partial written
PEB and all data from the old PEB is lost.

In order to make recover_peb() power cut aware, write the new
VID with a proper crc and copy_flag set such that the UBI attach
process will detect whether the new PEB is completely written
or not.
We cannot directly use ubi_eba_atomic_leb_change() since we'd
have to unlock the LEB which is facing a write error.

Cc: stable@vger.kernel.org
Reported-by: Jörg Pfähler <pfaehler@isse.de>
Reviewed-by: Jörg Pfähler <pfaehler@isse.de>
Signed-off-by: Richard Weinberger <richard@nod.at>

diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 5780dd1..ebf5172 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -575,6 +575,7 @@ static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum,
 	int err, idx = vol_id2idx(ubi, vol_id), new_pnum, data_size, tries = 0;
 	struct ubi_volume *vol = ubi->volumes[idx];
 	struct ubi_vid_hdr *vid_hdr;
+	uint32_t crc;
 
 	vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
 	if (!vid_hdr)
@@ -599,14 +600,8 @@ retry:
 		goto out_put;
 	}
 
-	vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
-	err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr);
-	if (err) {
-		up_read(&ubi->fm_eba_sem);
-		goto write_error;
-	}
+	ubi_assert(vid_hdr->vol_type == UBI_VID_DYNAMIC);
 
-	data_size = offset + len;
 	mutex_lock(&ubi->buf_mutex);
 	memset(ubi->peb_buf + offset, 0xFF, len);
 
@@ -621,6 +616,19 @@ retry:
 
 	memcpy(ubi->peb_buf + offset, buf, len);
 
+	data_size = offset + len;
+	crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size);
+	vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
+	vid_hdr->copy_flag = 1;
+	vid_hdr->data_size = cpu_to_be32(data_size);
+	vid_hdr->data_crc = cpu_to_be32(crc);
+	err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr);
+	if (err) {
+		mutex_unlock(&ubi->buf_mutex);
+		up_read(&ubi->fm_eba_sem);
+		goto write_error;
+	}
+
 	err = ubi_io_write_data(ubi, ubi->peb_buf, new_pnum, 0, data_size);
 	if (err) {
 		mutex_unlock(&ubi->buf_mutex);
-- 
cgit v0.10.2


From 1118dce773d84f39ebd51a9fe7261f9169cb056e Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Thu, 16 Jun 2016 23:26:14 +0200
Subject: mm: Export migrate_page_move_mapping and migrate_page_copy

Export these symbols such that UBIFS can implement
->migratepage.

Cc: stable@vger.kernel.org
Signed-off-by: Richard Weinberger <richard@nod.at>
Acked-by: Christoph Hellwig <hch@lst.de>

diff --git a/mm/migrate.c b/mm/migrate.c
index 9baf41c..bd3fdc2 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -431,6 +431,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 
 	return MIGRATEPAGE_SUCCESS;
 }
+EXPORT_SYMBOL(migrate_page_move_mapping);
 
 /*
  * The expected number of remaining references is the same as that
@@ -586,6 +587,7 @@ void migrate_page_copy(struct page *newpage, struct page *page)
 
 	mem_cgroup_migrate(page, newpage);
 }
+EXPORT_SYMBOL(migrate_page_copy);
 
 /************************************************************
  *                    Migration functions
-- 
cgit v0.10.2


From 4ac1c17b2044a1b4b2fbed74451947e905fc2992 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 16 Jun 2016 23:26:15 +0200
Subject: UBIFS: Implement ->migratepage()

During page migrations UBIFS might get confused
and the following assert triggers:
[  213.480000] UBIFS assert failed in ubifs_set_page_dirty at 1451 (pid 436)
[  213.490000] CPU: 0 PID: 436 Comm: drm-stress-test Not tainted 4.4.4-00176-geaa802524636-dirty #1008
[  213.490000] Hardware name: Allwinner sun4i/sun5i Families
[  213.490000] [<c0015e70>] (unwind_backtrace) from [<c0012cdc>] (show_stack+0x10/0x14)
[  213.490000] [<c0012cdc>] (show_stack) from [<c02ad834>] (dump_stack+0x8c/0xa0)
[  213.490000] [<c02ad834>] (dump_stack) from [<c0236ee8>] (ubifs_set_page_dirty+0x44/0x50)
[  213.490000] [<c0236ee8>] (ubifs_set_page_dirty) from [<c00fa0bc>] (try_to_unmap_one+0x10c/0x3a8)
[  213.490000] [<c00fa0bc>] (try_to_unmap_one) from [<c00fadb4>] (rmap_walk+0xb4/0x290)
[  213.490000] [<c00fadb4>] (rmap_walk) from [<c00fb1bc>] (try_to_unmap+0x64/0x80)
[  213.490000] [<c00fb1bc>] (try_to_unmap) from [<c010dc28>] (migrate_pages+0x328/0x7a0)
[  213.490000] [<c010dc28>] (migrate_pages) from [<c00d0cb0>] (alloc_contig_range+0x168/0x2f4)
[  213.490000] [<c00d0cb0>] (alloc_contig_range) from [<c010ec00>] (cma_alloc+0x170/0x2c0)
[  213.490000] [<c010ec00>] (cma_alloc) from [<c001a958>] (__alloc_from_contiguous+0x38/0xd8)
[  213.490000] [<c001a958>] (__alloc_from_contiguous) from [<c001ad44>] (__dma_alloc+0x23c/0x274)
[  213.490000] [<c001ad44>] (__dma_alloc) from [<c001ae08>] (arm_dma_alloc+0x54/0x5c)
[  213.490000] [<c001ae08>] (arm_dma_alloc) from [<c035cecc>] (drm_gem_cma_create+0xb8/0xf0)
[  213.490000] [<c035cecc>] (drm_gem_cma_create) from [<c035cf20>] (drm_gem_cma_create_with_handle+0x1c/0xe8)
[  213.490000] [<c035cf20>] (drm_gem_cma_create_with_handle) from [<c035d088>] (drm_gem_cma_dumb_create+0x3c/0x48)
[  213.490000] [<c035d088>] (drm_gem_cma_dumb_create) from [<c0341ed8>] (drm_ioctl+0x12c/0x444)
[  213.490000] [<c0341ed8>] (drm_ioctl) from [<c0121adc>] (do_vfs_ioctl+0x3f4/0x614)
[  213.490000] [<c0121adc>] (do_vfs_ioctl) from [<c0121d30>] (SyS_ioctl+0x34/0x5c)
[  213.490000] [<c0121d30>] (SyS_ioctl) from [<c000f2c0>] (ret_fast_syscall+0x0/0x34)

UBIFS is using PagePrivate() which can have different meanings across
filesystems. Therefore the generic page migration code cannot handle this
case correctly.
We have to implement our own migration function which basically does a
plain copy but also duplicates the page private flag.
UBIFS is not a block device filesystem and cannot use buffer_migrate_page().

Cc: stable@vger.kernel.org
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
[rw: Massaged changelog, build fixes, etc...]
Signed-off-by: Richard Weinberger <richard@nod.at>
Acked-by: Christoph Hellwig <hch@lst.de>

diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 0831697..7bbf420 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -52,6 +52,7 @@
 #include "ubifs.h"
 #include <linux/mount.h>
 #include <linux/slab.h>
+#include <linux/migrate.h>
 
 static int read_block(struct inode *inode, void *addr, unsigned int block,
 		      struct ubifs_data_node *dn)
@@ -1452,6 +1453,26 @@ static int ubifs_set_page_dirty(struct page *page)
 	return ret;
 }
 
+#ifdef CONFIG_MIGRATION
+static int ubifs_migrate_page(struct address_space *mapping,
+		struct page *newpage, struct page *page, enum migrate_mode mode)
+{
+	int rc;
+
+	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
+	if (rc != MIGRATEPAGE_SUCCESS)
+		return rc;
+
+	if (PagePrivate(page)) {
+		ClearPagePrivate(page);
+		SetPagePrivate(newpage);
+	}
+
+	migrate_page_copy(newpage, page);
+	return MIGRATEPAGE_SUCCESS;
+}
+#endif
+
 static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
 {
 	/*
@@ -1591,6 +1612,9 @@ const struct address_space_operations ubifs_file_address_operations = {
 	.write_end      = ubifs_write_end,
 	.invalidatepage = ubifs_invalidatepage,
 	.set_page_dirty = ubifs_set_page_dirty,
+#ifdef CONFIG_MIGRATION
+	.migratepage	= ubifs_migrate_page,
+#endif
 	.releasepage    = ubifs_releasepage,
 };
 
-- 
cgit v0.10.2


From 6e914ee629c411d9c6d160399ce7d3472d2c0ec7 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 20 Jun 2016 19:23:43 +1000
Subject: powerpc: Fix faults caused by radix patching of SLB miss handler

As part of the Radix MMU support we added some feature sections in the
SLB miss handler. These are intended to catch the case that we
incorrectly take an SLB miss when Radix is enabled, and instead of
crashing weirdly they bail out to a well defined exit path and trigger
an oops.

However the way they were written meant the bailout case was enabled by
default until we did CPU feature patching.

On powermacs the early debug prints in setup_system() can cause an SLB
miss, which happens before code patching, and so the SLB miss handler
would incorrectly bailout and crash during boot.

Fix it by inverting the sense of the feature section, so that the code
which is in place at boot is correct for the hash case. Once we
determine we are using Radix - which will never happen on a powermac -
only then do we patch in the bailout case which unconditionally jumps.

Fixes: caca285e5ab4 ("powerpc/mm/radix: Use STD_MMU_64 to properly isolate hash related code")
Reported-by: Denis Kirjanov <kda@linux-powerpc.org>
Tested-by: Denis Kirjanov <kda@linux-powerpc.org>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 4c94406..8bcc1b4 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1399,11 +1399,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_RADIX)
 	lwz	r9,PACA_EXSLB+EX_CCR(r13)	/* get saved CR */
 
 	mtlr	r10
-BEGIN_MMU_FTR_SECTION
-	b	2f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_RADIX)
 	andi.	r10,r12,MSR_RI	/* check for unrecoverable exception */
+BEGIN_MMU_FTR_SECTION
 	beq-	2f
+FTR_SECTION_ELSE
+	b	2f
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_RADIX)
 
 .machine	push
 .machine	"power4"
-- 
cgit v0.10.2


From 844e3be47693f92a108cb1fb3b0606bf25e9c7a6 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Wed, 22 Jun 2016 21:55:01 +0530
Subject: powerpc/bpf/jit: Disable classic BPF JIT on ppc64le

Classic BPF JIT was never ported completely to work on little endian
powerpc. However, it can be enabled and will crash the system when used.
As such, disable use of BPF JIT on ppc64le.

Fixes: 7c105b63bd98 ("powerpc: Add CONFIG_CPU_LITTLE_ENDIAN kernel config option.")
Reported-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 01f7464..0a9d439 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -128,7 +128,7 @@ config PPC
 	select IRQ_FORCED_THREADING
 	select HAVE_RCU_TABLE_FREE if SMP
 	select HAVE_SYSCALL_TRACEPOINTS
-	select HAVE_CBPF_JIT
+	select HAVE_CBPF_JIT if CPU_BIG_ENDIAN
 	select HAVE_ARCH_JUMP_LABEL
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_HAS_GCOV_PROFILE_ALL
-- 
cgit v0.10.2


From 31b9655f439a26856edca0f3f8daa368a61f16d5 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Mon, 11 Apr 2016 17:37:40 -0400
Subject: Btrfs: track transid for delayed ref flushing

Using the offwakecputime bpf script I noticed most of our time was spent waiting
on the delayed ref throttling.  This is what is supposed to happen, but
sometimes the transaction can commit and then we're waiting for throttling that
doesn't matter anymore.  So change this stuff to be a little smarter by tracking
the transid we were in when we initiated the throttling.  If the transaction we
get is different then we can just bail out.  This resulted in a 50% speedup in
my fs_mark test, and reduced the amount of time spent throttling by 60 seconds
over the entire run (which is about 30 minutes).  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 101c3cf..4274a7b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2518,7 +2518,7 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root, unsigned long count);
 int btrfs_async_run_delayed_refs(struct btrfs_root *root,
-				 unsigned long count, int wait);
+				 unsigned long count, u64 transid, int wait);
 int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len);
 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root, u64 bytenr,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 29e5d00..ecfa520 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2835,6 +2835,7 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
 
 struct async_delayed_refs {
 	struct btrfs_root *root;
+	u64 transid;
 	int count;
 	int error;
 	int sync;
@@ -2850,9 +2851,16 @@ static void delayed_ref_async_start(struct btrfs_work *work)
 
 	async = container_of(work, struct async_delayed_refs, work);
 
-	trans = btrfs_join_transaction(async->root);
+	trans = btrfs_attach_transaction(async->root);
 	if (IS_ERR(trans)) {
-		async->error = PTR_ERR(trans);
+		if (PTR_ERR(trans) != -ENOENT)
+			async->error = PTR_ERR(trans);
+		goto done;
+	}
+
+	/* Don't bother flushing if we got into a different transaction */
+	if (trans->transid != async->transid) {
+		btrfs_end_transaction(trans, async->root);
 		goto done;
 	}
 
@@ -2876,7 +2884,7 @@ done:
 }
 
 int btrfs_async_run_delayed_refs(struct btrfs_root *root,
-				 unsigned long count, int wait)
+				 unsigned long count, u64 transid, int wait)
 {
 	struct async_delayed_refs *async;
 	int ret;
@@ -2888,6 +2896,7 @@ int btrfs_async_run_delayed_refs(struct btrfs_root *root,
 	async->root = root->fs_info->tree_root;
 	async->count = count;
 	async->error = 0;
+	async->transid = transid;
 	if (wait)
 		async->sync = 1;
 	else
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index bb62418..78582e3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4558,6 +4558,7 @@ delete:
 			BUG_ON(ret);
 			if (btrfs_should_throttle_delayed_refs(trans, root))
 				btrfs_async_run_delayed_refs(root,
+							     trans->transid,
 					trans->delayed_ref_updates * 2, 0);
 			if (be_nice) {
 				if (truncate_space_check(trans, root,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 7658457..948aa18 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -818,6 +818,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 {
 	struct btrfs_transaction *cur_trans = trans->transaction;
 	struct btrfs_fs_info *info = root->fs_info;
+	u64 transid = trans->transid;
 	unsigned long cur = trans->delayed_ref_updates;
 	int lock = (trans->type != TRANS_JOIN_NOLOCK);
 	int err = 0;
@@ -905,7 +906,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 
 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
 	if (must_run_delayed_refs) {
-		btrfs_async_run_delayed_refs(root, cur,
+		btrfs_async_run_delayed_refs(root, cur, transid,
 					     must_run_delayed_refs == 1);
 	}
 	return err;
-- 
cgit v0.10.2


From 0f873eca82a0bee45f38862e0ea2ac7b1c2a31bd Mon Sep 17 00:00:00 2001
From: Chris Mason <clm@fb.com>
Date: Wed, 27 Apr 2016 09:59:38 -0400
Subject: btrfs: fix deadlock in delayed_ref_async_start

"Btrfs: track transid for delayed ref flushing" was deadlocking on
btrfs_attach_transaction because its not safe to call from the async
delayed ref start code.  This commit brings back btrfs_join_transaction
instead and checks for a blocked commit.

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index ecfa520..82b912a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2851,16 +2851,13 @@ static void delayed_ref_async_start(struct btrfs_work *work)
 
 	async = container_of(work, struct async_delayed_refs, work);
 
-	trans = btrfs_attach_transaction(async->root);
-	if (IS_ERR(trans)) {
-		if (PTR_ERR(trans) != -ENOENT)
-			async->error = PTR_ERR(trans);
+	/* if the commit is already started, we don't need to wait here */
+	if (btrfs_transaction_blocked(async->root->fs_info))
 		goto done;
-	}
 
-	/* Don't bother flushing if we got into a different transaction */
-	if (trans->transid != async->transid) {
-		btrfs_end_transaction(trans, async->root);
+	trans = btrfs_join_transaction(async->root);
+	if (IS_ERR(trans)) {
+		async->error = PTR_ERR(trans);
 		goto done;
 	}
 
@@ -2869,10 +2866,15 @@ static void delayed_ref_async_start(struct btrfs_work *work)
 	 * wait on delayed refs
 	 */
 	trans->sync = true;
+
+	/* Don't bother flushing if we got into a different transaction */
+	if (trans->transid > async->transid)
+		goto end;
+
 	ret = btrfs_run_delayed_refs(trans, async->root, async->count);
 	if (ret)
 		async->error = ret;
-
+end:
 	ret = btrfs_end_transaction(trans, async->root);
 	if (ret && !async->error)
 		async->error = ret;
-- 
cgit v0.10.2


From c6887cd11149d7325328749f06719071e6c725c6 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 25 Mar 2016 13:26:00 -0400
Subject: Btrfs: don't do nocow check unless we have to

Before we write into prealloc/nocow space we have to make sure that there are no
references to the extents we are writing into, which means checking the extent
tree and csum tree in the case of nocow.  So we don't want to do the nocow dance
unless we can't reserve data space, since it's a serious drag on performance.
With the following sequence

fallocate -l10737418240 /mnt/btrfs-test/file
cp --reflink /mnt/btrfs-test/file /mnt/btrfs-test/link
fio --name=randwrite --rw=randwrite --bs=4k --filename=/mnt/btrfs-test/file \
	--end_fsync=1

we get the worst case scenario where we have to fall back on to doing the check
anyway.

Without this patch
lat (usec): min=5, max=111598, avg=27.65, stdev=124.51
write: io=10240MB, bw=126876KB/s, iops=31718, runt= 82646msec

With this patch
lat (usec): min=3, max=91210, avg=14.09, stdev=110.62
write: io=10240MB, bw=212753KB/s, iops=53188, runt= 49286msec

We get twice the throughput, half of the runtime, and half of the average
latency.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
[ PAGE_CACHE_ removal related fixups ]
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 159a934..8a538ab 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1534,30 +1534,30 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
 		reserve_bytes = round_up(write_bytes + sector_offset,
 				root->sectorsize);
 
-		if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
-					      BTRFS_INODE_PREALLOC)) &&
-		    check_can_nocow(inode, pos, &write_bytes) > 0) {
-			/*
-			 * For nodata cow case, no need to reserve
-			 * data space.
-			 */
-			only_release_metadata = true;
-			/*
-			 * our prealloc extent may be smaller than
-			 * write_bytes, so scale down.
-			 */
-			num_pages = DIV_ROUND_UP(write_bytes + offset,
-						 PAGE_SIZE);
-			reserve_bytes = round_up(write_bytes + sector_offset,
-					root->sectorsize);
-			goto reserve_metadata;
-		}
-
 		ret = btrfs_check_data_free_space(inode, pos, write_bytes);
-		if (ret < 0)
-			break;
+		if (ret < 0) {
+			if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+						      BTRFS_INODE_PREALLOC)) &&
+			    check_can_nocow(inode, pos, &write_bytes) > 0) {
+				/*
+				 * For nodata cow case, no need to reserve
+				 * data space.
+				 */
+				only_release_metadata = true;
+				/*
+				 * our prealloc extent may be smaller than
+				 * write_bytes, so scale down.
+				 */
+				num_pages = DIV_ROUND_UP(write_bytes + offset,
+							 PAGE_SIZE);
+				reserve_bytes = round_up(write_bytes +
+							 sector_offset,
+							 root->sectorsize);
+			} else {
+				break;
+			}
+		}
 
-reserve_metadata:
 		ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
 		if (ret) {
 			if (!only_release_metadata)
-- 
cgit v0.10.2


From fefb62455f6becc87d2d8f25ba2df961add5d06c Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 21 Jun 2016 01:52:36 +0300
Subject: MAINTAINERS: belong Documentation/pinctrl.txt properly

I'm pretty sure that Documentation/pinctrl.txt would be better maintained by
pinctrl subsystem.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index e1b090f..c447bdc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8959,6 +8959,7 @@ L:	linux-gpio@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl.git
 S:	Maintained
 F:	Documentation/devicetree/bindings/pinctrl/
+F:	Documentation/pinctrl.txt
 F:	drivers/pinctrl/
 F:	include/linux/pinctrl/
 
-- 
cgit v0.10.2


From bce271f255dae8335dc4d2ee2c4531e09cc67f5a Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Tue, 21 Jun 2016 12:14:07 +0200
Subject: can: fix handling of unmodifiable configuration options fix

With upstream commit bb208f144cf3f59 (can: fix handling of unmodifiable
configuration options) a new can_validate() function was introduced.

When invoking 'ip link set can0 type can' without any configuration data
can_validate() tries to validate the content without taking into account that
there's totally no content. This patch adds a check for missing content.

Reported-by: ajneu <ajneu1@gmail.com>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 910c12e..348dd50 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -798,6 +798,9 @@ static int can_validate(struct nlattr *tb[], struct nlattr *data[])
 	 * - control mode with CAN_CTRLMODE_FD set
 	 */
 
+	if (!data)
+		return 0;
+
 	if (data[IFLA_CAN_CTRLMODE]) {
 		struct can_ctrlmode *cm = nla_data(data[IFLA_CAN_CTRLMODE]);
 
-- 
cgit v0.10.2


From 25e1ed6e64f52a692ba3191c4fde650aab3ecc07 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Tue, 21 Jun 2016 15:45:47 +0200
Subject: can: fix oops caused by wrong rtnl dellink usage

For 'real' hardware CAN devices the netlink interface is used to set CAN
specific communication parameters. Real CAN hardware can not be created nor
removed with the ip tool ...

This patch adds a private dellink function for the CAN device driver interface
that does just nothing.

It's a follow up to commit 993e6f2fd ("can: fix oops caused by wrong rtnl
newlink usage") but for dellink.

Reported-by: ajneu <ajneu1@gmail.com>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 348dd50..ad535a8 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -1011,6 +1011,11 @@ static int can_newlink(struct net *src_net, struct net_device *dev,
 	return -EOPNOTSUPP;
 }
 
+static void can_dellink(struct net_device *dev, struct list_head *head)
+{
+	return;
+}
+
 static struct rtnl_link_ops can_link_ops __read_mostly = {
 	.kind		= "can",
 	.maxtype	= IFLA_CAN_MAX,
@@ -1019,6 +1024,7 @@ static struct rtnl_link_ops can_link_ops __read_mostly = {
 	.validate	= can_validate,
 	.newlink	= can_newlink,
 	.changelink	= can_changelink,
+	.dellink	= can_dellink,
 	.get_size	= can_get_size,
 	.fill_info	= can_fill_info,
 	.get_xstats_size = can_get_xstats_size,
-- 
cgit v0.10.2


From b41aa4f8476545e2b663b1549759a8c3a66f47b0 Mon Sep 17 00:00:00 2001
From: Cristina Ciocan <cristina.ciocan@intel.com>
Date: Wed, 22 Jun 2016 14:17:19 +0300
Subject: pinctrl: baytrail: Fix mingled clock pins

Fix plt clock 3, 4 and 5 pins, which were not in the proper order.

Signed-off-by: Cristina Ciocan <cristina.ciocan@intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>

diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c
index 677a811..7abfd42 100644
--- a/drivers/pinctrl/intel/pinctrl-baytrail.c
+++ b/drivers/pinctrl/intel/pinctrl-baytrail.c
@@ -401,9 +401,9 @@ static const struct byt_simple_func_mux byt_score_sata_mux[] = {
 static const unsigned int byt_score_plt_clk0_pins[] = { 96 };
 static const unsigned int byt_score_plt_clk1_pins[] = { 97 };
 static const unsigned int byt_score_plt_clk2_pins[] = { 98 };
-static const unsigned int byt_score_plt_clk4_pins[] = { 99 };
-static const unsigned int byt_score_plt_clk5_pins[] = { 100 };
-static const unsigned int byt_score_plt_clk3_pins[] = { 101 };
+static const unsigned int byt_score_plt_clk3_pins[] = { 99 };
+static const unsigned int byt_score_plt_clk4_pins[] = { 100 };
+static const unsigned int byt_score_plt_clk5_pins[] = { 101 };
 static const struct byt_simple_func_mux byt_score_plt_clk_mux[] = {
 	SIMPLE_FUNC("plt_clk", 1),
 };
-- 
cgit v0.10.2


From 71873a9b38d1cc6c93e2962149a7bb7272a7cb66 Mon Sep 17 00:00:00 2001
From: Jimmy Assarsson <jimmyassarsson@gmail.com>
Date: Thu, 23 Jun 2016 07:57:21 +0200
Subject: can: kvaser_usb: Add support for more Kvaser Leaf v2 devices

This patch adds support for Kvaser Leaf Light HS v2 OEM, Mini PCI
Express 2xHS and USBcan Light 2xHS.

Signed-off-by: Jimmy Assarsson <jimmyassarsson@gmail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>

diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig
index 2ff0df3..8483a40 100644
--- a/drivers/net/can/usb/Kconfig
+++ b/drivers/net/can/usb/Kconfig
@@ -47,6 +47,8 @@ config CAN_KVASER_USB
 	    - Kvaser USBcan R
 	    - Kvaser Leaf Light v2
 	    - Kvaser Mini PCI Express HS
+	    - Kvaser Mini PCI Express 2xHS
+	    - Kvaser USBcan Light 2xHS
 	    - Kvaser USBcan II HS/HS
 	    - Kvaser USBcan II HS/LS
 	    - Kvaser USBcan Rugged ("USBcan Rev B")
diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
index 022bfa1..6f1f3b6 100644
--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c
@@ -59,11 +59,14 @@
 #define USB_CAN_R_PRODUCT_ID		39
 #define USB_LEAF_LITE_V2_PRODUCT_ID	288
 #define USB_MINI_PCIE_HS_PRODUCT_ID	289
+#define USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID 290
+#define USB_USBCAN_LIGHT_2HS_PRODUCT_ID	291
+#define USB_MINI_PCIE_2HS_PRODUCT_ID	292
 
 static inline bool kvaser_is_leaf(const struct usb_device_id *id)
 {
 	return id->idProduct >= USB_LEAF_DEVEL_PRODUCT_ID &&
-	       id->idProduct <= USB_MINI_PCIE_HS_PRODUCT_ID;
+	       id->idProduct <= USB_MINI_PCIE_2HS_PRODUCT_ID;
 }
 
 /* Kvaser USBCan-II devices */
@@ -537,6 +540,9 @@ static const struct usb_device_id kvaser_usb_table[] = {
 		.driver_info = KVASER_HAS_TXRX_ERRORS },
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID) },
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID) },
+	{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID) },
+	{ USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID) },
+	{ USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID) },
 
 	/* USBCANII family IDs */
 	{ USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN2_PRODUCT_ID),
-- 
cgit v0.10.2


From 055ddaace03580455a7b7dbea8e93d62acee61fc Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 22 Jun 2016 20:29:37 +0200
Subject: crypto: user - re-add size check for CRYPTO_MSG_GETALG

Commit 9aa867e46565 ("crypto: user - Add CRYPTO_MSG_DELRNG")
accidentally removed the minimum size check for CRYPTO_MSG_GETALG
netlink messages. This allows userland to send a truncated
CRYPTO_MSG_GETALG message as short as a netlink header only making
crypto_report() operate on uninitialized memory by accessing data
beyond the end of the netlink message.

Fix this be re-adding the minimum required size of CRYPTO_MSG_GETALG
messages to the crypto_msg_min[] array.

Fixes: 9aa867e46565 ("crypto: user - Add CRYPTO_MSG_DELRNG")
Cc: stable@vger.kernel.org	# v4.2
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index 43fe85f..7097a33 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -455,6 +455,7 @@ static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = {
 	[CRYPTO_MSG_NEWALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
 	[CRYPTO_MSG_DELALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
 	[CRYPTO_MSG_UPDATEALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
+	[CRYPTO_MSG_GETALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
 	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = 0,
 };
 
-- 
cgit v0.10.2


From 842775f1509054ea969f1787f38d6a0ec2ccfaba Mon Sep 17 00:00:00 2001
From: Ross Lagerwall <ross.lagerwall@citrix.com>
Date: Tue, 10 May 2016 10:27:54 +0100
Subject: xen/balloon: Fix declared-but-not-defined warning

Fix a declared-but-not-defined warning when building with
XEN_BALLOON_MEMORY_HOTPLUG=n. This fixes a regression introduced by
commit dfd74a1edfab ("xen/balloon: Fix crash when ballooning on x86 32
bit PAE").

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
Acked-by: Juergen Gross <jgross@suse.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>

diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index d46839f..e4db19e 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -151,8 +151,6 @@ static DECLARE_WAIT_QUEUE_HEAD(balloon_wq);
 static void balloon_process(struct work_struct *work);
 static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
 
-static void release_memory_resource(struct resource *resource);
-
 /* When ballooning out (allocating memory to return to Xen) we don't really
    want the kernel to try too hard since that can trigger the oom killer. */
 #define GFP_BALLOON \
@@ -248,6 +246,19 @@ static enum bp_state update_schedule(enum bp_state state)
 }
 
 #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+static void release_memory_resource(struct resource *resource)
+{
+	if (!resource)
+		return;
+
+	/*
+	 * No need to reset region to identity mapped since we now
+	 * know that no I/O can be in this region
+	 */
+	release_resource(resource);
+	kfree(resource);
+}
+
 static struct resource *additional_memory_resource(phys_addr_t size)
 {
 	struct resource *res;
@@ -286,19 +297,6 @@ static struct resource *additional_memory_resource(phys_addr_t size)
 	return res;
 }
 
-static void release_memory_resource(struct resource *resource)
-{
-	if (!resource)
-		return;
-
-	/*
-	 * No need to reset region to identity mapped since we now
-	 * know that no I/O can be in this region
-	 */
-	release_resource(resource);
-	kfree(resource);
-}
-
 static enum bp_state reserve_additional_memory(void)
 {
 	long credit;
-- 
cgit v0.10.2


From 1cf38741308c64d08553602b3374fb39224eeb5a Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 23 Jun 2016 07:12:27 +0200
Subject: x86/xen: fix upper bound of pmd loop in xen_cleanhighmap()

xen_cleanhighmap() is operating on level2_kernel_pgt only. The upper
bound of the loop setting non-kernel-image entries to zero should not
exceed the size of level2_kernel_pgt.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 478a2de..2693b7e 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1113,7 +1113,7 @@ static void __init xen_cleanhighmap(unsigned long vaddr,
 
 	/* NOTE: The loop is more greedy than the cleanup_highmap variant.
 	 * We include the PMD passed in on _both_ boundaries. */
-	for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE));
+	for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PTRS_PER_PMD));
 			pmd++, vaddr += PMD_SIZE) {
 		if (pmd_none(*pmd))
 			continue;
-- 
cgit v0.10.2


From 02ef871ecac290919ea0c783d05da7eedeffc10e Mon Sep 17 00:00:00 2001
From: Andrey Grodzovsky <andrey2805@gmail.com>
Date: Tue, 21 Jun 2016 14:26:36 -0400
Subject: xen/pciback: Fix conf_space read/write overlap check.

Current overlap check is evaluating to false a case where a filter
field is fully contained (proper subset) of a r/w request.  This
change applies classical overlap check instead to include all the
scenarios.

More specifically, for (Hilscher GmbH CIFX 50E-DP(M/S)) device driver
the logic is such that the entire confspace is read and written in 4
byte chunks. In this case as an example, CACHE_LINE_SIZE,
LATENCY_TIMER and PCI_BIST are arriving together in one call to
xen_pcibk_config_write() with offset == 0xc and size == 4.  With the
exsisting overlap check the LATENCY_TIMER field (offset == 0xd, length
== 1) is fully contained in the write request and hence is excluded
from write, which is incorrect.

Signed-off-by: Andrey Grodzovsky <andrey2805@gmail.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: Jan Beulich <JBeulich@suse.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>

diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
index 8e67336..6a25533 100644
--- a/drivers/xen/xen-pciback/conf_space.c
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -183,8 +183,7 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
 		field_start = OFFSET(cfg_entry);
 		field_end = OFFSET(cfg_entry) + field->size;
 
-		if ((req_start >= field_start && req_start < field_end)
-		    || (req_end > field_start && req_end <= field_end)) {
+		 if (req_end > field_start && field_end > req_start) {
 			err = conf_space_read(dev, cfg_entry, field_start,
 					      &tmp_val);
 			if (err)
@@ -230,8 +229,7 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
 		field_start = OFFSET(cfg_entry);
 		field_end = OFFSET(cfg_entry) + field->size;
 
-		if ((req_start >= field_start && req_start < field_end)
-		    || (req_end > field_start && req_end <= field_end)) {
+		 if (req_end > field_start && field_end > req_start) {
 			tmp_val = 0;
 
 			err = xen_pcibk_config_read(dev, field_start,
-- 
cgit v0.10.2


From 5ce91714b0d8c0a3ff9b858966721f508351cf4c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali.rohar@gmail.com>
Date: Sat, 18 Jun 2016 00:54:47 +0200
Subject: hwmon: (dell-smm) Cache fan_type() calls and change fan detection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On more Dell machines (e.g. Dell Precision M3800) fan_type() call is too
expensive (CPU is too long in SMM mode) and cause kernel to hang. This is
bug in Dell SMM or BIOS.

This patch caches type for each fan (as it should not change) and changes
the way how fan presense is detected. First it try function fan_status()
as was before commit f989e55452c7 ("i8k: Add support for fan labels"). And
if that fails fallback to fan_type(). *_status() functions can fail in case
fan is not currently accessible (e.g. present on GPU which is currently
turned off).

Reported-by: Tolga Cakir <cevelnet@gmail.com>
Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=112021
Cc: stable@vger.kernel.org # v4.0+, will need backport
Tested-by: Tolga Cakir <cevelnet@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>

diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index 4bbc587..2ac87d5 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -238,7 +238,7 @@ static int i8k_get_fan_speed(int fan)
 /*
  * Read the fan type.
  */
-static int i8k_get_fan_type(int fan)
+static int _i8k_get_fan_type(int fan)
 {
 	struct smm_regs regs = { .eax = I8K_SMM_GET_FAN_TYPE, };
 
@@ -249,6 +249,17 @@ static int i8k_get_fan_type(int fan)
 	return i8k_smm(&regs) ? : regs.eax & 0xff;
 }
 
+static int i8k_get_fan_type(int fan)
+{
+	/* I8K_SMM_GET_FAN_TYPE SMM call is expensive, so cache values */
+	static int types[2] = { INT_MIN, INT_MIN };
+
+	if (types[fan] == INT_MIN)
+		types[fan] = _i8k_get_fan_type(fan);
+
+	return types[fan];
+}
+
 /*
  * Read the fan nominal rpm for specific fan speed.
  */
@@ -782,13 +793,17 @@ static int __init i8k_init_hwmon(void)
 	if (err >= 0)
 		i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP4;
 
-	/* First fan attributes, if fan type is OK */
-	err = i8k_get_fan_type(0);
+	/* First fan attributes, if fan status or type is OK */
+	err = i8k_get_fan_status(0);
+	if (err < 0)
+		err = i8k_get_fan_type(0);
 	if (err >= 0)
 		i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN1;
 
-	/* Second fan attributes, if fan type is OK */
-	err = i8k_get_fan_type(1);
+	/* Second fan attributes, if fan status or type is OK */
+	err = i8k_get_fan_status(1);
+	if (err < 0)
+		err = i8k_get_fan_type(1);
 	if (err >= 0)
 		i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN2;
 
-- 
cgit v0.10.2


From d6b186c1e2d852a92c43f090d0d8fad4704d51ef Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Tue, 17 May 2016 15:54:50 +0100
Subject: x86/xen: avoid m2p lookup when setting early page table entries

When page tables entries are set using xen_set_pte_init() during early
boot there is no page fault handler that could handle a fault when
performing an M2P lookup.

In 64 bit guests (usually dom0) early_ioremap() would fault in
xen_set_pte_init() because an M2P lookup faults because the MFN is in
MMIO space and not mapped in the M2P.  This lookup is done to see if
the PFN in in the range used for the initial page table pages, so that
the PTE may be set as read-only.

The M2P lookup can be avoided by moving the check (and clear of RW)
earlier when the PFN is still available.

Reported-by: Kevin Moraga <kmoragas@riseup.net>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: Juergen Gross <jgross@suse.com>

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 2693b7e..6743371 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1551,41 +1551,6 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
 #endif
 }
 
-#ifdef CONFIG_X86_32
-static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
-{
-	/* If there's an existing pte, then don't allow _PAGE_RW to be set */
-	if (pte_val_ma(*ptep) & _PAGE_PRESENT)
-		pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
-			       pte_val_ma(pte));
-
-	return pte;
-}
-#else /* CONFIG_X86_64 */
-static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
-{
-	unsigned long pfn;
-
-	if (xen_feature(XENFEAT_writable_page_tables) ||
-	    xen_feature(XENFEAT_auto_translated_physmap) ||
-	    xen_start_info->mfn_list >= __START_KERNEL_map)
-		return pte;
-
-	/*
-	 * Pages belonging to the initial p2m list mapped outside the default
-	 * address range must be mapped read-only. This region contains the
-	 * page tables for mapping the p2m list, too, and page tables MUST be
-	 * mapped read-only.
-	 */
-	pfn = pte_pfn(pte);
-	if (pfn >= xen_start_info->first_p2m_pfn &&
-	    pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
-		pte = __pte_ma(pte_val_ma(pte) & ~_PAGE_RW);
-
-	return pte;
-}
-#endif /* CONFIG_X86_64 */
-
 /*
  * Init-time set_pte while constructing initial pagetables, which
  * doesn't allow RO page table pages to be remapped RW.
@@ -1600,13 +1565,37 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
  * so always write the PTE directly and rely on Xen trapping and
  * emulating any updates as necessary.
  */
-static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
+__visible pte_t xen_make_pte_init(pteval_t pte)
 {
-	if (pte_mfn(pte) != INVALID_P2M_ENTRY)
-		pte = mask_rw_pte(ptep, pte);
-	else
-		pte = __pte_ma(0);
+#ifdef CONFIG_X86_64
+	unsigned long pfn;
+
+	/*
+	 * Pages belonging to the initial p2m list mapped outside the default
+	 * address range must be mapped read-only. This region contains the
+	 * page tables for mapping the p2m list, too, and page tables MUST be
+	 * mapped read-only.
+	 */
+	pfn = (pte & PTE_PFN_MASK) >> PAGE_SHIFT;
+	if (xen_start_info->mfn_list < __START_KERNEL_map &&
+	    pfn >= xen_start_info->first_p2m_pfn &&
+	    pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
+		pte &= ~_PAGE_RW;
+#endif
+	pte = pte_pfn_to_mfn(pte);
+	return native_make_pte(pte);
+}
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init);
 
+static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
+{
+#ifdef CONFIG_X86_32
+	/* If there's an existing pte, then don't allow _PAGE_RW to be set */
+	if (pte_mfn(pte) != INVALID_P2M_ENTRY
+	    && pte_val_ma(*ptep) & _PAGE_PRESENT)
+		pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
+			       pte_val_ma(pte));
+#endif
 	native_set_pte(ptep, pte);
 }
 
@@ -2407,6 +2396,7 @@ static void __init xen_post_allocator_init(void)
 	pv_mmu_ops.alloc_pud = xen_alloc_pud;
 	pv_mmu_ops.release_pud = xen_release_pud;
 #endif
+	pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte);
 
 #ifdef CONFIG_X86_64
 	pv_mmu_ops.write_cr3 = &xen_write_cr3;
@@ -2455,7 +2445,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 	.pte_val = PV_CALLEE_SAVE(xen_pte_val),
 	.pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
 
-	.make_pte = PV_CALLEE_SAVE(xen_make_pte),
+	.make_pte = PV_CALLEE_SAVE(xen_make_pte_init),
 	.make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
 
 #ifdef CONFIG_X86_PAE
-- 
cgit v0.10.2


From f336ae03149725bb5844166c7b04f7f65f17eec9 Mon Sep 17 00:00:00 2001
From: Talat Batheesh <talatb@mellanox.com>
Date: Wed, 22 Jun 2016 17:27:22 +0300
Subject: IB/core: Fix no default GIDs when netdevice reregisters

Currently, when the netdevice returned by get_netdev is unregistered,
we delete all GIDs (including the default GIDs) and reset their
attributes. Therefore, when we re-register it, no default GIDs
will be assigned (as their "default GID") attribute will be reset.
Fixing this by keeping "default GID" attribute.

Fixes: 03db3a2d81e6 ('IB/core: Add RoCE GID table management')
Signed-off-by: Talat Batheesh <talatb@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 0409667..1a2984c 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -411,7 +411,9 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
 
 	for (ix = 0; ix < table->sz; ix++)
 		if (table->data_vec[ix].attr.ndev == ndev)
-			if (!del_gid(ib_dev, port, table, ix, false))
+			if (!del_gid(ib_dev, port, table, ix,
+				     !!(table->data_vec[ix].props &
+					GID_TABLE_ENTRY_DEFAULT)))
 				deleted = true;
 
 	write_unlock_irq(&table->rwlock);
-- 
cgit v0.10.2


From c65f6c5a3650876a69d1041a9d3c90986e9ca233 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Wed, 22 Jun 2016 17:27:23 +0300
Subject: IB/core: Fix RoCE v1 multicast join logic issue

During multicast join of RoCEv1, IGMP join state and max hop limit
were updated incorrectly. IGMP join should be sent and marked as
joined only on RoCEv2 after a successful join. Max hops should be
updated to the hop limit on RoCEv2 regardless of the join state.

Fixes: bee3c3c91865 ('IB/cma: Join and leave multicast groups...')
Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index f0c91ba..dcde870 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -3878,12 +3878,12 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
 	gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
 		   rdma_start_port(id_priv->cma_dev->device)];
 	if (addr->sa_family == AF_INET) {
-		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+			mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
 			err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
 					    true);
-		if (!err) {
-			mc->igmp_joined = true;
-			mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+			if (!err)
+				mc->igmp_joined = true;
 		}
 	} else {
 		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
-- 
cgit v0.10.2


From b3556005c5f319285610e3eae88b4078e1a4d3bc Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Wed, 22 Jun 2016 17:27:24 +0300
Subject: IB/core: Fix false search of the IB_SA_WELL_KNOWN_GUID

When virtualziation is supported, VFs may send SA MADs to a GID formed
by the concatenation of the subnet prefix with the
IB_SA_WELL_KNOWN_GUID. When a response is required, the current code
will search the local HCA's port for the received GID to figure out the
GID index of the entry containing this GID. However, since this is not a
real GID it will not be found and error will be printed.

We change the logic to check if the destination GID is this special GID
and avoid lookup in this case and use GID index 0.

Fixes: a0c1b2a35087 ('IB/core: Support accessing SA in virtualized environment')
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 1d7d4cf..6298f54 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -511,12 +511,16 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
 		ah_attr->grh.dgid = sgid;
 
 		if (!rdma_cap_eth_ah(device, port_num)) {
-			ret = ib_find_cached_gid_by_port(device, &dgid,
-							 IB_GID_TYPE_IB,
-							 port_num, NULL,
-							 &gid_index);
-			if (ret)
-				return ret;
+			if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
+				ret = ib_find_cached_gid_by_port(device, &dgid,
+								 IB_GID_TYPE_IB,
+								 port_num, NULL,
+								 &gid_index);
+				if (ret)
+					return ret;
+			} else {
+				gid_index = 0;
+			}
 		}
 
 		ah_attr->grh.sgid_index = (u8) gid_index;
-- 
cgit v0.10.2


From b57141c1abe41e26c09b1b1b7ece463464ba6de2 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Wed, 22 Jun 2016 17:27:25 +0300
Subject: IB/uverbs: Initialize ib_qp_init_attr with zeros

Initialize ib_qp_init_attr with zeros in order to avoid from garbage
in fields that won't be set with user values.

Fixes: a060b5629ab06 ('IB/core: generic RDMA READ/WRITE API')
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 1a8babb..825021d 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1747,7 +1747,7 @@ static int create_qp(struct ib_uverbs_file *file,
 	struct ib_srq			*srq = NULL;
 	struct ib_qp			*qp;
 	char				*buf;
-	struct ib_qp_init_attr		attr;
+	struct ib_qp_init_attr		attr = {};
 	struct ib_uverbs_ex_create_qp_resp resp;
 	int				ret;
 
-- 
cgit v0.10.2


From c9b254955b9f8814966f5dabd34c39d0e0a2b437 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Wed, 22 Jun 2016 17:27:26 +0300
Subject: IB/mlx5: Fix post send fence logic

If the caller specified IB_SEND_FENCE in the send flags of the work
request and no previous work request stated that the successive one
should be fenced, the work request would be executed without a fence.
This could result in RDMA read or atomic operations failure due to a MR
being invalidated. Fix this by adding the mlx5 enumeration for fencing
RDMA/atomic operations and fix the logic to apply this.

Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters')
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index ce43422..ce0a7ab 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -3332,10 +3332,11 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr)
 			return MLX5_FENCE_MODE_SMALL_AND_FENCE;
 		else
 			return fence;
-
-	} else {
-		return 0;
+	} else if (unlikely(wr->send_flags & IB_SEND_FENCE)) {
+		return MLX5_FENCE_MODE_FENCE;
 	}
+
+	return 0;
 }
 
 static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index e4e2988..630f66a 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -172,6 +172,7 @@ enum {
 enum {
 	MLX5_FENCE_MODE_NONE			= 0 << 5,
 	MLX5_FENCE_MODE_INITIATOR_SMALL		= 1 << 5,
+	MLX5_FENCE_MODE_FENCE			= 2 << 5,
 	MLX5_FENCE_MODE_STRONG_ORDERING		= 3 << 5,
 	MLX5_FENCE_MODE_SMALL_AND_FENCE		= 4 << 5,
 };
-- 
cgit v0.10.2


From 00bf534fce23048aa0e6fd8dbceedf097ee65508 Mon Sep 17 00:00:00 2001
From: Talat Batheesh <talatb@mellanox.com>
Date: Wed, 22 Jun 2016 17:27:27 +0300
Subject: IB/mlx5: Fix wrong naming of port_rcv_data counter

port_xmit_data is written instead of port_rcv_data.

Fixes: 3efd9a11212d ('IB/mlx5: Modify MAD reading counters method to use counter registers')
Signed-off-by: Talat Batheesh <talatb@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 1534af1..364aab9 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -121,7 +121,7 @@ static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext,
 	pma_cnt_ext->port_xmit_data =
 		cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.octets,
 					 transmitted_ib_multicast.octets) >> 2);
-	pma_cnt_ext->port_xmit_data =
+	pma_cnt_ext->port_rcv_data =
 		cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.octets,
 					 received_ib_multicast.octets) >> 2);
 	pma_cnt_ext->port_xmit_packets =
-- 
cgit v0.10.2


From f2940e2c76bb554a7fbdd28ca5b90904117a9e96 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Wed, 22 Jun 2016 17:27:28 +0300
Subject: IB/mlx4: Fix the SQ size of an RC QP

When calculating the required size of an RC QP send queue, leave
enough space for masked atomic operations, which require more space than
"regular" atomic operation.

Fixes: 6fa8f719844b ("IB/mlx4: Add support for masked atomic operations")
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Jack Morgenstein <jackm@mellanox.co.il>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 81b0e1f..5196117 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -362,7 +362,7 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
 			sizeof (struct mlx4_wqe_raddr_seg);
 	case MLX4_IB_QPT_RC:
 		return sizeof (struct mlx4_wqe_ctrl_seg) +
-			sizeof (struct mlx4_wqe_atomic_seg) +
+			sizeof (struct mlx4_wqe_masked_atomic_seg) +
 			sizeof (struct mlx4_wqe_raddr_seg);
 	case MLX4_IB_QPT_SMI:
 	case MLX4_IB_QPT_GSI:
-- 
cgit v0.10.2


From a6100603a4a87fc436199362bdb81cb849faaf6e Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Wed, 22 Jun 2016 17:27:29 +0300
Subject: IB/mlx4: Fix error flow when sending mads under SRIOV

Fix mad send error flow to prevent double freeing address handles,
and leaking tx_ring entries when SRIOV is active.

If ib_mad_post_send fails, the address handle pointer in the tx_ring entry
must be set to NULL (or there will be a double-free) and tx_tail must be
incremented (or there will be a leak of tx_ring entries).
The tx_ring is handled the same way in the send-completion handler.

Fixes: 37bfc7c1e83f ("IB/mlx4: SR-IOV multiplex and demultiplex MADs")
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index d68f506..9c2e53d 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -527,7 +527,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 		tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
 	spin_unlock(&tun_qp->tx_lock);
 	if (ret)
-		goto out;
+		goto end;
 
 	tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
 	if (tun_qp->tx_ring[tun_tx_ix].ah)
@@ -596,9 +596,15 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 	wr.wr.send_flags = IB_SEND_SIGNALED;
 
 	ret = ib_post_send(src_qp, &wr.wr, &bad_wr);
-out:
-	if (ret)
-		ib_destroy_ah(ah);
+	if (!ret)
+		return 0;
+ out:
+	spin_lock(&tun_qp->tx_lock);
+	tun_qp->tx_ix_tail++;
+	spin_unlock(&tun_qp->tx_lock);
+	tun_qp->tx_ring[tun_tx_ix].ah = NULL;
+end:
+	ib_destroy_ah(ah);
 	return ret;
 }
 
@@ -1326,9 +1332,15 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
 
 
 	ret = ib_post_send(send_qp, &wr.wr, &bad_wr);
+	if (!ret)
+		return 0;
+
+	spin_lock(&sqp->tx_lock);
+	sqp->tx_ix_tail++;
+	spin_unlock(&sqp->tx_lock);
+	sqp->tx_ring[wire_tx_ix].ah = NULL;
 out:
-	if (ret)
-		ib_destroy_ah(ah);
+	ib_destroy_ah(ah);
 	return ret;
 }
 
-- 
cgit v0.10.2


From 5533c18ab02b17a7f2ac11908e2d97d4b421617d Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Wed, 22 Jun 2016 17:27:30 +0300
Subject: IB/mlx4: Verify port number in flow steering create flow

In procedure mlx4_ib_create_flow, passing an invalid port number
will cause an out-of-bounds array access. Data passed to this procedure
can come from user-space.  Therefore, need to validate port number
before proceeding onwards.

Note that we check against the number of physical ports declared at
the verbs (ib core) level; When bonding is active, the verbs level
sees one physical port, even though the low-level driver sees two ports.

Fixes: f77c0162a339 ("IB/mlx4: Add receive flow steering support")
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Reviewed-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 0eb09e1..42a4607 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1704,6 +1704,9 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
 	struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
 	int is_bonded = mlx4_is_bonded(dev);
 
+	if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt)
+		return ERR_PTR(-EINVAL);
+
 	if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
 	    (flow_attr->type != IB_FLOW_ATTR_NORMAL))
 		return ERR_PTR(-EOPNOTSUPP);
-- 
cgit v0.10.2


From 5b420d9cf7382c6e1512e96e02d18842d272049c Mon Sep 17 00:00:00 2001
From: Dotan Barak <dotanb@dev.mellanox.co.il>
Date: Wed, 22 Jun 2016 17:27:31 +0300
Subject: IB/mlx4: Fix memory leak if QP creation failed

When RC, UC, or RAW QPs are created, a qp object is allocated (kzalloc).
If at a later point (in procedure create_qp_common) the qp creation fails,
this qp object must be freed.

Fixes: 1ffeb2eb8be99 ("IB/mlx4: SR-IOV IB context objects and proxy/tunnel SQP support")
Signed-off-by: Dotan Barak <dotanb@dev.mellanox.co.il>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 5196117..8db8405 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1191,8 +1191,10 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
 	{
 		err = create_qp_common(to_mdev(pd->device), pd, init_attr,
 				       udata, 0, &qp, gfp);
-		if (err)
+		if (err) {
+			kfree(qp);
 			return ERR_PTR(err);
+		}
 
 		qp->ibqp.qp_num = qp->mqp.qpn;
 		qp->xrcdn = xrcdn;
-- 
cgit v0.10.2


From cbc9355a939b90263c58beb855f8151b56634c42 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 22 Jun 2016 17:27:32 +0300
Subject: IB/mlx4: Prevent cross page boundary allocation

Prevent cross page boundary allocation by allocating
new page, this is required to be aligned with ConnectX-3 HW
requirements.

Not doing that might cause to "RDMA read local protection" error.

Fixes: 1b2cd0fc673c ('IB/mlx4: Support the new memory registration API')
Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 6c5ac5d..29acda2 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -139,7 +139,7 @@ struct mlx4_ib_mr {
 	u32			max_pages;
 	struct mlx4_mr		mmr;
 	struct ib_umem	       *umem;
-	void			*pages_alloc;
+	size_t			page_map_size;
 };
 
 struct mlx4_ib_mw {
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 6312721..5d73989 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -277,20 +277,23 @@ mlx4_alloc_priv_pages(struct ib_device *device,
 		      struct mlx4_ib_mr *mr,
 		      int max_pages)
 {
-	int size = max_pages * sizeof(u64);
-	int add_size;
 	int ret;
 
-	add_size = max_t(int, MLX4_MR_PAGES_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
+	/* Ensure that size is aligned to DMA cacheline
+	 * requirements.
+	 * max_pages is limited to MLX4_MAX_FAST_REG_PAGES
+	 * so page_map_size will never cross PAGE_SIZE.
+	 */
+	mr->page_map_size = roundup(max_pages * sizeof(u64),
+				    MLX4_MR_PAGES_ALIGN);
 
-	mr->pages_alloc = kzalloc(size + add_size, GFP_KERNEL);
-	if (!mr->pages_alloc)
+	/* Prevent cross page boundary allocation. */
+	mr->pages = (__be64 *)get_zeroed_page(GFP_KERNEL);
+	if (!mr->pages)
 		return -ENOMEM;
 
-	mr->pages = PTR_ALIGN(mr->pages_alloc, MLX4_MR_PAGES_ALIGN);
-
 	mr->page_map = dma_map_single(device->dma_device, mr->pages,
-				      size, DMA_TO_DEVICE);
+				      mr->page_map_size, DMA_TO_DEVICE);
 
 	if (dma_mapping_error(device->dma_device, mr->page_map)) {
 		ret = -ENOMEM;
@@ -298,9 +301,9 @@ mlx4_alloc_priv_pages(struct ib_device *device,
 	}
 
 	return 0;
-err:
-	kfree(mr->pages_alloc);
 
+err:
+	free_page((unsigned long)mr->pages);
 	return ret;
 }
 
@@ -309,11 +312,10 @@ mlx4_free_priv_pages(struct mlx4_ib_mr *mr)
 {
 	if (mr->pages) {
 		struct ib_device *device = mr->ibmr.device;
-		int size = mr->max_pages * sizeof(u64);
 
 		dma_unmap_single(device->dma_device, mr->page_map,
-				 size, DMA_TO_DEVICE);
-		kfree(mr->pages_alloc);
+				 mr->page_map_size, DMA_TO_DEVICE);
+		free_page((unsigned long)mr->pages);
 		mr->pages = NULL;
 	}
 }
@@ -537,14 +539,12 @@ int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
 	mr->npages = 0;
 
 	ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map,
-				   sizeof(u64) * mr->max_pages,
-				   DMA_TO_DEVICE);
+				   mr->page_map_size, DMA_TO_DEVICE);
 
 	rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page);
 
 	ib_dma_sync_single_for_device(ibmr->device, mr->page_map,
-				      sizeof(u64) * mr->max_pages,
-				      DMA_TO_DEVICE);
+				      mr->page_map_size, DMA_TO_DEVICE);
 
 	return rc;
 }
-- 
cgit v0.10.2


From 2aee309d3e01447c55fdf89cef05a0e2be372655 Mon Sep 17 00:00:00 2001
From: Mike Marciniszyn <mike.marciniszyn@intel.com>
Date: Fri, 17 Jun 2016 19:17:49 -0700
Subject: IB/hfi1: Fix deadlock with txreq allocation slow path

A failure in the get_txreq() inline will result in a
slow path retry using __get_txreq().

__get_txreq() attempts to procure the qp s_lock, which
is already held in all callers.

Fix by deleting the s_lock maintenance in __get_txreq()
and add sparse syntax hooks to future proof the code.

Cc: Stable <stable@vger.kernel.org> # 4.6+
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index bc95c41..d8fb056 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -92,11 +92,10 @@ void hfi1_put_txreq(struct verbs_txreq *tx)
 
 struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 				struct rvt_qp *qp)
+	__must_hold(&qp->s_lock)
 {
 	struct verbs_txreq *tx = ERR_PTR(-EBUSY);
-	unsigned long flags;
 
-	spin_lock_irqsave(&qp->s_lock, flags);
 	write_seqlock(&dev->iowait_lock);
 	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		struct hfi1_qp_priv *priv;
@@ -116,7 +115,6 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 	}
 out:
 	write_sequnlock(&dev->iowait_lock);
-	spin_unlock_irqrestore(&qp->s_lock, flags);
 	return tx;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 1cf69b2..a1d6e08 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -73,6 +73,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 
 static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
 					    struct rvt_qp *qp)
+	__must_hold(&qp->slock)
 {
 	struct verbs_txreq *tx;
 	struct hfi1_qp_priv *priv = qp->priv;
-- 
cgit v0.10.2


From 8ae84f7c56044ee17ef6b700cb34d11ad9428a2e Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Fri, 17 Jun 2016 19:17:54 -0700
Subject: IB/hfi1: Don't zero out qp->s_ack_queue in rvt_reset_qp

Since rvt_reset_qp already zero's out qp->s_ack_queue head and tail
pointers, there is no need to zero out qp->s_ack_queue itself.

Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index c3e0d61..d04f4fe 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -576,12 +576,6 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 	qp->s_ssn = 1;
 	qp->s_lsn = 0;
 	qp->s_mig_state = IB_MIG_MIGRATED;
-	if (qp->s_ack_queue)
-		memset(
-			qp->s_ack_queue,
-			0,
-			rvt_max_atomic(rdi) *
-				sizeof(*qp->s_ack_queue));
 	qp->r_head_ack_queue = 0;
 	qp->s_tail_ack_queue = 0;
 	qp->s_num_rd_atomic = 0;
-- 
cgit v0.10.2


From c755f4afa66ad3ed98870bd3254f37c47fb2c800 Mon Sep 17 00:00:00 2001
From: Mike Marciniszyn <mike.marciniszyn@intel.com>
Date: Wed, 22 Jun 2016 13:29:33 -0700
Subject: IB/rdmavt: Correct qp_priv_alloc() return value test

The current drivers return errors from this calldown
wrapped in an ERR_PTR().

The rdmavt code incorrectly tests for NULL.

The code is fixed to use IS_ERR() and change ret according
to the driver return value.

Cc: Stable <stable@vger.kernel.org> # 4.6+
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index d04f4fe..41ba7e9 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -699,8 +699,10 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 		 * initialization that is needed.
 		 */
 		priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp);
-		if (!priv)
+		if (IS_ERR(priv)) {
+			ret = priv;
 			goto bail_qp;
+		}
 		qp->priv = priv;
 		qp->timeout_jiffies =
 			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 16274e2..9c9a27d 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -203,7 +203,9 @@ struct rvt_driver_provided {
 
 	/*
 	 * Allocate a private queue pair data structure for driver specific
-	 * information which is opaque to rdmavt.
+	 * information which is opaque to rdmavt.  Errors are returned via
+	 * ERR_PTR(err).  The driver is free to return NULL or a valid
+	 * pointer.
 	 */
 	void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 				gfp_t gfp);
-- 
cgit v0.10.2


From 747f1c6d9be749a29612fc78c321b97099906008 Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Tue, 14 Jun 2016 16:54:16 -0500
Subject: i40iw: Correct CQ arming

CQ is armed for solicited events only, ignoring other notification
flags. Correct this by arming for next and arming for solicited
event if IB_CQ_SOLICITED is set. Also protect CQ shadow area update
with spinlock.

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 02a735b..c6e75ac 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -2327,13 +2327,16 @@ static int i40iw_req_notify_cq(struct ib_cq *ibcq,
 {
 	struct i40iw_cq *iwcq;
 	struct i40iw_cq_uk *ukcq;
-	enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_SOLICITED;
+	unsigned long flags;
+	enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_EVENT;
 
 	iwcq = (struct i40iw_cq *)ibcq;
 	ukcq = &iwcq->sc_cq.cq_uk;
-	if (notify_flags == IB_CQ_NEXT_COMP)
-		cq_notify = IW_CQ_COMPL_EVENT;
+	if (notify_flags == IB_CQ_SOLICITED)
+		cq_notify = IW_CQ_COMPL_SOLICITED;
+	spin_lock_irqsave(&iwcq->lock, flags);
 	ukcq->ops.iw_cq_request_notification(ukcq, cq_notify);
+	spin_unlock_irqrestore(&iwcq->lock, flags);
 	return 0;
 }
 
-- 
cgit v0.10.2


From ee23abd75c5076e51061c275e8f659d754a63c9d Mon Sep 17 00:00:00 2001
From: Faisal Latif <faisal.latif@intel.com>
Date: Tue, 14 Jun 2016 16:54:17 -0500
Subject: i40iw: Correct status check on i40iw_get_pble

i40iw_get_pble returns 0 on success. Correct the check on return
code.

Signed-off-by: Faisal Latif <faisal.latif@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index c6e75ac..65bea9c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -1527,7 +1527,7 @@ static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd,
 	mutex_lock(&iwdev->pbl_mutex);
 	status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
 	mutex_unlock(&iwdev->pbl_mutex);
-	if (!status)
+	if (status)
 		goto err1;
 
 	if (palloc->level != I40IW_LEVEL_1)
-- 
cgit v0.10.2


From 0477e18145c565f9ca74c6df4112f818f673fcaa Mon Sep 17 00:00:00 2001
From: Faisal Latif <faisal.latif@intel.com>
Date: Tue, 14 Jun 2016 16:54:18 -0500
Subject: i40iw: Return correct max_fast_reg_page_list_len

Return correct value for max_fast_reg_page_list_len from
i40iw_query_device().

Signed-off-by: Faisal Latif <faisal.latif@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index 8b95320..14f16a2 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -113,6 +113,7 @@
 
 #define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types)
 #define IW_CFG_FPM_QP_COUNT		32768
+#define I40IW_MAX_PAGES_PER_FMR		512
 
 #define I40IW_MTU_TO_MSS		40
 #define I40IW_DEFAULT_MSS		1460
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 65bea9c..31eda323 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -79,6 +79,7 @@ static int i40iw_query_device(struct ib_device *ibdev,
 	props->max_qp_init_rd_atom = props->max_qp_rd_atom;
 	props->atomic_cap = IB_ATOMIC_NONE;
 	props->max_map_per_fmr = 1;
+	props->max_fast_reg_page_list_len = I40IW_MAX_PAGES_PER_FMR;
 	return 0;
 }
 
-- 
cgit v0.10.2


From 7748e4990de42ea796543c0ffd34118c3a5e6a98 Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Tue, 14 Jun 2016 16:54:19 -0500
Subject: i40iw: Enable level-1 PBL for fast memory registration

Set the chunk_size to enable level-1 PBL support when the fast memory
page count is more than one.

Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Signed-off-by: Faisal Latif <faisal.latif@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index 14f16a2..b738acd 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -114,6 +114,7 @@
 #define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types)
 #define IW_CFG_FPM_QP_COUNT		32768
 #define I40IW_MAX_PAGES_PER_FMR		512
+#define I40IW_MIN_PAGES_PER_FMR		1
 
 #define I40IW_MTU_TO_MSS		40
 #define I40IW_DEFAULT_MSS		1460
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 31eda323..33959ed 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -2150,6 +2150,7 @@ static int i40iw_post_send(struct ib_qp *ibqp,
 			struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev;
 			struct i40iw_fast_reg_stag_info info;
 
+			memset(&info, 0, sizeof(info));
 			info.access_rights = I40IW_ACCESS_FLAGS_LOCALREAD;
 			info.access_rights |= i40iw_get_user_access(flags);
 			info.stag_key = reg_wr(ib_wr)->key & 0xff;
@@ -2159,10 +2160,14 @@ static int i40iw_post_send(struct ib_qp *ibqp,
 			info.addr_type = I40IW_ADDR_TYPE_VA_BASED;
 			info.va = (void *)(uintptr_t)iwmr->ibmr.iova;
 			info.total_len = iwmr->ibmr.length;
+			info.reg_addr_pa = *(u64 *)palloc->level1.addr;
 			info.first_pm_pbl_index = palloc->level1.idx;
 			info.local_fence = ib_wr->send_flags & IB_SEND_FENCE;
 			info.signaled = ib_wr->send_flags & IB_SEND_SIGNALED;
 
+			if (iwmr->npages > I40IW_MIN_PAGES_PER_FMR)
+				info.chunk_size = 1;
+
 			if (page_shift == 21)
 				info.page_size = 1; /* 2M page */
 
-- 
cgit v0.10.2


From 962fcef33b03395051367181a0549d29d109d9a4 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 18 Jun 2016 13:03:36 +0800
Subject: esp: Fix ESN generation under UDP encapsulation

Blair Steven noticed that ESN in conjunction with UDP encapsulation
is broken because we set the temporary ESP header to the wrong spot.

This patch fixes this by first of all using the right spot, i.e.,
4 bytes off the real ESP header, and then saving this information
so that after encryption we can restore it properly.

Fixes: 7021b2e1cddd ("esp4: Switch to new AEAD interface")
Reported-by: Blair Steven <Blair.Steven@alliedtelesis.co.nz>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 4779374..d95631d 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -23,6 +23,11 @@ struct esp_skb_cb {
 	void *tmp;
 };
 
+struct esp_output_extra {
+	__be32 seqhi;
+	u32 esphoff;
+};
+
 #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
 
 static u32 esp4_get_mtu(struct xfrm_state *x, int mtu);
@@ -35,11 +40,11 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu);
  *
  * TODO: Use spare space in skb for this where possible.
  */
-static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen)
+static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int extralen)
 {
 	unsigned int len;
 
-	len = seqhilen;
+	len = extralen;
 
 	len += crypto_aead_ivsize(aead);
 
@@ -57,15 +62,16 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen)
 	return kmalloc(len, GFP_ATOMIC);
 }
 
-static inline __be32 *esp_tmp_seqhi(void *tmp)
+static inline void *esp_tmp_extra(void *tmp)
 {
-	return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32));
+	return PTR_ALIGN(tmp, __alignof__(struct esp_output_extra));
 }
-static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
+
+static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int extralen)
 {
 	return crypto_aead_ivsize(aead) ?
-	       PTR_ALIGN((u8 *)tmp + seqhilen,
-			 crypto_aead_alignmask(aead) + 1) : tmp + seqhilen;
+	       PTR_ALIGN((u8 *)tmp + extralen,
+			 crypto_aead_alignmask(aead) + 1) : tmp + extralen;
 }
 
 static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv)
@@ -99,7 +105,7 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset)
 {
 	struct ip_esp_hdr *esph = (void *)(skb->data + offset);
 	void *tmp = ESP_SKB_CB(skb)->tmp;
-	__be32 *seqhi = esp_tmp_seqhi(tmp);
+	__be32 *seqhi = esp_tmp_extra(tmp);
 
 	esph->seq_no = esph->spi;
 	esph->spi = *seqhi;
@@ -107,7 +113,11 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset)
 
 static void esp_output_restore_header(struct sk_buff *skb)
 {
-	esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32));
+	void *tmp = ESP_SKB_CB(skb)->tmp;
+	struct esp_output_extra *extra = esp_tmp_extra(tmp);
+
+	esp_restore_header(skb, skb_transport_offset(skb) + extra->esphoff -
+				sizeof(__be32));
 }
 
 static void esp_output_done_esn(struct crypto_async_request *base, int err)
@@ -121,6 +131,7 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err)
 static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err;
+	struct esp_output_extra *extra;
 	struct ip_esp_hdr *esph;
 	struct crypto_aead *aead;
 	struct aead_request *req;
@@ -137,8 +148,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	int tfclen;
 	int nfrags;
 	int assoclen;
-	int seqhilen;
-	__be32 *seqhi;
+	int extralen;
 	__be64 seqno;
 
 	/* skb is pure payload to encrypt */
@@ -166,21 +176,21 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	nfrags = err;
 
 	assoclen = sizeof(*esph);
-	seqhilen = 0;
+	extralen = 0;
 
 	if (x->props.flags & XFRM_STATE_ESN) {
-		seqhilen += sizeof(__be32);
-		assoclen += seqhilen;
+		extralen += sizeof(*extra);
+		assoclen += sizeof(__be32);
 	}
 
-	tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
+	tmp = esp_alloc_tmp(aead, nfrags, extralen);
 	if (!tmp) {
 		err = -ENOMEM;
 		goto error;
 	}
 
-	seqhi = esp_tmp_seqhi(tmp);
-	iv = esp_tmp_iv(aead, tmp, seqhilen);
+	extra = esp_tmp_extra(tmp);
+	iv = esp_tmp_iv(aead, tmp, extralen);
 	req = esp_tmp_req(aead, iv);
 	sg = esp_req_sg(aead, req);
 
@@ -247,8 +257,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	 * encryption.
 	 */
 	if ((x->props.flags & XFRM_STATE_ESN)) {
-		esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
-		*seqhi = esph->spi;
+		extra->esphoff = (unsigned char *)esph -
+				 skb_transport_header(skb);
+		esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4);
+		extra->seqhi = esph->spi;
 		esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
 		aead_request_set_callback(req, 0, esp_output_done_esn, skb);
 	}
@@ -445,7 +457,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 		goto out;
 
 	ESP_SKB_CB(skb)->tmp = tmp;
-	seqhi = esp_tmp_seqhi(tmp);
+	seqhi = esp_tmp_extra(tmp);
 	iv = esp_tmp_iv(aead, tmp, seqhilen);
 	req = esp_tmp_req(aead, iv);
 	sg = esp_req_sg(aead, req);
-- 
cgit v0.10.2


From 067a7cd06f7bf860f2e3415394b065b9a0983802 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 20 Jun 2016 13:37:18 -0700
Subject: act_ife: only acquire tcf_lock for existing actions

Alexey reported that we have GFP_KERNEL allocation when
holding the spinlock tcf_lock. Actually we don't have
to take that spinlock for all the cases, especially
for the new one we just create. To modify the existing
actions, we still need this spinlock to make sure
the whole update is atomic.

For net-next, we can get rid of this spinlock because
we already hold the RTNL lock on slow path, and on fast
path we can use RCU to protect the metalist.

Joint work with Jamal.

Reported-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h
index dc9a09a..c55facd 100644
--- a/include/net/tc_act/tc_ife.h
+++ b/include/net/tc_act/tc_ife.h
@@ -36,7 +36,7 @@ struct tcf_meta_ops {
 	int	(*encode)(struct sk_buff *, void *, struct tcf_meta_info *);
 	int	(*decode)(struct sk_buff *, void *, u16 len);
 	int	(*get)(struct sk_buff *skb, struct tcf_meta_info *mi);
-	int	(*alloc)(struct tcf_meta_info *, void *);
+	int	(*alloc)(struct tcf_meta_info *, void *, gfp_t);
 	void	(*release)(struct tcf_meta_info *);
 	int	(*validate)(void *val, int len);
 	struct module	*owner;
@@ -48,8 +48,8 @@ int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi);
 int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi);
 int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen,
 			const void *dval);
-int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval);
-int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval);
+int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp);
+int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp);
 int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi);
 int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi);
 int ife_validate_meta_u32(void *val, int len);
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 658046d..e407659 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -106,9 +106,9 @@ int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi)
 }
 EXPORT_SYMBOL_GPL(ife_get_meta_u16);
 
-int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval)
+int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp)
 {
-	mi->metaval = kmemdup(metaval, sizeof(u32), GFP_KERNEL);
+	mi->metaval = kmemdup(metaval, sizeof(u32), gfp);
 	if (!mi->metaval)
 		return -ENOMEM;
 
@@ -116,9 +116,9 @@ int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval)
 }
 EXPORT_SYMBOL_GPL(ife_alloc_meta_u32);
 
-int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval)
+int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp)
 {
-	mi->metaval = kmemdup(metaval, sizeof(u16), GFP_KERNEL);
+	mi->metaval = kmemdup(metaval, sizeof(u16), gfp);
 	if (!mi->metaval)
 		return -ENOMEM;
 
@@ -240,10 +240,10 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len)
 }
 
 /* called when adding new meta information
- * under ife->tcf_lock
+ * under ife->tcf_lock for existing action
 */
 static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
-				void *val, int len)
+				void *val, int len, bool exists)
 {
 	struct tcf_meta_ops *ops = find_ife_oplist(metaid);
 	int ret = 0;
@@ -251,11 +251,13 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
 	if (!ops) {
 		ret = -ENOENT;
 #ifdef CONFIG_MODULES
-		spin_unlock_bh(&ife->tcf_lock);
+		if (exists)
+			spin_unlock_bh(&ife->tcf_lock);
 		rtnl_unlock();
 		request_module("ifemeta%u", metaid);
 		rtnl_lock();
-		spin_lock_bh(&ife->tcf_lock);
+		if (exists)
+			spin_lock_bh(&ife->tcf_lock);
 		ops = find_ife_oplist(metaid);
 #endif
 	}
@@ -272,10 +274,10 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
 }
 
 /* called when adding new meta information
- * under ife->tcf_lock
+ * under ife->tcf_lock for existing action
 */
 static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
-			int len)
+			int len, bool exists)
 {
 	struct tcf_meta_info *mi = NULL;
 	struct tcf_meta_ops *ops = find_ife_oplist(metaid);
@@ -284,7 +286,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
 	if (!ops)
 		return -ENOENT;
 
-	mi = kzalloc(sizeof(*mi), GFP_KERNEL);
+	mi = kzalloc(sizeof(*mi), exists ? GFP_ATOMIC : GFP_KERNEL);
 	if (!mi) {
 		/*put back what find_ife_oplist took */
 		module_put(ops->owner);
@@ -294,7 +296,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
 	mi->metaid = metaid;
 	mi->ops = ops;
 	if (len > 0) {
-		ret = ops->alloc(mi, metaval);
+		ret = ops->alloc(mi, metaval, exists ? GFP_ATOMIC : GFP_KERNEL);
 		if (ret != 0) {
 			kfree(mi);
 			module_put(ops->owner);
@@ -307,14 +309,14 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
 	return ret;
 }
 
-static int use_all_metadata(struct tcf_ife_info *ife)
+static int use_all_metadata(struct tcf_ife_info *ife, bool exists)
 {
 	struct tcf_meta_ops *o;
 	int rc = 0;
 	int installed = 0;
 
 	list_for_each_entry(o, &ifeoplist, list) {
-		rc = add_metainfo(ife, o->metaid, NULL, 0);
+		rc = add_metainfo(ife, o->metaid, NULL, 0, exists);
 		if (rc == 0)
 			installed += 1;
 	}
@@ -385,8 +387,9 @@ static void tcf_ife_cleanup(struct tc_action *a, int bind)
 	spin_unlock_bh(&ife->tcf_lock);
 }
 
-/* under ife->tcf_lock */
-static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb)
+/* under ife->tcf_lock for existing action */
+static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
+			     bool exists)
 {
 	int len = 0;
 	int rc = 0;
@@ -398,11 +401,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb)
 			val = nla_data(tb[i]);
 			len = nla_len(tb[i]);
 
-			rc = load_metaops_and_vet(ife, i, val, len);
+			rc = load_metaops_and_vet(ife, i, val, len, exists);
 			if (rc != 0)
 				return rc;
 
-			rc = add_metainfo(ife, i, val, len);
+			rc = add_metainfo(ife, i, val, len, exists);
 			if (rc)
 				return rc;
 		}
@@ -474,7 +477,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 			saddr = nla_data(tb[TCA_IFE_SMAC]);
 	}
 
-	spin_lock_bh(&ife->tcf_lock);
+	if (exists)
+		spin_lock_bh(&ife->tcf_lock);
 	ife->tcf_action = parm->action;
 
 	if (parm->flags & IFE_ENCODE) {
@@ -504,11 +508,12 @@ metadata_parse_err:
 			if (ret == ACT_P_CREATED)
 				_tcf_ife_cleanup(a, bind);
 
-			spin_unlock_bh(&ife->tcf_lock);
+			if (exists)
+				spin_unlock_bh(&ife->tcf_lock);
 			return err;
 		}
 
-		err = populate_metalist(ife, tb2);
+		err = populate_metalist(ife, tb2, exists);
 		if (err)
 			goto metadata_parse_err;
 
@@ -518,17 +523,19 @@ metadata_parse_err:
 		 * as we can. You better have at least one else we are
 		 * going to bail out
 		 */
-		err = use_all_metadata(ife);
+		err = use_all_metadata(ife, exists);
 		if (err) {
 			if (ret == ACT_P_CREATED)
 				_tcf_ife_cleanup(a, bind);
 
-			spin_unlock_bh(&ife->tcf_lock);
+			if (exists)
+				spin_unlock_bh(&ife->tcf_lock);
 			return err;
 		}
 	}
 
-	spin_unlock_bh(&ife->tcf_lock);
+	if (exists)
+		spin_unlock_bh(&ife->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
 		tcf_hash_insert(tn, a);
-- 
cgit v0.10.2


From 817e9f2c5c262b2716f5d77020d118ad53315f3e Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Mon, 20 Jun 2016 13:37:19 -0700
Subject: act_ife: acquire ife_mod_lock before reading ifeoplist

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index e407659..ea4a2fe 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -277,7 +277,7 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
  * under ife->tcf_lock for existing action
 */
 static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
-			int len, bool exists)
+			int len, bool atomic)
 {
 	struct tcf_meta_info *mi = NULL;
 	struct tcf_meta_ops *ops = find_ife_oplist(metaid);
@@ -286,7 +286,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
 	if (!ops)
 		return -ENOENT;
 
-	mi = kzalloc(sizeof(*mi), exists ? GFP_ATOMIC : GFP_KERNEL);
+	mi = kzalloc(sizeof(*mi), atomic ? GFP_ATOMIC : GFP_KERNEL);
 	if (!mi) {
 		/*put back what find_ife_oplist took */
 		module_put(ops->owner);
@@ -296,7 +296,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
 	mi->metaid = metaid;
 	mi->ops = ops;
 	if (len > 0) {
-		ret = ops->alloc(mi, metaval, exists ? GFP_ATOMIC : GFP_KERNEL);
+		ret = ops->alloc(mi, metaval, atomic ? GFP_ATOMIC : GFP_KERNEL);
 		if (ret != 0) {
 			kfree(mi);
 			module_put(ops->owner);
@@ -309,17 +309,19 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
 	return ret;
 }
 
-static int use_all_metadata(struct tcf_ife_info *ife, bool exists)
+static int use_all_metadata(struct tcf_ife_info *ife)
 {
 	struct tcf_meta_ops *o;
 	int rc = 0;
 	int installed = 0;
 
+	read_lock(&ife_mod_lock);
 	list_for_each_entry(o, &ifeoplist, list) {
-		rc = add_metainfo(ife, o->metaid, NULL, 0, exists);
+		rc = add_metainfo(ife, o->metaid, NULL, 0, true);
 		if (rc == 0)
 			installed += 1;
 	}
+	read_unlock(&ife_mod_lock);
 
 	if (installed)
 		return 0;
@@ -523,7 +525,7 @@ metadata_parse_err:
 		 * as we can. You better have at least one else we are
 		 * going to bail out
 		 */
-		err = use_all_metadata(ife, exists);
+		err = use_all_metadata(ife);
 		if (err) {
 			if (ret == ACT_P_CREATED)
 				_tcf_ife_cleanup(a, bind);
-- 
cgit v0.10.2


From c0cf4512a31eb3cec70b066bc36ed55f7d05b8c0 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Thu, 23 Jun 2016 09:35:48 +0200
Subject: IB/srpt: Reduce QP buffer size

The memory needed for the send and receive queues associated with
a QP is proportional to the max_sge parameter. The current value
of that parameter is such that with an mlx4 HCA the QP buffer size
is 8 MB. Since DMA is used for communication between HCA and CPU
that buffer either has to be allocated coherently or map_single()
must succeed for that buffer. Since large contiguous allocations
are fragile and since the maximum segment size for e.g. swiotlb
is 256 KB, reduce the max_sge parameter. This patch avoids that
the following text appears on the console after SRP logout and
relogin on a system equipped with multiple IB HCAs:

mlx4_core 0000:05:00.0: swiotlb buffer is full (sz: 8388608 bytes)
swiotlb: coherent allocation failed for device 0000:05:00.0 size=8388608
CPU: 11 PID: 148 Comm: kworker/11:1 Not tainted 4.7.0-rc4-dbg+ #1
Call Trace:
 [<ffffffff812c6d35>] dump_stack+0x67/0x92
 [<ffffffff812efe71>] swiotlb_alloc_coherent+0x141/0x150
 [<ffffffff810458be>] x86_swiotlb_alloc_coherent+0x3e/0x50
 [<ffffffffa03861fa>] mlx4_buf_direct_alloc.isra.5+0x9a/0x120 [mlx4_core]
 [<ffffffffa0386545>] mlx4_buf_alloc+0x165/0x1a0 [mlx4_core]
 [<ffffffffa035053d>] create_qp_common.isra.29+0x57d/0xff0 [mlx4_ib]
 [<ffffffffa03510da>] mlx4_ib_create_qp+0x12a/0x3f0 [mlx4_ib]
 [<ffffffffa031154a>] ib_create_qp+0x3a/0x250 [ib_core]
 [<ffffffffa055dd4b>] srpt_cm_handler+0x4bb/0xcad [ib_srpt]
 [<ffffffffa02c1ab0>] cm_process_work+0x20/0xf0 [ib_cm]
 [<ffffffffa02c3640>] cm_work_handler+0x1ac0/0x2059 [ib_cm]
 [<ffffffff810737ed>] process_one_work+0x19d/0x490
 [<ffffffff81073b29>] worker_thread+0x49/0x490
 [<ffffffff8107a0ea>] kthread+0xea/0x100
 [<ffffffff815b25af>] ret_from_fork+0x1f/0x40

Fixes: b99f8e4d7bcd ("IB/srpt: convert to the generic RDMA READ/WRITE API")
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Laurence Oberman <loberman@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Doug Ledford <dledford@redhat.com>

diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index e68b20cb..4a41556 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -1638,8 +1638,7 @@ retry:
 	 */
 	qp_init->cap.max_send_wr = srp_sq_size / 2;
 	qp_init->cap.max_rdma_ctxs = srp_sq_size / 2;
-	qp_init->cap.max_send_sge = max(sdev->device->attrs.max_sge_rd,
-					sdev->device->attrs.max_sge);
+	qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
 	qp_init->port_num = ch->sport->port;
 
 	ch->qp = ib_create_qp(sdev->pd, qp_init);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index fee6bfd..3890304 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -106,6 +106,7 @@ enum {
 	SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2,
 
 	SRPT_DEF_SG_TABLESIZE = 128,
+	SRPT_DEF_SG_PER_WQE = 16,
 
 	MIN_SRPT_SQ_SIZE = 16,
 	DEF_SRPT_SQ_SIZE = 4096,
-- 
cgit v0.10.2


From 33cdcee04be3b4482be97393167e7561b2584e1e Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Wed, 22 Jun 2016 09:25:14 +0200
Subject: pwm: Fix pwm_apply_args()

Commit 5ec803edcb70 ("pwm: Add core infrastructure to allow atomic
updates"), implemented pwm_disable() as a wrapper around
pwm_apply_state(), and then, commit ef2bf4997f7d ("pwm: Improve args
checking in pwm_apply_state()") added missing checks on the ->period
value in pwm_apply_state() to ensure we were not passing inappropriate
values to the ->config() or ->apply() methods.

The conjunction of these 2 commits led to a case where pwm_disable()
was no longer succeeding, thus preventing the polarity setting done
in pwm_apply_args().

Set a valid period in pwm_apply_args() to ensure polarity setting
won't be rejected.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Suggested-by: Brian Norris <briannorris@chromium.org>
Fixes: 5ec803edcb70 ("pwm: Add core infrastructure to allow atomic updates")
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 908b67c..c038ae3 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -464,6 +464,8 @@ static inline bool pwm_can_sleep(struct pwm_device *pwm)
 
 static inline void pwm_apply_args(struct pwm_device *pwm)
 {
+	struct pwm_state state = { };
+
 	/*
 	 * PWM users calling pwm_apply_args() expect to have a fresh config
 	 * where the polarity and period are set according to pwm_args info.
@@ -476,18 +478,20 @@ static inline void pwm_apply_args(struct pwm_device *pwm)
 	 * at startup (even if they are actually enabled), thus authorizing
 	 * polarity setting.
 	 *
-	 * Instead of setting ->enabled to false, we call pwm_disable()
-	 * before pwm_set_polarity() to ensure that everything is configured
-	 * as expected, and the PWM is really disabled when the user request
-	 * it.
+	 * To fulfill this requirement, we apply a new state which disables
+	 * the PWM device and set the reference period and polarity config.
 	 *
 	 * Note that PWM users requiring a smooth handover between the
 	 * bootloader and the kernel (like critical regulators controlled by
 	 * PWM devices) will have to switch to the atomic API and avoid calling
 	 * pwm_apply_args().
 	 */
-	pwm_disable(pwm);
-	pwm_set_polarity(pwm, pwm->args.polarity);
+
+	state.enabled = false;
+	state.polarity = pwm->args.polarity;
+	state.period = pwm->args.period;
+
+	pwm_apply_state(pwm, &state);
 }
 
 struct pwm_lookup {
-- 
cgit v0.10.2


From c7f1429389ec1aa25e042bb13451385fbb596f8c Mon Sep 17 00:00:00 2001
From: Cameron Gutman <aicommander@gmail.com>
Date: Thu, 23 Jun 2016 10:24:42 -0700
Subject: Input: xpad - fix oops when attaching an unknown Xbox One gamepad

Xbox One controllers have multiple interfaces which all have the
same class, subclass, and protocol. One of the these interfaces
has only a single endpoint. When Xpad attempts to bind to this
interface, it causes an oops when trying initialize the output URB
by trying to access the second endpoint's descriptor.

This situation was avoided for known Xbox One devices by checking
the XTYPE constant associated with the VID and PID tuple. However,
this breaks when new or previously unknown Xbox One controllers
are attached to the system.

This change addresses the problem by deriving the XTYPE for Xbox
One controllers based on the interface protocol before checking
the interface number.

Fixes: 1a48ff81b391 ("Input: xpad - add support for Xbox One controllers")
Signed-off-by: Cameron Gutman <aicommander@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>

diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 923c572..3438e98 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -1437,16 +1437,6 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
 			break;
 	}
 
-	if (xpad_device[i].xtype == XTYPE_XBOXONE &&
-	    intf->cur_altsetting->desc.bInterfaceNumber != 0) {
-		/*
-		 * The Xbox One controller lists three interfaces all with the
-		 * same interface class, subclass and protocol. Differentiate by
-		 * interface number.
-		 */
-		return -ENODEV;
-	}
-
 	xpad = kzalloc(sizeof(struct usb_xpad), GFP_KERNEL);
 	if (!xpad)
 		return -ENOMEM;
@@ -1478,6 +1468,8 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
 		if (intf->cur_altsetting->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC) {
 			if (intf->cur_altsetting->desc.bInterfaceProtocol == 129)
 				xpad->xtype = XTYPE_XBOX360W;
+			else if (intf->cur_altsetting->desc.bInterfaceProtocol == 208)
+				xpad->xtype = XTYPE_XBOXONE;
 			else
 				xpad->xtype = XTYPE_XBOX360;
 		} else {
@@ -1492,6 +1484,17 @@ static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id
 			xpad->mapping |= MAP_STICKS_TO_NULL;
 	}
 
+	if (xpad->xtype == XTYPE_XBOXONE &&
+	    intf->cur_altsetting->desc.bInterfaceNumber != 0) {
+		/*
+		 * The Xbox One controller lists three interfaces all with the
+		 * same interface class, subclass and protocol. Differentiate by
+		 * interface number.
+		 */
+		error = -ENODEV;
+		goto err_free_in_urb;
+	}
+
 	error = xpad_init_output(intf, xpad);
 	if (error)
 		goto err_free_in_urb;
-- 
cgit v0.10.2


From 04e1b65af2085d4102b2b5d2fd1e050f8ee63092 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Fri, 17 Jun 2016 17:20:40 +0000
Subject: Btrfs: fix error return code in btrfs_init_test_fs()

Fix to return a negative error code from the kern_mount() error handling
case instead of 0(ret is set to 0 by register_filesystem), as done
elsewhere in this function.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>

diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 10eb249..02223f3 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -68,7 +68,7 @@ int btrfs_init_test_fs(void)
 	if (IS_ERR(test_mnt)) {
 		printk(KERN_ERR "btrfs: cannot mount test file system\n");
 		unregister_filesystem(&test_type);
-		return ret;
+		return PTR_ERR(test_mnt);
 	}
 	return 0;
 }
-- 
cgit v0.10.2


From 415b35a55b57a701afe7391d32a6bb0193b7d3da Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Fri, 17 Jun 2016 19:16:21 -0700
Subject: Btrfs: fix error handling in map_private_extent_buffer

map_private_extent_buffer() can return -EINVAL in two different cases,
1. when the requested contents span two pages if nodesize is larger
   than pagesize,
2. when it detects something insane.

The 2nd one used to be only a WARN_ON(1), and we decided to return a error
to callers, but we didn't fix up all its callers, which will be
addressed by this patch.

Without this, btrfs may end up with 'general protection', ie.
reading invalid memory.

Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 6276add..a85cf7d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1786,10 +1786,12 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
 			if (!err) {
 				tmp = (struct btrfs_disk_key *)(kaddr + offset -
 							map_start);
-			} else {
+			} else if (err == 1) {
 				read_extent_buffer(eb, &unaligned,
 						   offset, sizeof(unaligned));
 				tmp = &unaligned;
+			} else {
+				return err;
 			}
 
 		} else {
@@ -2830,6 +2832,8 @@ cow_done:
 		}
 
 		ret = key_search(b, key, level, &prev_cmp, &slot);
+		if (ret < 0)
+			goto done;
 
 		if (level != 0) {
 			int dec = 0;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index aaee3ef..75533ad 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5342,6 +5342,11 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
 	return ret;
 }
 
+/*
+ * return 0 if the item is found within a page.
+ * return 1 if the item spans two pages.
+ * return -EINVAL otherwise.
+ */
 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
 			       unsigned long min_len, char **map,
 			       unsigned long *map_start,
@@ -5356,7 +5361,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
 		PAGE_SHIFT;
 
 	if (i != end_i)
-		return -EINVAL;
+		return 1;
 
 	if (i == 0) {
 		offset = start_offset;
-- 
cgit v0.10.2


From c0d2f6104e8ab2eb75e58e72494ad4b69c5227f8 Mon Sep 17 00:00:00 2001
From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Date: Wed, 22 Jun 2016 09:57:01 +0800
Subject: btrfs: fix disk_i_size update bug when fallocate() fails

When doing truncate operation, btrfs_setsize() will first call
truncate_setsize() to set new inode->i_size, but if later
btrfs_truncate() fails, btrfs_setsize() will call
"i_size_write(inode, BTRFS_I(inode)->disk_i_size)" to reset the
inmemory inode size, now bug occurs. It's because for truncate
case btrfs_ordered_update_i_size() directly uses inode->i_size
to update BTRFS_I(inode)->disk_i_size, indeed we should use the
"offset" argument to update disk_i_size. Here is the call graph:
==>btrfs_truncate()
====>btrfs_truncate_inode_items()
======>btrfs_ordered_update_i_size(inode, last_size, NULL);
Here btrfs_ordered_update_i_size()'s offset argument is last_size.

And below test case can reveal this bug:

dd if=/dev/zero of=fs.img bs=$((1024*1024)) count=100
dev=$(losetup --show -f fs.img)
mkdir -p /mnt/mntpoint
mkfs.btrfs  -f $dev
mount $dev /mnt/mntpoint
cd /mnt/mntpoint

echo "workdir is: /mnt/mntpoint"
blocksize=$((128 * 1024))
dd if=/dev/zero of=testfile bs=$blocksize count=1
sync
count=$((17*1024*1024*1024/blocksize))
echo "file size is:" $((count*blocksize))
for ((i = 1; i <= $count; i++)); do
	i=$((i + 1))
	dst_offset=$((blocksize * i))
	xfs_io -f -c "reflink testfile 0 $dst_offset $blocksize"\
		testfile > /dev/null
done
sync

truncate --size 0 testfile
ls -l testfile
du -sh testfile
exit

In this case, truncate operation will fail for enospc reason and
"du -sh testfile" returns value greater than 0, but testfile's
size is 0, we need to reflect correct inode->i_size.

Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>

diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e96634a..aca8264 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -968,6 +968,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
 	struct rb_node *prev = NULL;
 	struct btrfs_ordered_extent *test;
 	int ret = 1;
+	u64 orig_offset = offset;
 
 	spin_lock_irq(&tree->lock);
 	if (ordered) {
@@ -983,7 +984,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
 
 	/* truncate file */
 	if (disk_i_size > i_size) {
-		BTRFS_I(inode)->disk_i_size = i_size;
+		BTRFS_I(inode)->disk_i_size = orig_offset;
 		ret = 0;
 		goto out;
 	}
-- 
cgit v0.10.2


From b7f67055d2df9b8f68f02e49d256ee3973999bd2 Mon Sep 17 00:00:00 2001
From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Date: Thu, 23 Jun 2016 15:16:44 +0530
Subject: Btrfs: Force stripesize to the value of sectorsize

Btrfs code currently assumes stripesize to be same as
sectorsize. However Btrfs-progs (until commit
df05c7ed455f519e6e15e46196392e4757257305) has been setting
btrfs_super_block->stripesize to a value of 4096.

This commit makes sure that the value of btrfs_super_block->stripesize
is a power of 2. Later, it unconditionally sets btrfs_root->stripesize
to sectorsize.

Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 54cca7a..60ce119 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2806,7 +2806,7 @@ int open_ctree(struct super_block *sb,
 
 	nodesize = btrfs_super_nodesize(disk_super);
 	sectorsize = btrfs_super_sectorsize(disk_super);
-	stripesize = btrfs_super_stripesize(disk_super);
+	stripesize = sectorsize;
 	fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids));
 	fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
 
@@ -4133,9 +4133,7 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
 		       btrfs_super_bytes_used(sb));
 		ret = -EINVAL;
 	}
-	if (!is_power_of_2(btrfs_super_stripesize(sb)) ||
-		((btrfs_super_stripesize(sb) != sectorsize) &&
-			(btrfs_super_stripesize(sb) != 4096))) {
+	if (!is_power_of_2(btrfs_super_stripesize(sb))) {
 		btrfs_err(fs_info, "invalid stripesize %u",
 		       btrfs_super_stripesize(sb));
 		ret = -EINVAL;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index c3a2900..64eec2c 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4694,12 +4694,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 
 	if (type & BTRFS_BLOCK_GROUP_RAID5) {
 		raid_stripe_len = find_raid56_stripe_len(ndevs - 1,
-				 btrfs_super_stripesize(info->super_copy));
+						extent_root->stripesize);
 		data_stripes = num_stripes - 1;
 	}
 	if (type & BTRFS_BLOCK_GROUP_RAID6) {
 		raid_stripe_len = find_raid56_stripe_len(ndevs - 2,
-				 btrfs_super_stripesize(info->super_copy));
+						extent_root->stripesize);
 		data_stripes = num_stripes - 2;
 	}
 
-- 
cgit v0.10.2


From 02bae045f3b3bc5681f075be0a8b1313147d1df2 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Thu, 23 Jun 2016 10:58:26 +0800
Subject: drm/amd/powerplay: add some definition for FFC feature on polaris.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
index d41d37a..b8f4b73 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
@@ -392,6 +392,8 @@ typedef uint16_t PPSMC_Result;
 #define PPSMC_MSG_SetGpuPllDfsForSclk         ((uint16_t) 0x300)
 #define PPSMC_MSG_Didt_Block_Function		  ((uint16_t) 0x301)
 
+#define PPSMC_MSG_SetVBITimeout               ((uint16_t) 0x306)
+
 #define PPSMC_MSG_SecureSRBMWrite             ((uint16_t) 0x600)
 #define PPSMC_MSG_SecureSRBMRead              ((uint16_t) 0x601)
 #define PPSMC_MSG_SetAddress                  ((uint16_t) 0x800)
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
index b85ff54..899d6d8 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
@@ -270,7 +270,8 @@ struct SMU74_Discrete_DpmTable {
 	uint8_t                             BootPhases;
 
 	uint8_t                             VRHotLevel;
-	uint8_t                             Reserved1[3];
+	uint8_t                             LdoRefSel;
+	uint8_t                             Reserved1[2];
 	uint16_t                            FanStartTemperature;
 	uint16_t                            FanStopTemperature;
 	uint16_t                            MaxVoltage;
-- 
cgit v0.10.2


From 83a7af6dcfd2d84066c6d19bf2bd837f7be4a5ca Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Thu, 23 Jun 2016 11:05:00 +0800
Subject: drm/amd/powerplay: disable FFC.

SMC need use VBI signal for MCLK switching
Send 2 x frame time as vbi timeout

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
index 64ee78f..20ccbc7 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
@@ -1805,6 +1805,7 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 		data->smc_state_table.Sclk_voltageOffset[i] = volt_offset;
 	}
 
+	data->smc_state_table.LdoRefSel = (table_info->cac_dtp_table->ucCKS_LDO_REFSEL != 0) ? table_info->cac_dtp_table->ucCKS_LDO_REFSEL : 6;
 	/* Populate CKS Lookup Table */
 	if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5)
 		stretch_amount2 = 0;
@@ -2487,6 +2488,8 @@ int polaris10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to enable VR hot GPIO interrupt!", result = tmp_result);
 
+	smum_send_msg_to_smc(hwmgr->smumgr, (PPSMC_Msg)PPSMC_HasDisplay);
+
 	tmp_result = polaris10_enable_sclk_control(hwmgr);
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to enable SCLK control!", result = tmp_result);
@@ -4359,6 +4362,15 @@ static int polaris10_notify_link_speed_change_after_state_change(
 	return 0;
 }
 
+static int polaris10_notify_smc_display(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+
+	smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+		(PPSMC_Msg)PPSMC_MSG_SetVBITimeout, data->frame_time_x2);
+	return (smum_send_msg_to_smc(hwmgr->smumgr, (PPSMC_Msg)PPSMC_HasDisplay) == 0) ?  0 : -EINVAL;
+}
+
 static int polaris10_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input)
 {
 	int tmp_result, result = 0;
@@ -4407,6 +4419,11 @@ static int polaris10_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *i
 			"Failed to program memory timing parameters!",
 			result = tmp_result);
 
+	tmp_result = polaris10_notify_smc_display(hwmgr);
+	PP_ASSERT_WITH_CODE((0 == tmp_result),
+			"Failed to notify smc display settings!",
+			result = tmp_result);
+
 	tmp_result = polaris10_unfreeze_sclk_mclk_dpm(hwmgr);
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to unfreeze SCLK MCLK DPM!",
@@ -4441,6 +4458,7 @@ static int polaris10_set_max_fan_pwm_output(struct pp_hwmgr *hwmgr, uint16_t us_
 			PPSMC_MSG_SetFanPwmMax, us_max_fan_pwm);
 }
 
+
 int polaris10_notify_smc_display_change(struct pp_hwmgr *hwmgr, bool has_display)
 {
 	PPSMC_Msg msg = has_display ? (PPSMC_Msg)PPSMC_HasDisplay : (PPSMC_Msg)PPSMC_NoDisplay;
@@ -4460,8 +4478,6 @@ int polaris10_notify_smc_display_config_after_ps_adjustment(struct pp_hwmgr *hwm
 
 	if (num_active_displays > 1)  /* to do && (pHwMgr->pPECI->displayConfiguration.bMultiMonitorInSync != TRUE)) */
 		polaris10_notify_smc_display_change(hwmgr, false);
-	else
-		polaris10_notify_smc_display_change(hwmgr, true);
 
 	return 0;
 }
@@ -4502,6 +4518,8 @@ int polaris10_program_display_gap(struct pp_hwmgr *hwmgr)
 	frame_time_in_us = 1000000 / refresh_rate;
 
 	pre_vbi_time_in_us = frame_time_in_us - 200 - mode_info.vblank_time_us;
+	data->frame_time_x2 = frame_time_in_us * 2 / 100;
+
 	display_gap2 = pre_vbi_time_in_us * (ref_clock / 100);
 
 	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_DISPLAY_GAP_CNTL2, display_gap2);
@@ -4510,8 +4528,6 @@ int polaris10_program_display_gap(struct pp_hwmgr *hwmgr)
 
 	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, data->soft_regs_start + offsetof(SMU74_SoftRegisters, VBlankTimeout), (frame_time_in_us - pre_vbi_time_in_us));
 
-	polaris10_notify_smc_display_change(hwmgr, num_active_displays != 0);
-
 	return 0;
 }
 
@@ -4623,7 +4639,7 @@ int polaris10_upload_mc_firmware(struct pp_hwmgr *hwmgr)
 		return 0;
 	}
 
-	data->need_long_memory_training = true;
+	data->need_long_memory_training = false;
 
 /*
  *	PPMCME_FirmwareDescriptorEntry *pfd = NULL;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
index d717789..afc3434 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
@@ -315,6 +315,7 @@ struct polaris10_hwmgr {
 
 	uint32_t                              avfs_vdroop_override_setting;
 	bool                                  apply_avfs_cks_off_voltage;
+	uint32_t                              frame_time_x2;
 };
 
 /* To convert to Q8.8 format for firmware */
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index 28f5714..77e8e33 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -411,6 +411,8 @@ struct phm_cac_tdp_table {
 	uint8_t  ucVr_I2C_Line;
 	uint8_t  ucPlx_I2C_address;
 	uint8_t  ucPlx_I2C_Line;
+	uint32_t usBoostPowerLimit;
+	uint8_t  ucCKS_LDO_REFSEL;
 };
 
 struct phm_ppm_table {
-- 
cgit v0.10.2


From 0812a945fbb814e7946fbe6ddcc81d054c8b6c91 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Wed, 22 Jun 2016 21:00:09 +0800
Subject: drm/amd/powerplay: Update CKS on/ CKS off voltage offset calculation

CKS on/off voltage offset calculation algorithm takes in a few coefficients.
We need to update them for polaris to latest coefficients to align with BB.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
index 20ccbc7..d23dcce 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
@@ -98,6 +98,7 @@
 #define PCIE_BUS_CLK                10000
 #define TCLK                        (PCIE_BUS_CLK / 10)
 
+#define CEILING_UCHAR(double) ((double-(uint8_t)(double)) > 0 ? (uint8_t)(double+1) : (uint8_t)(double))
 
 static const uint16_t polaris10_clock_stretcher_lookup_table[2][4] =
 { {600, 1050, 3, 0}, {600, 1050, 6, 1} };
@@ -1787,20 +1788,27 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 
 	ro = efuse * (max -min)/255 + min;
 
-	/* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */
+	/* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset
+	 * there is a little difference in calculating
+	 * volt_with_cks with windows */
 	for (i = 0; i < sclk_table->count; i++) {
 		data->smc_state_table.Sclk_CKS_masterEn0_7 |=
 				sclk_table->entries[i].cks_enable << i;
-
-		volt_without_cks =  (uint32_t)(((ro - 40) * 1000 - 2753594 - sclk_table->entries[i].clk/100 * 136418 /1000) / \
-					(sclk_table->entries[i].clk/100 * 1132925 /10000 - 242418)/100);
-
-		volt_with_cks = (uint32_t)((ro * 1000 -2396351 - sclk_table->entries[i].clk/100 * 329021/1000) / \
-				(sclk_table->entries[i].clk/10000 * 649434 /1000  - 18005)/10);
+		if (hwmgr->chip_id == CHIP_POLARIS10) {
+			volt_without_cks = (uint32_t)((2753594000 + (sclk_table->entries[i].clk/100) * 136418 -(ro - 70) * 1000000) / \
+						(2424180 - (sclk_table->entries[i].clk/100) * 1132925/1000));
+			volt_with_cks = (uint32_t)((279720200 + sclk_table->entries[i].clk * 3232 - (ro - 65) * 100000000) / \
+					(252248000 - sclk_table->entries[i].clk/100 * 115764));
+		} else {
+			volt_without_cks = (uint32_t)((2416794800 + (sclk_table->entries[i].clk/100) * 1476925/10 -(ro - 50) * 1000000) / \
+						(2625416 - (sclk_table->entries[i].clk/100) * 12586807/10000));
+			volt_with_cks = (uint32_t)((2999656000 + sclk_table->entries[i].clk * 392803/100 - (ro - 44) * 1000000) / \
+					(3422454 - sclk_table->entries[i].clk/100 * 18886376/10000));
+		}
 
 		if (volt_without_cks >= volt_with_cks)
-			volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks +
-					sclk_table->entries[i].cks_voffset) * 100 / 625) + 1);
+			volt_offset = (uint8_t)CEILING_UCHAR((volt_without_cks - volt_with_cks +
+					sclk_table->entries[i].cks_voffset) * 100 / 625);
 
 		data->smc_state_table.Sclk_voltageOffset[i] = volt_offset;
 	}
-- 
cgit v0.10.2


From 12afb34400eb2b301f06b2aa3535497d14faee59 Mon Sep 17 00:00:00 2001
From: Ping Cheng <pinglinux@gmail.com>
Date: Thu, 23 Jun 2016 10:54:17 -0700
Subject: Input: wacom_w8001 - w8001_MAX_LENGTH should be 13

Somehow the patch that added two-finger touch support forgot to update
W8001_MAX_LENGTH from 11 to 13.

Signed-off-by: Ping Cheng <pingc@wacom.com>
Reviewed-by: Peter Hutterer <peter.hutterer@who-t.net>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>

diff --git a/drivers/input/touchscreen/wacom_w8001.c b/drivers/input/touchscreen/wacom_w8001.c
index bab3c6a..b1b4127 100644
--- a/drivers/input/touchscreen/wacom_w8001.c
+++ b/drivers/input/touchscreen/wacom_w8001.c
@@ -27,7 +27,7 @@ MODULE_AUTHOR("Jaya Kumar <jayakumar.lkml@gmail.com>");
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 
-#define W8001_MAX_LENGTH	11
+#define W8001_MAX_LENGTH	13
 #define W8001_LEAD_MASK		0x80
 #define W8001_LEAD_BYTE		0x80
 #define W8001_TAB_MASK		0x40
-- 
cgit v0.10.2


From 21de12ee5568fd1aec47890c72967abf791ac80a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 20 Jun 2016 15:00:43 -0700
Subject: netem: fix a use after free

If the packet was dropped by lower qdisc, then we must not
access it later.

Save qdisc_pkt_len(skb) in a temp variable.

Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 205bed0..178f163 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -650,14 +650,14 @@ deliver:
 #endif
 
 			if (q->qdisc) {
+				unsigned int pkt_len = qdisc_pkt_len(skb);
 				int err = qdisc_enqueue(skb, q->qdisc);
 
-				if (unlikely(err != NET_XMIT_SUCCESS)) {
-					if (net_xmit_drop_count(err)) {
-						qdisc_qstats_drop(sch);
-						qdisc_tree_reduce_backlog(sch, 1,
-									  qdisc_pkt_len(skb));
-					}
+				if (err != NET_XMIT_SUCCESS &&
+				    net_xmit_drop_count(err)) {
+					qdisc_qstats_drop(sch);
+					qdisc_tree_reduce_backlog(sch, 1,
+								  pkt_len);
 				}
 				goto tfifo_dequeue;
 			}
-- 
cgit v0.10.2


From 6720a305df74ca30bcc10fc316881641b6ff0c80 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 23 Jun 2016 12:11:17 -0700
Subject: locking: avoid passing around 'thread_info' in mutex debugging code

None of the code actually wants a thread_info, it all wants a
task_struct, and it's just converting back and forth between the two
("ti->task" to get the task_struct from the thread_info, and
"task_thread_info(task)" to go the other way).

No semantic change.

Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index 3ef3736..9c951fa 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -49,21 +49,21 @@ void debug_mutex_free_waiter(struct mutex_waiter *waiter)
 }
 
 void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
-			    struct thread_info *ti)
+			    struct task_struct *task)
 {
 	SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
 
 	/* Mark the current thread as blocked on the lock: */
-	ti->task->blocked_on = waiter;
+	task->blocked_on = waiter;
 }
 
 void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
-			 struct thread_info *ti)
+			 struct task_struct *task)
 {
 	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
-	DEBUG_LOCKS_WARN_ON(waiter->task != ti->task);
-	DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter);
-	ti->task->blocked_on = NULL;
+	DEBUG_LOCKS_WARN_ON(waiter->task != task);
+	DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter);
+	task->blocked_on = NULL;
 
 	list_del_init(&waiter->list);
 	waiter->task = NULL;
diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h
index 0799fd3..d06ae3b 100644
--- a/kernel/locking/mutex-debug.h
+++ b/kernel/locking/mutex-debug.h
@@ -20,9 +20,9 @@ extern void debug_mutex_wake_waiter(struct mutex *lock,
 extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
 extern void debug_mutex_add_waiter(struct mutex *lock,
 				   struct mutex_waiter *waiter,
-				   struct thread_info *ti);
+				   struct task_struct *task);
 extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
-				struct thread_info *ti);
+				struct task_struct *task);
 extern void debug_mutex_unlock(struct mutex *lock);
 extern void debug_mutex_init(struct mutex *lock, const char *name,
 			     struct lock_class_key *key);
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 79d2d76..a70b90d 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -537,7 +537,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		goto skip_wait;
 
 	debug_mutex_lock_common(lock, &waiter);
-	debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
+	debug_mutex_add_waiter(lock, &waiter, task);
 
 	/* add waiting tasks to the end of the waitqueue (FIFO): */
 	list_add_tail(&waiter.list, &lock->wait_list);
@@ -584,7 +584,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	}
 	__set_task_state(task, TASK_RUNNING);
 
-	mutex_remove_waiter(lock, &waiter, current_thread_info());
+	mutex_remove_waiter(lock, &waiter, task);
 	/* set it to 0 if there are no waiters left: */
 	if (likely(list_empty(&lock->wait_list)))
 		atomic_set(&lock->count, 0);
@@ -605,7 +605,7 @@ skip_wait:
 	return 0;
 
 err:
-	mutex_remove_waiter(lock, &waiter, task_thread_info(task));
+	mutex_remove_waiter(lock, &waiter, task);
 	spin_unlock_mutex(&lock->wait_lock, flags);
 	debug_mutex_free_waiter(&waiter);
 	mutex_release(&lock->dep_map, 1, ip);
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index 5cda397..a68bae5 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -13,7 +13,7 @@
 		do { spin_lock(lock); (void)(flags); } while (0)
 #define spin_unlock_mutex(lock, flags) \
 		do { spin_unlock(lock); (void)(flags); } while (0)
-#define mutex_remove_waiter(lock, waiter, ti) \
+#define mutex_remove_waiter(lock, waiter, task) \
 		__list_del((waiter)->list.prev, (waiter)->list.next)
 
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-- 
cgit v0.10.2


From 52fe705b493d40b472b9e7220a71bdca8537c6b2 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Tue, 21 Jun 2016 15:39:43 +1200
Subject: net: vrf: replace hard tab with space in assignment

The assignment of rth->dst.output in vrf_rt6_create() and
vrf_rtable_create() used a hard tab before the '='. The neighboring
assignments did not. Make the assignment of rth->dst.output consistent
with the surrounding code.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index dff0884..8bd8c7e 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -304,7 +304,7 @@ static int vrf_rt6_create(struct net_device *dev)
 	dst_hold(&rt6->dst);
 
 	rt6->rt6i_table = rt6i_table;
-	rt6->dst.output	= vrf_output6;
+	rt6->dst.output = vrf_output6;
 	rcu_assign_pointer(vrf->rt6, rt6);
 
 	rc = 0;
@@ -403,7 +403,7 @@ static int vrf_rtable_create(struct net_device *dev)
 	if (!rth)
 		return -ENOMEM;
 
-	rth->dst.output	= vrf_output;
+	rth->dst.output = vrf_output;
 	rth->rt_table_id = vrf->tb_id;
 
 	rcu_assign_pointer(vrf->rth, rth);
-- 
cgit v0.10.2


From efeb2267bba8aa893afdadfc9bae4790777c600c Mon Sep 17 00:00:00 2001
From: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Date: Tue, 21 Jun 2016 16:26:49 +0800
Subject: geneve: fix tx_errors statistics

Tx errors present summation of errors encountered while transmitting
packets.

Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 305a04e..cc39cef 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -958,8 +958,8 @@ tx_error:
 		dev->stats.collisions++;
 	else if (err == -ENETUNREACH)
 		dev->stats.tx_carrier_errors++;
-	else
-		dev->stats.tx_errors++;
+
+	dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
 }
 
@@ -1048,8 +1048,8 @@ tx_error:
 		dev->stats.collisions++;
 	else if (err == -ENETUNREACH)
 		dev->stats.tx_carrier_errors++;
-	else
-		dev->stats.tx_errors++;
+
+	dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
 }
 #endif
-- 
cgit v0.10.2


From da01e18a37a57f360222d3a123b8f6994aa1ad14 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 23 Jun 2016 12:20:01 -0700
Subject: x86: avoid avoid passing around 'thread_info' in stack dumping code

None of the code actually wants a thread_info, it all wants a
task_struct, and it's just converting to a thread_info pointer much too
early.

No semantic change.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 7c247e7..0944218 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -14,7 +14,7 @@ extern int kstack_depth_to_print;
 struct thread_info;
 struct stacktrace_ops;
 
-typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo,
+typedef unsigned long (*walk_stack_t)(struct task_struct *task,
 				      unsigned long *stack,
 				      unsigned long bp,
 				      const struct stacktrace_ops *ops,
@@ -23,13 +23,13 @@ typedef unsigned long (*walk_stack_t)(struct thread_info *tinfo,
 				      int *graph);
 
 extern unsigned long
-print_context_stack(struct thread_info *tinfo,
+print_context_stack(struct task_struct *task,
 		    unsigned long *stack, unsigned long bp,
 		    const struct stacktrace_ops *ops, void *data,
 		    unsigned long *end, int *graph);
 
 extern unsigned long
-print_context_stack_bp(struct thread_info *tinfo,
+print_context_stack_bp(struct task_struct *task,
 		       unsigned long *stack, unsigned long bp,
 		       const struct stacktrace_ops *ops, void *data,
 		       unsigned long *end, int *graph);
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 2bb25c3..d6209f3 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -42,16 +42,14 @@ void printk_address(unsigned long address)
 static void
 print_ftrace_graph_addr(unsigned long addr, void *data,
 			const struct stacktrace_ops *ops,
-			struct thread_info *tinfo, int *graph)
+			struct task_struct *task, int *graph)
 {
-	struct task_struct *task;
 	unsigned long ret_addr;
 	int index;
 
 	if (addr != (unsigned long)return_to_handler)
 		return;
 
-	task = tinfo->task;
 	index = task->curr_ret_stack;
 
 	if (!task->ret_stack || index < *graph)
@@ -68,7 +66,7 @@ print_ftrace_graph_addr(unsigned long addr, void *data,
 static inline void
 print_ftrace_graph_addr(unsigned long addr, void *data,
 			const struct stacktrace_ops *ops,
-			struct thread_info *tinfo, int *graph)
+			struct task_struct *task, int *graph)
 { }
 #endif
 
@@ -79,10 +77,10 @@ print_ftrace_graph_addr(unsigned long addr, void *data,
  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
  */
 
-static inline int valid_stack_ptr(struct thread_info *tinfo,
+static inline int valid_stack_ptr(struct task_struct *task,
 			void *p, unsigned int size, void *end)
 {
-	void *t = tinfo;
+	void *t = task_thread_info(task);
 	if (end) {
 		if (p < end && p >= (end-THREAD_SIZE))
 			return 1;
@@ -93,14 +91,14 @@ static inline int valid_stack_ptr(struct thread_info *tinfo,
 }
 
 unsigned long
-print_context_stack(struct thread_info *tinfo,
+print_context_stack(struct task_struct *task,
 		unsigned long *stack, unsigned long bp,
 		const struct stacktrace_ops *ops, void *data,
 		unsigned long *end, int *graph)
 {
 	struct stack_frame *frame = (struct stack_frame *)bp;
 
-	while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
+	while (valid_stack_ptr(task, stack, sizeof(*stack), end)) {
 		unsigned long addr;
 
 		addr = *stack;
@@ -112,7 +110,7 @@ print_context_stack(struct thread_info *tinfo,
 			} else {
 				ops->address(data, addr, 0);
 			}
-			print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
+			print_ftrace_graph_addr(addr, data, ops, task, graph);
 		}
 		stack++;
 	}
@@ -121,7 +119,7 @@ print_context_stack(struct thread_info *tinfo,
 EXPORT_SYMBOL_GPL(print_context_stack);
 
 unsigned long
-print_context_stack_bp(struct thread_info *tinfo,
+print_context_stack_bp(struct task_struct *task,
 		       unsigned long *stack, unsigned long bp,
 		       const struct stacktrace_ops *ops, void *data,
 		       unsigned long *end, int *graph)
@@ -129,7 +127,7 @@ print_context_stack_bp(struct thread_info *tinfo,
 	struct stack_frame *frame = (struct stack_frame *)bp;
 	unsigned long *ret_addr = &frame->return_address;
 
-	while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) {
+	while (valid_stack_ptr(task, ret_addr, sizeof(*ret_addr), end)) {
 		unsigned long addr = *ret_addr;
 
 		if (!__kernel_text_address(addr))
@@ -139,7 +137,7 @@ print_context_stack_bp(struct thread_info *tinfo,
 			break;
 		frame = frame->next_frame;
 		ret_addr = &frame->return_address;
-		print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
+		print_ftrace_graph_addr(addr, data, ops, task, graph);
 	}
 
 	return (unsigned long)frame;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 464ffd6..fef917e 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -61,15 +61,13 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		bp = stack_frame(task, regs);
 
 	for (;;) {
-		struct thread_info *context;
 		void *end_stack;
 
 		end_stack = is_hardirq_stack(stack, cpu);
 		if (!end_stack)
 			end_stack = is_softirq_stack(stack, cpu);
 
-		context = task_thread_info(task);
-		bp = ops->walk_stack(context, stack, bp, ops, data,
+		bp = ops->walk_stack(task, stack, bp, ops, data,
 				     end_stack, &graph);
 
 		/* Stop if not on irq stack */
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 5f1c626..d558a8a 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -153,7 +153,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		const struct stacktrace_ops *ops, void *data)
 {
 	const unsigned cpu = get_cpu();
-	struct thread_info *tinfo;
 	unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
 	unsigned long dummy;
 	unsigned used = 0;
@@ -179,7 +178,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 	 * current stack address. If the stacks consist of nested
 	 * exceptions
 	 */
-	tinfo = task_thread_info(task);
 	while (!done) {
 		unsigned long *stack_end;
 		enum stack_type stype;
@@ -202,7 +200,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 			if (ops->stack(data, id) < 0)
 				break;
 
-			bp = ops->walk_stack(tinfo, stack, bp, ops,
+			bp = ops->walk_stack(task, stack, bp, ops,
 					     data, stack_end, &graph);
 			ops->stack(data, "<EOE>");
 			/*
@@ -218,7 +216,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 
 			if (ops->stack(data, "IRQ") < 0)
 				break;
-			bp = ops->walk_stack(tinfo, stack, bp,
+			bp = ops->walk_stack(task, stack, bp,
 				     ops, data, stack_end, &graph);
 			/*
 			 * We link to the next stack (which would be
@@ -240,7 +238,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 	/*
 	 * This handles the process stack:
 	 */
-	bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph);
+	bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph);
 	put_cpu();
 }
 EXPORT_SYMBOL(dump_trace);
-- 
cgit v0.10.2


From c40e4096bf0a12a76c349cbc4d1b2aa40a3d68cd Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 21 Jun 2016 16:36:06 +0300
Subject: MAINTAINERS: Update Mellanox's mlx4 Eth NIC driver entry

Tariq Toukan is replacing Eugenia (Jenny) Emantayev as the mlx4
Ethernet driver maintainer, thanks to Jenny and good luck to him.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/MAINTAINERS b/MAINTAINERS
index 2ebe195..8cffceb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7406,7 +7406,7 @@ F:	drivers/scsi/megaraid.*
 F:	drivers/scsi/megaraid/
 
 MELLANOX ETHERNET DRIVER (mlx4_en)
-M: 	Eugenia Emantayev <eugenia@mellanox.com>
+M:	Tariq Toukan <tariqt@mellanox.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 W:	http://www.mellanox.com
-- 
cgit v0.10.2


From 9e72ac7492149a229ce9039c680849cb682d7092 Mon Sep 17 00:00:00 2001
From: Ping Cheng <pinglinux@gmail.com>
Date: Thu, 23 Jun 2016 10:55:11 -0700
Subject: Input: wacom_w8001 - ignore invalid pen data packets

ThinkPad X60 Tablet PC (pen only device) sometime posts
packets that are larger than W8001_PKTLEN_TPCPEN.

Reported-by: Chris J Arges <christopherarges@gmail.com>
Tested-by: Chris J Arges <christopherarges@gmail.com>
Signed-off-by: Ping Cheng <pingc@wacom.com>
Reviewed-by: Peter Hutterer <peter.hutterer@who-t.net>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>

diff --git a/drivers/input/touchscreen/wacom_w8001.c b/drivers/input/touchscreen/wacom_w8001.c
index b1b4127..0c9191c 100644
--- a/drivers/input/touchscreen/wacom_w8001.c
+++ b/drivers/input/touchscreen/wacom_w8001.c
@@ -339,6 +339,15 @@ static irqreturn_t w8001_interrupt(struct serio *serio,
 		w8001->idx = 0;
 		parse_multi_touch(w8001);
 		break;
+
+	default:
+		/*
+		 * ThinkPad X60 Tablet PC (pen only device) sometimes
+		 * sends invalid data packets that are larger than
+		 * W8001_PKTLEN_TPCPEN. Let's start over again.
+		 */
+		if (!w8001->touch_dev && w8001->idx > W8001_PKTLEN_TPCPEN - 1)
+			w8001->idx = 0;
 	}
 
 	return IRQ_HANDLED;
-- 
cgit v0.10.2


From 226ba707744a51acb4244724e09caacb1d96aed9 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 21 Jun 2016 16:09:00 -0700
Subject: Input: elantech - add more IC body types to the list

The touchpad in HP Pavilion 14-ab057ca reports it's version as 12 and
according to Elan both 11 and 12 are valid IC types and should be
identified as hw_version 4.

Reported-by: Patrick Lessard <Patrick.Lessard@cogeco.com>
Tested-by: Patrick Lessard <Patrick.Lessard@cogeco.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>

diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index 78f93cf..be5b399 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1568,13 +1568,7 @@ static int elantech_set_properties(struct elantech_data *etd)
 		case 5:
 			etd->hw_version = 3;
 			break;
-		case 6:
-		case 7:
-		case 8:
-		case 9:
-		case 10:
-		case 13:
-		case 14:
+		case 6 ... 14:
 			etd->hw_version = 4;
 			break;
 		default:
-- 
cgit v0.10.2


From 3c67a829bd45f99b5c03580bb898c99fcc023356 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <umgwanakikbuti@gmail.com>
Date: Thu, 23 Jun 2016 08:45:42 +0200
Subject: cpufreq: pcc-cpufreq: Fix doorbell.access_width

Commit 920de6ebfab8 (ACPICA: Hardware: Enhance
acpi_hw_validate_register() with access_width/bit_offset awareness)
apparently exposed a latent bug, doorbell.access_width is initialized
to 64, but per Lv Zheng, it should be 4, and indeed, making that
change does bring pcc-cpufreq back to life.

Fixes: 920de6ebfab8 (ACPICA: Hardware: Enhance acpi_hw_validate_register() with access_width/bit_offset awareness)
Suggested-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
index 808a320..a7ecb9a 100644
--- a/drivers/cpufreq/pcc-cpufreq.c
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -487,7 +487,7 @@ static int __init pcc_cpufreq_probe(void)
 	doorbell.space_id = reg_resource->space_id;
 	doorbell.bit_width = reg_resource->bit_width;
 	doorbell.bit_offset = reg_resource->bit_offset;
-	doorbell.access_width = 64;
+	doorbell.access_width = 4;
 	doorbell.address = reg_resource->address;
 
 	pr_debug("probe: doorbell: space_id is %d, bit_width is %d, "
-- 
cgit v0.10.2


From 0d37189e80163c653771916afe28fae1a8d14daa Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Thu, 23 Jun 2016 11:18:43 +0900
Subject: PM / devfreq: Send the DEVFREQ_POSTCHANGE notification when target()
 is failed

This patch sends the DEVFREQ_POSTCHANGE notification when
devfreq->profile->targer() is failed. The PRECHANGE/POSTCHANGE
should be paired.

Fixes: 0fe3a66410a3 (PM / devfreq: Add new DEVFREQ_TRANSITION_NOTIFIER notifier)
Reported-by: Lin Huang <hl@rock-chips.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index c7f47e3..e92418f 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -268,8 +268,11 @@ int update_devfreq(struct devfreq *devfreq)
 	devfreq_notify_transition(devfreq, &freqs, DEVFREQ_PRECHANGE);
 
 	err = devfreq->profile->target(devfreq->dev.parent, &freq, flags);
-	if (err)
+	if (err) {
+		freqs.new = cur_freq;
+		devfreq_notify_transition(devfreq, &freqs, DEVFREQ_POSTCHANGE);
 		return err;
+	}
 
 	freqs.new = freq;
 	devfreq_notify_transition(devfreq, &freqs, DEVFREQ_POSTCHANGE);
-- 
cgit v0.10.2


From 82d6489d0fed2ec8a8c48c19e8d8a04ac8e5bb26 Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@redhat.com>
Date: Wed, 22 Jun 2016 17:28:41 -0300
Subject: cgroup: Disable IRQs while holding css_set_lock

While testing the deadline scheduler + cgroup setup I hit this
warning.

[  132.612935] ------------[ cut here ]------------
[  132.612951] WARNING: CPU: 5 PID: 0 at kernel/softirq.c:150 __local_bh_enable_ip+0x6b/0x80
[  132.612952] Modules linked in: (a ton of modules...)
[  132.612981] CPU: 5 PID: 0 Comm: swapper/5 Not tainted 4.7.0-rc2 #2
[  132.612981] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.2-20150714_191134- 04/01/2014
[  132.612982]  0000000000000086 45c8bb5effdd088b ffff88013fd43da0 ffffffff813d229e
[  132.612984]  0000000000000000 0000000000000000 ffff88013fd43de0 ffffffff810a652b
[  132.612985]  00000096811387b5 0000000000000200 ffff8800bab29d80 ffff880034c54c00
[  132.612986] Call Trace:
[  132.612987]  <IRQ>  [<ffffffff813d229e>] dump_stack+0x63/0x85
[  132.612994]  [<ffffffff810a652b>] __warn+0xcb/0xf0
[  132.612997]  [<ffffffff810e76a0>] ? push_dl_task.part.32+0x170/0x170
[  132.612999]  [<ffffffff810a665d>] warn_slowpath_null+0x1d/0x20
[  132.613000]  [<ffffffff810aba5b>] __local_bh_enable_ip+0x6b/0x80
[  132.613008]  [<ffffffff817d6c8a>] _raw_write_unlock_bh+0x1a/0x20
[  132.613010]  [<ffffffff817d6c9e>] _raw_spin_unlock_bh+0xe/0x10
[  132.613015]  [<ffffffff811388ac>] put_css_set+0x5c/0x60
[  132.613016]  [<ffffffff8113dc7f>] cgroup_free+0x7f/0xa0
[  132.613017]  [<ffffffff810a3912>] __put_task_struct+0x42/0x140
[  132.613018]  [<ffffffff810e776a>] dl_task_timer+0xca/0x250
[  132.613027]  [<ffffffff810e76a0>] ? push_dl_task.part.32+0x170/0x170
[  132.613030]  [<ffffffff8111371e>] __hrtimer_run_queues+0xee/0x270
[  132.613031]  [<ffffffff81113ec8>] hrtimer_interrupt+0xa8/0x190
[  132.613034]  [<ffffffff81051a58>] local_apic_timer_interrupt+0x38/0x60
[  132.613035]  [<ffffffff817d9b0d>] smp_apic_timer_interrupt+0x3d/0x50
[  132.613037]  [<ffffffff817d7c5c>] apic_timer_interrupt+0x8c/0xa0
[  132.613038]  <EOI>  [<ffffffff81063466>] ? native_safe_halt+0x6/0x10
[  132.613043]  [<ffffffff81037a4e>] default_idle+0x1e/0xd0
[  132.613044]  [<ffffffff810381cf>] arch_cpu_idle+0xf/0x20
[  132.613046]  [<ffffffff810e8fda>] default_idle_call+0x2a/0x40
[  132.613047]  [<ffffffff810e92d7>] cpu_startup_entry+0x2e7/0x340
[  132.613048]  [<ffffffff81050235>] start_secondary+0x155/0x190
[  132.613049] ---[ end trace f91934d162ce9977 ]---

The warn is the spin_(lock|unlock)_bh(&css_set_lock) in the interrupt
context. Converting the spin_lock_bh to spin_lock_irq(save) to avoid
this problem - and other problems of sharing a spinlock with an
interrupt.

Cc: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: cgroups@vger.kernel.org
Cc: stable@vger.kernel.org # 4.5+
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Rik van Riel <riel@redhat.com>
Reviewed-by: "Luis Claudio R. Goncalves" <lgoncalv@redhat.com>
Signed-off-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Acked-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 688eb0c..75c0ff0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -837,6 +837,8 @@ static void put_css_set_locked(struct css_set *cset)
 
 static void put_css_set(struct css_set *cset)
 {
+	unsigned long flags;
+
 	/*
 	 * Ensure that the refcount doesn't hit zero while any readers
 	 * can see it. Similar to atomic_dec_and_lock(), but for an
@@ -845,9 +847,9 @@ static void put_css_set(struct css_set *cset)
 	if (atomic_add_unless(&cset->refcount, -1, 1))
 		return;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irqsave(&css_set_lock, flags);
 	put_css_set_locked(cset);
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irqrestore(&css_set_lock, flags);
 }
 
 /*
@@ -1070,11 +1072,11 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 
 	/* First see if we already have a cgroup group that matches
 	 * the desired set */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	cset = find_existing_css_set(old_cset, cgrp, template);
 	if (cset)
 		get_css_set(cset);
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	if (cset)
 		return cset;
@@ -1102,7 +1104,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 	 * find_existing_css_set() */
 	memcpy(cset->subsys, template, sizeof(cset->subsys));
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	/* Add reference counts and links from the new css_set. */
 	list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
 		struct cgroup *c = link->cgrp;
@@ -1128,7 +1130,7 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 		css_get(css);
 	}
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	return cset;
 }
@@ -1192,7 +1194,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
 	 * Release all the links from cset_links to this hierarchy's
 	 * root cgroup
 	 */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
 		list_del(&link->cset_link);
@@ -1200,7 +1202,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
 		kfree(link);
 	}
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	if (!list_empty(&root->root_list)) {
 		list_del(&root->root_list);
@@ -1600,11 +1602,11 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
 		ss->root = dst_root;
 		css->cgroup = dcgrp;
 
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 		hash_for_each(css_set_table, i, cset, hlist)
 			list_move_tail(&cset->e_cset_node[ss->id],
 				       &dcgrp->e_csets[ss->id]);
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 
 		/* default hierarchy doesn't enable controllers by default */
 		dst_root->subsys_mask |= 1 << ssid;
@@ -1640,10 +1642,10 @@ static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
 	if (!buf)
 		return -ENOMEM;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
 	len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	if (len >= PATH_MAX)
 		len = -ERANGE;
@@ -1897,7 +1899,7 @@ static void cgroup_enable_task_cg_lists(void)
 {
 	struct task_struct *p, *g;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	if (use_task_css_set_links)
 		goto out_unlock;
@@ -1922,8 +1924,12 @@ static void cgroup_enable_task_cg_lists(void)
 		 * entry won't be deleted though the process has exited.
 		 * Do it while holding siglock so that we don't end up
 		 * racing against cgroup_exit().
+		 *
+		 * Interrupts were already disabled while acquiring
+		 * the css_set_lock, so we do not need to disable it
+		 * again when acquiring the sighand->siglock here.
 		 */
-		spin_lock_irq(&p->sighand->siglock);
+		spin_lock(&p->sighand->siglock);
 		if (!(p->flags & PF_EXITING)) {
 			struct css_set *cset = task_css_set(p);
 
@@ -1932,11 +1938,11 @@ static void cgroup_enable_task_cg_lists(void)
 			list_add_tail(&p->cg_list, &cset->tasks);
 			get_css_set(cset);
 		}
-		spin_unlock_irq(&p->sighand->siglock);
+		spin_unlock(&p->sighand->siglock);
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
 out_unlock:
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 }
 
 static void init_cgroup_housekeeping(struct cgroup *cgrp)
@@ -2043,13 +2049,13 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
 	 * Link the root cgroup in this hierarchy into all the css_set
 	 * objects.
 	 */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	hash_for_each(css_set_table, i, cset, hlist) {
 		link_css_set(&tmp_links, cset, root_cgrp);
 		if (css_set_populated(cset))
 			cgroup_update_populated(root_cgrp, true);
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	BUG_ON(!list_empty(&root_cgrp->self.children));
 	BUG_ON(atomic_read(&root->nr_cgrps) != 1);
@@ -2256,11 +2262,11 @@ out_mount:
 		struct cgroup *cgrp;
 
 		mutex_lock(&cgroup_mutex);
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 
 		cgrp = cset_cgroup_from_root(ns->root_cset, root);
 
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 		mutex_unlock(&cgroup_mutex);
 
 		nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb);
@@ -2337,11 +2343,11 @@ char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
 	char *ret;
 
 	mutex_lock(&cgroup_mutex);
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	mutex_unlock(&cgroup_mutex);
 
 	return ret;
@@ -2369,7 +2375,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
 	char *path = NULL;
 
 	mutex_lock(&cgroup_mutex);
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
 
@@ -2382,7 +2388,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
 			path = buf;
 	}
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	mutex_unlock(&cgroup_mutex);
 	return path;
 }
@@ -2557,7 +2563,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
 	 * the new cgroup.  There are no failure cases after here, so this
 	 * is the commit point.
 	 */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry(cset, &tset->src_csets, mg_node) {
 		list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
 			struct css_set *from_cset = task_css_set(task);
@@ -2568,7 +2574,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
 			put_css_set_locked(from_cset);
 		}
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	/*
 	 * Migration is committed, all target tasks are now on dst_csets.
@@ -2597,13 +2603,13 @@ out_cancel_attach:
 		}
 	} while_each_subsys_mask();
 out_release_tset:
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_splice_init(&tset->dst_csets, &tset->src_csets);
 	list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
 		list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
 		list_del_init(&cset->mg_node);
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	return ret;
 }
 
@@ -2634,7 +2640,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
 
 	lockdep_assert_held(&cgroup_mutex);
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) {
 		cset->mg_src_cgrp = NULL;
 		cset->mg_dst_cgrp = NULL;
@@ -2642,7 +2648,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
 		list_del_init(&cset->mg_preload_node);
 		put_css_set_locked(cset);
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 }
 
 /**
@@ -2783,7 +2789,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
 	 * already PF_EXITING could be freed from underneath us unless we
 	 * take an rcu_read_lock.
 	 */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	rcu_read_lock();
 	task = leader;
 	do {
@@ -2792,7 +2798,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup,
 			break;
 	} while_each_thread(leader, task);
 	rcu_read_unlock();
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	return cgroup_taskset_migrate(&tset, root);
 }
@@ -2816,7 +2822,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
 		return -EBUSY;
 
 	/* look up all src csets */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	rcu_read_lock();
 	task = leader;
 	do {
@@ -2826,7 +2832,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
 			break;
 	} while_each_thread(leader, task);
 	rcu_read_unlock();
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	/* prepare dst csets and commit */
 	ret = cgroup_migrate_prepare_dst(&preloaded_csets);
@@ -2859,9 +2865,9 @@ static int cgroup_procs_write_permission(struct task_struct *task,
 		struct cgroup *cgrp;
 		struct inode *inode;
 
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 		cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 
 		while (!cgroup_is_descendant(dst_cgrp, cgrp))
 			cgrp = cgroup_parent(cgrp);
@@ -2962,9 +2968,9 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
 		if (root == &cgrp_dfl_root)
 			continue;
 
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 		from_cgrp = task_cgroup_from_root(from, root);
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 
 		retval = cgroup_attach_task(from_cgrp, tsk, false);
 		if (retval)
@@ -3080,7 +3086,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 	percpu_down_write(&cgroup_threadgroup_rwsem);
 
 	/* look up all csses currently attached to @cgrp's subtree */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
 		struct cgrp_cset_link *link;
 
@@ -3088,14 +3094,14 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 			cgroup_migrate_add_src(link->cset, dsct,
 					       &preloaded_csets);
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	/* NULL dst indicates self on default hierarchy */
 	ret = cgroup_migrate_prepare_dst(&preloaded_csets);
 	if (ret)
 		goto out_finish;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) {
 		struct task_struct *task, *ntask;
 
@@ -3107,7 +3113,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
 		list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
 			cgroup_taskset_add(task, &tset);
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	ret = cgroup_taskset_migrate(&tset, cgrp->root);
 out_finish:
@@ -3908,10 +3914,10 @@ static int cgroup_task_count(const struct cgroup *cgrp)
 	int count = 0;
 	struct cgrp_cset_link *link;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry(link, &cgrp->cset_links, cset_link)
 		count += atomic_read(&link->cset->refcount);
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	return count;
 }
 
@@ -4249,7 +4255,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
 
 	memset(it, 0, sizeof(*it));
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	it->ss = css->ss;
 
@@ -4262,7 +4268,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
 
 	css_task_iter_advance_css_set(it);
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 }
 
 /**
@@ -4280,7 +4286,7 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
 		it->cur_task = NULL;
 	}
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	if (it->task_pos) {
 		it->cur_task = list_entry(it->task_pos, struct task_struct,
@@ -4289,7 +4295,7 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
 		css_task_iter_advance(it);
 	}
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	return it->cur_task;
 }
@@ -4303,10 +4309,10 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
 void css_task_iter_end(struct css_task_iter *it)
 {
 	if (it->cur_cset) {
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 		list_del(&it->iters_node);
 		put_css_set_locked(it->cur_cset);
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 	}
 
 	if (it->cur_task)
@@ -4338,10 +4344,10 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
 	mutex_lock(&cgroup_mutex);
 
 	/* all tasks in @from are being moved, all csets are source */
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry(link, &from->cset_links, cset_link)
 		cgroup_migrate_add_src(link->cset, to, &preloaded_csets);
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	ret = cgroup_migrate_prepare_dst(&preloaded_csets);
 	if (ret)
@@ -5449,10 +5455,10 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	 */
 	cgrp->self.flags &= ~CSS_ONLINE;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry(link, &cgrp->cset_links, cset_link)
 		link->cset->dead = true;
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 
 	/* initiate massacre of all css's */
 	for_each_css(css, ssid, cgrp)
@@ -5723,7 +5729,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 		goto out;
 
 	mutex_lock(&cgroup_mutex);
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	for_each_root(root) {
 		struct cgroup_subsys *ss;
@@ -5776,7 +5782,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 
 	retval = 0;
 out_unlock:
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	mutex_unlock(&cgroup_mutex);
 	kfree(buf);
 out:
@@ -5921,13 +5927,13 @@ void cgroup_post_fork(struct task_struct *child)
 	if (use_task_css_set_links) {
 		struct css_set *cset;
 
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 		cset = task_css_set(current);
 		if (list_empty(&child->cg_list)) {
 			get_css_set(cset);
 			css_set_move_task(child, NULL, cset, false);
 		}
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 	}
 
 	/*
@@ -5972,9 +5978,9 @@ void cgroup_exit(struct task_struct *tsk)
 	cset = task_css_set(tsk);
 
 	if (!list_empty(&tsk->cg_list)) {
-		spin_lock_bh(&css_set_lock);
+		spin_lock_irq(&css_set_lock);
 		css_set_move_task(tsk, cset, NULL, false);
-		spin_unlock_bh(&css_set_lock);
+		spin_unlock_irq(&css_set_lock);
 	} else {
 		get_css_set(cset);
 	}
@@ -6042,9 +6048,9 @@ static void cgroup_release_agent(struct work_struct *work)
 	if (!pathbuf || !agentbuf)
 		goto out;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	if (!path)
 		goto out;
 
@@ -6304,12 +6310,12 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
 		return ERR_PTR(-EPERM);
 
 	mutex_lock(&cgroup_mutex);
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 
 	cset = task_css_set(current);
 	get_css_set(cset);
 
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	mutex_unlock(&cgroup_mutex);
 
 	new_ns = alloc_cgroup_ns();
@@ -6433,7 +6439,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
 	if (!name_buf)
 		return -ENOMEM;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	rcu_read_lock();
 	cset = rcu_dereference(current->cgroups);
 	list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
@@ -6444,7 +6450,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
 			   c->root->hierarchy_id, name_buf);
 	}
 	rcu_read_unlock();
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	kfree(name_buf);
 	return 0;
 }
@@ -6455,7 +6461,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
 	struct cgroup_subsys_state *css = seq_css(seq);
 	struct cgrp_cset_link *link;
 
-	spin_lock_bh(&css_set_lock);
+	spin_lock_irq(&css_set_lock);
 	list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
 		struct css_set *cset = link->cset;
 		struct task_struct *task;
@@ -6478,7 +6484,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
 	overflow:
 		seq_puts(seq, "  ...\n");
 	}
-	spin_unlock_bh(&css_set_lock);
+	spin_unlock_irq(&css_set_lock);
 	return 0;
 }
 
-- 
cgit v0.10.2


From 52dfcc5ccfbb6697ac3cac7f7ff1e712760e1216 Mon Sep 17 00:00:00 2001
From: Dmitrii Tcvetkov <demfloro@demfloro.ru>
Date: Mon, 20 Jun 2016 13:52:14 +0300
Subject: drm/nouveau: fix for disabled fbdev emulation

Hello,

after this commit:

commit f045f459d925138fe7d6193a8c86406bda7e49da
Author: Ben Skeggs <bskeggs@redhat.com>
Date:   Thu Jun 2 12:23:31 2016 +1000
    drm/nouveau/fbcon: fix out-of-bounds memory accesses

kernel started to oops when loading nouveau module when using GTX 780 Ti
video adapter. This patch fixes the problem.

Bug report: https://bugzilla.kernel.org/show_bug.cgi?id=120591

Signed-off-by: Dmitrii Tcvetkov <demfloro@demfloro.ru>
Suggested-by: Ilia Mirkin <imirkin@alum.mit.edu>
Fixes: f045f459d925 ("nouveau_fbcon_init()")
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Cc: stable@vger.kernel.org

diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 300ea03..d1f248f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -552,7 +552,8 @@ nouveau_fbcon_init(struct drm_device *dev)
 	if (ret)
 		goto fini;
 
-	fbcon->helper.fbdev->pixmap.buf_align = 4;
+	if (fbcon->helper.fbdev)
+		fbcon->helper.fbdev->pixmap.buf_align = 4;
 	return 0;
 
 fini:
-- 
cgit v0.10.2


From 60842ef8128e7bf58c024814cd0dc14319232b6c Mon Sep 17 00:00:00 2001
From: Sinclair Yeh <syeh@vmware.com>
Date: Thu, 23 Jun 2016 17:37:34 -0700
Subject: Input: vmmouse - remove port reservation

The VMWare EFI BIOS will expose port 0x5658 as an ACPI resource.  This
causes the port to be reserved by the APCI module as the system comes up,
making it unavailable to be reserved again by other drivers, thus
preserving this VMWare port for special use in a VMWare guest.

This port is designed to be shared among multiple VMWare services, such as
the VMMOUSE.  Because of this, VMMOUSE should not try to reserve this port
on its own.

The VMWare non-EFI BIOS does not do this to preserve compatibility with
existing/legacy VMs.  It is known that there is small chance a VM may be
configured such that these ports get reserved by other non-VMWare devices,
and if this ever happens, the result is undefined.

Signed-off-by: Sinclair Yeh <syeh@vmware.com>
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
Cc: <stable@vger.kernel.org> # 4.1-
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>

diff --git a/drivers/input/mouse/vmmouse.c b/drivers/input/mouse/vmmouse.c
index a3f0f5a..0f58678 100644
--- a/drivers/input/mouse/vmmouse.c
+++ b/drivers/input/mouse/vmmouse.c
@@ -355,18 +355,11 @@ int vmmouse_detect(struct psmouse *psmouse, bool set_properties)
 		return -ENXIO;
 	}
 
-	if (!request_region(VMMOUSE_PROTO_PORT, 4, "vmmouse")) {
-		psmouse_dbg(psmouse, "VMMouse port in use.\n");
-		return -EBUSY;
-	}
-
 	/* Check if the device is present */
 	response = ~VMMOUSE_PROTO_MAGIC;
 	VMMOUSE_CMD(GETVERSION, 0, version, response, dummy1, dummy2);
-	if (response != VMMOUSE_PROTO_MAGIC || version == 0xffffffffU) {
-		release_region(VMMOUSE_PROTO_PORT, 4);
+	if (response != VMMOUSE_PROTO_MAGIC || version == 0xffffffffU)
 		return -ENXIO;
-	}
 
 	if (set_properties) {
 		psmouse->vendor = VMMOUSE_VENDOR;
@@ -374,8 +367,6 @@ int vmmouse_detect(struct psmouse *psmouse, bool set_properties)
 		psmouse->model = version;
 	}
 
-	release_region(VMMOUSE_PROTO_PORT, 4);
-
 	return 0;
 }
 
@@ -394,7 +385,6 @@ static void vmmouse_disconnect(struct psmouse *psmouse)
 	psmouse_reset(psmouse);
 	input_unregister_device(priv->abs_dev);
 	kfree(priv);
-	release_region(VMMOUSE_PROTO_PORT, 4);
 }
 
 /**
@@ -438,15 +428,10 @@ int vmmouse_init(struct psmouse *psmouse)
 	struct input_dev *rel_dev = psmouse->dev, *abs_dev;
 	int error;
 
-	if (!request_region(VMMOUSE_PROTO_PORT, 4, "vmmouse")) {
-		psmouse_dbg(psmouse, "VMMouse port in use.\n");
-		return -EBUSY;
-	}
-
 	psmouse_reset(psmouse);
 	error = vmmouse_enable(psmouse);
 	if (error)
-		goto release_region;
+		return error;
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	abs_dev = input_allocate_device();
@@ -502,8 +487,5 @@ init_fail:
 	kfree(priv);
 	psmouse->private = NULL;
 
-release_region:
-	release_region(VMMOUSE_PROTO_PORT, 4);
-
 	return error;
 }
-- 
cgit v0.10.2


From 1ee6667cd8d183b2fed12f97285f184431d2caf9 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 23 Jun 2016 17:50:39 -0700
Subject: libnvdimm, pfn, dax: fix initialization vs autodetect for mode +
 alignment

The updated ndctl unit tests discovered that if a pfn configuration with
a 4K alignment is read from the namespace, that alignment will be
ignored in favor of the default 2M alignment.  The result is that the
configuration will fail initialization with a message like:

    dax6.1: bad offset: 0x22000 dax disabled align: 0x200000

Fix this by allowing the alignment read from the info block to override
the default which is 2M not 0 in the autodetect path.  This also fixes a
similar problem with the mode and alignment settings silently being
overwritten by the kernel when userspace has changed it.  We now will
either overwrite the info block if userspace changes the uuid or fail
and warn if a live setting disagrees with the info block.

Cc: <stable@vger.kernel.org>
Cc: Micah Parrish <micah.parrish@hpe.com>
Cc: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index f7718ec..cea8350 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -344,6 +344,8 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
 int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 {
 	u64 checksum, offset;
+	unsigned long align;
+	enum nd_pfn_mode mode;
 	struct nd_namespace_io *nsio;
 	struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
 	struct nd_namespace_common *ndns = nd_pfn->ndns;
@@ -386,22 +388,50 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 		return -ENXIO;
 	}
 
+	align = le32_to_cpu(pfn_sb->align);
+	offset = le64_to_cpu(pfn_sb->dataoff);
+	if (align == 0)
+		align = 1UL << ilog2(offset);
+	mode = le32_to_cpu(pfn_sb->mode);
+
 	if (!nd_pfn->uuid) {
-		/* from probe we allocate */
+		/*
+		 * When probing a namepace via nd_pfn_probe() the uuid
+		 * is NULL (see: nd_pfn_devinit()) we init settings from
+		 * pfn_sb
+		 */
 		nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL);
 		if (!nd_pfn->uuid)
 			return -ENOMEM;
+		nd_pfn->align = align;
+		nd_pfn->mode = mode;
 	} else {
-		/* from init we validate */
+		/*
+		 * When probing a pfn / dax instance we validate the
+		 * live settings against the pfn_sb
+		 */
 		if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0)
 			return -ENODEV;
+
+		/*
+		 * If the uuid validates, but other settings mismatch
+		 * return EINVAL because userspace has managed to change
+		 * the configuration without specifying new
+		 * identification.
+		 */
+		if (nd_pfn->align != align || nd_pfn->mode != mode) {
+			dev_err(&nd_pfn->dev,
+					"init failed, settings mismatch\n");
+			dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n",
+					nd_pfn->align, align, nd_pfn->mode,
+					mode);
+			return -EINVAL;
+		}
 	}
 
-	if (nd_pfn->align == 0)
-		nd_pfn->align = le32_to_cpu(pfn_sb->align);
-	if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) {
+	if (align > nvdimm_namespace_capacity(ndns)) {
 		dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n",
-				nd_pfn->align, nvdimm_namespace_capacity(ndns));
+				align, nvdimm_namespace_capacity(ndns));
 		return -EINVAL;
 	}
 
@@ -411,7 +441,6 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 	 * namespace has changed since the pfn superblock was
 	 * established.
 	 */
-	offset = le64_to_cpu(pfn_sb->dataoff);
 	nsio = to_nd_namespace_io(&ndns->dev);
 	if (offset >= resource_size(&nsio->res)) {
 		dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n",
@@ -419,10 +448,11 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 		return -EBUSY;
 	}
 
-	if ((nd_pfn->align && !IS_ALIGNED(offset, nd_pfn->align))
+	if ((align && !IS_ALIGNED(offset, align))
 			|| !IS_ALIGNED(offset, PAGE_SIZE)) {
-		dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n",
-				offset);
+		dev_err(&nd_pfn->dev,
+				"bad offset: %#llx dax disabled align: %#lx\n",
+				offset, align);
 		return -ENXIO;
 	}
 
@@ -502,7 +532,6 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
 	res->start += start_pad;
 	res->end -= end_trunc;
 
-	nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
 	if (nd_pfn->mode == PFN_MODE_RAM) {
 		if (offset < SZ_8K)
 			return ERR_PTR(-EINVAL);
-- 
cgit v0.10.2


From 81e257e964268d050f8e9188becd44d50f241d72 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Thu, 23 Jun 2016 13:45:06 +0200
Subject: drm/atomic: Make drm_atomic_legacy_backoff reset crtc->acquire_ctx

Atomic updates may acquire more state than initially locked through
drm_modeset_lock_crtc, running with heavy stress can cause a
WARN_ON(crtc->acquire_ctx) in drm_modeset_lock_crtc:

[  601.491296] ------------[ cut here ]------------
[  601.491366] WARNING: CPU: 0 PID: 2411 at
drivers/gpu/drm/drm_modeset_lock.c:191 drm_modeset_lock_crtc+0xeb/0xf0 [drm]
[  601.491369] Modules linked in: drm i915 drm_kms_helper
[  601.491414] CPU: 0 PID: 2411 Comm: kms_cursor_lega Tainted: G     U 4.7.0-rc4-patser+ #4798
[  601.491417] Hardware name: Intel Corporation Skylake Client
[  601.491420]  0000000000000000 ffff88044d153c98 ffffffff812ead28 0000000000000000
[  601.491425]  0000000000000000 ffff88044d153cd8 ffffffff810868e6 000000bf58058030
[  601.491431]  ffff880088b415e8 ffff880458058030 ffff88008a271548 ffff88008a271568
[  601.491436] Call Trace:
[  601.491443]  [<ffffffff812ead28>] dump_stack+0x4d/0x65
[  601.491447]  [<ffffffff810868e6>] __warn+0xc6/0xe0
[  601.491452]  [<ffffffff81086968>] warn_slowpath_null+0x18/0x20
[  601.491472]  [<ffffffffc00d4ffb>] drm_modeset_lock_crtc+0xeb/0xf0 [drm]
[  601.491491]  [<ffffffffc00c5526>] drm_mode_cursor_common+0x66/0x180 [drm]
[  601.491509]  [<ffffffffc00c91cc>] drm_mode_cursor_ioctl+0x3c/0x40 [drm]
[  601.491524]  [<ffffffffc00bc94d>] drm_ioctl+0x14d/0x530 [drm]
[  601.491540]  [<ffffffffc00c9190>] ? drm_mode_setcrtc+0x520/0x520 [drm]
[  601.491545]  [<ffffffff81176aeb>] ? handle_mm_fault+0x106b/0x1430
[  601.491550]  [<ffffffff81108441>] ? stop_one_cpu+0x61/0x70
[  601.491556]  [<ffffffff811bb71d>] do_vfs_ioctl+0x8d/0x570
[  601.491560]  [<ffffffff81290d7e>] ? security_file_ioctl+0x3e/0x60
[  601.491565]  [<ffffffff811bbc74>] SyS_ioctl+0x74/0x80
[  601.491571]  [<ffffffff810e321c>] ? posix_get_monotonic_raw+0xc/0x10
[  601.491576]  [<ffffffff8175b11b>] entry_SYSCALL_64_fastpath+0x13/0x8f
[  601.491581] ---[ end trace 56f3d3d85f000d00 ]---

For good measure, test mode_config.acquire_ctx too, although this should
never happen.

Testcase: kms_cursor_legacy
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: stable@vger.kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index c204ef3..9bb99e2 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -1296,14 +1296,39 @@ EXPORT_SYMBOL(drm_atomic_add_affected_planes);
  */
 void drm_atomic_legacy_backoff(struct drm_atomic_state *state)
 {
+	struct drm_device *dev = state->dev;
+	unsigned crtc_mask = 0;
+	struct drm_crtc *crtc;
 	int ret;
+	bool global = false;
+
+	drm_for_each_crtc(crtc, dev) {
+		if (crtc->acquire_ctx != state->acquire_ctx)
+			continue;
+
+		crtc_mask |= drm_crtc_mask(crtc);
+		crtc->acquire_ctx = NULL;
+	}
+
+	if (WARN_ON(dev->mode_config.acquire_ctx == state->acquire_ctx)) {
+		global = true;
+
+		dev->mode_config.acquire_ctx = NULL;
+	}
 
 retry:
 	drm_modeset_backoff(state->acquire_ctx);
 
-	ret = drm_modeset_lock_all_ctx(state->dev, state->acquire_ctx);
+	ret = drm_modeset_lock_all_ctx(dev, state->acquire_ctx);
 	if (ret)
 		goto retry;
+
+	drm_for_each_crtc(crtc, dev)
+		if (drm_crtc_mask(crtc) & crtc_mask)
+			crtc->acquire_ctx = state->acquire_ctx;
+
+	if (global)
+		dev->mode_config.acquire_ctx = state->acquire_ctx;
 }
 EXPORT_SYMBOL(drm_atomic_legacy_backoff);
 
-- 
cgit v0.10.2


From 3d22462ae915743f3be5bf1ab3d4a6b72c2bb6c9 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Tue, 24 May 2016 06:27:44 -0400
Subject: cifs: stuff the fl_owner into "pid" field in the lock request

Right now, we send the tgid cross the wire. What we really want to send
though is a hashed fl_owner_t since samba treats this field as a generic
lockowner.

It turns out that because we enforce and release locks locally before
they are ever sent to the server, this patch makes no difference in
behavior. Still, setting OFD locks on the server using the process
pid seems wrong, so I think this patch still makes sense.

Signed-off-by: Jeff Layton <jlayton@poochiereds.net>
Signed-off-by: Steve French <smfrench@gmail.com>
Acked-by: Pavel Shilovsky <pshilovsky@samba.org>
Acked-by: Sachin Prabhu <sprabhu@redhat.com>

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5d8b7ed..5d841f3 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -87,6 +87,7 @@ extern mempool_t *cifs_req_poolp;
 extern mempool_t *cifs_mid_poolp;
 
 struct workqueue_struct	*cifsiod_wq;
+__u32 cifs_lock_secret;
 
 /*
  * Bumps refcount for cifs super block.
@@ -1266,6 +1267,8 @@ init_cifs(void)
 	spin_lock_init(&cifs_file_list_lock);
 	spin_lock_init(&GlobalMid_Lock);
 
+	get_random_bytes(&cifs_lock_secret, sizeof(cifs_lock_secret));
+
 	if (cifs_max_pending < 2) {
 		cifs_max_pending = 2;
 		cifs_dbg(FYI, "cifs_max_pending set to min of 2\n");
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index bba106c..8f1d8c1 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1619,6 +1619,7 @@ void cifs_oplock_break(struct work_struct *work);
 
 extern const struct slow_work_ops cifs_oplock_break_ops;
 extern struct workqueue_struct *cifsiod_wq;
+extern __u32 cifs_lock_secret;
 
 extern mempool_t *cifs_mid_poolp;
 
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 9793ae0..d4890b6 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1112,6 +1112,12 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
 	return rc;
 }
 
+static __u32
+hash_lockowner(fl_owner_t owner)
+{
+	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
+}
+
 struct lock_to_push {
 	struct list_head llist;
 	__u64 offset;
@@ -1178,7 +1184,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
 		else
 			type = CIFS_WRLCK;
 		lck = list_entry(el, struct lock_to_push, llist);
-		lck->pid = flock->fl_pid;
+		lck->pid = hash_lockowner(flock->fl_owner);
 		lck->netfid = cfile->fid.netfid;
 		lck->length = length;
 		lck->type = type;
@@ -1305,7 +1311,8 @@ cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
 			posix_lock_type = CIFS_RDLCK;
 		else
 			posix_lock_type = CIFS_WRLCK;
-		rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
+		rc = CIFSSMBPosixLock(xid, tcon, netfid,
+				      hash_lockowner(flock->fl_owner),
 				      flock->fl_start, length, flock,
 				      posix_lock_type, wait_flag);
 		return rc;
@@ -1505,7 +1512,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
 			posix_lock_type = CIFS_UNLCK;
 
 		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
-				      current->tgid, flock->fl_start, length,
+				      hash_lockowner(flock->fl_owner),
+				      flock->fl_start, length,
 				      NULL, posix_lock_type, wait_flag);
 		goto out;
 	}
-- 
cgit v0.10.2


From 202d772ba02b1deb8835a631cd8255943d1906a0 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Thu, 26 May 2016 11:52:24 +0200
Subject: cifs: use CIFS_MAX_DOMAINNAME_LEN when converting the domain name

Currently in build_ntlmssp_auth_blob(), when converting the domain
name to UTF16, CIFS_MAX_USERNAME_LEN limit is used. It should be
CIFS_MAX_DOMAINNAME_LEN. This patch fixes this.

Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>

diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index af0ec2d..c3d086e 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -430,7 +430,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 	} else {
 		int len;
 		len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName,
-				      CIFS_MAX_USERNAME_LEN, nls_cp);
+				      CIFS_MAX_DOMAINNAME_LEN, nls_cp);
 		len *= 2; /* unicode is 2 bytes each */
 		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
 		sec_blob->DomainName.Length = cpu_to_le16(len);
-- 
cgit v0.10.2


From b8da344b74c822e966c6d19d6b2321efe82c5d97 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Thu, 26 May 2016 11:52:25 +0200
Subject: cifs: dynamic allocation of ntlmssp blob

In sess_auth_rawntlmssp_authenticate(), the ntlmssp blob is allocated
statically and its size is an "empirical" 5*sizeof(struct
_AUTHENTICATE_MESSAGE) (320B on x86_64). I don't know where this value
comes from or if it was ever appropriate, but it is currently
insufficient: the user and domain name in UTF16 could take 1kB by
themselves. Because of that, build_ntlmssp_auth_blob() might corrupt
memory (out-of-bounds write). The size of ntlmssp_blob in
SMB2_sess_setup() is too small too (sizeof(struct _NEGOTIATE_MESSAGE)
+ 500).

This patch allocates the blob dynamically in
build_ntlmssp_auth_blob().

Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
CC: Stable <stable@vger.kernel.org>

diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index 848249f..3079b38 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -133,6 +133,6 @@ typedef struct _AUTHENTICATE_MESSAGE {
 
 int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifs_ses *ses);
 void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, struct cifs_ses *ses);
-int build_ntlmssp_auth_blob(unsigned char *pbuffer, u16 *buflen,
+int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
 			struct cifs_ses *ses,
 			const struct nls_table *nls_cp);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index c3d086e..a42e99c 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -364,19 +364,43 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
 	sec_blob->DomainName.MaximumLength = 0;
 }
 
-/* We do not malloc the blob, it is passed in pbuffer, because its
-   maximum possible size is fixed and small, making this approach cleaner.
-   This function returns the length of the data in the blob */
-int build_ntlmssp_auth_blob(unsigned char *pbuffer,
+static int size_of_ntlmssp_blob(struct cifs_ses *ses)
+{
+	int sz = sizeof(AUTHENTICATE_MESSAGE) + ses->auth_key.len
+		- CIFS_SESS_KEY_SIZE + CIFS_CPHTXT_SIZE + 2;
+
+	if (ses->domainName)
+		sz += 2 * strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
+	else
+		sz += 2;
+
+	if (ses->user_name)
+		sz += 2 * strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN);
+	else
+		sz += 2;
+
+	return sz;
+}
+
+int build_ntlmssp_auth_blob(unsigned char **pbuffer,
 					u16 *buflen,
 				   struct cifs_ses *ses,
 				   const struct nls_table *nls_cp)
 {
 	int rc;
-	AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
+	AUTHENTICATE_MESSAGE *sec_blob;
 	__u32 flags;
 	unsigned char *tmp;
 
+	rc = setup_ntlmv2_rsp(ses, nls_cp);
+	if (rc) {
+		cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc);
+		*buflen = 0;
+		goto setup_ntlmv2_ret;
+	}
+	*pbuffer = kmalloc(size_of_ntlmssp_blob(ses), GFP_KERNEL);
+	sec_blob = (AUTHENTICATE_MESSAGE *)*pbuffer;
+
 	memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
 	sec_blob->MessageType = NtLmAuthenticate;
 
@@ -391,7 +415,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 			flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
 	}
 
-	tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE);
+	tmp = *pbuffer + sizeof(AUTHENTICATE_MESSAGE);
 	sec_blob->NegotiateFlags = cpu_to_le32(flags);
 
 	sec_blob->LmChallengeResponse.BufferOffset =
@@ -399,13 +423,9 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 	sec_blob->LmChallengeResponse.Length = 0;
 	sec_blob->LmChallengeResponse.MaximumLength = 0;
 
-	sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
+	sec_blob->NtChallengeResponse.BufferOffset =
+				cpu_to_le32(tmp - *pbuffer);
 	if (ses->user_name != NULL) {
-		rc = setup_ntlmv2_rsp(ses, nls_cp);
-		if (rc) {
-			cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc);
-			goto setup_ntlmv2_ret;
-		}
 		memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
 				ses->auth_key.len - CIFS_SESS_KEY_SIZE);
 		tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
@@ -423,7 +443,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 	}
 
 	if (ses->domainName == NULL) {
-		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 		sec_blob->DomainName.Length = 0;
 		sec_blob->DomainName.MaximumLength = 0;
 		tmp += 2;
@@ -432,14 +452,14 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 		len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName,
 				      CIFS_MAX_DOMAINNAME_LEN, nls_cp);
 		len *= 2; /* unicode is 2 bytes each */
-		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 		sec_blob->DomainName.Length = cpu_to_le16(len);
 		sec_blob->DomainName.MaximumLength = cpu_to_le16(len);
 		tmp += len;
 	}
 
 	if (ses->user_name == NULL) {
-		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 		sec_blob->UserName.Length = 0;
 		sec_blob->UserName.MaximumLength = 0;
 		tmp += 2;
@@ -448,13 +468,13 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 		len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name,
 				      CIFS_MAX_USERNAME_LEN, nls_cp);
 		len *= 2; /* unicode is 2 bytes each */
-		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 		sec_blob->UserName.Length = cpu_to_le16(len);
 		sec_blob->UserName.MaximumLength = cpu_to_le16(len);
 		tmp += len;
 	}
 
-	sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+	sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 	sec_blob->WorkstationName.Length = 0;
 	sec_blob->WorkstationName.MaximumLength = 0;
 	tmp += 2;
@@ -463,19 +483,19 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 		(ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
 			&& !calc_seckey(ses)) {
 		memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
-		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 		sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
 		sec_blob->SessionKey.MaximumLength =
 				cpu_to_le16(CIFS_CPHTXT_SIZE);
 		tmp += CIFS_CPHTXT_SIZE;
 	} else {
-		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer);
 		sec_blob->SessionKey.Length = 0;
 		sec_blob->SessionKey.MaximumLength = 0;
 	}
 
+	*buflen = tmp - *pbuffer;
 setup_ntlmv2_ret:
-	*buflen = tmp - pbuffer;
 	return rc;
 }
 
@@ -1266,7 +1286,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
 	struct cifs_ses *ses = sess_data->ses;
 	__u16 bytes_remaining;
 	char *bcc_ptr;
-	char *ntlmsspblob = NULL;
+	unsigned char *ntlmsspblob = NULL;
 	u16 blob_len;
 
 	cifs_dbg(FYI, "rawntlmssp session setup authenticate phase\n");
@@ -1279,19 +1299,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
 	/* Build security blob before we assemble the request */
 	pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
 	smb_buf = (struct smb_hdr *)pSMB;
-	/*
-	 * 5 is an empirical value, large enough to hold
-	 * authenticate message plus max 10 of av paris,
-	 * domain, user, workstation names, flags, etc.
-	 */
-	ntlmsspblob = kzalloc(5*sizeof(struct _AUTHENTICATE_MESSAGE),
-				GFP_KERNEL);
-	if (!ntlmsspblob) {
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	rc = build_ntlmssp_auth_blob(ntlmsspblob,
+	rc = build_ntlmssp_auth_blob(&ntlmsspblob,
 					&blob_len, ses, sess_data->nls_cp);
 	if (rc)
 		goto out_free_ntlmsspblob;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 8f38e33..c3e61a7 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -588,7 +588,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
 	u16 blob_length = 0;
 	struct key *spnego_key = NULL;
 	char *security_blob = NULL;
-	char *ntlmssp_blob = NULL;
+	unsigned char *ntlmssp_blob = NULL;
 	bool use_spnego = false; /* else use raw ntlmssp */
 
 	cifs_dbg(FYI, "Session Setup\n");
@@ -713,13 +713,7 @@ ssetup_ntlmssp_authenticate:
 		iov[1].iov_len = blob_length;
 	} else if (phase == NtLmAuthenticate) {
 		req->hdr.SessionId = ses->Suid;
-		ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500,
-				       GFP_KERNEL);
-		if (ntlmssp_blob == NULL) {
-			rc = -ENOMEM;
-			goto ssetup_exit;
-		}
-		rc = build_ntlmssp_auth_blob(ntlmssp_blob, &blob_length, ses,
+		rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses,
 					     nls_cp);
 		if (rc) {
 			cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n",
-- 
cgit v0.10.2


From a6b6befbb2806697461962edb044e3376a771ebb Mon Sep 17 00:00:00 2001
From: Luis de Bethencourt <luisbg@osg.samsung.com>
Date: Wed, 8 Jun 2016 17:02:32 +0100
Subject: cifs: check hash calculating succeeded

calc_lanman_hash() could return -ENOMEM or other errors, we should check
that everything went fine before using the calculated key.

Signed-off-by: Luis de Bethencourt <luisbg@osg.samsung.com>
Signed-off-by: Steve French <smfrench@gmail.com>

diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index a42e99c..538d9b5 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -710,6 +710,8 @@ sess_auth_lanman(struct sess_data *sess_data)
 		rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
 				      ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
 				      true : false, lnm_session_key);
+		if (rc)
+			goto out;
 
 		memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
 		bcc_ptr += CIFS_AUTH_RESP_SIZE;
-- 
cgit v0.10.2


From 4c5ea0a9cd02d6aa8adc86e100b2a4cff8d614ff Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 21 Jun 2016 18:52:17 +0200
Subject: locking/static_key: Fix concurrent static_key_slow_inc()

The following scenario is possible:

    CPU 1                                   CPU 2
    static_key_slow_inc()
     atomic_inc_not_zero()
      -> key.enabled == 0, no increment
     jump_label_lock()
     atomic_inc_return()
      -> key.enabled == 1 now
                                            static_key_slow_inc()
                                             atomic_inc_not_zero()
                                              -> key.enabled == 1, inc to 2
                                             return
                                            ** static key is wrong!
     jump_label_update()
     jump_label_unlock()

Testing the static key at the point marked by (**) will follow the
wrong path for jumps that have not been patched yet.  This can
actually happen when creating many KVM virtual machines with userspace
LAPIC emulation; just run several copies of the following program:

    #include <fcntl.h>
    #include <unistd.h>
    #include <sys/ioctl.h>
    #include <linux/kvm.h>

    int main(void)
    {
        for (;;) {
            int kvmfd = open("/dev/kvm", O_RDONLY);
            int vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0);
            close(ioctl(vmfd, KVM_CREATE_VCPU, 1));
            close(vmfd);
            close(kvmfd);
        }
        return 0;
    }

Every KVM_CREATE_VCPU ioctl will attempt a static_key_slow_inc() call.
The static key's purpose is to skip NULL pointer checks and indeed one
of the processes eventually dereferences NULL.

As explained in the commit that introduced the bug:

  706249c222f6 ("locking/static_keys: Rework update logic")

jump_label_update() needs key.enabled to be true.  The solution adopted
here is to temporarily make key.enabled == -1, and use go down the
slow path when key.enabled <= 0.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <stable@vger.kernel.org> # v4.3+
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 706249c222f6 ("locking/static_keys: Rework update logic")
Link: http://lkml.kernel.org/r/1466527937-69798-1-git-send-email-pbonzini@redhat.com
[ Small stylistic edits to the changelog and the code. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 0536524..6890446 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -117,13 +117,18 @@ struct module;
 
 #include <linux/atomic.h>
 
+#ifdef HAVE_JUMP_LABEL
+
 static inline int static_key_count(struct static_key *key)
 {
-	return atomic_read(&key->enabled);
+	/*
+	 * -1 means the first static_key_slow_inc() is in progress.
+	 *  static_key_enabled() must return true, so return 1 here.
+	 */
+	int n = atomic_read(&key->enabled);
+	return n >= 0 ? n : 1;
 }
 
-#ifdef HAVE_JUMP_LABEL
-
 #define JUMP_TYPE_FALSE	0UL
 #define JUMP_TYPE_TRUE	1UL
 #define JUMP_TYPE_MASK	1UL
@@ -162,6 +167,11 @@ extern void jump_label_apply_nops(struct module *mod);
 
 #else  /* !HAVE_JUMP_LABEL */
 
+static inline int static_key_count(struct static_key *key)
+{
+	return atomic_read(&key->enabled);
+}
+
 static __always_inline void jump_label_init(void)
 {
 	static_key_initialized = true;
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 05254ee..4b353e0 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -58,13 +58,36 @@ static void jump_label_update(struct static_key *key);
 
 void static_key_slow_inc(struct static_key *key)
 {
+	int v, v1;
+
 	STATIC_KEY_CHECK_USE();
-	if (atomic_inc_not_zero(&key->enabled))
-		return;
+
+	/*
+	 * Careful if we get concurrent static_key_slow_inc() calls;
+	 * later calls must wait for the first one to _finish_ the
+	 * jump_label_update() process.  At the same time, however,
+	 * the jump_label_update() call below wants to see
+	 * static_key_enabled(&key) for jumps to be updated properly.
+	 *
+	 * So give a special meaning to negative key->enabled: it sends
+	 * static_key_slow_inc() down the slow path, and it is non-zero
+	 * so it counts as "enabled" in jump_label_update().  Note that
+	 * atomic_inc_unless_negative() checks >= 0, so roll our own.
+	 */
+	for (v = atomic_read(&key->enabled); v > 0; v = v1) {
+		v1 = atomic_cmpxchg(&key->enabled, v, v + 1);
+		if (likely(v1 == v))
+			return;
+	}
 
 	jump_label_lock();
-	if (atomic_inc_return(&key->enabled) == 1)
+	if (atomic_read(&key->enabled) == 0) {
+		atomic_set(&key->enabled, -1);
 		jump_label_update(key);
+		atomic_set(&key->enabled, 1);
+	} else {
+		atomic_inc(&key->enabled);
+	}
 	jump_label_unlock();
 }
 EXPORT_SYMBOL_GPL(static_key_slow_inc);
@@ -72,6 +95,13 @@ EXPORT_SYMBOL_GPL(static_key_slow_inc);
 static void __static_key_slow_dec(struct static_key *key,
 		unsigned long rate_limit, struct delayed_work *work)
 {
+	/*
+	 * The negative count check is valid even when a negative
+	 * key->enabled is in use by static_key_slow_inc(); a
+	 * __static_key_slow_dec() before the first static_key_slow_inc()
+	 * returns is unbalanced, because all other static_key_slow_inc()
+	 * instances block while the update is in progress.
+	 */
 	if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) {
 		WARN(atomic_read(&key->enabled) < 0,
 		     "jump label: negative count!\n");
-- 
cgit v0.10.2


From 094f469172e00d6ab0a3130b0e01c83b3cf3a98d Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Thu, 16 Jun 2016 15:57:01 +0300
Subject: sched/fair: Initialize throttle_count for new task-groups lazily

Cgroup created inside throttled group must inherit current throttle_count.
Broken throttle_count allows to nominate throttled entries as a next buddy,
later this leads to null pointer dereference in pick_next_task_fair().

This patch initialize cfs_rq->throttle_count at first enqueue: laziness
allows to skip locking all rq at group creation. Lazy approach also allows
to skip full sub-tree scan at throttling hierarchy (not in this patch).

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bsegall@google.com
Link: http://lkml.kernel.org/r/146608182119.21870.8439834428248129633.stgit@buzz
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2ae68f0..8c5d8c0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4202,6 +4202,26 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
 	if (!cfs_bandwidth_used())
 		return;
 
+	/* Synchronize hierarchical throttle counter: */
+	if (unlikely(!cfs_rq->throttle_uptodate)) {
+		struct rq *rq = rq_of(cfs_rq);
+		struct cfs_rq *pcfs_rq;
+		struct task_group *tg;
+
+		cfs_rq->throttle_uptodate = 1;
+
+		/* Get closest up-to-date node, because leaves go first: */
+		for (tg = cfs_rq->tg->parent; tg; tg = tg->parent) {
+			pcfs_rq = tg->cfs_rq[cpu_of(rq)];
+			if (pcfs_rq->throttle_uptodate)
+				break;
+		}
+		if (tg) {
+			cfs_rq->throttle_count = pcfs_rq->throttle_count;
+			cfs_rq->throttled_clock_task = rq_clock_task(rq);
+		}
+	}
+
 	/* an active group must be handled by the update_curr()->put() path */
 	if (!cfs_rq->runtime_enabled || cfs_rq->curr)
 		return;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 72f1f30..7cbeb92 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -437,7 +437,7 @@ struct cfs_rq {
 
 	u64 throttled_clock, throttled_clock_task;
 	u64 throttled_clock_task_time;
-	int throttled, throttle_count;
+	int throttled, throttle_count, throttle_uptodate;
 	struct list_head throttled_list;
 #endif /* CONFIG_CFS_BANDWIDTH */
 #endif /* CONFIG_FAIR_GROUP_SCHED */
-- 
cgit v0.10.2


From 754bd598be9bbc953bc709a9e8ed7f3188bfb9d7 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Thu, 16 Jun 2016 15:57:15 +0300
Subject: sched/fair: Do not announce throttled next buddy in
 dequeue_task_fair()

Hierarchy could be already throttled at this point. Throttled next
buddy could trigger a NULL pointer dereference in pick_next_task_fair().

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ben Segall <bsegall@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/146608183552.21905.15924473394414832071.stgit@buzz
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8c5d8c0..bdcbeea 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4537,15 +4537,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 		/* Don't dequeue parent if it has other entities besides us */
 		if (cfs_rq->load.weight) {
+			/* Avoid re-evaluating load for this entity: */
+			se = parent_entity(se);
 			/*
 			 * Bias pick_next to pick a task from this cfs_rq, as
 			 * p is sleeping when it is within its sched_slice.
 			 */
-			if (task_sleep && parent_entity(se))
-				set_next_buddy(parent_entity(se));
-
-			/* avoid re-evaluating load for this entity */
-			se = parent_entity(se);
+			if (task_sleep && se && !throttled_hierarchy(cfs_rq))
+				set_next_buddy(se);
 			break;
 		}
 		flags |= DEQUEUE_SLEEP;
-- 
cgit v0.10.2


From feb245e304f343cf5e4f9123db36354144dce8a4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Thu, 16 Jun 2016 15:35:04 -0400
Subject: sched/core: Allow kthreads to fall back to online && !active cpus

During CPU hotplug, CPU_ONLINE callbacks are run while the CPU is
online but not active.  A CPU_ONLINE callback may create or bind a
kthread so that its cpus_allowed mask only allows the CPU which is
being brought online.  The kthread may start executing before the CPU
is made active and can end up in select_fallback_rq().

In such cases, the expected behavior is selecting the CPU which is
coming online; however, because select_fallback_rq() only chooses from
active CPUs, it determines that the task doesn't have any viable CPU
in its allowed mask and ends up overriding it to cpu_possible_mask.

CPU_ONLINE callbacks should be able to put kthreads on the CPU which
is coming online.  Update select_fallback_rq() so that it follows
cpu_online() rather than cpu_active() for kthreads.

Reported-by: Gautham R Shenoy <ego@linux.vnet.ibm.com>
Tested-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kernel-team@fb.com
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/20160616193504.GB3262@mtj.duckdns.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4135ac8..51d7105 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1536,7 +1536,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 	for (;;) {
 		/* Any allowed, online CPU? */
 		for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
-			if (!cpu_active(dest_cpu))
+			if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu))
+				continue;
+			if (!cpu_online(dest_cpu))
 				continue;
 			goto out;
 		}
-- 
cgit v0.10.2


From 93a2001bdfd5376c3dc2158653034c20392d15c5 Mon Sep 17 00:00:00 2001
From: Scott Bauer <sbauer@plzdonthack.me>
Date: Thu, 23 Jun 2016 08:59:47 -0600
Subject: HID: hiddev: validate num_values for HIDIOCGUSAGES, HIDIOCSUSAGES
 commands

This patch validates the num_values parameter from userland during the
HIDIOCGUSAGES and HIDIOCSUSAGES commands. Previously, if the report id was set
to HID_REPORT_ID_UNKNOWN, we would fail to validate the num_values parameter
leading to a heap overflow.

Cc: stable@vger.kernel.org
Signed-off-by: Scott Bauer <sbauer@plzdonthack.me>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index 2f1ddca..700145b 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -516,13 +516,13 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd,
 					goto inval;
 			} else if (uref->usage_index >= field->report_count)
 				goto inval;
-
-			else if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) &&
-				 (uref_multi->num_values > HID_MAX_MULTI_USAGES ||
-				  uref->usage_index + uref_multi->num_values > field->report_count))
-				goto inval;
 		}
 
+		if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) &&
+		    (uref_multi->num_values > HID_MAX_MULTI_USAGES ||
+		     uref->usage_index + uref_multi->num_values > field->report_count))
+			goto inval;
+
 		switch (cmd) {
 		case HIDIOCGUSAGE:
 			uref->value = field->value[uref->usage_index];
-- 
cgit v0.10.2


From f83c32925d45926cd0e0f18bf28e6039116c4486 Mon Sep 17 00:00:00 2001
From: Woodrow Shen <woodrow.shen@gmail.com>
Date: Fri, 24 Jun 2016 15:58:34 +0800
Subject: ALSA: hda - Fix the headset mic jack detection on Dell machine

The new Dell laptop with codec 3246 can't detect headset mic when
headset was inserted on the machine. So adding pin configurations
into quirk table makes headset mic work correctly.

Codec: Realtek ALC3246
Vendor Id: 0x10ec0256
Subsystem Id: 0x10280781

Signed-off-by: Woodrow Shen <woodrow.shen@canonical.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 0fe18ed..f28363e 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5832,6 +5832,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 		{0x14, 0x90170120},
 		{0x21, 0x02211030}),
 	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+		{0x12, 0x90a60170},
+		{0x14, 0x90170120},
+		{0x21, 0x02211030}),
+	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
 		ALC256_STANDARD_PINS),
 	SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4,
 		{0x12, 0x90a60130},
-- 
cgit v0.10.2


From d2bd05d88d245c13b64c3bf9c8927a1c56453d8c Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Fri, 24 Jun 2016 03:13:34 -0600
Subject: xen-pciback: return proper values during BAR sizing

Reads following writes with all address bits set to 1 should return all
changeable address bits as one, not the BAR size (nor, as was the case
for the upper half of 64-bit BARs, the high half of the region's end
address). Presumably this didn't cause any problems so far because
consumers use the value to calculate the size (usually via val & -val),
and do nothing else with it.

But also consider the exception here: Unimplemented BARs should always
return all zeroes.

And finally, the check for whether to return the sizing address on read
for the ROM BAR should ignore all non-address bits, not just the ROM
Enable one.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>

diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c
index ad3d17d..9ead1c2 100644
--- a/drivers/xen/xen-pciback/conf_space_header.c
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -145,7 +145,7 @@ static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
 	/* A write to obtain the length must happen as a 32-bit write.
 	 * This does not (yet) support writing individual bytes
 	 */
-	if (value == ~PCI_ROM_ADDRESS_ENABLE)
+	if ((value | ~PCI_ROM_ADDRESS_MASK) == ~0U)
 		bar->which = 1;
 	else {
 		u32 tmpval;
@@ -225,38 +225,42 @@ static inline void read_dev_bar(struct pci_dev *dev,
 			   (PCI_BASE_ADDRESS_SPACE_MEMORY |
 				PCI_BASE_ADDRESS_MEM_TYPE_64))) {
 			bar_info->val = res[pos - 1].start >> 32;
-			bar_info->len_val = res[pos - 1].end >> 32;
+			bar_info->len_val = -resource_size(&res[pos - 1]) >> 32;
 			return;
 		}
 	}
 
+	if (!res[pos].flags ||
+	    (res[pos].flags & (IORESOURCE_DISABLED | IORESOURCE_UNSET |
+			       IORESOURCE_BUSY)))
+		return;
+
 	bar_info->val = res[pos].start |
 			(res[pos].flags & PCI_REGION_FLAG_MASK);
-	bar_info->len_val = resource_size(&res[pos]);
+	bar_info->len_val = -resource_size(&res[pos]) |
+			    (res[pos].flags & PCI_REGION_FLAG_MASK);
 }
 
 static void *bar_init(struct pci_dev *dev, int offset)
 {
-	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
+	struct pci_bar_info *bar = kzalloc(sizeof(*bar), GFP_KERNEL);
 
 	if (!bar)
 		return ERR_PTR(-ENOMEM);
 
 	read_dev_bar(dev, bar, offset, ~0);
-	bar->which = 0;
 
 	return bar;
 }
 
 static void *rom_init(struct pci_dev *dev, int offset)
 {
-	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
+	struct pci_bar_info *bar = kzalloc(sizeof(*bar), GFP_KERNEL);
 
 	if (!bar)
 		return ERR_PTR(-ENOMEM);
 
 	read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
-	bar->which = 0;
 
 	return bar;
 }
-- 
cgit v0.10.2


From 0f087ee3f3b86a4507db4ff1d2d5a3880e4cfd16 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Fri, 24 Jun 2016 15:13:16 +0200
Subject: ALSA: hda / realtek - add two more Thinkpad IDs (5050,5053) for
 tpt460 fixup

  See: https://bugzilla.redhat.com/show_bug.cgi?id=1349539
  See: https://bugzilla.kernel.org/show_bug.cgi?id=120961

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index f28363e..900bfbc 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5650,6 +5650,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x503c, "Thinkpad L450", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x504a, "ThinkPad X260", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x504b, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE),
+	SND_PCI_QUIRK(0x17aa, 0x5050, "Thinkpad T560p", ALC292_FIXUP_TPT460),
+	SND_PCI_QUIRK(0x17aa, 0x5053, "Thinkpad T460", ALC292_FIXUP_TPT460),
 	SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K),
 	SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
-- 
cgit v0.10.2


From d5dbbe6569481bf12dcbe3e12cff72c5f78d272c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 24 Jun 2016 15:15:26 +0200
Subject: ALSA: dummy: Fix a use-after-free at closing

syzkaller fuzzer spotted a potential use-after-free case in snd-dummy
driver when hrtimer is used as backend:
> ==================================================================
> BUG: KASAN: use-after-free in rb_erase+0x1b17/0x2010 at addr ffff88005e5b6f68
>  Read of size 8 by task syz-executor/8984
> =============================================================================
> BUG kmalloc-192 (Not tainted): kasan: bad access detected
> -----------------------------------------------------------------------------
>
> Disabling lock debugging due to kernel taint
> INFO: Allocated in 0xbbbbbbbbbbbbbbbb age=18446705582212484632
> ....
> [<      none      >] dummy_hrtimer_create+0x49/0x1a0 sound/drivers/dummy.c:464
> ....
> INFO: Freed in 0xfffd8e09 age=18446705496313138713 cpu=2164287125 pid=-1
> [<      none      >] dummy_hrtimer_free+0x68/0x80 sound/drivers/dummy.c:481
> ....
> Call Trace:
>  [<ffffffff8179e59e>] __asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:333
>  [<     inline     >] rb_set_parent include/linux/rbtree_augmented.h:111
>  [<     inline     >] __rb_erase_augmented include/linux/rbtree_augmented.h:218
>  [<ffffffff82ca5787>] rb_erase+0x1b17/0x2010 lib/rbtree.c:427
>  [<ffffffff82cb02e8>] timerqueue_del+0x78/0x170 lib/timerqueue.c:86
>  [<ffffffff814d0c80>] __remove_hrtimer+0x90/0x220 kernel/time/hrtimer.c:903
>  [<     inline     >] remove_hrtimer kernel/time/hrtimer.c:945
>  [<ffffffff814d23da>] hrtimer_try_to_cancel+0x22a/0x570 kernel/time/hrtimer.c:1046
>  [<ffffffff814d2742>] hrtimer_cancel+0x22/0x40 kernel/time/hrtimer.c:1066
>  [<ffffffff85420531>] dummy_hrtimer_stop+0x91/0xb0 sound/drivers/dummy.c:417
>  [<ffffffff854228bf>] dummy_pcm_trigger+0x17f/0x1e0 sound/drivers/dummy.c:507
>  [<ffffffff85392170>] snd_pcm_do_stop+0x160/0x1b0 sound/core/pcm_native.c:1106
>  [<ffffffff85391b26>] snd_pcm_action_single+0x76/0x120 sound/core/pcm_native.c:956
>  [<ffffffff85391e01>] snd_pcm_action+0x231/0x290 sound/core/pcm_native.c:974
>  [<     inline     >] snd_pcm_stop sound/core/pcm_native.c:1139
>  [<ffffffff8539754d>] snd_pcm_drop+0x12d/0x1d0 sound/core/pcm_native.c:1784
>  [<ffffffff8539d3be>] snd_pcm_common_ioctl1+0xfae/0x2150 sound/core/pcm_native.c:2805
>  [<ffffffff8539ee91>] snd_pcm_capture_ioctl1+0x2a1/0x5e0 sound/core/pcm_native.c:2976
>  [<ffffffff8539f2ec>] snd_pcm_kernel_ioctl+0x11c/0x160 sound/core/pcm_native.c:3020
>  [<ffffffff853d9a44>] snd_pcm_oss_sync+0x3a4/0xa30 sound/core/oss/pcm_oss.c:1693
>  [<ffffffff853da27d>] snd_pcm_oss_release+0x1ad/0x280 sound/core/oss/pcm_oss.c:2483
>  .....

A workaround is to call hrtimer_cancel() in dummy_hrtimer_sync() which
is called certainly before other blocking ops.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Tested-by: Dmitry Vyukov <dvyukov@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c
index c0f8f61..172dacd 100644
--- a/sound/drivers/dummy.c
+++ b/sound/drivers/dummy.c
@@ -420,6 +420,7 @@ static int dummy_hrtimer_stop(struct snd_pcm_substream *substream)
 
 static inline void dummy_hrtimer_sync(struct dummy_hrtimer_pcm *dpcm)
 {
+	hrtimer_cancel(&dpcm->timer);
 	tasklet_kill(&dpcm->tasklet);
 }
 
-- 
cgit v0.10.2


From bc23676caf54c9b6e2521ef065dfddf6c50211de Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 17 Jun 2016 16:48:18 -0400
Subject: NFSv4.1/pnfs: Ensure we handle delegation errors in
 nfs4_proc_layoutget()

nfs4_handle_exception() relies on the caller setting the 'inode' field
in the struct nfs4_exception argument when the error applies to a
delegation.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Jeff Layton <jlayton@poochiereds.net>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index de97567..27fe63b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8036,7 +8036,10 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout, gfp_t gfp_flags)
 		.flags = RPC_TASK_ASYNC,
 	};
 	struct pnfs_layout_segment *lseg = NULL;
-	struct nfs4_exception exception = { .timeout = *timeout };
+	struct nfs4_exception exception = {
+		.inode = inode,
+		.timeout = *timeout,
+	};
 	int status = 0;
 
 	dprintk("--> %s\n", __func__);
-- 
cgit v0.10.2


From 67a3b721462c9b3bdc36ad6a583f41706402b3ea Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 17 Jun 2016 16:48:19 -0400
Subject: NFSv4.1/pnfs: Layout stateids start out as being invalid

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Jeff Layton <jlayton@poochiereds.net>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0c7e0d4..f619139 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1290,6 +1290,7 @@ alloc_init_layout_hdr(struct inode *ino,
 	INIT_LIST_HEAD(&lo->plh_bulk_destroy);
 	lo->plh_inode = ino;
 	lo->plh_lc_cred = get_rpccred(ctx->cred);
+	lo->plh_flags |= 1 << NFS_LAYOUT_INVALID_STID;
 	return lo;
 }
 
@@ -1565,8 +1566,7 @@ lookup_again:
 	 * stateid, or it has been invalidated, then we must use the open
 	 * stateid.
 	 */
-	if (lo->plh_stateid.seqid == 0 ||
-	    test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) {
+	if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) {
 
 		/*
 		 * The first layoutget for the file. Need to serialize per
-- 
cgit v0.10.2


From e5241e43883058b61a955b4bbd677fe4ffd3ae4e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 17 Jun 2016 16:48:20 -0400
Subject: NFSv4.1/pnfs: Add sparse lock annotations for pnfs_find_alloc_layout

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Jeff Layton <jlayton@poochiereds.net>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index f619139..ca488b5 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1298,6 +1298,8 @@ static struct pnfs_layout_hdr *
 pnfs_find_alloc_layout(struct inode *ino,
 		       struct nfs_open_context *ctx,
 		       gfp_t gfp_flags)
+	__releases(&ino->i_lock)
+	__acquires(&ino->i_lock)
 {
 	struct nfs_inode *nfsi = NFS_I(ino);
 	struct pnfs_layout_hdr *new = NULL;
-- 
cgit v0.10.2


From dd1beb3d16f6a10683b84b89a4644065c43910f3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 17 Jun 2016 16:48:21 -0400
Subject: NFS/pnfs: handle bad delegation stateids in
 nfs4_layoutget_handle_exception

We must call nfs4_handle_exception() on BAD_STATEID errors. The only
exception is if the stateid argument turns out to be a layout stateid
that is declared invalid.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Jeff Layton <jlayton@poochiereds.net>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 27fe63b..406dd3e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7924,8 +7924,8 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 			break;
 		}
 		lo = NFS_I(inode)->layout;
-		if (lo && nfs4_stateid_match(&lgp->args.stateid,
-					&lo->plh_stateid)) {
+		if (lo && !test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) &&
+		    nfs4_stateid_match_other(&lgp->args.stateid, &lo->plh_stateid)) {
 			LIST_HEAD(head);
 
 			/*
@@ -7936,10 +7936,10 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 			pnfs_mark_matching_lsegs_invalid(lo, &head, NULL, 0);
 			spin_unlock(&inode->i_lock);
 			pnfs_free_lseg_list(&head);
+			status = -EAGAIN;
+			goto out;
 		} else
 			spin_unlock(&inode->i_lock);
-		status = -EAGAIN;
-		goto out;
 	}
 
 	status = nfs4_handle_exception(server, status, exception);
-- 
cgit v0.10.2


From d2a7de0b34cd255f23d4b7a7f065677f4b1c15f2 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 17 Jun 2016 16:48:22 -0400
Subject: NFS: Fix up O_DIRECT results

if we read or wrote something, we must report it

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Jeff Layton <jlayton@poochiereds.net>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 979b3c4..c7326c2 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -353,10 +353,12 @@ static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
 
 	result = wait_for_completion_killable(&dreq->completion);
 
+	if (!result) {
+		result = dreq->count;
+		WARN_ON_ONCE(dreq->count < 0);
+	}
 	if (!result)
 		result = dreq->error;
-	if (!result)
-		result = dreq->count;
 
 out:
 	return (ssize_t) result;
@@ -386,8 +388,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
 
 	if (dreq->iocb) {
 		long res = (long) dreq->error;
-		if (!res)
+		if (dreq->count != 0) {
 			res = (long) dreq->count;
+			WARN_ON_ONCE(dreq->count < 0);
+		}
 		dreq->iocb->ki_complete(dreq->iocb, res, 0);
 	}
 
-- 
cgit v0.10.2


From cea7f829d3ea6e87a67220bc417260b858e278ff Mon Sep 17 00:00:00 2001
From: Oleg Drokin <green@linuxhacker.ru>
Date: Fri, 17 Jun 2016 16:48:23 -0400
Subject: nfs4: Fix potential use after free of state in nfs4_do_reclaim.

Commit e8d975e73e5f ("fixing infinite OPEN loop in 4.0 stateid recovery")
introduced access to state after it was just potentially freed by
nfs4_put_open_state leading to a random data corruption somewhere.

BUG: unable to handle kernel paging request at ffff88004941ee40
IP: [<ffffffff813baf01>] nfs4_do_reclaim+0x461/0x740
PGD 3501067 PUD 3504067 PMD 6ff37067 PTE 800000004941e060
Oops: 0002 [#1] SMP DEBUG_PAGEALLOC
Modules linked in: loop rpcsec_gss_krb5 acpi_cpufreq tpm_tis joydev i2c_piix4 pcspkr tpm virtio_console nfsd ttm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops floppy serio_raw virtio_blk drm
CPU: 6 PID: 2161 Comm: 192.168.10.253- Not tainted 4.7.0-rc1-vm-nfs+ #112
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
task: ffff8800463dcd00 ti: ffff88003ff48000 task.ti: ffff88003ff48000
RIP: 0010:[<ffffffff813baf01>]  [<ffffffff813baf01>] nfs4_do_reclaim+0x461/0x740
RSP: 0018:ffff88003ff4bd68  EFLAGS: 00010246
RAX: 0000000000000000 RBX: ffffffff81a49900 RCX: 00000000000000e8
RDX: 00000000000000e8 RSI: ffff8800418b9930 RDI: ffff880040c96c88
RBP: ffff88003ff4bdf8 R08: 0000000000000001 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: ffff880040c96c98
R13: ffff88004941ee20 R14: ffff88004941ee40 R15: ffff88004941ee00
FS:  0000000000000000(0000) GS:ffff88006d000000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffff88004941ee40 CR3: 0000000060b0b000 CR4: 00000000000006e0
Stack:
 ffffffff813baad5 ffff8800463dcd00 ffff880000000001 ffffffff810e6b68
 ffff880043ddbc88 ffff8800418b9800 ffff8800418b98c8 ffff88004941ee48
 ffff880040c96c90 ffff880040c96c00 ffff880040c96c20 ffff880040c96c40
Call Trace:
 [<ffffffff813baad5>] ? nfs4_do_reclaim+0x35/0x740
 [<ffffffff810e6b68>] ? trace_hardirqs_on_caller+0x128/0x1b0
 [<ffffffff813bb7cd>] nfs4_run_state_manager+0x5ed/0xa40
 [<ffffffff813bb1e0>] ? nfs4_do_reclaim+0x740/0x740
 [<ffffffff813bb1e0>] ? nfs4_do_reclaim+0x740/0x740
 [<ffffffff810af0d1>] kthread+0x101/0x120
 [<ffffffff810e6b68>] ? trace_hardirqs_on_caller+0x128/0x1b0
 [<ffffffff818843af>] ret_from_fork+0x1f/0x40
 [<ffffffff810aefd0>] ? kthread_create_on_node+0x250/0x250
Code: 65 80 4c 8b b5 78 ff ff ff e8 fc 88 4c 00 48 8b 7d 88 e8 13 67 d2 ff 49 8b 47 40 a8 02 0f 84 d3 01 00 00 4c 89 ff e8 7f f9 ff ff <f0> 41 80 26 7f 48 8b 7d c8 e8 b1 84 4c 00 e9 39 fd ff ff 3d e6
RIP  [<ffffffff813baf01>] nfs4_do_reclaim+0x461/0x740
 RSP <ffff88003ff4bd68>
CR2: ffff88004941ee40

Signed-off-by: Oleg Drokin <green@linuxhacker.ru>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9679f47..834b875 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1488,9 +1488,9 @@ restart:
 					}
 					spin_unlock(&state->state_lock);
 				}
-				nfs4_put_open_state(state);
 				clear_bit(NFS_STATE_RECLAIM_NOGRACE,
 					&state->flags);
+				nfs4_put_open_state(state);
 				spin_lock(&sp->so_lock);
 				goto restart;
 			}
-- 
cgit v0.10.2


From 5e3a98883e7ebdd1440f829a9e9dd5c3d2c5903b Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@monkey.org>
Date: Fri, 17 Jun 2016 16:48:24 -0400
Subject: pnfs_nfs: fix _cancel_empty_pagelist

pnfs_generic_commit_cancel_empty_pagelist calls nfs_commitdata_release,
but that is wrong: nfs_commitdata_release puts the open context, something
that isn't valid until nfs_init_commit is called, which is never the case
when pnfs_generic_commit_cancel_empty_pagelist is called.

This was introduced in "nfs: avoid race that crashes nfs_init_commit".

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Cc: stable@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 0dfc476..b38e3c0 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -247,7 +247,11 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages,
 }
 
 /* Helper function for pnfs_generic_commit_pagelist to catch an empty
- * page list. This can happen when two commits race. */
+ * page list. This can happen when two commits race.
+ *
+ * This must be called instead of nfs_init_commit - call one or the other, but
+ * not both!
+ */
 static bool
 pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
 					  struct nfs_commit_data *data,
@@ -256,7 +260,11 @@ pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
 	if (list_empty(pages)) {
 		if (atomic_dec_and_test(&cinfo->mds->rpcs_out))
 			wake_up_atomic_t(&cinfo->mds->rpcs_out);
-		nfs_commitdata_release(data);
+		/* don't call nfs_commitdata_release - it tries to put
+		 * the open_context which is not acquired until nfs_init_commit
+		 * which has not been called on @data */
+		WARN_ON_ONCE(data->context);
+		nfs_commit_free(data);
 		return true;
 	}
 
-- 
cgit v0.10.2


From cbebaf897e5c4862567eb799dc84acc5d7ee2678 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 17 Jun 2016 16:48:25 -0400
Subject: NFS: Fix a double page unlock

Since commit 0bcbf039f6b2, nfs_readpage_release() has been used to
unlock the page in the read code.

Fixes: 0bcbf039f6b2 ("nfs: handle request add failure properly")
Cc: stable@vger.kernel.org # v4.5+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 6776d7a..572e5b3 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -367,13 +367,13 @@ readpage_async_filler(void *data, struct page *page)
 		nfs_list_remove_request(new);
 		nfs_readpage_release(new);
 		error = desc->pgio->pg_error;
-		goto out_unlock;
+		goto out;
 	}
 	return 0;
 out_error:
 	error = PTR_ERR(new);
-out_unlock:
 	unlock_page(page);
+out:
 	return error;
 }
 
-- 
cgit v0.10.2


From 2d148c7e84962429a79092a56d3736a8d984f595 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 17 Jun 2016 16:48:26 -0400
Subject: NFSv4.1/pnfs: Mark the layout stateid invalid when all segments are
 removed

According to RFC5661, section 12.5.3. the layout stateid is no longer
valid once the client no longer holds any layout segments. Ensure that
we mark it invalid.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index ca488b5..0fbe734 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -361,8 +361,10 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
 	list_del_init(&lseg->pls_list);
 	/* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
 	atomic_dec(&lo->plh_refcount);
-	if (list_empty(&lo->plh_segs))
+	if (list_empty(&lo->plh_segs)) {
+		set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
 		clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+	}
 	rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
 }
 
-- 
cgit v0.10.2


From d8fdb47fae5febc02e62da121f85625244b98b2e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 17 Jun 2016 16:48:27 -0400
Subject: NFS: Don't let readdirplus revalidate an inode that was marked as
 stale

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index aaf7bd0..a924d66 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -424,12 +424,17 @@ static int xdr_decode(nfs_readdir_descriptor_t *desc,
 static
 int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
 {
+	struct inode *inode;
 	struct nfs_inode *nfsi;
 
 	if (d_really_is_negative(dentry))
 		return 0;
 
-	nfsi = NFS_I(d_inode(dentry));
+	inode = d_inode(dentry);
+	if (is_bad_inode(inode) || NFS_STALE(inode))
+		return 0;
+
+	nfsi = NFS_I(inode);
 	if (entry->fattr->fileid == nfsi->fileid)
 		return 1;
 	if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0)
-- 
cgit v0.10.2


From 916ec34d0bafe95b977908acd8b3153c9df6f9d6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 17 Jun 2016 16:48:28 -0400
Subject: NFS: Fix potential race in nfs_fhget()

If we don't set the mode correctly in nfs_init_locked(), then there is
potential for a race with a second call to nfs_fhget that will cause
inode aliasing.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 52e7d68..dda689d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -282,6 +282,7 @@ nfs_init_locked(struct inode *inode, void *opaque)
 	struct nfs_fattr	*fattr = desc->fattr;
 
 	set_nfs_fileid(inode, fattr->fileid);
+	inode->i_mode = fattr->mode;
 	nfs_copy_fh(NFS_FH(inode), desc->fh);
 	return 0;
 }
-- 
cgit v0.10.2


From 1b982ea2ca398bdaeab6cf2aba459a9ca808f1f3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Fri, 17 Jun 2016 16:48:29 -0400
Subject: NFS: Fix an unused variable warning

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a924d66..2817cce 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1368,7 +1368,6 @@ EXPORT_SYMBOL_GPL(nfs_dentry_operations);
 struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
 {
 	struct dentry *res;
-	struct dentry *parent;
 	struct inode *inode = NULL;
 	struct nfs_fh *fhandle = NULL;
 	struct nfs_fattr *fattr = NULL;
@@ -1398,7 +1397,6 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
 	if (IS_ERR(label))
 		goto out;
 
-	parent = dentry->d_parent;
 	/* Protect against concurrent sillydeletes */
 	trace_nfs_lookup_enter(dir, dentry, flags);
 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
-- 
cgit v0.10.2


From 4995734e973a2c2e9c6f6413cbad9913fc4df0dc Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 24 Jun 2016 09:07:39 -0700
Subject: acpi, nfit: fix acpi_check_dsm() vs zero functions implemented

QEMU 2.6 implements nascent support for nvdimm DSMs. Depending on
configuration it may only implement the function0 dsm to indicate that
no other DSMs are available. Commit 31eca76ba2fc "nfit, libnvdimm:
limited/whitelisted dimm command marshaling mechanism" breaks QEMU, but
QEMU is spec compliant.  Per the spec the way to indicate that no
functions are supported is:

    If Function Index is zero, the return is a buffer containing one bit
    for each function index, starting with zero. Bit 0 indicates whether
    there is support for any functions other than function 0 for the
    specified UUID and Revision ID. If set to zero, no functions are
    supported (other than function zero) for the specified UUID and
    Revision ID.

Update the nfit driver to determine the family (interface UUID) without
requiring the implementation to define any other functions, i.e.
short-circuit acpi_check_dsm() to succeed per the spec.  The nfit driver
appears to be the only user passing funcs==0 to acpi_check_dsm(), so
this behavior change of the common routine should be limited to the
probing done by the nfit driver.

Cc: Len Brown <lenb@kernel.org>
Cc: Jerry Hoemann <jerry.hoemann@hpe.com>
Acked-by: "Rafael J. Wysocki" <rafael@kernel.org>
Fixes: 31eca76ba2fc ("nfit, libnvdimm: limited/whitelisted dimm command marshaling mechanism")
Reported-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Tested-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 2215fc8..32579a7 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -1131,11 +1131,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 
 	/*
 	 * Until standardization materializes we need to consider up to 3
-	 * different command sets.  Note, that checking for function0 (bit0)
-	 * tells us if any commands are reachable through this uuid.
+	 * different command sets.  Note, that checking for zero functions
+	 * tells us if any commands might be reachable through this uuid.
 	 */
 	for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_HPE2; i++)
-		if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
+		if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 0))
 			break;
 
 	/* limit the supported commands to those that are publicly documented */
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index 22c0995..b4de130 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -680,9 +680,6 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
 	u64 mask = 0;
 	union acpi_object *obj;
 
-	if (funcs == 0)
-		return false;
-
 	obj = acpi_evaluate_dsm(handle, uuid, rev, 0, NULL);
 	if (!obj)
 		return false;
@@ -695,6 +692,9 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs)
 			mask |= (((u64)obj->buffer.pointer[i]) << (i * 8));
 	ACPI_FREE(obj);
 
+	if (funcs == 0)
+		return true;
+
 	/*
 	 * Bit 0 indicates whether there's support for any functions other than
 	 * function 0 for the specified UUID and revision.
-- 
cgit v0.10.2


From 485e71e8fb6356c08c7fc6bcce4bf02c9a9a663f Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Wed, 22 Jun 2016 23:57:25 +0200
Subject: posix_acl: Add set_posix_acl

Factor out part of posix_acl_xattr_set into a common function that takes
a posix_acl, which nfsd can also call.

The prototype already exists in include/linux/posix_acl.h.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Cc: stable@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 8a4a266..edc452c 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -820,39 +820,43 @@ posix_acl_xattr_get(const struct xattr_handler *handler,
 	return error;
 }
 
-static int
-posix_acl_xattr_set(const struct xattr_handler *handler,
-		    struct dentry *unused, struct inode *inode,
-		    const char *name, const void *value,
-		    size_t size, int flags)
+int
+set_posix_acl(struct inode *inode, int type, struct posix_acl *acl)
 {
-	struct posix_acl *acl = NULL;
-	int ret;
-
 	if (!IS_POSIXACL(inode))
 		return -EOPNOTSUPP;
 	if (!inode->i_op->set_acl)
 		return -EOPNOTSUPP;
 
-	if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
-		return value ? -EACCES : 0;
+	if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
+		return acl ? -EACCES : 0;
 	if (!inode_owner_or_capable(inode))
 		return -EPERM;
 
+	if (acl) {
+		int ret = posix_acl_valid(acl);
+		if (ret)
+			return ret;
+	}
+	return inode->i_op->set_acl(inode, acl, type);
+}
+EXPORT_SYMBOL(set_posix_acl);
+
+static int
+posix_acl_xattr_set(const struct xattr_handler *handler,
+		    struct dentry *unused, struct inode *inode,
+		    const char *name, const void *value,
+		    size_t size, int flags)
+{
+	struct posix_acl *acl = NULL;
+	int ret;
+
 	if (value) {
 		acl = posix_acl_from_xattr(&init_user_ns, value, size);
 		if (IS_ERR(acl))
 			return PTR_ERR(acl);
-
-		if (acl) {
-			ret = posix_acl_valid(acl);
-			if (ret)
-				goto out;
-		}
 	}
-
-	ret = inode->i_op->set_acl(inode, acl, handler->flags);
-out:
+	ret = set_posix_acl(inode, handler->flags, acl);
 	posix_acl_release(acl);
 	return ret;
 }
-- 
cgit v0.10.2


From 999653786df6954a31044528ac3f7a5dadca08f4 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 22 Jun 2016 19:43:35 +0100
Subject: nfsd: check permissions when setting ACLs

Use set_posix_acl, which includes proper permission checks, instead of
calling ->set_acl directly.  Without this anyone may be able to grant
themselves permissions to a file by setting the ACL.

Lock the inode to make the new checks atomic with respect to set_acl.
(Also, nfsd was the only caller of set_acl not locking the inode, so I
suspect this may fix other races.)

This also simplifies the code, and ensures our ACLs are checked by
posix_acl_valid.

The permission checks and the inode locking were lost with commit
4ac7249e, which changed nfsd to use the set_acl inode operation directly
instead of going through xattr handlers.

Reported-by: David Sinquin <david@sinquin.eu>
[agreunba@redhat.com: use set_posix_acl]
Fixes: 4ac7249e
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 1580ea6..d08cd88 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -104,22 +104,21 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
 		goto out;
 
 	inode = d_inode(fh->fh_dentry);
-	if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) {
-		error = -EOPNOTSUPP;
-		goto out_errno;
-	}
 
 	error = fh_want_write(fh);
 	if (error)
 		goto out_errno;
 
-	error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS);
+	fh_lock(fh);
+
+	error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
 	if (error)
-		goto out_drop_write;
-	error = inode->i_op->set_acl(inode, argp->acl_default,
-				     ACL_TYPE_DEFAULT);
+		goto out_drop_lock;
+	error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
 	if (error)
-		goto out_drop_write;
+		goto out_drop_lock;
+
+	fh_unlock(fh);
 
 	fh_drop_write(fh);
 
@@ -131,7 +130,8 @@ out:
 	posix_acl_release(argp->acl_access);
 	posix_acl_release(argp->acl_default);
 	return nfserr;
-out_drop_write:
+out_drop_lock:
+	fh_unlock(fh);
 	fh_drop_write(fh);
 out_errno:
 	nfserr = nfserrno(error);
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 01df4cd..0c89034 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -95,22 +95,20 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
 		goto out;
 
 	inode = d_inode(fh->fh_dentry);
-	if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) {
-		error = -EOPNOTSUPP;
-		goto out_errno;
-	}
 
 	error = fh_want_write(fh);
 	if (error)
 		goto out_errno;
 
-	error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS);
+	fh_lock(fh);
+
+	error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
 	if (error)
-		goto out_drop_write;
-	error = inode->i_op->set_acl(inode, argp->acl_default,
-				     ACL_TYPE_DEFAULT);
+		goto out_drop_lock;
+	error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
 
-out_drop_write:
+out_drop_lock:
+	fh_unlock(fh);
 	fh_drop_write(fh);
 out_errno:
 	nfserr = nfserrno(error);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 6adabd6..71292a0 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -770,9 +770,6 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	dentry = fhp->fh_dentry;
 	inode = d_inode(dentry);
 
-	if (!inode->i_op->set_acl || !IS_POSIXACL(inode))
-		return nfserr_attrnotsupp;
-
 	if (S_ISDIR(inode->i_mode))
 		flags = NFS4_ACL_DIR;
 
@@ -782,16 +779,19 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (host_error < 0)
 		goto out_nfserr;
 
-	host_error = inode->i_op->set_acl(inode, pacl, ACL_TYPE_ACCESS);
+	fh_lock(fhp);
+
+	host_error = set_posix_acl(inode, ACL_TYPE_ACCESS, pacl);
 	if (host_error < 0)
-		goto out_release;
+		goto out_drop_lock;
 
 	if (S_ISDIR(inode->i_mode)) {
-		host_error = inode->i_op->set_acl(inode, dpacl,
-						  ACL_TYPE_DEFAULT);
+		host_error = set_posix_acl(inode, ACL_TYPE_DEFAULT, dpacl);
 	}
 
-out_release:
+out_drop_lock:
+	fh_unlock(fhp);
+
 	posix_acl_release(pacl);
 	posix_acl_release(dpacl);
 out_nfserr:
-- 
cgit v0.10.2


From 4fcd1813e6404dd4420c7d12fb483f9320f0bf93 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Wed, 22 Jun 2016 20:12:05 -0500
Subject: Fix reconnect to not defer smb3 session reconnect long after socket
 reconnect

Azure server blocks clients that open a socket and don't do anything on it.
In our reconnect scenarios, we can reconnect the tcp session and
detect the socket is available but we defer the negprot and SMB3 session
setup and tree connect reconnection until the next i/o is requested, but
this looks suspicous to some servers who expect SMB3 negprog and session
setup soon after a socket is created.

In the echo thread, reconnect SMB3 sessions and tree connections
that are disconnected.  A later patch will replay persistent (and
resilient) handle opens.

CC: Stable <stable@vger.kernel.org>
Signed-off-by: Steve French <steve.french@primarydata.com>
Acked-by: Pavel Shilovsky <pshilovsky@samba.org>

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 66736f5..7d2b15c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -428,7 +428,9 @@ cifs_echo_request(struct work_struct *work)
 	 * server->ops->need_neg() == true. Also, no need to ping if
 	 * we got a response recently.
 	 */
-	if (!server->ops->need_neg || server->ops->need_neg(server) ||
+
+	if (server->tcpStatus == CifsNeedReconnect ||
+	    server->tcpStatus == CifsExiting || server->tcpStatus == CifsNew ||
 	    (server->ops->can_echo && !server->ops->can_echo(server)) ||
 	    time_before(jiffies, server->lstrp + echo_interval - HZ))
 		goto requeue_echo;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index c3e61a7..29e06db 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1812,6 +1812,33 @@ SMB2_echo(struct TCP_Server_Info *server)
 
 	cifs_dbg(FYI, "In echo request\n");
 
+	if (server->tcpStatus == CifsNeedNegotiate) {
+		struct list_head *tmp, *tmp2;
+		struct cifs_ses *ses;
+		struct cifs_tcon *tcon;
+
+		cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n");
+		spin_lock(&cifs_tcp_ses_lock);
+		list_for_each(tmp, &server->smb_ses_list) {
+			ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+			list_for_each(tmp2, &ses->tcon_list) {
+				tcon = list_entry(tmp2, struct cifs_tcon,
+						  tcon_list);
+				/* add check for persistent handle reconnect */
+				if (tcon && tcon->need_reconnect) {
+					spin_unlock(&cifs_tcp_ses_lock);
+					rc = smb2_reconnect(SMB2_ECHO, tcon);
+					spin_lock(&cifs_tcp_ses_lock);
+				}
+			}
+		}
+		spin_unlock(&cifs_tcp_ses_lock);
+	}
+
+	/* if no session, renegotiate failed above */
+	if (server->tcpStatus == CifsNeedNegotiate)
+		return -EIO;
+
 	rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req);
 	if (rc)
 		return rc;
-- 
cgit v0.10.2


From 45e8a2583d97ca758a55c608f78c4cef562644d1 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@gmail.com>
Date: Wed, 22 Jun 2016 21:07:32 -0500
Subject: File names with trailing period or space need special case conversion

POSIX allows files with trailing spaces or a trailing period but
SMB3 does not, so convert these using the normal Services For Mac
mapping as we do for other reserved characters such as
	: < > | ? *
This is similar to what Macs do for the same problem over SMB3.

CC: Stable <stable@vger.kernel.org>
Signed-off-by: Steve French <steve.french@primarydata.com>
Acked-by: Pavel Shilovsky <pshilovsky@samba.org>

diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 5a53ac6..02b071bf 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -101,6 +101,12 @@ convert_sfm_char(const __u16 src_char, char *target)
 	case SFM_SLASH:
 		*target = '\\';
 		break;
+	case SFM_SPACE:
+		*target = ' ';
+		break;
+	case SFM_PERIOD:
+		*target = '.';
+		break;
 	default:
 		return false;
 	}
@@ -404,7 +410,7 @@ static __le16 convert_to_sfu_char(char src_char)
 	return dest_char;
 }
 
-static __le16 convert_to_sfm_char(char src_char)
+static __le16 convert_to_sfm_char(char src_char, bool end_of_string)
 {
 	__le16 dest_char;
 
@@ -427,6 +433,18 @@ static __le16 convert_to_sfm_char(char src_char)
 	case '|':
 		dest_char = cpu_to_le16(SFM_PIPE);
 		break;
+	case '.':
+		if (end_of_string)
+			dest_char = cpu_to_le16(SFM_PERIOD);
+		else
+			dest_char = 0;
+		break;
+	case ' ':
+		if (end_of_string)
+			dest_char = cpu_to_le16(SFM_SPACE);
+		else
+			dest_char = 0;
+		break;
 	default:
 		dest_char = 0;
 	}
@@ -469,9 +487,16 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
 		/* see if we must remap this char */
 		if (map_chars == SFU_MAP_UNI_RSVD)
 			dst_char = convert_to_sfu_char(src_char);
-		else if (map_chars == SFM_MAP_UNI_RSVD)
-			dst_char = convert_to_sfm_char(src_char);
-		else
+		else if (map_chars == SFM_MAP_UNI_RSVD) {
+			bool end_of_string;
+
+			if (i == srclen - 1)
+				end_of_string = true;
+			else
+				end_of_string = false;
+
+			dst_char = convert_to_sfm_char(src_char, end_of_string);
+		} else
 			dst_char = 0;
 		/*
 		 * FIXME: We can not handle remapping backslash (UNI_SLASH)
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index bdc52cb..479bc0a 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -64,6 +64,8 @@
 #define SFM_LESSTHAN    ((__u16) 0xF023)
 #define SFM_PIPE        ((__u16) 0xF027)
 #define SFM_SLASH       ((__u16) 0xF026)
+#define SFM_PERIOD	((__u16) 0xF028)
+#define SFM_SPACE	((__u16) 0xF029)
 
 /*
  * Mapping mechanism to use when one of the seven reserved characters is
-- 
cgit v0.10.2


From b235beea9e996a4d36fed6cfef4801a3e7d7a9a5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 24 Jun 2016 15:09:37 -0700
Subject: Clarify naming of thread info/stack allocators

We've had the thread info allocated together with the thread stack for
most architectures for a long time (since the thread_info was split off
from the task struct), but that is about to change.

But the patches that move the thread info to be off-stack (and a part of
the task struct instead) made it clear how confused the allocator and
freeing functions are.

Because the common case was that we share an allocation with the thread
stack and the thread_info, the two pointers were identical.  That
identity then meant that we would have things like

	ti = alloc_thread_info_node(tsk, node);
	...
	tsk->stack = ti;

which certainly _worked_ (since stack and thread_info have the same
value), but is rather confusing: why are we assigning a thread_info to
the stack? And if we move the thread_info away, the "confusing" code
just gets to be entirely bogus.

So remove all this confusion, and make it clear that we are doing the
stack allocation by renaming and clarifying the function names to be
about the stack.  The fact that the thread_info then shares the
allocation is an implementation detail, and not really about the
allocation itself.

This is a pure renaming and type fix: we pass in the same pointer, it's
just that we clarify what the pointer means.

The ia64 code that actually only has one single allocation (for all of
task_struct, thread_info and kernel thread stack) now looks a bit odd,
but since "tsk->stack" is actually not even used there, that oddity
doesn't matter.  It would be a separate thing to clean that up, I
intentionally left the ia64 changes as a pure brute-force renaming and
type change.

Acked-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/Kconfig b/arch/Kconfig
index e973479..1599629 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -226,8 +226,8 @@ config ARCH_INIT_TASK
 config ARCH_TASK_STRUCT_ALLOCATOR
 	bool
 
-# Select if arch has its private alloc_thread_info() function
-config ARCH_THREAD_INFO_ALLOCATOR
+# Select if arch has its private alloc_thread_stack() function
+config ARCH_THREAD_STACK_ALLOCATOR
 	bool
 
 # Select if arch wants to size task_struct dynamically via arch_task_struct_size:
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index f80758c..e109ee9 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -45,7 +45,7 @@ config IA64
 	select GENERIC_SMP_IDLE_THREAD
 	select ARCH_INIT_TASK
 	select ARCH_TASK_STRUCT_ALLOCATOR
-	select ARCH_THREAD_INFO_ALLOCATOR
+	select ARCH_THREAD_STACK_ALLOCATOR
 	select ARCH_CLOCKSOURCE_DATA
 	select GENERIC_TIME_VSYSCALL_OLD
 	select SYSCTL_ARCH_UNALIGN_NO_WARN
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index aa995b6..d1212b8 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -48,15 +48,15 @@ struct thread_info {
 #ifndef ASM_OFFSETS_C
 /* how to get the thread information struct from C */
 #define current_thread_info()	((struct thread_info *) ((char *) current + IA64_TASK_SIZE))
-#define alloc_thread_info_node(tsk, node)	\
-		((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
+#define alloc_thread_stack_node(tsk, node)	\
+		((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE))
 #define task_thread_info(tsk)	((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE))
 #else
 #define current_thread_info()	((struct thread_info *) 0)
-#define alloc_thread_info_node(tsk, node)	((struct thread_info *) 0)
+#define alloc_thread_stack_node(tsk, node)	((unsigned long *) 0)
 #define task_thread_info(tsk)	((struct thread_info *) 0)
 #endif
-#define free_thread_info(ti)	/* nothing */
+#define free_thread_stack(ti)	/* nothing */
 #define task_stack_page(tsk)	((void *)(tsk))
 
 #define __HAVE_THREAD_FUNCTIONS
diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h
index 4861a78..f5f90bb 100644
--- a/arch/mn10300/include/asm/thread_info.h
+++ b/arch/mn10300/include/asm/thread_info.h
@@ -115,7 +115,7 @@ static inline unsigned long current_stack_pointer(void)
 }
 
 #ifndef CONFIG_KGDB
-void arch_release_thread_info(struct thread_info *ti);
+void arch_release_thread_stack(unsigned long *stack);
 #endif
 #define get_thread_info(ti)	get_task_struct((ti)->task)
 #define put_thread_info(ti)	put_task_struct((ti)->task)
diff --git a/arch/mn10300/kernel/kgdb.c b/arch/mn10300/kernel/kgdb.c
index 9977082..2d7986c 100644
--- a/arch/mn10300/kernel/kgdb.c
+++ b/arch/mn10300/kernel/kgdb.c
@@ -397,8 +397,9 @@ static bool kgdb_arch_undo_singlestep(struct pt_regs *regs)
  * single-step state is cleared.  At this point the breakpoints should have
  * been removed by __switch_to().
  */
-void arch_release_thread_info(struct thread_info *ti)
+void arch_release_thread_stack(unsigned long *stack)
 {
+	struct thread_info *ti = (void *)stack;
 	if (kgdb_sstep_thread == ti) {
 		kgdb_sstep_thread = NULL;
 
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index 4b7cef9..c1467ac 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -78,7 +78,7 @@ struct thread_info {
 
 #ifndef __ASSEMBLY__
 
-void arch_release_thread_info(struct thread_info *info);
+void arch_release_thread_stack(unsigned long *stack);
 
 /* How to get the thread information struct from C. */
 register unsigned long stack_pointer __asm__("sp");
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 6b705cc..a465d83 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -73,8 +73,9 @@ void arch_cpu_idle(void)
 /*
  * Release a thread_info structure
  */
-void arch_release_thread_info(struct thread_info *info)
+void arch_release_thread_stack(unsigned long *stack)
 {
+	struct thread_info *info = (void *)stack;
 	struct single_step_state *step_state = info->step_state;
 
 	if (step_state) {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6e42ada..253538f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3007,7 +3007,7 @@ static inline int object_is_on_stack(void *obj)
 	return (obj >= stack) && (obj < (stack + THREAD_SIZE));
 }
 
-extern void thread_info_cache_init(void);
+extern void thread_stack_cache_init(void);
 
 #ifdef CONFIG_DEBUG_STACK_USAGE
 static inline unsigned long stack_not_used(struct task_struct *p)
diff --git a/init/main.c b/init/main.c
index 4c17fda..826fd57 100644
--- a/init/main.c
+++ b/init/main.c
@@ -453,7 +453,7 @@ void __init __weak smp_setup_processor_id(void)
 }
 
 # if THREAD_SIZE >= PAGE_SIZE
-void __init __weak thread_info_cache_init(void)
+void __init __weak thread_stack_cache_init(void)
 {
 }
 #endif
@@ -627,7 +627,7 @@ asmlinkage __visible void __init start_kernel(void)
 	/* Should be run before the first non-init thread is created */
 	init_espfix_bsp();
 #endif
-	thread_info_cache_init();
+	thread_stack_cache_init();
 	cred_init();
 	fork_init();
 	proc_caches_init();
diff --git a/kernel/fork.c b/kernel/fork.c
index 5c2c355..37b9439 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -148,18 +148,18 @@ static inline void free_task_struct(struct task_struct *tsk)
 }
 #endif
 
-void __weak arch_release_thread_info(struct thread_info *ti)
+void __weak arch_release_thread_stack(unsigned long *stack)
 {
 }
 
-#ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR
+#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR
 
 /*
  * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
  * kmemcache based allocator.
  */
 # if THREAD_SIZE >= PAGE_SIZE
-static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
+static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
 						  int node)
 {
 	struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP,
@@ -172,33 +172,33 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
 	return page ? page_address(page) : NULL;
 }
 
-static inline void free_thread_info(struct thread_info *ti)
+static inline void free_thread_stack(unsigned long *stack)
 {
-	struct page *page = virt_to_page(ti);
+	struct page *page = virt_to_page(stack);
 
 	memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK,
 				    -(1 << THREAD_SIZE_ORDER));
 	__free_kmem_pages(page, THREAD_SIZE_ORDER);
 }
 # else
-static struct kmem_cache *thread_info_cache;
+static struct kmem_cache *thread_stack_cache;
 
-static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
+static struct thread_info *alloc_thread_stack_node(struct task_struct *tsk,
 						  int node)
 {
-	return kmem_cache_alloc_node(thread_info_cache, THREADINFO_GFP, node);
+	return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
 }
 
-static void free_thread_info(struct thread_info *ti)
+static void free_stack(unsigned long *stack)
 {
-	kmem_cache_free(thread_info_cache, ti);
+	kmem_cache_free(thread_stack_cache, stack);
 }
 
-void thread_info_cache_init(void)
+void thread_stack_cache_init(void)
 {
-	thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
+	thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE,
 					      THREAD_SIZE, 0, NULL);
-	BUG_ON(thread_info_cache == NULL);
+	BUG_ON(thread_stack_cache == NULL);
 }
 # endif
 #endif
@@ -221,9 +221,9 @@ struct kmem_cache *vm_area_cachep;
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static struct kmem_cache *mm_cachep;
 
-static void account_kernel_stack(struct thread_info *ti, int account)
+static void account_kernel_stack(unsigned long *stack, int account)
 {
-	struct zone *zone = page_zone(virt_to_page(ti));
+	struct zone *zone = page_zone(virt_to_page(stack));
 
 	mod_zone_page_state(zone, NR_KERNEL_STACK, account);
 }
@@ -231,8 +231,8 @@ static void account_kernel_stack(struct thread_info *ti, int account)
 void free_task(struct task_struct *tsk)
 {
 	account_kernel_stack(tsk->stack, -1);
-	arch_release_thread_info(tsk->stack);
-	free_thread_info(tsk->stack);
+	arch_release_thread_stack(tsk->stack);
+	free_thread_stack(tsk->stack);
 	rt_mutex_debug_task_free(tsk);
 	ftrace_graph_exit_task(tsk);
 	put_seccomp_filter(tsk);
@@ -343,7 +343,7 @@ void set_task_stack_end_magic(struct task_struct *tsk)
 static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 {
 	struct task_struct *tsk;
-	struct thread_info *ti;
+	unsigned long *stack;
 	int err;
 
 	if (node == NUMA_NO_NODE)
@@ -352,15 +352,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	if (!tsk)
 		return NULL;
 
-	ti = alloc_thread_info_node(tsk, node);
-	if (!ti)
+	stack = alloc_thread_stack_node(tsk, node);
+	if (!stack)
 		goto free_tsk;
 
 	err = arch_dup_task_struct(tsk, orig);
 	if (err)
-		goto free_ti;
+		goto free_stack;
 
-	tsk->stack = ti;
+	tsk->stack = stack;
 #ifdef CONFIG_SECCOMP
 	/*
 	 * We must handle setting up seccomp filters once we're under
@@ -392,14 +392,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	tsk->task_frag.page = NULL;
 	tsk->wake_q.next = NULL;
 
-	account_kernel_stack(ti, 1);
+	account_kernel_stack(stack, 1);
 
 	kcov_task_init(tsk);
 
 	return tsk;
 
-free_ti:
-	free_thread_info(ti);
+free_stack:
+	free_thread_stack(stack);
 free_tsk:
 	free_task_struct(tsk);
 	return NULL;
-- 
cgit v0.10.2


From aca9c293d098292579e345b2b39b394778d41526 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 24 Jun 2016 16:55:53 -0700
Subject: x86: fix up a few misc stack pointer vs thread_info confusions

As the actual pointer value is the same for the thread stack allocation
and the thread_info, code that confused the two worked fine, but will
break when the thread info is moved away from the stack allocation.  It
also looks very confusing.

For example, the kprobe code wanted to know the current top of stack.
To do that, it used this:

	(unsigned long)current_thread_info() + THREAD_SIZE

which did indeed give the correct value.  But it's not only a fairly
nonsensical expression, it's also rather complex, especially since we
actually have this:

	static inline unsigned long current_top_of_stack(void)

which not only gives us the value we are interested in, but happens to
be how "current_thread_info()" is currently defined as:

	(struct thread_info *)(current_top_of_stack() - THREAD_SIZE);

so using current_thread_info() to figure out the top of the stack really
is a very round-about thing to do.

The other cases are just simpler confusion about task_thread_info() vs
task_stack_page(), which currently return the same pointer - but if you
want the stack page, you really should be using the latter one.

And there was one entirely unused assignment of the current stack to a
thread_info pointer.

All cleaned up to make more sense today, and make it easier to move the
thread_info away from the stack in the future.

No semantic changes.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 4421b5d..d1d1e50 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -38,12 +38,11 @@ typedef u8 kprobe_opcode_t;
 #define RELATIVECALL_OPCODE 0xe8
 #define RELATIVE_ADDR_SIZE 4
 #define MAX_STACK_SIZE 64
-#define MIN_STACK_SIZE(ADDR)					       \
-	(((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \
-			      THREAD_SIZE - (unsigned long)(ADDR)))    \
-	 ? (MAX_STACK_SIZE)					       \
-	 : (((unsigned long)current_thread_info()) +		       \
-	    THREAD_SIZE - (unsigned long)(ADDR)))
+#define CUR_STACK_SIZE(ADDR) \
+	(current_top_of_stack() - (unsigned long)(ADDR))
+#define MIN_STACK_SIZE(ADDR)				\
+	(MAX_STACK_SIZE < CUR_STACK_SIZE(ADDR) ?	\
+	 MAX_STACK_SIZE : CUR_STACK_SIZE(ADDR))
 
 #define flush_insn_slot(p)	do { } while (0)
 
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index d6209f3..ef8017c 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -80,7 +80,7 @@ print_ftrace_graph_addr(unsigned long addr, void *data,
 static inline int valid_stack_ptr(struct task_struct *task,
 			void *p, unsigned int size, void *end)
 {
-	void *t = task_thread_info(task);
+	void *t = task_stack_page(task);
 	if (end) {
 		if (p < end && p >= (end-THREAD_SIZE))
 			return 1;
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 38da8f2..c627bf8 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -130,11 +130,9 @@ void irq_ctx_init(int cpu)
 
 void do_softirq_own_stack(void)
 {
-	struct thread_info *curstk;
 	struct irq_stack *irqstk;
 	u32 *isp, *prev_esp;
 
-	curstk = current_stack();
 	irqstk = __this_cpu_read(softirq_stack);
 
 	/* build the stack frame on the softirq stack */
-- 
cgit v0.10.2


From 7e8b3dfef16375dbfeb1f36a83eb9f27117c51fd Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 23 Jun 2016 14:54:37 -0400
Subject: USB: EHCI: declare hostpc register as zero-length array

The HOSTPC extension registers found in some EHCI implementations form
a variable-length array, with one element for each port.  Therefore
the hostpc field in struct ehci_regs should be declared as a
zero-length array, not a single-element array.

This fixes a problem reported by UBSAN.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-by: Wilfried Klaebe <linux-kernel@lebenslange-mailadresse.de>
Tested-by: Wilfried Klaebe <linux-kernel@lebenslange-mailadresse.de>
CC: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 966889a..e479033 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -180,11 +180,11 @@ struct ehci_regs {
  * PORTSCx
  */
 	/* HOSTPC: offset 0x84 */
-	u32		hostpc[1];	/* HOSTPC extension */
+	u32		hostpc[0];	/* HOSTPC extension */
 #define HOSTPC_PHCD	(1<<22)		/* Phy clock disable */
 #define HOSTPC_PSPD	(3<<25)		/* Port speed detection */
 
-	u32		reserved5[16];
+	u32		reserved5[17];
 
 	/* USBMODE_EX: offset 0xc8 */
 	u32		usbmode_ex;	/* USB Device mode extension */
-- 
cgit v0.10.2


From 7f1a00b6fcd0e3c19beba2e92d157dc0c2cf3494 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 24 Jun 2016 17:07:33 -0700
Subject: fix up initial thread stack pointer vs thread_info confusion

The INIT_TASK() initializer was similarly confused about the stack vs
thread_info allocation that the allocators had, and that were fixed in
commit b235beea9e99 ("Clarify naming of thread info/stack allocators").

The task ->stack pointer only incidentally ends up having the same value
as the thread_info, and in fact that will change.

So fix the initial task struct initializer to point to 'init_stack'
instead of 'init_thread_info', and make sure the ia64 definition for
that exists.

This actually makes the ia64 tsk->stack pointer be sensible for the
initial task, but not for any other task.  As mentioned in commit
b235beea9e99, that whole pointer isn't actually used on ia64, since
task_stack_page() there just points to the (single) allocation.

All the other architectures seem to have copied the 'init_stack'
definition, even if it tended to be generally unusued.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
index f9efe97..0eaa89f 100644
--- a/arch/ia64/kernel/init_task.c
+++ b/arch/ia64/kernel/init_task.c
@@ -26,6 +26,7 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
  * handled. This is done by having a special ".data..init_task" section...
  */
 #define init_thread_info	init_task_mem.s.thread_info
+#define init_stack		init_task_mem.stack
 
 union {
 	struct {
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index f2cb8d4..f8834f8 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -190,7 +190,7 @@ extern struct task_group root_task_group;
 #define INIT_TASK(tsk)	\
 {									\
 	.state		= 0,						\
-	.stack		= &init_thread_info,				\
+	.stack		= init_stack,					\
 	.usage		= ATOMIC_INIT(2),				\
 	.flags		= PF_KTHREAD,					\
 	.prio		= MAX_PRIO-20,					\
-- 
cgit v0.10.2


From 491a1c65ae498dea0e39b24a46e528a78a8532ed Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 24 Jun 2016 14:48:35 -0700
Subject: mm,oom_reaper: don't call mmput_async() without atomic_inc_not_zero()

Commit e2fe14564d33 ("oom_reaper: close race with exiting task") reduced
frequency of needlessly selecting next OOM victim, but was calling
mmput_async() when atomic_inc_not_zero() failed.

Link: http://lkml.kernel.org/r/1464423365-5555-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index acbc432..be67df3 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -478,6 +478,7 @@ static bool __oom_reap_task(struct task_struct *tsk)
 	mm = p->mm;
 	if (!atomic_inc_not_zero(&mm->mm_users)) {
 		task_unlock(p);
+		mm = NULL;
 		goto unlock_oom;
 	}
 
-- 
cgit v0.10.2


From 9df10fb7b80bc2f540956ba01b5e7ee1012001a5 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 24 Jun 2016 14:48:38 -0700
Subject: oom_reaper: avoid pointless atomic_inc_not_zero usage.

Since commit 36324a990cf5 ("oom: clear TIF_MEMDIE after oom_reaper
managed to unmap the address space") changed to use find_lock_task_mm()
for finding a mm_struct to reap, it is guaranteed that mm->mm_users > 0
because find_lock_task_mm() returns a task_struct with ->mm != NULL.
Therefore, we can safely use atomic_inc().

Link: http://lkml.kernel.org/r/1465024759-8074-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index be67df3..ddf7448 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -474,14 +474,8 @@ static bool __oom_reap_task(struct task_struct *tsk)
 	p = find_lock_task_mm(tsk);
 	if (!p)
 		goto unlock_oom;
-
 	mm = p->mm;
-	if (!atomic_inc_not_zero(&mm->mm_users)) {
-		task_unlock(p);
-		mm = NULL;
-		goto unlock_oom;
-	}
-
+	atomic_inc(&mm->mm_users);
 	task_unlock(p);
 
 	if (!down_read_trylock(&mm->mmap_sem)) {
-- 
cgit v0.10.2


From a7b50abc90afb2e3c27e1bd212643cc53eaf0b60 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Fri, 24 Jun 2016 14:48:40 -0700
Subject: selftests/vm/compaction_test: fix write to restore nr_hugepages

The write at the end of the test to restore nr_hugepages to its previous
value is failing.  This is because it is trying to write the number of
bytes in the char array as opposed to the number of bytes in the string.

Link: http://lkml.kernel.org/r/1465331205-3284-1-git-send-email-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Sri Jayaramappa <sjayaram@akamai.com>
Cc: Eric B Munson <emunson@akamai.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c
index 932ff57..00c4f65 100644
--- a/tools/testing/selftests/vm/compaction_test.c
+++ b/tools/testing/selftests/vm/compaction_test.c
@@ -136,7 +136,7 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
 	printf("No of huge pages allocated = %d\n",
 	       (atoi(nr_hugepages)));
 
-	if (write(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages))
+	if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages))
 	    != strlen(initial_nr_hugepages)) {
 		perror("Failed to write to /proc/sys/vm/nr_hugepages\n");
 		goto close_fd;
-- 
cgit v0.10.2


From b9b4bb26af017dbe930cd4df7f9b2fc3a0497bfe Mon Sep 17 00:00:00 2001
From: Anthony Romano <anthony.romano@coreos.com>
Date: Fri, 24 Jun 2016 14:48:43 -0700
Subject: tmpfs: don't undo fallocate past its last page

When fallocate is interrupted it will undo a range that extends one byte
past its range of allocated pages.  This can corrupt an in-use page by
zeroing out its first byte.  Instead, undo using the inclusive byte
range.

Fixes: 1635f6a74152f1d ("tmpfs: undo fallocation on failure")
Link: http://lkml.kernel.org/r/1462713387-16724-1-git-send-email-anthony.romano@coreos.com
Signed-off-by: Anthony Romano <anthony.romano@coreos.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Brandon Philips <brandon@ifup.co>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/shmem.c b/mm/shmem.c
index a361449..24463b6 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2227,7 +2227,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 			/* Remove the !PageUptodate pages we added */
 			shmem_undo_range(inode,
 				(loff_t)start << PAGE_SHIFT,
-				(loff_t)index << PAGE_SHIFT, true);
+				((loff_t)index << PAGE_SHIFT) - 1, true);
 			goto undone;
 		}
 
-- 
cgit v0.10.2


From 32d6bd9059f265f617f6502c68dfbcae7e515add Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:48:47 -0700
Subject: tree wide: get rid of __GFP_REPEAT for order-0 allocations part I

This is the third version of the patchset previously sent [1].  I have
basically only rebased it on top of 4.7-rc1 tree and dropped "dm: get
rid of superfluous gfp flags" which went through dm tree.  I am sending
it now because it is tree wide and chances for conflicts are reduced
considerably when we want to target rc2.  I plan to send the next step
and rename the flag and move to a better semantic later during this
release cycle so we will have a new semantic ready for 4.8 merge window
hopefully.

Motivation:

While working on something unrelated I've checked the current usage of
__GFP_REPEAT in the tree.  It seems that a majority of the usage is and
always has been bogus because __GFP_REPEAT has always been about costly
high order allocations while we are using it for order-0 or very small
orders very often.  It seems that a big pile of them is just a
copy&paste when a code has been adopted from one arch to another.

I think it makes some sense to get rid of them because they are just
making the semantic more unclear.  Please note that GFP_REPEAT is
documented as

* __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt

* _might_ fail.  This depends upon the particular VM implementation.
  while !costly requests have basically nofail semantic.  So one could
  reasonably expect that order-0 request with __GFP_REPEAT will not loop
  for ever.  This is not implemented right now though.

I would like to move on with __GFP_REPEAT and define a better semantic
for it.

  $ git grep __GFP_REPEAT origin/master | wc -l
  111
  $ git grep __GFP_REPEAT | wc -l
  36

So we are down to the third after this patch series.  The remaining
places really seem to be relying on __GFP_REPEAT due to large allocation
requests.  This still needs some double checking which I will do later
after all the simple ones are sorted out.

I am touching a lot of arch specific code here and I hope I got it right
but as a matter of fact I even didn't compile test for some archs as I
do not have cross compiler for them.  Patches should be quite trivial to
review for stupid compile mistakes though.  The tricky parts are usually
hidden by macro definitions and thats where I would appreciate help from
arch maintainers.

[1] http://lkml.kernel.org/r/1461849846-27209-1-git-send-email-mhocko@kernel.org

This patch (of 19):

__GFP_REPEAT has a rather weak semantic but since it has been introduced
around 2.6.12 it has been ignored for low order allocations.  Yet we
have the full kernel tree with its usage for apparently order-0
allocations.  This is really confusing because __GFP_REPEAT is
explicitly documented to allow allocation failures which is a weaker
semantic than the current order-0 has (basically nofail).

Let's simply drop __GFP_REPEAT from those places.  This would allow to
identify place which really need allocator to retry harder and formulate
a more specific semantic for what the flag is supposed to do actually.

Link: http://lkml.kernel.org/r/1464599699-30131-2-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen Liqin <liqin.linux@gmail.com>
Cc: Chris Metcalf <cmetcalf@mellanox.com> [for tile]
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: John Crispin <blogic@openwrt.org>
Cc: Lennox Wu <lennox.wu@gmail.com>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index aab14a0..c2ebb6f 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -40,7 +40,7 @@ pgd_free(struct mm_struct *mm, pgd_t *pgd)
 static inline pmd_t *
 pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return ret;
 }
 
@@ -53,7 +53,7 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
 }
 
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 19cfab5..20febb3 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -29,7 +29,7 @@
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
+	return (pmd_t *)get_zeroed_page(GFP_KERNEL);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
diff --git a/arch/avr32/include/asm/pgalloc.h b/arch/avr32/include/asm/pgalloc.h
index 1aba19d..db039cb 100644
--- a/arch/avr32/include/asm/pgalloc.h
+++ b/arch/avr32/include/asm/pgalloc.h
@@ -43,7 +43,7 @@ static inline void pgd_ctor(void *x)
  */
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	return quicklist_alloc(QUICK_PGD, GFP_KERNEL | __GFP_REPEAT, pgd_ctor);
+	return quicklist_alloc(QUICK_PGD, GFP_KERNEL, pgd_ctor);
 }
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -54,7 +54,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
@@ -63,7 +63,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 	struct page *page;
 	void *pg;
 
-	pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+	pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 	if (!pg)
 		return NULL;
 
diff --git a/arch/cris/include/asm/pgalloc.h b/arch/cris/include/asm/pgalloc.h
index 235ece4..42f1aff 100644
--- a/arch/cris/include/asm/pgalloc.h
+++ b/arch/cris/include/asm/pgalloc.h
@@ -24,14 +24,14 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-  	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
  	return pte;
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *pte;
-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+	pte = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0);
 	if (!pte)
 		return NULL;
 	if (!pgtable_page_ctor(pte)) {
diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c
index 41907d2..c9ed14f 100644
--- a/arch/frv/mm/pgalloc.c
+++ b/arch/frv/mm/pgalloc.c
@@ -22,7 +22,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((aligned(PAGE_SIZE)));
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL);
 	if (pte)
 		clear_page(pte);
 	return pte;
@@ -33,9 +33,9 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	struct page *page;
 
 #ifdef CONFIG_HIGHPTE
-	page = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0);
+	page = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM, 0);
 #else
-	page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+	page = alloc_pages(GFP_KERNEL, 0);
 #endif
 	if (!page)
 		return NULL;
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index 77da3b0..eeebf86 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -64,7 +64,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 {
 	struct page *pte;
 
-	pte = alloc_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
+	pte = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!pte)
 		return NULL;
 	if (!pgtable_page_ctor(pte)) {
@@ -78,7 +78,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	gfp_t flags =  GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO;
+	gfp_t flags =  GFP_KERNEL | __GFP_ZERO;
 	return (pte_t *) __get_free_page(flags);
 }
 
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index f9924fb..fb95aed 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -14,7 +14,7 @@ extern const char bad_pmd_string[];
 extern inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	unsigned long address)
 {
-	unsigned long page = __get_free_page(GFP_DMA|__GFP_REPEAT);
+	unsigned long page = __get_free_page(GFP_DMA);
 
 	if (!page)
 		return NULL;
@@ -51,7 +51,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
 static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	unsigned long address)
 {
-	struct page *page = alloc_pages(GFP_DMA|__GFP_REPEAT, 0);
+	struct page *page = alloc_pages(GFP_DMA, 0);
 	pte_t *pte;
 
 	if (!page)
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index 24bcba4..c895b98 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -11,7 +11,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long ad
 {
 	pte_t *pte;
 
-	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	if (pte) {
 		__flush_page_to_ram(pte);
 		flush_tlb_kernel_page(pte);
@@ -32,7 +32,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addres
 	struct page *page;
 	pte_t *pte;
 
-	page = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+	page = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0);
 	if(!page)
 		return NULL;
 	if (!pgtable_page_ctor(page)) {
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 0931388..1901f61 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -37,7 +37,7 @@ do {							\
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	unsigned long page = __get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	unsigned long page = __get_free_page(GFP_KERNEL);
 
 	if (!page)
 		return NULL;
@@ -49,7 +49,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 					unsigned long address)
 {
-        struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+        struct page *page = alloc_pages(GFP_KERNEL, 0);
 
 	if (page == NULL)
 		return NULL;
diff --git a/arch/metag/include/asm/pgalloc.h b/arch/metag/include/asm/pgalloc.h
index 3104df0..c2caa1e 100644
--- a/arch/metag/include/asm/pgalloc.h
+++ b/arch/metag/include/asm/pgalloc.h
@@ -42,8 +42,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT |
-					      __GFP_ZERO);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 	return pte;
 }
 
@@ -51,7 +50,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 				      unsigned long address)
 {
 	struct page *pte;
-	pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0);
+	pte = alloc_pages(GFP_KERNEL  | __GFP_ZERO, 0);
 	if (!pte)
 		return NULL;
 	if (!pgtable_page_ctor(pte)) {
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 61436d6..7c89390 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -116,9 +116,9 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	struct page *ptepage;
 
 #ifdef CONFIG_HIGHPTE
-	int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
+	int flags = GFP_KERNEL | __GFP_HIGHMEM;
 #else
-	int flags = GFP_KERNEL | __GFP_REPEAT;
+	int flags = GFP_KERNEL;
 #endif
 
 	ptepage = alloc_pages(flags, 0);
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 4f4520e..eb99fcc 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -239,8 +239,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 {
 	pte_t *pte;
 	if (mem_init_done) {
-		pte = (pte_t *)__get_free_page(GFP_KERNEL |
-					__GFP_REPEAT | __GFP_ZERO);
+		pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 	} else {
 		pte = (pte_t *)early_get_page();
 		if (pte)
diff --git a/arch/mn10300/mm/pgtable.c b/arch/mn10300/mm/pgtable.c
index e77a7c7..9577cf7 100644
--- a/arch/mn10300/mm/pgtable.c
+++ b/arch/mn10300/mm/pgtable.c
@@ -63,7 +63,7 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL);
 	if (pte)
 		clear_page(pte);
 	return pte;
@@ -74,9 +74,9 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	struct page *pte;
 
 #ifdef CONFIG_HIGHPTE
-	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0);
+	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM, 0);
 #else
-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+	pte = alloc_pages(GFP_KERNEL, 0);
 #endif
 	if (!pte)
 		return NULL;
diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index 21484e5b..87eebd1 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -77,7 +77,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 					 unsigned long address)
 {
 	struct page *pte;
-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0);
+	pte = alloc_pages(GFP_KERNEL, 0);
 	if (!pte)
 		return NULL;
 	clear_page(page_address(pte));
diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index 62b08ef..5b2a9511 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -122,7 +122,7 @@ pte_t __init_refok *pte_alloc_one_kernel(struct mm_struct *mm,
 	pte_t *pte;
 
 	if (likely(mem_init_done)) {
-		pte = (pte_t *) __get_free_page(GFP_KERNEL | __GFP_REPEAT);
+		pte = (pte_t *) __get_free_page(GFP_KERNEL);
 	} else {
 		pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
 #if 0
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index f2fd327..52c3def 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -124,7 +124,7 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 static inline pgtable_t
 pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
 	if (!page)
 		return NULL;
 	if (!pgtable_page_ctor(page)) {
@@ -137,7 +137,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 {
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
 }
 
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 488279e..049b803 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -151,7 +151,7 @@ static inline pgtable_t pmd_pgtable(pmd_t pmd)
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
+	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index 069369f..8a8a7d9 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -88,7 +88,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
+	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index bf7bf32..7f922f5 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -84,7 +84,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add
 	pte_t *pte;
 
 	if (slab_is_available()) {
-		pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+		pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	} else {
 		pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
 		if (pte)
@@ -97,7 +97,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *ptepage;
 
-	gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO;
+	gfp_t flags = GFP_KERNEL | __GFP_ZERO;
 
 	ptepage = alloc_pages(flags, 0);
 	if (!ptepage)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index e009e06..f5e8d4e 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -350,8 +350,7 @@ static pte_t *get_from_cache(struct mm_struct *mm)
 static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
 {
 	void *ret = NULL;
-	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
-				       __GFP_REPEAT | __GFP_ZERO);
+	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
 	if (!page)
 		return NULL;
 	if (!kernel && !pgtable_page_ctor(page)) {
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index a33673b..f3f42c8 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -34,7 +34,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 					  unsigned long address)
 {
-	return quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
@@ -43,7 +43,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 	struct page *page;
 	void *pg;
 
-	pg = quicklist_alloc(QUICK_PT, GFP_KERNEL | __GFP_REPEAT, NULL);
+	pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
 	if (!pg)
 		return NULL;
 	page = virt_to_page(pg);
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 14bb0d5..aec508e 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2704,8 +2704,7 @@ void __flush_tlb_all(void)
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 			    unsigned long address)
 {
-	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
-				       __GFP_REPEAT | __GFP_ZERO);
+	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
 	pte_t *pte = NULL;
 
 	if (page)
@@ -2717,8 +2716,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 pgtable_t pte_alloc_one(struct mm_struct *mm,
 			unsigned long address)
 {
-	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
-				       __GFP_REPEAT | __GFP_ZERO);
+	struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
 	if (!page)
 		return NULL;
 	if (!pgtable_page_ctor(page)) {
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index b2a2dff..e7437ec 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -204,7 +204,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
 	pte_t *pte;
 
-	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	return pte;
 }
 
@@ -212,7 +212,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	struct page *pte;
 
-	pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte = alloc_page(GFP_KERNEL|__GFP_ZERO);
 	if (!pte)
 		return NULL;
 	if (!pgtable_page_ctor(pte)) {
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index bf7f8b5..574c23c 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -81,7 +81,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	struct page *page;
-	page = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0);
+	page = alloc_pages(GFP_KERNEL |  __GFP_ZERO, 0);
 	if (!page)
 		return NULL;
 	if (!pgtable_pmd_page_ctor(page)) {
@@ -125,7 +125,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return (pud_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
+	return (pud_t *)get_zeroed_page(GFP_KERNEL);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index cab9f76..dd2a49a 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -182,7 +182,7 @@ static void * __ref alloc_p2m_page(void)
 	if (unlikely(!slab_is_available()))
 		return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE);
 
-	return (void *)__get_free_page(GFP_KERNEL | __GFP_REPEAT);
+	return (void *)__get_free_page(GFP_KERNEL);
 }
 
 static void __ref free_p2m_page(void *p)
diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index d38eb92..1065bc8 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -44,7 +44,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 	pte_t *ptep;
 	int i;
 
-	ptep = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	ptep = (pte_t *)__get_free_page(GFP_KERNEL);
 	if (!ptep)
 		return NULL;
 	for (i = 0; i < 1024; i++)
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index d597e43..ab19adb 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1750,7 +1750,7 @@ aoecmd_init(void)
 	int ret;
 
 	/* get_zeroed_page returns page with ref count 1 */
-	p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
+	p = (void *) get_zeroed_page(GFP_KERNEL);
 	if (!p)
 		return -ENOMEM;
 	empty_page = virt_to_page(p);
-- 
cgit v0.10.2


From a3a9a59d206779dc0c4ca5a6de6a2ff40382732b Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:48:50 -0700
Subject: x86: get rid of superfluous __GFP_REPEAT

__GFP_REPEAT has a rather weak semantic but since it has been introduced
around 2.6.12 it has been ignored for low order allocations.

PGALLOC_GFP uses __GFP_REPEAT but none of the allocation which uses this
flag is for more than order-0.  This means that this flag has never been
actually useful here because it has always been used only for
PAGE_ALLOC_COSTLY requests.

Link: http://lkml.kernel.org/r/1464599699-30131-3-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 4d38416..04f89ca 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -57,7 +57,7 @@
 # error "Need more than one PGD for the ESPFIX hack"
 #endif
 
-#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
 
 /* This contains the *bottom* address of the espfix stack */
 DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 4eb287e..aa0ff4b 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -6,7 +6,7 @@
 #include <asm/fixmap.h>
 #include <asm/mtrr.h>
 
-#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
+#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO
 
 #ifdef CONFIG_HIGHPTE
 #define PGALLOC_USER_GFP __GFP_HIGHMEM
-- 
cgit v0.10.2


From f58f230a832ba8220a64f44aaafcce4b7358d826 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:48:53 -0700
Subject: x86/efi: get rid of superfluous __GFP_REPEAT

__GFP_REPEAT has a rather weak semantic but since it has been introduced
around 2.6.12 it has been ignored for low order allocations.

efi_alloc_page_tables uses __GFP_REPEAT but it allocates an order-0
page.  This means that this flag has never been actually useful here
because it has always been used only for PAGE_ALLOC_COSTLY requests.

Link: http://lkml.kernel.org/r/1464599699-30131-4-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 6e7242b..b226b3f 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -139,7 +139,7 @@ int __init efi_alloc_page_tables(void)
 	if (efi_enabled(EFI_OLD_MEMMAP))
 		return 0;
 
-	gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO;
+	gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO;
 	efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
 	if (!efi_pgd)
 		return -ENOMEM;
-- 
cgit v0.10.2


From f3610a6aff7dd70b788364255c0cbc128488ef72 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:48:56 -0700
Subject: arm64: get rid of superfluous __GFP_REPEAT

__GFP_REPEAT has a rather weak semantic but since it has been introduced
around 2.6.12 it has been ignored for low order allocations.

{pte,pmd,pud}_alloc_one{_kernel}, late_pgtable_alloc use PGALLOC_GFP for
__get_free_page (aka order-0).

pgd_alloc is slightly more complex because it allocates from pgd_cache
if PGD_SIZE != PAGE_SIZE and PGD_SIZE depends on the configuration
(CONFIG_ARM64_VA_BITS, PAGE_SHIFT and CONFIG_PGTABLE_LEVELS).

As per
config PGTABLE_LEVELS
	int
	default 2 if ARM64_16K_PAGES && ARM64_VA_BITS_36
	default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42
	default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48
	default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39
	default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47
	default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48

we should have the following options

  CONFIG_ARM64_VA_BITS:48 CONFIG_PGTABLE_LEVELS:4 PAGE_SIZE:4k size:4096 pages:1
  CONFIG_ARM64_VA_BITS:48 CONFIG_PGTABLE_LEVELS:4 PAGE_SIZE:16k size:16 pages:1
  CONFIG_ARM64_VA_BITS:48 CONFIG_PGTABLE_LEVELS:3 PAGE_SIZE:64k size:512 pages:1
  CONFIG_ARM64_VA_BITS:47 CONFIG_PGTABLE_LEVELS:3 PAGE_SIZE:16k size:16384 pages:1
  CONFIG_ARM64_VA_BITS:42 CONFIG_PGTABLE_LEVELS:2 PAGE_SIZE:64k size:65536 pages:1
  CONFIG_ARM64_VA_BITS:39 CONFIG_PGTABLE_LEVELS:3 PAGE_SIZE:4k size:4096 pages:1
  CONFIG_ARM64_VA_BITS:36 CONFIG_PGTABLE_LEVELS:2 PAGE_SIZE:16k size:16384 pages:1

All of them fit into a single page (aka order-0).  This means that this
flag has never been actually useful here because it has always been used
only for PAGE_ALLOC_COSTLY requests.

Link: http://lkml.kernel.org/r/1464599699-30131-6-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index ff98585..d25f4f1 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -26,7 +26,7 @@
 
 #define check_pgt_cache()		do { } while (0)
 
-#define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+#define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
 #define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
 
 #if CONFIG_PGTABLE_LEVELS > 2
-- 
cgit v0.10.2


From 54d87d600adbe9889bccaff38420cec02250993b Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:48:58 -0700
Subject: arc: get rid of superfluous __GFP_REPEAT

__GFP_REPEAT has a rather weak semantic but since it has been introduced
around 2.6.12 it has been ignored for low order allocations.

pte_alloc_one_kernel uses __get_order_pte but this is obviously always
zero because BITS_FOR_PTE is not larger than 9 yet the page size is
always larger than 4K.  This means that this flag has never been
actually useful here because it has always been used only for
PAGE_ALLOC_COSTLY requests.

Link: http://lkml.kernel.org/r/1464599699-30131-7-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 86ed671..3749234 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -95,7 +95,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 {
 	pte_t *pte;
 
-	pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO,
+	pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
 					 __get_order_pte());
 
 	return pte;
@@ -107,7 +107,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	pgtable_t pte_pg;
 	struct page *page;
 
-	pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte());
+	pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL, __get_order_pte());
 	if (!pte_pg)
 		return 0;
 	memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
-- 
cgit v0.10.2


From 65f84656ff7c24177c43652bc88cc2a06f9a48b1 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:49:01 -0700
Subject: mips: get rid of superfluous __GFP_REPEAT

__GFP_REPEAT has a rather weak semantic but since it has been introduced
around 2.6.12 it has been ignored for low order allocations.

pte_alloc_one{_kernel}, pmd_alloc_one allocate PTE_ORDER resp.
PMD_ORDER but both are not larger than 1.  This means that this flag has
never been actually useful here because it has always been used only for
PAGE_ALLOC_COSTLY requests.

Link: http://lkml.kernel.org/r/1464599699-30131-8-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: John Crispin <blogic@openwrt.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index b336037..93c079a 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -69,7 +69,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 {
 	pte_t *pte;
 
-	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, PTE_ORDER);
+	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER);
 
 	return pte;
 }
@@ -79,7 +79,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 {
 	struct page *pte;
 
-	pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER);
+	pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
 	if (!pte)
 		return NULL;
 	clear_highpage(pte);
@@ -113,7 +113,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	pmd_t *pmd;
 
-	pmd = (pmd_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, PMD_ORDER);
+	pmd = (pmd_t *) __get_free_pages(GFP_KERNEL, PMD_ORDER);
 	if (pmd)
 		pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table);
 	return pmd;
-- 
cgit v0.10.2


From 565299d03363c71d5bcf7edabb41b2b36a9ea36e Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:49:04 -0700
Subject: nios2: get rid of superfluous __GFP_REPEAT

__GFP_REPEAT has a rather weak semantic but since it has been introduced
around 2.6.12 it has been ignored for low order allocations.

pte_alloc_one{_kernel} allocate PTE_ORDER which is 0.  This means that
this flag has never been actually useful here because it has always been
used only for PAGE_ALLOC_COSTLY requests.

Link: http://lkml.kernel.org/r/1464599699-30131-9-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: Ley Foon Tan <lftan@altera.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index 6e2985e..bb47d08 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -42,8 +42,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 {
 	pte_t *pte;
 
-	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO,
-					PTE_ORDER);
+	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER);
 
 	return pte;
 }
@@ -53,7 +52,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
 {
 	struct page *pte;
 
-	pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER);
+	pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
 	if (pte) {
 		if (!pgtable_page_ctor(pte)) {
 			__free_page(pte);
-- 
cgit v0.10.2


From aade311a50b0be5d5ee93bac7ebc2da9a16556d7 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:49:06 -0700
Subject: parisc: get rid of superfluous __GFP_REPEAT

__GFP_REPEAT has a rather weak semantic but since it has been introduced
around 2.6.12 it has been ignored for low order allocations.

pmd_alloc_one allocate PMD_ORDER which is 1.  This means that this flag
has never been actually useful here because it has always been used only
for PAGE_ALLOC_COSTLY requests.

Link: http://lkml.kernel.org/r/1464599699-30131-10-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Helge Deller <deller@gmx.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index 52c3def..f08dda3 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -63,8 +63,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd)
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT,
-					       PMD_ORDER);
+	pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL, PMD_ORDER);
 	if (pmd)
 		memset(pmd, 0, PAGE_SIZE<<PMD_ORDER);
 	return pmd;
-- 
cgit v0.10.2


From a4135b93898fe38ea4136c6b03b29b746e08c83a Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:49:09 -0700
Subject: score: get rid of superfluous __GFP_REPEAT

__GFP_REPEAT has a rather weak semantic but since it has been introduced
around 2.6.12 it has been ignored for low order allocations.

pte_alloc_one{_kernel} allocate PTE_ORDER which is 0.  This means that
this flag has never been actually useful here because it has always been
used only for PAGE_ALLOC_COSTLY requests.

Link: http://lkml.kernel.org/r/1464599699-30131-11-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: Chen Liqin <liqin.linux@gmail.com>
Cc: Lennox Wu <lennox.wu@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/score/include/asm/pgalloc.h b/arch/score/include/asm/pgalloc.h
index 2e06765..49b012d 100644
--- a/arch/score/include/asm/pgalloc.h
+++ b/arch/score/include/asm/pgalloc.h
@@ -42,8 +42,7 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 {
 	pte_t *pte;
 
-	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO,
-					PTE_ORDER);
+	pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER);
 
 	return pte;
 }
@@ -53,7 +52,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 {
 	struct page *pte;
 
-	pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER);
+	pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
 	if (!pte)
 		return NULL;
 	clear_highpage(pte);
-- 
cgit v0.10.2


From 2379a23e34b58520dfc8f4909f116a08393138e4 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:49:12 -0700
Subject: powerpc: get rid of superfluous __GFP_REPEAT

__GFP_REPEAT has a rather weak semantic but since it has been introduced
around 2.6.12 it has been ignored for low order allocations.

{pud,pmd}_alloc_one are allocating from {PGT,PUD}_CACHE initialized in
pgtable_cache_init which doesn't have larger than sizeof(void *) << 12
size and that fits into !costly allocation request size.

PGALLOC_GFP is used only in radix__pgd_alloc which uses either order-0
or order-4 requests.  The first one doesn't need the flag while the
second does.  Drop __GFP_REPEAT from PGALLOC_GFP and add it for the
order-4 one.

This means that this flag has never been actually useful here because it
has always been used only for PAGE_ALLOC_COSTLY requests.

Link: http://lkml.kernel.org/r/1464599699-30131-12-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 049b803..d14fcf8 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -41,7 +41,7 @@ extern struct kmem_cache *pgtable_cache[];
 			pgtable_cache[(shift) - 1];	\
 		})
 
-#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
+#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO
 
 extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int);
 extern void pte_fragment_free(unsigned long *, int);
@@ -56,7 +56,7 @@ static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
 	return (pgd_t *)__get_free_page(PGALLOC_GFP);
 #else
 	struct page *page;
-	page = alloc_pages(PGALLOC_GFP, 4);
+	page = alloc_pages(PGALLOC_GFP | __GFP_REPEAT, 4);
 	if (!page)
 		return NULL;
 	return (pgd_t *) page_address(page);
@@ -93,8 +93,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
-				GFP_KERNEL|__GFP_REPEAT);
+	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
@@ -115,8 +114,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
-				GFP_KERNEL|__GFP_REPEAT);
+	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index 8a8a7d9..897d2e1 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -57,8 +57,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
-				GFP_KERNEL|__GFP_REPEAT);
+	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
@@ -190,8 +189,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
-				GFP_KERNEL|__GFP_REPEAT);
+	return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 5aac1a3..119d186 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -73,7 +73,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 	cachep = PGT_CACHE(pdshift - pshift);
 #endif
 
-	new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT);
+	new = kmem_cache_zalloc(cachep, GFP_KERNEL);
 
 	BUG_ON(pshift > HUGEPD_SHIFT_MASK);
 	BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
-- 
cgit v0.10.2


From 45eeff260d40ff02af3d5b8e2919033ee59f9ff6 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:49:14 -0700
Subject: sparc: get rid of superfluous __GFP_REPEAT

__GFP_REPEAT has a rather weak semantic but since it has been introduced
around 2.6.12 it has been ignored for low order allocations.

{pud,pmd}_alloc_one is using __GFP_REPEAT but it always allocates from
pgtable_cache which is initialzed to PAGE_SIZE objects.  This means that
this flag has never been actually useful here because it has always been
used only for PAGE_ALLOC_COSTLY requests.

Link: http://lkml.kernel.org/r/1464599699-30131-13-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 5e31871..3529f13 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -41,8 +41,7 @@ static inline void __pud_populate(pud_t *pud, pmd_t *pmd)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(pgtable_cache,
-				GFP_KERNEL|__GFP_REPEAT);
+	return kmem_cache_alloc(pgtable_cache, GFP_KERNEL);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
@@ -52,8 +51,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return kmem_cache_alloc(pgtable_cache,
-				GFP_KERNEL|__GFP_REPEAT);
+	return kmem_cache_alloc(pgtable_cache, GFP_KERNEL);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
-- 
cgit v0.10.2


From 10d58bf297e2cba0cfa2cd143d4f0df26e129040 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:49:17 -0700
Subject: s390: get rid of superfluous __GFP_REPEAT

__GFP_REPEAT has a rather weak semantic but since it has been introduced
around 2.6.12 it has been ignored for low order allocations.

page_table_alloc then uses the flag for a single page allocation.  This
means that this flag has never been actually useful here because it has
always been used only for PAGE_ALLOC_COSTLY requests.

Link: http://lkml.kernel.org/r/1464599699-30131-14-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index e8b5962..e2565d2 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -169,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 			return table;
 	}
 	/* Allocate a fresh page */
-	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+	page = alloc_page(GFP_KERNEL);
 	if (!page)
 		return NULL;
 	if (!pgtable_page_ctor(page)) {
-- 
cgit v0.10.2


From 884ed4cb8aa19ccff32f5c5586257c56e56f91a4 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:49:20 -0700
Subject: sh: get rid of superfluous __GFP_REPEAT

__GFP_REPEAT has a rather weak semantic but since it has been introduced
around 2.6.12 it has been ignored for low order allocations.

PGALLOC_GFP uses __GFP_REPEAT but {pgd,pmd}_alloc allocate from
{pgd,pmd}_cache but both caches are allocating up to PAGE_SIZE objects.
This means that this flag has never been actually useful here because it
has always been used only for PAGE_ALLOC_COSTLY requests.

Link: http://lkml.kernel.org/r/1464599699-30131-15-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/sh/mm/pgtable.c b/arch/sh/mm/pgtable.c
index 26e03a1..a62bd86 100644
--- a/arch/sh/mm/pgtable.c
+++ b/arch/sh/mm/pgtable.c
@@ -1,7 +1,7 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 
-#define PGALLOC_GFP GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO
+#define PGALLOC_GFP GFP_KERNEL | __GFP_ZERO
 
 static struct kmem_cache *pgd_cachep;
 #if PAGETABLE_LEVELS > 2
-- 
cgit v0.10.2


From f45eebc25e78991ef6a6d784ab54151d3003cfdf Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:49:22 -0700
Subject: tile: get rid of superfluous __GFP_REPEAT

__GFP_REPEAT has a rather weak semantic but since it has been introduced
around 2.6.12 it has been ignored for low order allocations.

pgtable_alloc_one uses __GFP_REPEAT flag for L2_USER_PGTABLE_ORDER but
the order is either 0 or 3 if L2_KERNEL_PGTABLE_SHIFT for HPAGE_SHIFT.
This means that this flag has never been actually useful here because it
has always been used only for PAGE_ALLOC_COSTLY requests.

Link: http://lkml.kernel.org/r/1464599699-30131-16-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Chris Metcalf <cmetcalf@mellanox.com> [for tile]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 7bf2491..c4d5bf8 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -231,7 +231,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address,
 			       int order)
 {
-	gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO;
+	gfp_t flags = GFP_KERNEL|__GFP_ZERO;
 	struct page *p;
 	int i;
 
-- 
cgit v0.10.2


From a830627b01b26452a13abb7e7b37d39365be4b05 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:49:25 -0700
Subject: unicore32: get rid of superfluous __GFP_REPEAT

__GFP_REPEAT has a rather weak semantic but since it has been introduced
around 2.6.12 it has been ignored for low order allocations.

PGALLOC_GFP uses __GFP_REPEAT but it is only used in pte_alloc_one,
pte_alloc_one_kernel which does order-0 request.  This means that this
flag has never been actually useful here because it has always been used
only for PAGE_ALLOC_COSTLY requests.

Link: http://lkml.kernel.org/r/1464599699-30131-17-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: Guan Xuetao <gxt@mprc.pku.edu.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
index 2e02d13..2677579 100644
--- a/arch/unicore32/include/asm/pgalloc.h
+++ b/arch/unicore32/include/asm/pgalloc.h
@@ -28,7 +28,7 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
 #define pgd_alloc(mm)			get_pgd_slow(mm)
 #define pgd_free(mm, pgd)		free_pgd_slow(mm, pgd)
 
-#define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+#define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
 
 /*
  * Allocate one PTE table.
-- 
cgit v0.10.2


From f2db19719a4e789a58ac024b43f12eeb9e458074 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:49:28 -0700
Subject: jbd2: get rid of superfluous __GFP_REPEAT

jbd2_alloc is explicit about its allocation preferences wrt.  the
allocation size.  Sub page allocations go to the slab allocator and
larger are using either the page allocator or vmalloc.  This is all good
but the logic is unnecessarily complex.

1) as per Ted, the vmalloc fallback is a left-over:

 : jbd2_alloc is only passed in the bh->b_size, which can't be PAGE_SIZE, so
 : the code path that calls vmalloc() should never get called.  When we
 : conveted jbd2_alloc() to suppor sub-page size allocations in commit
 : d2eecb039368, there was an assumption that it could be called with a size
 : greater than PAGE_SIZE, but that's certaily not true today.

Moreover vmalloc allocation might even lead to a deadlock because the
callers expect GFP_NOFS context while vmalloc is GFP_KERNEL.

2) __GFP_REPEAT for requests <= PAGE_ALLOC_COSTLY_ORDER is ignored
   since the flag was introduced.

Let's simplify the code flow and use the slab allocator for sub-page
requests and the page allocator for others.  Even though order > 0 is
not currently used as per above leave that option open.

Link: http://lkml.kernel.org/r/1464599699-30131-18-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index b31852f..e3ca4b4 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2329,18 +2329,10 @@ void *jbd2_alloc(size_t size, gfp_t flags)
 
 	BUG_ON(size & (size-1)); /* Must be a power of 2 */
 
-	flags |= __GFP_REPEAT;
-	if (size == PAGE_SIZE)
-		ptr = (void *)__get_free_pages(flags, 0);
-	else if (size > PAGE_SIZE) {
-		int order = get_order(size);
-
-		if (order < 3)
-			ptr = (void *)__get_free_pages(flags, order);
-		else
-			ptr = vmalloc(size);
-	} else
+	if (size < PAGE_SIZE)
 		ptr = kmem_cache_alloc(get_slab(size), flags);
+	else
+		ptr = (void *)__get_free_pages(flags, get_order(size));
 
 	/* Check alignment; SLUB has gotten this wrong in the past,
 	 * and this can lead to user data corruption! */
@@ -2351,20 +2343,10 @@ void *jbd2_alloc(size_t size, gfp_t flags)
 
 void jbd2_free(void *ptr, size_t size)
 {
-	if (size == PAGE_SIZE) {
-		free_pages((unsigned long)ptr, 0);
-		return;
-	}
-	if (size > PAGE_SIZE) {
-		int order = get_order(size);
-
-		if (order < 3)
-			free_pages((unsigned long)ptr, order);
-		else
-			vfree(ptr);
-		return;
-	}
-	kmem_cache_free(get_slab(size), ptr);
+	if (size < PAGE_SIZE)
+		kmem_cache_free(get_slab(size), ptr);
+	else
+		free_pages((unsigned long)ptr, get_order(size));
 };
 
 /*
-- 
cgit v0.10.2


From a6921c2974a09bfe8d039980c0b14a305644930b Mon Sep 17 00:00:00 2001
From: Jon Mason <jdmason@kudzu.us>
Date: Fri, 24 Jun 2016 14:49:31 -0700
Subject: MAINTAINERS: update Calgary IOMMU

Update the contact info for Muli, clean-up my name, and update the
mailing list to the IOMMU mailing list.

Link: http://lkml.kernel.org/r/1465493059-11840-2-git-send-email-jdmason@kudzu.us
Signed-off-by: Jon Mason <jdmason@kudzu.us>
Cc: Muli Ben-Yehuda <mulix@mulix.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/MAINTAINERS b/MAINTAINERS
index e1b090f..952fd2a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2776,9 +2776,9 @@ F:	include/net/caif/
 F:	net/caif/
 
 CALGARY x86-64 IOMMU
-M:	Muli Ben-Yehuda <muli@il.ibm.com>
-M:	"Jon D. Mason" <jdmason@kudzu.us>
-L:	discuss@x86-64.org
+M:	Muli Ben-Yehuda <mulix@mulix.org>
+M:	Jon Mason <jdmason@kudzu.us>
+L:	iommu@lists.linux-foundation.org
 S:	Maintained
 F:	arch/x86/kernel/pci-calgary_64.c
 F:	arch/x86/kernel/tce_64.c
-- 
cgit v0.10.2


From 9b75a867cc9ddbafcaf35029358ac500f2635ff3 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Fri, 24 Jun 2016 14:49:34 -0700
Subject: mm: mempool: kasan: don't poot mempool objects in quarantine

Currently we may put reserved by mempool elements into quarantine via
kasan_kfree().  This is totally wrong since quarantine may really free
these objects.  So when mempool will try to use such element,
use-after-free will happen.  Or mempool may decide that it no longer
need that element and double-free it.

So don't put object into quarantine in kasan_kfree(), just poison it.
Rename kasan_kfree() to kasan_poison_kfree() to respect that.

Also, we shouldn't use kasan_slab_alloc()/kasan_krealloc() in
kasan_unpoison_element() because those functions may update allocation
stacktrace.  This would be wrong for the most of the remove_element call
sites.

(The only call site where we may want to update alloc stacktrace is
 in mempool_alloc(). Kmemleak solves this by calling
 kmemleak_update_trace(), so we could make something like that too.
 But this is out of scope of this patch).

Fixes: 55834c59098d ("mm: kasan: initial memory quarantine implementation")
Link: http://lkml.kernel.org/r/575977C3.1010905@virtuozzo.com
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Reported-by: Kuthonuzo Luruo <kuthonuzo.luruo@hpe.com>
Acked-by: Alexander Potapenko <glider@google.com>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Kostya Serebryany <kcc@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 611927f..ac4b3c4 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -59,14 +59,13 @@ void kasan_poison_object_data(struct kmem_cache *cache, void *object);
 
 void kasan_kmalloc_large(const void *ptr, size_t size, gfp_t flags);
 void kasan_kfree_large(const void *ptr);
-void kasan_kfree(void *ptr);
+void kasan_poison_kfree(void *ptr);
 void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size,
 		  gfp_t flags);
 void kasan_krealloc(const void *object, size_t new_size, gfp_t flags);
 
 void kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags);
 bool kasan_slab_free(struct kmem_cache *s, void *object);
-void kasan_poison_slab_free(struct kmem_cache *s, void *object);
 
 struct kasan_cache {
 	int alloc_meta_offset;
@@ -76,6 +75,9 @@ struct kasan_cache {
 int kasan_module_alloc(void *addr, size_t size);
 void kasan_free_shadow(const struct vm_struct *vm);
 
+size_t ksize(const void *);
+static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); }
+
 #else /* CONFIG_KASAN */
 
 static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
@@ -102,7 +104,7 @@ static inline void kasan_poison_object_data(struct kmem_cache *cache,
 
 static inline void kasan_kmalloc_large(void *ptr, size_t size, gfp_t flags) {}
 static inline void kasan_kfree_large(const void *ptr) {}
-static inline void kasan_kfree(void *ptr) {}
+static inline void kasan_poison_kfree(void *ptr) {}
 static inline void kasan_kmalloc(struct kmem_cache *s, const void *object,
 				size_t size, gfp_t flags) {}
 static inline void kasan_krealloc(const void *object, size_t new_size,
@@ -114,11 +116,12 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object)
 {
 	return false;
 }
-static inline void kasan_poison_slab_free(struct kmem_cache *s, void *object) {}
 
 static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
 static inline void kasan_free_shadow(const struct vm_struct *vm) {}
 
+static inline void kasan_unpoison_slab(const void *ptr) { }
+
 #endif /* CONFIG_KASAN */
 
 #endif /* LINUX_KASAN_H */
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 28439ac..6845f92 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -508,7 +508,7 @@ void kasan_slab_alloc(struct kmem_cache *cache, void *object, gfp_t flags)
 	kasan_kmalloc(cache, object, cache->object_size, flags);
 }
 
-void kasan_poison_slab_free(struct kmem_cache *cache, void *object)
+static void kasan_poison_slab_free(struct kmem_cache *cache, void *object)
 {
 	unsigned long size = cache->object_size;
 	unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
@@ -626,7 +626,7 @@ void kasan_krealloc(const void *object, size_t size, gfp_t flags)
 		kasan_kmalloc(page->slab_cache, object, size, flags);
 }
 
-void kasan_kfree(void *ptr)
+void kasan_poison_kfree(void *ptr)
 {
 	struct page *page;
 
@@ -636,7 +636,7 @@ void kasan_kfree(void *ptr)
 		kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page),
 				KASAN_FREE_PAGE);
 	else
-		kasan_slab_free(page->slab_cache, ptr);
+		kasan_poison_slab_free(page->slab_cache, ptr);
 }
 
 void kasan_kfree_large(const void *ptr)
diff --git a/mm/mempool.c b/mm/mempool.c
index 9e075f8..8f65464 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -104,20 +104,16 @@ static inline void poison_element(mempool_t *pool, void *element)
 
 static void kasan_poison_element(mempool_t *pool, void *element)
 {
-	if (pool->alloc == mempool_alloc_slab)
-		kasan_poison_slab_free(pool->pool_data, element);
-	if (pool->alloc == mempool_kmalloc)
-		kasan_kfree(element);
+	if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
+		kasan_poison_kfree(element);
 	if (pool->alloc == mempool_alloc_pages)
 		kasan_free_pages(element, (unsigned long)pool->pool_data);
 }
 
 static void kasan_unpoison_element(mempool_t *pool, void *element, gfp_t flags)
 {
-	if (pool->alloc == mempool_alloc_slab)
-		kasan_slab_alloc(pool->pool_data, element, flags);
-	if (pool->alloc == mempool_kmalloc)
-		kasan_krealloc(element, (size_t)pool->pool_data, flags);
+	if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
+		kasan_unpoison_slab(element);
 	if (pool->alloc == mempool_alloc_pages)
 		kasan_alloc_pages(element, (unsigned long)pool->pool_data);
 }
-- 
cgit v0.10.2


From e838a45f9392a5bd2be1cd3ab0b16ae85857461c Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 24 Jun 2016 14:49:37 -0700
Subject: mm, sl[au]b: add __GFP_ATOMIC to the GFP reclaim mask

Commit d0164adc89f6 ("mm, page_alloc: distinguish between being unable
to sleep, unwilling to sleep and avoiding waking kswapd") modified
__GFP_WAIT to explicitly identify the difference between atomic callers
and those that were unwilling to sleep.  Later the definition was
removed entirely.

The GFP_RECLAIM_MASK is the set of flags that affect watermark checking
and reclaim behaviour but __GFP_ATOMIC was never added.  Without it,
atomic users of the slab allocator strip the __GFP_ATOMIC flag and
cannot access the page allocator atomic reserves.  This patch addresses
the problem.

The user-visible impact depends on the workload but potentially atomic
allocations unnecessarily fail without this path.

Link: http://lkml.kernel.org/r/20160610093832.GK2527@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Reported-by: Marcin Wojtas <mw@semihalf.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: <stable@vger.kernel.org>	[4.4+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/internal.h b/mm/internal.h
index a37e5b6..2524ec8 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -24,7 +24,8 @@
  */
 #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
 			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
-			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
+			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\
+			__GFP_ATOMIC)
 
 /* The GFP flags allowed during early boot */
 #define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
-- 
cgit v0.10.2


From a8a47ff53462c3043778c04b3ba7230a39c476bf Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@free-electrons.com>
Date: Fri, 24 Jun 2016 14:49:39 -0700
Subject: mailmap: add Antoine Tenart's email
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I used "Antoine TÃ©nart" at first but then moved to a name without accent
as this cause some issues from time to time...  Add my email in the
mailmap file to have a consistent shortlog output.

Link: http://lkml.kernel.org/r/20160609130323.27706-1-antoine.tenart@free-electrons.com
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Cc: Antoine Tenart <antoine.tenart@free-electrons.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/.mailmap b/.mailmap
index 779a9ca..726b494 100644
--- a/.mailmap
+++ b/.mailmap
@@ -21,6 +21,7 @@ Andrey Ryabinin <ryabinin.a.a@gmail.com> <a.ryabinin@samsung.com>
 Andrew Morton <akpm@linux-foundation.org>
 Andrew Vasquez <andrew.vasquez@qlogic.com>
 Andy Adamson <andros@citi.umich.edu>
+Antoine Tenart <antoine.tenart@free-electrons.com>
 Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
 Archit Taneja <archit@ti.com>
 Arnaud Patard <arnaud.patard@rtp-net.org>
-- 
cgit v0.10.2


From 1f08fe266560fc2d1383fd9c8c08fdd432ea302b Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@free-electrons.com>
Date: Fri, 24 Jun 2016 14:49:42 -0700
Subject: mailmap: add Boris Brezillon's email

There are different versions of Boris' name and email in the log, and
one typo.  Add his emails in mailmap to have all of his contributions
under the same name/email tuple.

Link: http://lkml.kernel.org/r/20160609130323.27706-2-antoine.tenart@free-electrons.com
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/.mailmap b/.mailmap
index 726b494..52489f5 100644
--- a/.mailmap
+++ b/.mailmap
@@ -31,6 +31,9 @@ Axel Lin <axel.lin@gmail.com>
 Ben Gardner <bgardner@wabtec.com>
 Ben M Cahill <ben.m.cahill@intel.com>
 Björn Steinbrink <B.Steinbrink@gmx.de>
+Boris Brezillon <boris.brezillon@free-electrons.com>
+Boris Brezillon <boris.brezillon@free-electrons.com> <b.brezillon.dev@gmail.com>
+Boris Brezillon <boris.brezillon@free-electrons.com> <b.brezillon@overkiz.com>
 Brian Avery <b.avery@hp.com>
 Brian King <brking@us.ibm.com>
 Christoph Hellwig <hch@lst.de>
-- 
cgit v0.10.2


From 315d09bf30c2b436a1fdac86d31c24380cd56c4f Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 24 Jun 2016 14:49:45 -0700
Subject: Revert "mm: make faultaround produce old ptes"

This reverts commit 5c0a85fad949212b3e059692deecdeed74ae7ec7.

The commit causes ~6% regression in unixbench.

Let's revert it for now and consider other solution for reclaim problem
later.

Link: http://lkml.kernel.org/r/1465893750-44080-2-git-send-email-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5df5feb..ece042d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -602,7 +602,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
 }
 
 void do_set_pte(struct vm_area_struct *vma, unsigned long address,
-		struct page *page, pte_t *pte, bool write, bool anon, bool old);
+		struct page *page, pte_t *pte, bool write, bool anon);
 #endif
 
 /*
diff --git a/mm/filemap.c b/mm/filemap.c
index 00ae878..20f3b1f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2186,7 +2186,7 @@ repeat:
 		if (file->f_ra.mmap_miss > 0)
 			file->f_ra.mmap_miss--;
 		addr = address + (page->index - vmf->pgoff) * PAGE_SIZE;
-		do_set_pte(vma, addr, page, pte, false, false, true);
+		do_set_pte(vma, addr, page, pte, false, false);
 		unlock_page(page);
 		goto next;
 unlock:
diff --git a/mm/memory.c b/mm/memory.c
index 15322b7..61fe7e7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2877,7 +2877,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
  * vm_ops->map_pages.
  */
 void do_set_pte(struct vm_area_struct *vma, unsigned long address,
-		struct page *page, pte_t *pte, bool write, bool anon, bool old)
+		struct page *page, pte_t *pte, bool write, bool anon)
 {
 	pte_t entry;
 
@@ -2885,8 +2885,6 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
 	entry = mk_pte(page, vma->vm_page_prot);
 	if (write)
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-	if (old)
-		entry = pte_mkold(entry);
 	if (anon) {
 		inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
 		page_add_new_anon_rmap(page, vma, address, false);
@@ -3032,20 +3030,9 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 */
 	if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) {
 		pte = pte_offset_map_lock(mm, pmd, address, &ptl);
-		if (!pte_same(*pte, orig_pte))
-			goto unlock_out;
 		do_fault_around(vma, address, pte, pgoff, flags);
-		/* Check if the fault is handled by faultaround */
-		if (!pte_same(*pte, orig_pte)) {
-			/*
-			 * Faultaround produce old pte, but the pte we've
-			 * handler fault for should be young.
-			 */
-			pte_t entry = pte_mkyoung(*pte);
-			if (ptep_set_access_flags(vma, address, pte, entry, 0))
-				update_mmu_cache(vma, address, pte);
+		if (!pte_same(*pte, orig_pte))
 			goto unlock_out;
-		}
 		pte_unmap_unlock(pte, ptl);
 	}
 
@@ -3060,7 +3047,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		put_page(fault_page);
 		return ret;
 	}
-	do_set_pte(vma, address, fault_page, pte, false, false, false);
+	do_set_pte(vma, address, fault_page, pte, false, false);
 	unlock_page(fault_page);
 unlock_out:
 	pte_unmap_unlock(pte, ptl);
@@ -3111,7 +3098,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		}
 		goto uncharge_out;
 	}
-	do_set_pte(vma, address, new_page, pte, true, true, false);
+	do_set_pte(vma, address, new_page, pte, true, true);
 	mem_cgroup_commit_charge(new_page, memcg, false, false);
 	lru_cache_add_active_or_unevictable(new_page, vma);
 	pte_unmap_unlock(pte, ptl);
@@ -3164,7 +3151,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		put_page(fault_page);
 		return ret;
 	}
-	do_set_pte(vma, address, fault_page, pte, true, false, false);
+	do_set_pte(vma, address, fault_page, pte, true, false);
 	pte_unmap_unlock(pte, ptl);
 
 	if (set_page_dirty(fault_page))
-- 
cgit v0.10.2


From 06d8fbc7cfd655d202cfac9870bd79c18c2db698 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 24 Jun 2016 14:49:48 -0700
Subject: Revert "mm: disable fault around on emulated access bit architecture"

This reverts commit d0834a6c2c5b0c76cfb806bd7dba6556d8b4edbb.

After revert of 5c0a85fad949 ("mm: make faultaround produce old ptes")
faultaround doesn't have dependencies on hardware accessed bit, so let's
revert this one too.

Link: http://lkml.kernel.org/r/1465893750-44080-3-git-send-email-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/memory.c b/mm/memory.c
index 61fe7e7..cd1f29e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2898,16 +2898,8 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
 	update_mmu_cache(vma, address, pte);
 }
 
-/*
- * If architecture emulates "accessed" or "young" bit without HW support,
- * there is no much gain with fault_around.
- */
 static unsigned long fault_around_bytes __read_mostly =
-#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-	PAGE_SIZE;
-#else
 	rounddown_pow_of_two(65536);
-#endif
 
 #ifdef CONFIG_DEBUG_FS
 static int fault_around_bytes_get(void *data, u64 *val)
-- 
cgit v0.10.2


From c17b1f42594eb71b8d3eb5a6dfc907a7eb88a51d Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 24 Jun 2016 14:49:51 -0700
Subject: hugetlb: fix nr_pmds accounting with shared page tables

We account HugeTLB's shared page table to all processes who share it.
The accounting happens during huge_pmd_share().

If somebody populates pud entry under us, we should decrease pagetable's
refcount and decrease nr_pmds of the process.

By mistake, I increase nr_pmds again in this case.  :-/ It will lead to
"BUG: non-zero nr_pmds on freeing mm: 2" on process' exit.

Let's fix this by increasing nr_pmds only when we're sure that the page
table will be used.

Link: http://lkml.kernel.org/r/20160617122506.GC6534@node.shutemov.name
Fixes: dc6c9a35b66b ("mm: account pmd page tables to the process")
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: zhongjiang <zhongjiang@huawei.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 388c2bb..ee03021 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4228,7 +4228,6 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 		if (saddr) {
 			spte = huge_pte_offset(svma->vm_mm, saddr);
 			if (spte) {
-				mm_inc_nr_pmds(mm);
 				get_page(virt_to_page(spte));
 				break;
 			}
@@ -4243,9 +4242,9 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 	if (pud_none(*pud)) {
 		pud_populate(mm, pud,
 				(pmd_t *)((unsigned long)spte & PAGE_MASK));
+		mm_inc_nr_pmds(mm);
 	} else {
 		put_page(virt_to_page(spte));
-		mm_inc_nr_pmds(mm);
 	}
 	spin_unlock(ptl);
 out:
-- 
cgit v0.10.2


From d93c4130a7d049b234b5d5a15808eaf5406f2789 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 24 Jun 2016 14:49:54 -0700
Subject: memcg: mem_cgroup_migrate() may be called with irq disabled

mem_cgroup_migrate() uses local_irq_disable/enable() but can be called
with irq disabled from migrate_page_copy().  This ends up enabling irq
while holding a irq context lock triggering the following lockdep
warning.  Fix it by using irq_save/restore instead.

  =================================
  [ INFO: inconsistent lock state ]
  4.7.0-rc1+ #52 Tainted: G        W
  ---------------------------------
  inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage.
  kcompactd0/151 [HC0[0]:SC0[0]:HE1:SE1] takes:
   (&(&ctx->completion_lock)->rlock){+.?.-.}, at: [<000000000038fd96>] aio_migratepage+0x156/0x1e8
  {IN-SOFTIRQ-W} state was registered at:
     __lock_acquire+0x5b6/0x1930
     lock_acquire+0xee/0x270
     _raw_spin_lock_irqsave+0x66/0xb0
     aio_complete+0x98/0x328
     dio_complete+0xe4/0x1e0
     blk_update_request+0xd4/0x450
     scsi_end_request+0x48/0x1c8
     scsi_io_completion+0x272/0x698
     blk_done_softirq+0xca/0xe8
     __do_softirq+0xc8/0x518
     irq_exit+0xee/0x110
     do_IRQ+0x6a/0x88
     io_int_handler+0x11a/0x25c
     __mutex_unlock_slowpath+0x144/0x1d8
     __mutex_unlock_slowpath+0x140/0x1d8
     kernfs_iop_permission+0x64/0x80
     __inode_permission+0x9e/0xf0
     link_path_walk+0x6e/0x510
     path_lookupat+0xc4/0x1a8
     filename_lookup+0x9c/0x160
     user_path_at_empty+0x5c/0x70
     SyS_readlinkat+0x68/0x140
     system_call+0xd6/0x270
  irq event stamp: 971410
  hardirqs last  enabled at (971409):  migrate_page_move_mapping+0x3ea/0x588
  hardirqs last disabled at (971410):  _raw_spin_lock_irqsave+0x3c/0xb0
  softirqs last  enabled at (970526):  __do_softirq+0x460/0x518
  softirqs last disabled at (970519):  irq_exit+0xee/0x110

  other info that might help us debug this:
   Possible unsafe locking scenario:

	 CPU0
	 ----
    lock(&(&ctx->completion_lock)->rlock);
    <Interrupt>
      lock(&(&ctx->completion_lock)->rlock);

    *** DEADLOCK ***

  3 locks held by kcompactd0/151:
   #0:  (&(&mapping->private_lock)->rlock){+.+.-.}, at:  aio_migratepage+0x42/0x1e8
   #1:  (&ctx->ring_lock){+.+.+.}, at:  aio_migratepage+0x5a/0x1e8
   #2:  (&(&ctx->completion_lock)->rlock){+.?.-.}, at:  aio_migratepage+0x156/0x1e8

  stack backtrace:
  CPU: 20 PID: 151 Comm: kcompactd0 Tainted: G        W       4.7.0-rc1+ #52
  Call Trace:
    show_trace+0xea/0xf0
    show_stack+0x72/0xf0
    dump_stack+0x9a/0xd8
    print_usage_bug.part.27+0x2d4/0x2e8
    mark_lock+0x17e/0x758
    mark_held_locks+0xa2/0xd0
    trace_hardirqs_on_caller+0x140/0x1c0
    mem_cgroup_migrate+0x266/0x370
    aio_migratepage+0x16a/0x1e8
    move_to_new_page+0xb0/0x260
    migrate_pages+0x8f4/0x9f0
    compact_zone+0x4dc/0xdc8
    kcompactd_do_work+0x1aa/0x358
    kcompactd+0xba/0x2c8
    kthread+0x10a/0x110
    kernel_thread_starter+0x6/0xc
    kernel_thread_starter+0x0/0xc
  INFO: lockdep is turned off.

Link: http://lkml.kernel.org/r/20160620184158.GO3262@mtj.duckdns.org
Link: http://lkml.kernel.org/g/5767CFE5.7080904@de.ibm.com
Fixes: 74485cf2bc85 ("mm: migrate: consolidate mem_cgroup_migrate() calls")
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Cc: <stable@vger.kernel.org>	[4.5+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 75e7440..d4a33e1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5544,6 +5544,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
 	struct mem_cgroup *memcg;
 	unsigned int nr_pages;
 	bool compound;
+	unsigned long flags;
 
 	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
 	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
@@ -5574,10 +5575,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
 
 	commit_charge(newpage, memcg, false);
 
-	local_irq_disable();
+	local_irq_save(flags);
 	mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages);
 	memcg_check_events(memcg, newpage);
-	local_irq_enable();
+	local_irq_restore(flags);
 }
 
 DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
-- 
cgit v0.10.2


From ea3a9645866e12d2b198434f03df3c3e96fb86ce Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 24 Jun 2016 14:49:58 -0700
Subject: memcg: css_alloc should return an ERR_PTR value on error

mem_cgroup_css_alloc() was returning NULL on failure while cgroup core
expected it to return an ERR_PTR value leading to the following NULL
deref after a css allocation failure.  Fix it by return
ERR_PTR(-ENOMEM) instead.  I'll also update cgroup core so that it
can handle NULL returns.

  mkdir: page allocation failure: order:6, mode:0x240c0c0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO)
  CPU: 0 PID: 8738 Comm: mkdir Not tainted 4.7.0-rc3+ #123
  ...
  Call Trace:
    dump_stack+0x68/0xa1
    warn_alloc_failed+0xd6/0x130
    __alloc_pages_nodemask+0x4c6/0xf20
    alloc_pages_current+0x66/0xe0
    alloc_kmem_pages+0x14/0x80
    kmalloc_order_trace+0x2a/0x1a0
    __kmalloc+0x291/0x310
    memcg_update_all_caches+0x6c/0x130
    mem_cgroup_css_alloc+0x590/0x610
    cgroup_apply_control_enable+0x18b/0x370
    cgroup_mkdir+0x1de/0x2e0
    kernfs_iop_mkdir+0x55/0x80
    vfs_mkdir+0xb9/0x150
    SyS_mkdir+0x66/0xd0
    do_syscall_64+0x53/0x120
    entry_SYSCALL64_slow_path+0x25/0x25
  ...
  BUG: unable to handle kernel NULL pointer dereference at 00000000000000d0
  IP:  init_and_link_css+0x37/0x220
  PGD 34b1e067 PUD 3a109067 PMD 0
  Oops: 0002 [#1] SMP
  Modules linked in:
  CPU: 0 PID: 8738 Comm: mkdir Not tainted 4.7.0-rc3+ #123
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.2-20160422_131301-anatol 04/01/2014
  task: ffff88007cbc5200 ti: ffff8800666d4000 task.ti: ffff8800666d4000
  RIP: 0010:[<ffffffff810f2ca7>]  [<ffffffff810f2ca7>] init_and_link_css+0x37/0x220
  RSP: 0018:ffff8800666d7d90  EFLAGS: 00010246
  RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
  RDX: ffffffff810f2499 RSI: 0000000000000000 RDI: 0000000000000008
  RBP: ffff8800666d7db8 R08: 0000000000000003 R09: 0000000000000000
  R10: 0000000000000001 R11: 0000000000000000 R12: ffff88005a5fb400
  R13: ffffffff81f0f8a0 R14: ffff88005a5fb400 R15: 0000000000000010
  FS:  00007fc944689700(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007f3aed0d2b80 CR3: 000000003a1e8000 CR4: 00000000000006f0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
    cgroup_apply_control_enable+0x1ac/0x370
    cgroup_mkdir+0x1de/0x2e0
    kernfs_iop_mkdir+0x55/0x80
    vfs_mkdir+0xb9/0x150
    SyS_mkdir+0x66/0xd0
    do_syscall_64+0x53/0x120
    entry_SYSCALL64_slow_path+0x25/0x25
  Code: 89 f5 48 89 fb 49 89 d4 48 83 ec 08 8b 05 72 3b d8 00 85 c0 0f 85 60 01 00 00 4c 89 e7 e8 72 f7 ff ff 48 8d 7b 08 48 89 d9 31 c0 <48> c7 83 d0 00 00 00 00 00 00 00 48 83 e7 f8 48 29 f9 81 c1 d8
  RIP   init_and_link_css+0x37/0x220
   RSP <ffff8800666d7d90>
  CR2: 00000000000000d0
  ---[ end trace a2d8836ae1e852d1 ]---

Link: http://lkml.kernel.org/r/20160621165740.GJ3262@mtj.duckdns.org
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d4a33e1..ac8664db 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4203,7 +4203,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 	return &memcg->css;
 fail:
 	mem_cgroup_free(memcg);
-	return NULL;
+	return ERR_PTR(-ENOMEM);
 }
 
 static int
-- 
cgit v0.10.2


From 8f182270dfec432e93fae14f9208a6b9af01009f Mon Sep 17 00:00:00 2001
From: Lukasz Odzioba <lukasz.odzioba@intel.com>
Date: Fri, 24 Jun 2016 14:50:01 -0700
Subject: mm/swap.c: flush lru pvecs on compound page arrival

Currently we can have compound pages held on per cpu pagevecs, which
leads to a lot of memory unavailable for reclaim when needed.  In the
systems with hundreads of processors it can be GBs of memory.

On of the way of reproducing the problem is to not call munmap
explicitly on all mapped regions (i.e.  after receiving SIGTERM).  After
that some pages (with THP enabled also huge pages) may end up on
lru_add_pvec, example below.

  void main() {
  #pragma omp parallel
  {
	size_t size = 55 * 1000 * 1000; // smaller than  MEM/CPUS
	void *p = mmap(NULL, size, PROT_READ | PROT_WRITE,
		MAP_PRIVATE | MAP_ANONYMOUS , -1, 0);
	if (p != MAP_FAILED)
		memset(p, 0, size);
	//munmap(p, size); // uncomment to make the problem go away
  }
  }

When we run it with THP enabled it will leave significant amount of
memory on lru_add_pvec.  This memory will be not reclaimed if we hit
OOM, so when we run above program in a loop:

	for i in `seq 100`; do ./a.out; done

many processes (95% in my case) will be killed by OOM.

The primary point of the LRU add cache is to save the zone lru_lock
contention with a hope that more pages will belong to the same zone and
so their addition can be batched.  The huge page is already a form of
batched addition (it will add 512 worth of memory in one go) so skipping
the batching seems like a safer option when compared to a potential
excess in the caching which can be quite large and much harder to fix
because lru_add_drain_all is way to expensive and it is not really clear
what would be a good moment to call it.

Similarly we can reproduce the problem on lru_deactivate_pvec by adding:
madvise(p, size, MADV_FREE); after memset.

This patch flushes lru pvecs on compound page arrival making the problem
less severe - after applying it kill rate of above example drops to 0%,
due to reducing maximum amount of memory held on pvec from 28MB (with
THP) to 56kB per CPU.

Suggested-by: Michal Hocko <mhocko@suse.com>
Link: http://lkml.kernel.org/r/1466180198-18854-1-git-send-email-lukasz.odzioba@intel.com
Signed-off-by: Lukasz Odzioba <lukasz.odzioba@intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Kirill Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Ming Li <mingli199x@qq.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/swap.c b/mm/swap.c
index 59f5faf..90530ff 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -242,7 +242,7 @@ void rotate_reclaimable_page(struct page *page)
 		get_page(page);
 		local_irq_save(flags);
 		pvec = this_cpu_ptr(&lru_rotate_pvecs);
-		if (!pagevec_add(pvec, page))
+		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_move_tail(pvec);
 		local_irq_restore(flags);
 	}
@@ -296,7 +296,7 @@ void activate_page(struct page *page)
 		struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
 
 		get_page(page);
-		if (!pagevec_add(pvec, page))
+		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, __activate_page, NULL);
 		put_cpu_var(activate_page_pvecs);
 	}
@@ -391,9 +391,8 @@ static void __lru_cache_add(struct page *page)
 	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
 
 	get_page(page);
-	if (!pagevec_space(pvec))
+	if (!pagevec_add(pvec, page) || PageCompound(page))
 		__pagevec_lru_add(pvec);
-	pagevec_add(pvec, page);
 	put_cpu_var(lru_add_pvec);
 }
 
@@ -628,7 +627,7 @@ void deactivate_file_page(struct page *page)
 	if (likely(get_page_unless_zero(page))) {
 		struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
 
-		if (!pagevec_add(pvec, page))
+		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
 		put_cpu_var(lru_deactivate_file_pvecs);
 	}
@@ -648,7 +647,7 @@ void deactivate_page(struct page *page)
 		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
 
 		get_page(page);
-		if (!pagevec_add(pvec, page))
+		if (!pagevec_add(pvec, page) || PageCompound(page))
 			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
 		put_cpu_var(lru_deactivate_pvecs);
 	}
-- 
cgit v0.10.2


From c8cc708a340cc7c5445565079fd4d1c28898d7a2 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Fri, 24 Jun 2016 14:50:04 -0700
Subject: mm/hugetlb: clear compound_mapcount when freeing gigantic pages

While working on s390 support for gigantic hugepages I ran into the
following "Bad page state" warning when freeing gigantic pages:

  BUG: Bad page state in process bash  pfn:580001
  page:000003d116000040 count:0 mapcount:0 mapping:ffffffff00000000 index:0x0
  flags: 0x7fffc0000000000()
  page dumped because: non-NULL mapping

This is because page->compound_mapcount, which is part of a union with
page->mapping, is initialized with -1 in prep_compound_gigantic_page(),
and not cleared again during destroy_compound_gigantic_page().  Fix this
by clearing the compound_mapcount in destroy_compound_gigantic_page()
before clearing compound_head.

Interestingly enough, the warning will not show up on x86_64, although
this should not be architecture specific.  Apparently there is an
endianness issue, combined with the fact that the union contains both a
64 bit ->mapping pointer and a 32 bit atomic_t ->compound_mapcount as
members.  The resulting bogus page->mapping on x86_64 therefore contains
00000000ffffffff instead of ffffffff00000000 on s390, which will falsely
trigger the PageAnon() check in free_pages_prepare() because
page->mapping & PAGE_MAPPING_ANON is true on little-endian architectures
like x86_64 in this case (the page is not compound anymore,
->compound_head was already cleared before).  As a result, page->mapping
will be cleared before doing the checks in free_pages_check().

Not sure if the bogus "PageAnon() returning true" on x86_64 for the
first tail page of a gigantic page (at this stage) has other theoretical
implications, but they would also be fixed with this patch.

Link: http://lkml.kernel.org/r/1466612719-5642-1-git-send-email-gerald.schaefer@de.ibm.com
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ee03021..c1f3c0b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1030,6 +1030,7 @@ static void destroy_compound_gigantic_page(struct page *page,
 	int nr_pages = 1 << order;
 	struct page *p = page + 1;
 
+	atomic_set(compound_mapcount_ptr(page), 0);
 	for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
 		clear_compound_head(p);
 		set_page_refcounted(p);
-- 
cgit v0.10.2


From 5c335fe020ea287b2b49cc4dfca9f6756b88bb71 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Fri, 24 Jun 2016 14:50:07 -0700
Subject: mm: prevent KASAN false positives in kmemleak

When kmemleak dumps contents of leaked objects it reads whole objects
regardless of user-requested size.  This upsets KASAN.  Disable KASAN
checks around object dump.

Link: http://lkml.kernel.org/r/1466617631-68387-1-git-send-email-dvyukov@google.com
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index e642992..04320d3 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -307,8 +307,10 @@ static void hex_dump_object(struct seq_file *seq,
 	len = min_t(size_t, object->size, HEX_MAX_LINES * HEX_ROW_SIZE);
 
 	seq_printf(seq, "  hex dump (first %zu bytes):\n", len);
+	kasan_disable_current();
 	seq_hex_dump(seq, "    ", DUMP_PREFIX_NONE, HEX_ROW_SIZE,
 		     HEX_GROUP_SIZE, ptr, len, HEX_ASCII);
+	kasan_enable_current();
 }
 
 /*
-- 
cgit v0.10.2


From a4f04f2c6955aff5e2c08dcb40aca247ff4d7370 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Fri, 24 Jun 2016 14:50:10 -0700
Subject: mm, compaction: abort free scanner if split fails

If the memory compaction free scanner cannot successfully split a free
page (only possible due to per-zone low watermark), terminate the free
scanner rather than continuing to scan memory needlessly.  If the
watermark is insufficient for a free page of order <= cc->order, then
terminate the scanner since all future splits will also likely fail.

This prevents the compaction freeing scanner from scanning all memory on
very large zones (very noticeable for zones > 128GB, for instance) when
all splits will likely fail while holding zone->lock.

compaction_alloc() iterating a 128GB zone has been benchmarked to take
over 400ms on some systems whereas any free page isolated and ready to
be split ends up failing in split_free_page() because of the low
watermark check and thus the iteration continues.

The next time compaction occurs, the freeing scanner will likely start
at the end of the zone again since no success was made previously and we
get the same lengthy iteration until the zone is brought above the low
watermark.  All thp page faults can take >400ms in such a state without
this fix.

Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1606211820350.97086@chino.kir.corp.google.com
Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Hugh Dickins <hughd@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/compaction.c b/mm/compaction.c
index 1427366..79bfe0e 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -441,25 +441,23 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
 
 		/* Found a free page, break it into order-0 pages */
 		isolated = split_free_page(page);
+		if (!isolated)
+			break;
+
 		total_isolated += isolated;
+		cc->nr_freepages += isolated;
 		for (i = 0; i < isolated; i++) {
 			list_add(&page->lru, freelist);
 			page++;
 		}
-
-		/* If a page was split, advance to the end of it */
-		if (isolated) {
-			cc->nr_freepages += isolated;
-			if (!strict &&
-				cc->nr_migratepages <= cc->nr_freepages) {
-				blockpfn += isolated;
-				break;
-			}
-
-			blockpfn += isolated - 1;
-			cursor += isolated - 1;
-			continue;
+		if (!strict && cc->nr_migratepages <= cc->nr_freepages) {
+			blockpfn += isolated;
+			break;
 		}
+		/* Advance to the end of split page */
+		blockpfn += isolated - 1;
+		cursor += isolated - 1;
+		continue;
 
 isolate_fail:
 		if (strict)
@@ -469,6 +467,9 @@ isolate_fail:
 
 	}
 
+	if (locked)
+		spin_unlock_irqrestore(&cc->zone->lock, flags);
+
 	/*
 	 * There is a tiny chance that we have read bogus compound_order(),
 	 * so be careful to not go outside of the pageblock.
@@ -490,9 +491,6 @@ isolate_fail:
 	if (strict && blockpfn < end_pfn)
 		total_isolated = 0;
 
-	if (locked)
-		spin_unlock_irqrestore(&cc->zone->lock, flags);
-
 	/* Update the pageblock-skip if the whole pageblock was scanned */
 	if (blockpfn == end_pfn)
 		update_pageblock_skip(cc, valid_page, total_isolated, false);
@@ -1011,6 +1009,7 @@ static void isolate_freepages(struct compact_control *cc)
 				block_end_pfn = block_start_pfn,
 				block_start_pfn -= pageblock_nr_pages,
 				isolate_start_pfn = block_start_pfn) {
+		unsigned long isolated;
 
 		/*
 		 * This can iterate a massively long zone without finding any
@@ -1035,8 +1034,12 @@ static void isolate_freepages(struct compact_control *cc)
 			continue;
 
 		/* Found a block suitable for isolating free pages from. */
-		isolate_freepages_block(cc, &isolate_start_pfn,
-					block_end_pfn, freelist, false);
+		isolated = isolate_freepages_block(cc, &isolate_start_pfn,
+						block_end_pfn, freelist, false);
+		/* If isolation failed early, do not continue needlessly */
+		if (!isolated && isolate_start_pfn < block_end_pfn &&
+		    cc->nr_migratepages > cc->nr_freepages)
+			break;
 
 		/*
 		 * If we isolated enough freepages, or aborted due to async
-- 
cgit v0.10.2


From 7186ee06b66313dae0d34ec5241fda7c4a537cb9 Mon Sep 17 00:00:00 2001
From: Gang He <ghe@suse.com>
Date: Fri, 24 Jun 2016 14:50:13 -0700
Subject: ocfs2: disable BUG assertions in reading blocks

According to some high-load testing, these two BUG assertions were
encountered, this led system panic.  Actually, there were some
discussions about removing these two BUG() assertions, it would not
bring any side effect.

Then, I did the the following changes,

1) use the existing macro CATCH_BH_JBD_RACES to wrap BUG() in the
   ocfs2_read_blocks_sync function like before.

2) disable the macro CATCH_BH_JBD_RACES in Makefile by default.

Link: http://lkml.kernel.org/r/1466574294-26863-1-git-send-email-ghe@suse.com
Signed-off-by: Gang He <ghe@suse.com>
Cc: Mark Fasheh <mfasheh@suse.de>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <joseph.qi@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index e27e652..4342c7e 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -1,7 +1,5 @@
 ccflags-y := -Ifs/ocfs2
 
-ccflags-y += -DCATCH_BH_JBD_RACES
-
 obj-$(CONFIG_OCFS2_FS) += 	\
 	ocfs2.o			\
 	ocfs2_stackglue.o
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index fe50ded..498641e 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -139,11 +139,16 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
 
 		lock_buffer(bh);
 		if (buffer_jbd(bh)) {
+#ifdef CATCH_BH_JBD_RACES
 			mlog(ML_ERROR,
 			     "block %llu had the JBD bit set "
 			     "while I was in lock_buffer!",
 			     (unsigned long long)bh->b_blocknr);
 			BUG();
+#else
+			unlock_buffer(bh);
+			continue;
+#endif
 		}
 
 		clear_buffer_uptodate(bh);
-- 
cgit v0.10.2


From 74070542099c66d87aebeacd7b54dc0e8b6a73f9 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 24 Jun 2016 14:50:16 -0700
Subject: oom, suspend: fix oom_reaper vs. oom_killer_disable race

Tetsuo has reported the following potential oom_killer_disable vs.
oom_reaper race:

 (1) freeze_processes() starts freezing user space threads.
 (2) Somebody (maybe a kenrel thread) calls out_of_memory().
 (3) The OOM killer calls mark_oom_victim() on a user space thread
     P1 which is already in __refrigerator().
 (4) oom_killer_disable() sets oom_killer_disabled = true.
 (5) P1 leaves __refrigerator() and enters do_exit().
 (6) The OOM reaper calls exit_oom_victim(P1) before P1 can call
     exit_oom_victim(P1).
 (7) oom_killer_disable() returns while P1 not yet finished
 (8) P1 perform IO/interfere with the freezer.

This situation is unfortunate.  We cannot move oom_killer_disable after
all the freezable kernel threads are frozen because the oom victim might
depend on some of those kthreads to make a forward progress to exit so
we could deadlock.  It is also far from trivial to teach the oom_reaper
to not call exit_oom_victim() because then we would lose a guarantee of
the OOM killer and oom_killer_disable forward progress because
exit_mm->mmput might block and never call exit_oom_victim.

It seems the easiest way forward is to workaround this race by calling
try_to_freeze_tasks again after oom_killer_disable.  This will make sure
that all the tasks are frozen or it bails out.

Fixes: 449d777d7ad6 ("mm, oom_reaper: clear TIF_MEMDIE for all tasks queued for oom_reaper")
Link: http://lkml.kernel.org/r/1466597634-16199-1-git-send-email-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/power/process.c b/kernel/power/process.c
index df058be..0c2ee97 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -146,6 +146,18 @@ int freeze_processes(void)
 	if (!error && !oom_killer_disable())
 		error = -EBUSY;
 
+	/*
+	 * There is a hard to fix race between oom_reaper kernel thread
+	 * and oom_killer_disable. oom_reaper calls exit_oom_victim
+	 * before the victim reaches exit_mm so try to freeze all the tasks
+	 * again and catch such a left over task.
+	 */
+	if (!error) {
+		pr_info("Double checking all user space processes after OOM killer disable... ");
+		error = try_to_freeze_tasks(true);
+		pr_cont("\n");
+	}
+
 	if (error)
 		thaw_processes();
 	return error;
-- 
cgit v0.10.2


From 63d2f95d63396059200c391ca87161897b99e74a Mon Sep 17 00:00:00 2001
From: Torsten Hilbrich <torsten.hilbrich@secunet.com>
Date: Fri, 24 Jun 2016 14:50:18 -0700
Subject: fs/nilfs2: fix potential underflow in call to crc32_le

The value `bytes' comes from the filesystem which is about to be
mounted.  We cannot trust that the value is always in the range we
expect it to be.

Check its value before using it to calculate the length for the crc32_le
call.  It value must be larger (or equal) sumoff + 4.

This fixes a kernel bug when accidentially mounting an image file which
had the nilfs2 magic value 0x3434 at the right offset 0x406 by chance.
The bytes 0x01 0x00 were stored at 0x408 and were interpreted as a
s_bytes value of 1.  This caused an underflow when substracting sumoff +
4 (20) in the call to crc32_le.

  BUG: unable to handle kernel paging request at ffff88021e600000
  IP:  crc32_le+0x36/0x100
  ...
  Call Trace:
    nilfs_valid_sb.part.5+0x52/0x60 [nilfs2]
    nilfs_load_super_block+0x142/0x300 [nilfs2]
    init_nilfs+0x60/0x390 [nilfs2]
    nilfs_mount+0x302/0x520 [nilfs2]
    mount_fs+0x38/0x160
    vfs_kern_mount+0x67/0x110
    do_mount+0x269/0xe00
    SyS_mount+0x9f/0x100
    entry_SYSCALL_64_fastpath+0x16/0x71

Link: http://lkml.kernel.org/r/1466778587-5184-2-git-send-email-konishi.ryusuke@lab.ntt.co.jp
Signed-off-by: Torsten Hilbrich <torsten.hilbrich@secunet.com>
Tested-by: Torsten Hilbrich <torsten.hilbrich@secunet.com>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 809bd2d..e9fd241 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -439,7 +439,7 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp)
 	if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC)
 		return 0;
 	bytes = le16_to_cpu(sbp->s_bytes);
-	if (bytes > BLOCK_SIZE)
+	if (bytes < sumoff + 4 || bytes > BLOCK_SIZE)
 		return 0;
 	crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp,
 		       sumoff);
-- 
cgit v0.10.2


From 7c5b7239465932400ee0825bcc90624717c1af19 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 24 Jun 2016 14:50:21 -0700
Subject: tools/vm/slabinfo: fix spelling mistake: "Ocurrences" ->
 "Occurrences"

trivial fix to spelling mistake

Link: http://lkml.kernel.org/r/1466672144-831-1-git-send-email-colin.king@canonical.com
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c
index 1889163..7cf6e17 100644
--- a/tools/vm/slabinfo.c
+++ b/tools/vm/slabinfo.c
@@ -492,7 +492,7 @@ static void slab_stats(struct slabinfo *s)
 			s->deactivate_to_head + s->deactivate_to_tail + s->deactivate_bypass;
 
 	if (total) {
-		printf("\nSlab Deactivation             Ocurrences  %%\n");
+		printf("\nSlab Deactivation             Occurrences %%\n");
 		printf("-------------------------------------------------\n");
 		printf("Slab full                     %7lu  %3lu%%\n",
 			s->deactivate_full, (s->deactivate_full * 100) / total);
-- 
cgit v0.10.2


From 8285027fc479949a7a166bc1b26ce57e894878a7 Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Fri, 24 Jun 2016 14:50:24 -0700
Subject: mm/page_owner: avoid null pointer dereference

We have dereferenced page_ext before checking it.  Lets check it first
and then used it.

Fixes: f86e4271978b ("mm: check the return value of lookup_page_ext for all call sites")
Link: http://lkml.kernel.org/r/1465249059-7883-1-git-send-email-sudipm.mukherjee@gmail.com
Signed-off-by: Sudip Mukherjee <sudip.mukherjee@codethink.co.uk>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/page_owner.c b/mm/page_owner.c
index c6cda3e..fedeba8 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -207,13 +207,15 @@ void __dump_page_owner(struct page *page)
 		.nr_entries = page_ext->nr_entries,
 		.entries = &page_ext->trace_entries[0],
 	};
-	gfp_t gfp_mask = page_ext->gfp_mask;
-	int mt = gfpflags_to_migratetype(gfp_mask);
+	gfp_t gfp_mask;
+	int mt;
 
 	if (unlikely(!page_ext)) {
 		pr_alert("There is not page extension available.\n");
 		return;
 	}
+	gfp_mask = page_ext->gfp_mask;
+	mt = gfpflags_to_migratetype(gfp_mask);
 
 	if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
 		pr_alert("page_owner info is not active (free page?)\n");
-- 
cgit v0.10.2


From 5a9294e5c535deab69831076af15cd35e1c95f8b Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Fri, 24 Jun 2016 14:50:27 -0700
Subject: autofs: don't get stuck in a loop if vfs_write() returns an error

__vfs_write() returns a negative value in a error case.

Link: http://lkml.kernel.org/r/20160616083108.6278.65815.stgit@pluto.themaw.net
Signed-off-by: Andrey Vagin <avagin@openvz.org>
Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 0146d91..631f155 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -66,11 +66,12 @@ static int autofs4_write(struct autofs_sb_info *sbi,
 	set_fs(KERNEL_DS);
 
 	mutex_lock(&sbi->pipe_mutex);
-	wr = __vfs_write(file, data, bytes, &file->f_pos);
-	while (bytes && wr) {
+	while (bytes) {
+		wr = __vfs_write(file, data, bytes, &file->f_pos);
+		if (wr <= 0)
+			break;
 		data += wr;
 		bytes -= wr;
-		wr = __vfs_write(file, data, bytes, &file->f_pos);
 	}
 	mutex_unlock(&sbi->pipe_mutex);
 
-- 
cgit v0.10.2


From 0fd5ed8d897cffdc74903931bd7fcc9d8d154460 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Fri, 24 Jun 2016 14:50:30 -0700
Subject: init/main.c: fix initcall_blacklisted on ia64, ppc64 and parisc64

When I replaced kasprintf("%pf") with a direct call to
sprint_symbol_no_offset I must have broken the initcall blacklisting
feature on the arches where dereference_function_descriptor() is
non-trivial.

Fixes: c8cdd2be213f (init/main.c: simplify initcall_blacklisted())
Link: http://lkml.kernel.org/r/1466027283-4065-1-git-send-email-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Yang Shi <yang.shi@linaro.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Petr Mladek <pmladek@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/init/main.c b/init/main.c
index 4c17fda..63a5afb 100644
--- a/init/main.c
+++ b/init/main.c
@@ -708,11 +708,13 @@ static bool __init_or_module initcall_blacklisted(initcall_t fn)
 {
 	struct blacklist_entry *entry;
 	char fn_name[KSYM_SYMBOL_LEN];
+	unsigned long addr;
 
 	if (list_empty(&blacklisted_initcalls))
 		return false;
 
-	sprint_symbol_no_offset(fn_name, (unsigned long)fn);
+	addr = (unsigned long) dereference_function_descriptor(fn);
+	sprint_symbol_no_offset(fn_name, addr);
 
 	list_for_each_entry(entry, &blacklisted_initcalls, next) {
 		if (!strcmp(fn_name, entry->buf)) {
-- 
cgit v0.10.2


From b42b90d177d0c7f46f9c888c10c7900578ae7e09 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 24 Jun 2016 23:49:03 -0400
Subject: ceph: fix d_obtain_alias() misuses

on failure d_obtain_alias() will have done iput()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 6e72c98..1780218 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -95,10 +95,8 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
 	}
 
 	dentry = d_obtain_alias(inode);
-	if (IS_ERR(dentry)) {
-		iput(inode);
+	if (IS_ERR(dentry))
 		return dentry;
-	}
 	err = ceph_init_dentry(dentry);
 	if (err < 0) {
 		dput(dentry);
@@ -167,10 +165,8 @@ static struct dentry *__get_parent(struct super_block *sb,
 		return ERR_PTR(-ENOENT);
 
 	dentry = d_obtain_alias(inode);
-	if (IS_ERR(dentry)) {
-		iput(inode);
+	if (IS_ERR(dentry))
 		return dentry;
-	}
 	err = ceph_init_dentry(dentry);
 	if (err < 0) {
 		dput(dentry);
@@ -210,7 +206,7 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
 
 	dout("fh_to_parent %llx\n", cfh->parent_ino);
 	dentry = __get_parent(sb, NULL, cfh->ino);
-	if (IS_ERR(dentry) && PTR_ERR(dentry) == -ENOENT)
+	if (unlikely(dentry == ERR_PTR(-ENOENT)))
 		dentry = __fh_to_dentry(sb, cfh->parent_ino);
 	return dentry;
 }
-- 
cgit v0.10.2


From 9521d39976db20f8ef9b56af66661482a17d5364 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 25 Jun 2016 21:53:30 +1000
Subject: Fix build break in fork.c when THREAD_SIZE < PAGE_SIZE

Commit b235beea9e99 ("Clarify naming of thread info/stack allocators")
breaks the build on some powerpc configs, where THREAD_SIZE < PAGE_SIZE:

  kernel/fork.c:235:2: error: implicit declaration of function 'free_thread_stack'
  kernel/fork.c:355:8: error: assignment from incompatible pointer type
    stack = alloc_thread_stack_node(tsk, node);
    ^

Fix it by renaming free_stack() to free_thread_stack(), and updating the
return type of alloc_thread_stack_node().

Fixes: b235beea9e99 ("Clarify naming of thread info/stack allocators")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/fork.c b/kernel/fork.c
index 37b9439..4a7ec0c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -183,13 +183,13 @@ static inline void free_thread_stack(unsigned long *stack)
 # else
 static struct kmem_cache *thread_stack_cache;
 
-static struct thread_info *alloc_thread_stack_node(struct task_struct *tsk,
+static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
 						  int node)
 {
 	return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
 }
 
-static void free_stack(unsigned long *stack)
+static void free_thread_stack(unsigned long *stack)
 {
 	kmem_cache_free(thread_stack_cache, stack);
 }
-- 
cgit v0.10.2


From 02dbfc99b424dde3cf0a492ed3bec4f222441754 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Fri, 20 May 2016 13:50:33 -0700
Subject: Btrfs: fix ->iterate_shared() by upgrading i_rwsem for delayed nodes

Commit fe742fd4f90f ("Revert "btrfs: switch to ->iterate_shared()"")
backed out the conversion to ->iterate_shared() for Btrfs because the
delayed inode handling in btrfs_real_readdir() is racy. However, we can
still do readdir in parallel if there are no delayed nodes.

This is a temporary fix which upgrades the shared inode lock to an
exclusive lock only when we have delayed items until we come up with a
more complete solution. While we're here, rename the
btrfs_{get,put}_delayed_items functions to make it very clear that
they're just for readdir.

Tested with xfstests and by doing a parallel kernel build:

	while make tinyconfig && make -j4 && git clean dqfx; do
		:
	done

along with a bunch of parallel finds in another shell:

	while true; do
		for ((i=0; i<4; i++)); do
			find . >/dev/null &
		done
		wait
	done

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 61561c2..d3aaabb 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1606,15 +1606,23 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode)
 	return 0;
 }
 
-void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
-			     struct list_head *del_list)
+bool btrfs_readdir_get_delayed_items(struct inode *inode,
+				     struct list_head *ins_list,
+				     struct list_head *del_list)
 {
 	struct btrfs_delayed_node *delayed_node;
 	struct btrfs_delayed_item *item;
 
 	delayed_node = btrfs_get_delayed_node(inode);
 	if (!delayed_node)
-		return;
+		return false;
+
+	/*
+	 * We can only do one readdir with delayed items at a time because of
+	 * item->readdir_list.
+	 */
+	inode_unlock_shared(inode);
+	inode_lock(inode);
 
 	mutex_lock(&delayed_node->mutex);
 	item = __btrfs_first_delayed_insertion_item(delayed_node);
@@ -1641,10 +1649,13 @@ void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
 	 * requeue or dequeue this delayed node.
 	 */
 	atomic_dec(&delayed_node->refs);
+
+	return true;
 }
 
-void btrfs_put_delayed_items(struct list_head *ins_list,
-			     struct list_head *del_list)
+void btrfs_readdir_put_delayed_items(struct inode *inode,
+				     struct list_head *ins_list,
+				     struct list_head *del_list)
 {
 	struct btrfs_delayed_item *curr, *next;
 
@@ -1659,6 +1670,12 @@ void btrfs_put_delayed_items(struct list_head *ins_list,
 		if (atomic_dec_and_test(&curr->refs))
 			kfree(curr);
 	}
+
+	/*
+	 * The VFS is going to do up_read(), so we need to downgrade back to a
+	 * read lock.
+	 */
+	downgrade_write(&inode->i_rwsem);
 }
 
 int btrfs_should_delete_dir_index(struct list_head *del_list,
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 0167853..2495b3d 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -137,10 +137,12 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
 void btrfs_destroy_delayed_inodes(struct btrfs_root *root);
 
 /* Used for readdir() */
-void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
-			     struct list_head *del_list);
-void btrfs_put_delayed_items(struct list_head *ins_list,
-			     struct list_head *del_list);
+bool btrfs_readdir_get_delayed_items(struct inode *inode,
+				     struct list_head *ins_list,
+				     struct list_head *del_list);
+void btrfs_readdir_put_delayed_items(struct inode *inode,
+				     struct list_head *ins_list,
+				     struct list_head *del_list);
 int btrfs_should_delete_dir_index(struct list_head *del_list,
 				  u64 index);
 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d2be95c..969a25c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5757,6 +5757,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
 	int name_len;
 	int is_curr = 0;	/* ctx->pos points to the current index? */
 	bool emitted;
+	bool put = false;
 
 	/* FIXME, use a real flag for deciding about the key type */
 	if (root->fs_info->tree_root == root)
@@ -5774,7 +5775,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
 	if (key_type == BTRFS_DIR_INDEX_KEY) {
 		INIT_LIST_HEAD(&ins_list);
 		INIT_LIST_HEAD(&del_list);
-		btrfs_get_delayed_items(inode, &ins_list, &del_list);
+		put = btrfs_readdir_get_delayed_items(inode, &ins_list,
+						      &del_list);
 	}
 
 	key.type = key_type;
@@ -5921,8 +5923,8 @@ next:
 nopos:
 	ret = 0;
 err:
-	if (key_type == BTRFS_DIR_INDEX_KEY)
-		btrfs_put_delayed_items(&ins_list, &del_list);
+	if (put)
+		btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list);
 	btrfs_free_path(path);
 	return ret;
 }
@@ -10534,7 +10536,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = {
 static const struct file_operations btrfs_dir_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
-	.iterate	= btrfs_real_readdir,
+	.iterate_shared	= btrfs_real_readdir,
 	.unlocked_ioctl	= btrfs_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= btrfs_compat_ioctl,
-- 
cgit v0.10.2


From 1b45996d2ebf9680ccd0db875fc668aa025f40fd Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Tue, 17 May 2016 11:41:04 -0700
Subject: tty: vt: Fix soft lockup in fbcon cursor blink timer.

We are getting somewhat random soft lockups with this signature:

[   86.992215] [<fffffc00080935e0>] el1_irq+0xa0/0x10c
[   86.997082] [<fffffc000841822c>] cursor_timer_handler+0x30/0x54
[   87.002991] [<fffffc000810ec44>] call_timer_fn+0x54/0x1a8
[   87.008378] [<fffffc000810ef88>] run_timer_softirq+0x1c4/0x2bc
[   87.014200] [<fffffc000809077c>] __do_softirq+0x114/0x344
[   87.019590] [<fffffc00080af45c>] irq_exit+0x74/0x98
[   87.024458] [<fffffc00080fac20>] __handle_domain_irq+0x98/0xfc
[   87.030278] [<fffffc000809056c>] gic_handle_irq+0x94/0x190

This is caused by the vt visual_init() function calling into
fbcon_init() with a vc_cur_blink_ms value of zero.  This is a
transient condition, as it is later set to a non-zero value.  But, if
the timer happens to expire while the blink rate is zero, it goes into
an endless loop, and we get soft lockup.

The fix is to initialize vc_cur_blink_ms before calling the con_init()
function.

Signed-off-by: David Daney <david.daney@cavium.com>
Cc: stable@vger.kernel.org
Acked-by: Pavel Machek <pavel@ucw.cz>
Tested-by: Ming Lei <ming.lei@canonical.com>
Acked-by: Scot Doyle <lkml14@scotdoyle.com>
Tested-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index dc12532..5b0fe97 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -750,6 +750,7 @@ static void visual_init(struct vc_data *vc, int num, int init)
 	vc->vc_complement_mask = 0;
 	vc->vc_can_do_color = 0;
 	vc->vc_panic_force_write = false;
+	vc->vc_cur_blink_ms = DEFAULT_CURSOR_BLINK_MS;
 	vc->vc_sw->con_init(vc, init);
 	if (!vc->vc_complement_mask)
 		vc->vc_complement_mask = vc->vc_can_do_color ? 0x7700 : 0x0800;
-- 
cgit v0.10.2


From ef3149eb3ddb7f9125e11c90f8330e371b55cffd Mon Sep 17 00:00:00 2001
From: Luis de Bethencourt <luisbg@osg.samsung.com>
Date: Wed, 22 Jun 2016 20:43:30 +0100
Subject: staging: iio: accel: fix error check

sca3000_read_ctrl_reg() returns a negative number on failure, check for
this instead of zero.

Signed-off-by: Luis de Bethencourt <luisbg@osg.samsung.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>

diff --git a/drivers/staging/iio/accel/sca3000_core.c b/drivers/staging/iio/accel/sca3000_core.c
index a8f533a..ec12181 100644
--- a/drivers/staging/iio/accel/sca3000_core.c
+++ b/drivers/staging/iio/accel/sca3000_core.c
@@ -594,7 +594,7 @@ static ssize_t sca3000_read_frequency(struct device *dev,
 		goto error_ret_mut;
 	ret = sca3000_read_ctrl_reg(st, SCA3000_REG_CTRL_SEL_OUT_CTRL);
 	mutex_unlock(&st->lock);
-	if (ret)
+	if (ret < 0)
 		goto error_ret;
 	val = ret;
 	if (base_freq > 0)
-- 
cgit v0.10.2


From 0c1f91b98552da49d9d8eed32b3132a58d2f4598 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 17 Jun 2016 15:22:24 +0200
Subject: iio: accel: kxsd9: fix the usage of spi_w8r8()

These two spi_w8r8() calls return a value with is used by the code
following the error check. The dubious use was caused by a cleanup
patch.

Fixes: d34dbee8ac8e ("staging:iio:accel:kxsd9 cleanup and conversion to iio_chan_spec.")
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>

diff --git a/drivers/iio/accel/kxsd9.c b/drivers/iio/accel/kxsd9.c
index 923f565..3a9f106 100644
--- a/drivers/iio/accel/kxsd9.c
+++ b/drivers/iio/accel/kxsd9.c
@@ -81,7 +81,7 @@ static int kxsd9_write_scale(struct iio_dev *indio_dev, int micro)
 
 	mutex_lock(&st->buf_lock);
 	ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C));
-	if (ret)
+	if (ret < 0)
 		goto error_ret;
 	st->tx[0] = KXSD9_WRITE(KXSD9_REG_CTRL_C);
 	st->tx[1] = (ret & ~KXSD9_FS_MASK) | i;
@@ -163,7 +163,7 @@ static int kxsd9_read_raw(struct iio_dev *indio_dev,
 		break;
 	case IIO_CHAN_INFO_SCALE:
 		ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C));
-		if (ret)
+		if (ret < 0)
 			goto error_ret;
 		*val2 = kxsd9_micro_scales[ret & KXSD9_FS_MASK];
 		ret = IIO_VAL_INT_PLUS_MICRO;
-- 
cgit v0.10.2


From 6b7f4e25f3309f106a5c7ff42c8231494cf285d3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 20 Jun 2016 13:53:32 +0100
Subject: iio:ad7266: Fix broken regulator error handling

All regulator_get() variants return either a pointer to a regulator or an
ERR_PTR() so testing for NULL makes no sense and may lead to bugs if we
use NULL as a valid regulator. Fix this by using IS_ERR() as expected.

Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>

diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c
index 21e19b6..835d45d 100644
--- a/drivers/iio/adc/ad7266.c
+++ b/drivers/iio/adc/ad7266.c
@@ -397,7 +397,7 @@ static int ad7266_probe(struct spi_device *spi)
 	st = iio_priv(indio_dev);
 
 	st->reg = devm_regulator_get(&spi->dev, "vref");
-	if (!IS_ERR_OR_NULL(st->reg)) {
+	if (!IS_ERR(st->reg)) {
 		ret = regulator_enable(st->reg);
 		if (ret)
 			return ret;
-- 
cgit v0.10.2


From e5511c816e5ac4909bdd38e85ac344e2b9b8e984 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 20 Jun 2016 13:53:33 +0100
Subject: iio:ad7266: Fix support for optional regulators

The ad7266 driver attempts to support deciding between the use of internal
and external power supplies by checking to see if an error is returned when
requesting the regulator. This doesn't work with the current code since the
driver uses a normal regulator_get() which is for non-optional supplies
and so assumes that if a regulator is not provided by the platform then
this is a bug in the platform integration and so substitutes a dummy
regulator. Use regulator_get_optional() instead which indicates to the
framework that the regulator may be absent and provides a dummy regulator
instead.

Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>

diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c
index 835d45d..655b36b 100644
--- a/drivers/iio/adc/ad7266.c
+++ b/drivers/iio/adc/ad7266.c
@@ -396,7 +396,7 @@ static int ad7266_probe(struct spi_device *spi)
 
 	st = iio_priv(indio_dev);
 
-	st->reg = devm_regulator_get(&spi->dev, "vref");
+	st->reg = devm_regulator_get_optional(&spi->dev, "vref");
 	if (!IS_ERR(st->reg)) {
 		ret = regulator_enable(st->reg);
 		if (ret)
-- 
cgit v0.10.2


From 68b356eb3d9f5e38910fb62e22a78e2a18d544ae Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 20 Jun 2016 13:53:34 +0100
Subject: iio:ad7266: Fix probe deferral for vref

Currently the ad7266 driver treats any failure to get vref as though the
regulator were not present but this means that if probe deferral is
triggered the driver will act as though the regulator were not present.
Instead only use the internal reference if we explicitly got -ENODEV which
is what is returned for absent regulators.

Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>

diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c
index 655b36b..2123f0a 100644
--- a/drivers/iio/adc/ad7266.c
+++ b/drivers/iio/adc/ad7266.c
@@ -408,6 +408,9 @@ static int ad7266_probe(struct spi_device *spi)
 
 		st->vref_mv = ret / 1000;
 	} else {
+		/* Any other error indicates that the regulator does exist */
+		if (PTR_ERR(st->reg) != -ENODEV)
+			return PTR_ERR(st->reg);
 		/* Use internal reference */
 		st->vref_mv = 2500;
 	}
-- 
cgit v0.10.2


From 5353ed8deedee9e5acb9f896e9032158f5d998de Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 20 Jun 2016 15:40:27 +0100
Subject: devpts: fix null pointer dereference on failed memory allocation

An ENOMEM when creating a pair tty in tty_ldisc_setup causes a null
pointer dereference in devpts_kill_index because tty->link->driver_data
is NULL.  The oops was triggered with the pty stressor in stress-ng when
in a low memory condition.

tty_init_dev tries to clean up a tty_ldisc_setup ENOMEM error by calling
release_tty, however, this ultimately tries to clean up the NULL pair'd
tty in pty_unix98_remove, triggering the Oops.

Add check to pty_unix98_remove to only clean up fsi if it is not NULL.

Ooops:

[   23.020961] Oops: 0000 [#1] SMP
[   23.020976] Modules linked in: ppdev snd_hda_codec_generic snd_hda_intel snd_hda_codec parport_pc snd_hda_core snd_hwdep parport snd_pcm input_leds joydev snd_timer serio_raw snd soundcore i2c_piix4 mac_hid ib_iser rdma_cm iw_cm ib_cm ib_core configfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 btrfs raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel qxl aes_x86_64 ttm lrw gf128mul glue_helper ablk_helper drm_kms_helper cryptd syscopyarea sysfillrect psmouse sysimgblt floppy fb_sys_fops drm pata_acpi jitterentropy_rng drbg ansi_cprng
[   23.020978] CPU: 0 PID: 1452 Comm: stress-ng-pty Not tainted 4.7.0-rc4+ #2
[   23.020978] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[   23.020979] task: ffff88007ba30000 ti: ffff880078ea8000 task.ti: ffff880078ea8000
[   23.020981] RIP: 0010:[<ffffffff813f11ff>]  [<ffffffff813f11ff>] ida_remove+0x1f/0x120
[   23.020981] RSP: 0018:ffff880078eabb60  EFLAGS: 00010a03
[   23.020982] RAX: 4444444444444567 RBX: 0000000000000000 RCX: 000000000000001f
[   23.020982] RDX: 000000000000014c RSI: 000000000000026f RDI: 0000000000000000
[   23.020982] RBP: ffff880078eabb70 R08: 0000000000000004 R09: 0000000000000036
[   23.020983] R10: 000000000000026f R11: 0000000000000000 R12: 000000000000026f
[   23.020983] R13: 000000000000026f R14: ffff88007c944b40 R15: 000000000000026f
[   23.020984] FS:  00007f9a2f3cc700(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000
[   23.020984] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   23.020985] CR2: 0000000000000010 CR3: 000000006c81b000 CR4: 00000000001406f0
[   23.020988] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   23.020988] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   23.020988] Stack:
[   23.020989]  0000000000000000 000000000000026f ffff880078eabb90 ffffffff812a5a99
[   23.020990]  0000000000000000 00000000fffffff4 ffff880078eabba8 ffffffff814f9cbe
[   23.020991]  ffff88007965c800 ffff880078eabbc8 ffffffff814eef43 fffffffffffffff4
[   23.020991] Call Trace:
[   23.021000]  [<ffffffff812a5a99>] devpts_kill_index+0x29/0x50
[   23.021002]  [<ffffffff814f9cbe>] pty_unix98_remove+0x2e/0x50
[   23.021006]  [<ffffffff814eef43>] release_tty+0xb3/0x1b0
[   23.021007]  [<ffffffff814f18d4>] tty_init_dev+0xd4/0x1c0
[   23.021011]  [<ffffffff814f9fae>] ptmx_open+0xae/0x190
[   23.021013]  [<ffffffff812254ef>] chrdev_open+0xbf/0x1b0
[   23.021015]  [<ffffffff8121d973>] do_dentry_open+0x203/0x310
[   23.021016]  [<ffffffff81225430>] ? cdev_put+0x30/0x30
[   23.021017]  [<ffffffff8121ee44>] vfs_open+0x54/0x80
[   23.021018]  [<ffffffff8122b8fc>] ? may_open+0x8c/0x100
[   23.021019]  [<ffffffff8122f26b>] path_openat+0x2eb/0x1440
[   23.021020]  [<ffffffff81230534>] ? putname+0x54/0x60
[   23.021022]  [<ffffffff814f6f97>] ? n_tty_ioctl_helper+0x27/0x100
[   23.021023]  [<ffffffff81231651>] do_filp_open+0x91/0x100
[   23.021024]  [<ffffffff81230596>] ? getname_flags+0x56/0x1f0
[   23.021026]  [<ffffffff8123fc66>] ? __alloc_fd+0x46/0x190
[   23.021027]  [<ffffffff8121f1e4>] do_sys_open+0x124/0x210
[   23.021028]  [<ffffffff8121f2ee>] SyS_open+0x1e/0x20
[   23.021035]  [<ffffffff81845576>] entry_SYSCALL_64_fastpath+0x1e/0xa8
[   23.021044] Code: 63 28 45 31 e4 eb dd 0f 1f 44 00 00 55 4c 63 d6 48 ba 89 88 88 88 88 88 88 88 4c 89 d0 b9 1f 00 00 00 48 f7 e2 48 89 e5 41 54 53 <8b> 47 10 48 89 fb 8d 3c c5 00 00 00 00 48 c1 ea 09 b8 01 00 00
[   23.021045] RIP  [<ffffffff813f11ff>] ida_remove+0x1f/0x120
[   23.021045]  RSP <ffff880078eabb60>
[   23.021046] CR2: 0000000000000010

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index f856c45..51e0d32 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -667,8 +667,11 @@ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty)
 		fsi = tty->driver_data;
 	else
 		fsi = tty->link->driver_data;
-	devpts_kill_index(fsi, tty->index);
-	devpts_release(fsi);
+
+	if (fsi) {
+		devpts_kill_index(fsi, tty->index);
+		devpts_release(fsi);
+	}
 }
 
 static const struct tty_operations ptm_unix98_ops = {
-- 
cgit v0.10.2


From 4c2e07c6a29e0129e975727b9f57eede813eea85 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 26 Jun 2016 17:52:03 -0700
Subject: Linux 4.7-rc5


diff --git a/Makefile b/Makefile
index 4fb6bea..6471f20 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 7
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME = Psychotic Stoned Sheep
 
 # *DOCUMENTATION*
-- 
cgit v0.10.2


From a37503bc387ce2434106d4bf4870bd73081c7355 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Wed, 22 Jun 2016 12:40:50 -0500
Subject: net: smsc911x: Fix bug where PHY interrupts are overwritten by 0

By default, mdiobus_alloc() sets the PHYs to polling mode, but a
pointer size memcpy means that a couple IRQs end up being overwritten
with a value of 0. This means that PHY_POLL is disabled and results
in unpredictable behavior depending on the PHY's location on the
MDIO bus. Remove that memcpy and the now unused phy_irq member to
force the SMSC911x PHYs into polling mode 100% of the time.

Fixes: e7f4dc3536a4 ("mdio: Move allocation of interrupts into core")
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index 8af2556..b5ab5e1 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -116,7 +116,6 @@ struct smsc911x_data {
 
 	struct phy_device *phy_dev;
 	struct mii_bus *mii_bus;
-	int phy_irq[PHY_MAX_ADDR];
 	unsigned int using_extphy;
 	int last_duplex;
 	int last_carrier;
@@ -1073,7 +1072,6 @@ static int smsc911x_mii_init(struct platform_device *pdev,
 	pdata->mii_bus->priv = pdata;
 	pdata->mii_bus->read = smsc911x_mii_read;
 	pdata->mii_bus->write = smsc911x_mii_write;
-	memcpy(pdata->mii_bus->irq, pdata->phy_irq, sizeof(pdata->mii_bus));
 
 	pdata->mii_bus->parent = &pdev->dev;
 
-- 
cgit v0.10.2


From 8e96a87c5431c256feb65bcfc5aec92d9f7839b6 Mon Sep 17 00:00:00 2001
From: Cyril Bur <cyrilbur@gmail.com>
Date: Fri, 17 Jun 2016 14:58:34 +1000
Subject: powerpc/tm: Always reclaim in start_thread() for exec() class
 syscalls

Userspace can quite legitimately perform an exec() syscall with a
suspended transaction. exec() does not return to the old process, rather
it load a new one and starts that, the expectation therefore is that the
new process starts not in a transaction. Currently exec() is not treated
any differently to any other syscall which creates problems.

Firstly it could allow a new process to start with a suspended
transaction for a binary that no longer exists. This means that the
checkpointed state won't be valid and if the suspended transaction were
ever to be resumed and subsequently aborted (a possibility which is
exceedingly likely as exec()ing will likely doom the transaction) the
new process will jump to invalid state.

Secondly the incorrect attempt to keep the transactional state while
still zeroing state for the new process creates at least two TM Bad
Things. The first triggers on the rfid to return to userspace as
start_thread() has given the new process a 'clean' MSR but the suspend
will still be set in the hardware MSR. The second TM Bad Thing triggers
in __switch_to() as the processor is still transactionally suspended but
__switch_to() wants to zero the TM sprs for the new process.

This is an example of the outcome of calling exec() with a suspended
transaction. Note the first 700 is likely the first TM bad thing
decsribed earlier only the kernel can't report it as we've loaded
userspace registers. c000000000009980 is the rfid in
fast_exception_return()

  Bad kernel stack pointer 3fffcfa1a370 at c000000000009980
  Oops: Bad kernel stack pointer, sig: 6 [#1]
  CPU: 0 PID: 2006 Comm: tm-execed Not tainted
  NIP: c000000000009980 LR: 0000000000000000 CTR: 0000000000000000
  REGS: c00000003ffefd40 TRAP: 0700   Not tainted
  MSR: 8000000300201031 <SF,ME,IR,DR,LE,TM[SE]>  CR: 00000000  XER: 00000000
  CFAR: c0000000000098b4 SOFTE: 0
  PACATMSCRATCH: b00000010000d033
  GPR00: 0000000000000000 00003fffcfa1a370 0000000000000000 0000000000000000
  GPR04: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
  GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
  GPR12: 00003fff966611c0 0000000000000000 0000000000000000 0000000000000000
  NIP [c000000000009980] fast_exception_return+0xb0/0xb8
  LR [0000000000000000]           (null)
  Call Trace:
  Instruction dump:
  f84d0278 e9a100d8 7c7b03a6 e84101a0 7c4ff120 e8410170 7c5a03a6 e8010070
  e8410080 e8610088 e8810090 e8210078 <4c000024> 48000000 e8610178 88ed023b

  Kernel BUG at c000000000043e80 [verbose debug info unavailable]
  Unexpected TM Bad Thing exception at c000000000043e80 (msr 0x201033)
  Oops: Unrecoverable exception, sig: 6 [#2]
  CPU: 0 PID: 2006 Comm: tm-execed Tainted: G      D
  task: c0000000fbea6d80 ti: c00000003ffec000 task.ti: c0000000fb7ec000
  NIP: c000000000043e80 LR: c000000000015a24 CTR: 0000000000000000
  REGS: c00000003ffef7e0 TRAP: 0700   Tainted: G      D
  MSR: 8000000300201033 <SF,ME,IR,DR,RI,LE,TM[SE]>  CR: 28002828  XER: 00000000
  CFAR: c000000000015a20 SOFTE: 0
  PACATMSCRATCH: b00000010000d033
  GPR00: 0000000000000000 c00000003ffefa60 c000000000db5500 c0000000fbead000
  GPR04: 8000000300001033 2222222222222222 2222222222222222 00000000ff160000
  GPR08: 0000000000000000 800000010000d033 c0000000fb7e3ea0 c00000000fe00004
  GPR12: 0000000000002200 c00000000fe00000 0000000000000000 0000000000000000
  GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
  GPR20: 0000000000000000 0000000000000000 c0000000fbea7410 00000000ff160000
  GPR24: c0000000ffe1f600 c0000000fbea8700 c0000000fbea8700 c0000000fbead000
  GPR28: c000000000e20198 c0000000fbea6d80 c0000000fbeab680 c0000000fbea6d80
  NIP [c000000000043e80] tm_restore_sprs+0xc/0x1c
  LR [c000000000015a24] __switch_to+0x1f4/0x420
  Call Trace:
  Instruction dump:
  7c800164 4e800020 7c0022a6 f80304a8 7c0222a6 f80304b0 7c0122a6 f80304b8
  4e800020 e80304a8 7c0023a6 e80304b0 <7c0223a6> e80304b8 7c0123a6 4e800020

This fixes CVE-2016-5828.

Fixes: bc2a9408fa65 ("powerpc: Hook in new transactional memory code")
Cc: stable@vger.kernel.org # v3.9+
Signed-off-by: Cyril Bur <cyrilbur@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index e2f12cb..0b93893 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1505,6 +1505,16 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 		current->thread.regs = regs - 1;
 	}
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/*
+	 * Clear any transactional state, we're exec()ing. The cause is
+	 * not important as there will never be a recheckpoint so it's not
+	 * user visible.
+	 */
+	if (MSR_TM_SUSPENDED(mfmsr()))
+		tm_reclaim_current(0);
+#endif
+
 	memset(regs->gpr, 0, sizeof(regs->gpr));
 	regs->ctr = 0;
 	regs->link = 0;
-- 
cgit v0.10.2


From 0efce9da1255f13d910842b62cb14371044a3c50 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Wed, 8 Jun 2016 11:38:55 +0100
Subject: arm64: KVM: fix build with CONFIG_ARM_PMU disabled

When CONFIG_ARM_PMU is disabled, we get the following build error:

arch/arm64/kvm/sys_regs.c: In function 'pmu_counter_idx_valid':
arch/arm64/kvm/sys_regs.c:564:27: error: 'ARMV8_PMU_CYCLE_IDX' undeclared (first use in this function)
  if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX)
                           ^
arch/arm64/kvm/sys_regs.c:564:27: note: each undeclared identifier is reported only once for each function it appears in
arch/arm64/kvm/sys_regs.c: In function 'access_pmu_evcntr':
arch/arm64/kvm/sys_regs.c:592:10: error: 'ARMV8_PMU_CYCLE_IDX' undeclared (first use in this function)
    idx = ARMV8_PMU_CYCLE_IDX;
          ^
arch/arm64/kvm/sys_regs.c: In function 'access_pmu_evtyper':
arch/arm64/kvm/sys_regs.c:638:14: error: 'ARMV8_PMU_CYCLE_IDX' undeclared (first use in this function)
   if (idx == ARMV8_PMU_CYCLE_IDX)
              ^
arch/arm64/kvm/hyp/switch.c:86:15: error: 'ARMV8_PMU_USERENR_MASK' undeclared (first use in this function)
  write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);

This patch fixes the build with CONFIG_ARM_PMU disabled.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>

diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index fe389ac..92e7e97 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -18,13 +18,13 @@
 #ifndef __ASM_ARM_KVM_PMU_H
 #define __ASM_ARM_KVM_PMU_H
 
-#ifdef CONFIG_KVM_ARM_PMU
-
 #include <linux/perf_event.h>
 #include <asm/perf_event.h>
 
 #define ARMV8_PMU_CYCLE_IDX		(ARMV8_PMU_MAX_COUNTERS - 1)
 
+#ifdef CONFIG_KVM_ARM_PMU
+
 struct kvm_pmc {
 	u8 idx;	/* index into the pmu->pmc array */
 	struct perf_event *perf_event;
-- 
cgit v0.10.2


From 583248e6620a4726093295e2d6785fcbc2e86428 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 1 Jun 2016 12:10:08 +0100
Subject: iommu/iova: Disable preemption around use of this_cpu_ptr()

Between acquiring the this_cpu_ptr() and using it, ideally we don't want
to be preempted and work on another CPU's private data. this_cpu_ptr()
checks whether or not preemption is disable, and get_cpu_ptr() provides
a convenient wrapper for operating on the cpu ptr inside a preemption
disabled critical section (which currently is provided by the
spinlock).

[  167.997877] BUG: using smp_processor_id() in preemptible [00000000] code: usb-storage/216
[  167.997940] caller is debug_smp_processor_id+0x17/0x20
[  167.997945] CPU: 7 PID: 216 Comm: usb-storage Tainted: G     U          4.7.0-rc1-gfxbench-RO_Patchwork_1057+ #1
[  167.997948] Hardware name: Hewlett-Packard HP Pro 3500 Series/2ABF, BIOS 8.11 10/24/2012
[  167.997951]  0000000000000000 ffff880118b7f9c8 ffffffff8140dca5 0000000000000007
[  167.997958]  ffffffff81a3a7e9 ffff880118b7f9f8 ffffffff8142a927 0000000000000000
[  167.997965]  ffff8800d499ed58 0000000000000001 00000000000fffff ffff880118b7fa08
[  167.997971] Call Trace:
[  167.997977]  [<ffffffff8140dca5>] dump_stack+0x67/0x92
[  167.997981]  [<ffffffff8142a927>] check_preemption_disabled+0xd7/0xe0
[  167.997985]  [<ffffffff8142a947>] debug_smp_processor_id+0x17/0x20
[  167.997990]  [<ffffffff81507e17>] alloc_iova_fast+0xb7/0x210
[  167.997994]  [<ffffffff8150c55f>] intel_alloc_iova+0x7f/0xd0
[  167.997998]  [<ffffffff8151021d>] intel_map_sg+0xbd/0x240
[  167.998002]  [<ffffffff810e5efd>] ? debug_lockdep_rcu_enabled+0x1d/0x20
[  167.998009]  [<ffffffff81596059>] usb_hcd_map_urb_for_dma+0x4b9/0x5a0
[  167.998013]  [<ffffffff81596d19>] usb_hcd_submit_urb+0xe9/0xaa0
[  167.998017]  [<ffffffff810cff2f>] ? mark_held_locks+0x6f/0xa0
[  167.998022]  [<ffffffff810d525c>] ? __raw_spin_lock_init+0x1c/0x50
[  167.998025]  [<ffffffff810e5efd>] ? debug_lockdep_rcu_enabled+0x1d/0x20
[  167.998028]  [<ffffffff815988f3>] usb_submit_urb+0x3f3/0x5a0
[  167.998032]  [<ffffffff810d0082>] ? trace_hardirqs_on_caller+0x122/0x1b0
[  167.998035]  [<ffffffff81599ae7>] usb_sg_wait+0x67/0x150
[  167.998039]  [<ffffffff815dc202>] usb_stor_bulk_transfer_sglist.part.3+0x82/0xd0
[  167.998042]  [<ffffffff815dc29c>] usb_stor_bulk_srb+0x4c/0x60
[  167.998045]  [<ffffffff815dc42e>] usb_stor_Bulk_transport+0x17e/0x420
[  167.998049]  [<ffffffff815dcf32>] usb_stor_invoke_transport+0x242/0x540
[  167.998052]  [<ffffffff810e5efd>] ? debug_lockdep_rcu_enabled+0x1d/0x20
[  167.998058]  [<ffffffff815dba19>] usb_stor_transparent_scsi_command+0x9/0x10
[  167.998061]  [<ffffffff815de518>] usb_stor_control_thread+0x158/0x260
[  167.998064]  [<ffffffff815de3c0>] ? fill_inquiry_response+0x20/0x20
[  167.998067]  [<ffffffff815de3c0>] ? fill_inquiry_response+0x20/0x20
[  167.998071]  [<ffffffff8109ddfa>] kthread+0xea/0x100
[  167.998078]  [<ffffffff817ac6af>] ret_from_fork+0x1f/0x40
[  167.998081]  [<ffffffff8109dd10>] ? kthread_create_on_node+0x1f0/0x1f0

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96293
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: iommu@lists.linux-foundation.org
Cc: linux-kernel@vger.kernel.org
Fixes: 9257b4a206fc ('iommu/iova: introduce per-cpu caching to iova allocation')
Signed-off-by: Joerg Roedel <jroedel@suse.de>

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index ba764a0..e23001b 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -420,8 +420,10 @@ retry:
 
 		/* Try replenishing IOVAs by flushing rcache. */
 		flushed_rcache = true;
+		preempt_disable();
 		for_each_online_cpu(cpu)
 			free_cpu_cached_iovas(cpu, iovad);
+		preempt_enable();
 		goto retry;
 	}
 
@@ -749,7 +751,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
 	bool can_insert = false;
 	unsigned long flags;
 
-	cpu_rcache = this_cpu_ptr(rcache->cpu_rcaches);
+	cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
 	spin_lock_irqsave(&cpu_rcache->lock, flags);
 
 	if (!iova_magazine_full(cpu_rcache->loaded)) {
@@ -779,6 +781,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
 		iova_magazine_push(cpu_rcache->loaded, iova_pfn);
 
 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
+	put_cpu_ptr(rcache->cpu_rcaches);
 
 	if (mag_to_free) {
 		iova_magazine_free_pfns(mag_to_free, iovad);
@@ -812,7 +815,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 	bool has_pfn = false;
 	unsigned long flags;
 
-	cpu_rcache = this_cpu_ptr(rcache->cpu_rcaches);
+	cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches);
 	spin_lock_irqsave(&cpu_rcache->lock, flags);
 
 	if (!iova_magazine_empty(cpu_rcache->loaded)) {
@@ -834,6 +837,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
 		iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn);
 
 	spin_unlock_irqrestore(&cpu_rcache->lock, flags);
+	put_cpu_ptr(rcache->cpu_rcaches);
 
 	return iova_pfn;
 }
-- 
cgit v0.10.2


From 591d215afcc2f94e8e2c69a63c924c044677eb31 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 8 Jun 2016 17:24:45 +0100
Subject: KVM: arm/arm64: Stop leaking vcpu pid references

kvm provides kvm_vcpu_uninit(), which amongst other things, releases the
last reference to the struct pid of the task that was last running the vcpu.

On arm64 built with CONFIG_DEBUG_KMEMLEAK, starting a guest with kvmtool,
then killing it with SIGKILL results (after some considerable time) in:
> cat /sys/kernel/debug/kmemleak
> unreferenced object 0xffff80007d5ea080 (size 128):
>  comm "lkvm", pid 2025, jiffies 4294942645 (age 1107.776s)
>  hex dump (first 32 bytes):
>    01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
>    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
>  backtrace:
>    [<ffff8000001b30ec>] create_object+0xfc/0x278
>    [<ffff80000071da34>] kmemleak_alloc+0x34/0x70
>    [<ffff80000019fa2c>] kmem_cache_alloc+0x16c/0x1d8
>    [<ffff8000000d0474>] alloc_pid+0x34/0x4d0
>    [<ffff8000000b5674>] copy_process.isra.6+0x79c/0x1338
>    [<ffff8000000b633c>] _do_fork+0x74/0x320
>    [<ffff8000000b66b0>] SyS_clone+0x18/0x20
>    [<ffff800000085cb0>] el0_svc_naked+0x24/0x28
>    [<ffffffffffffffff>] 0xffffffffffffffff

On x86 kvm_vcpu_uninit() is called on the path from kvm_arch_destroy_vm(),
on arm no equivalent call is made. Add the call to kvm_arch_vcpu_free().

Signed-off-by: James Morse <james.morse@arm.com>
Fixes: 749cf76c5a36 ("KVM: ARM: Initial skeleton to compile KVM support")
Cc: <stable@vger.kernel.org> # 3.10+
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 893941e..f1bde7c 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -263,6 +263,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 	kvm_timer_vcpu_terminate(vcpu);
 	kvm_vgic_vcpu_destroy(vcpu);
 	kvm_pmu_vcpu_destroy(vcpu);
+	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
-- 
cgit v0.10.2


From 3bd4f9112f87a9c65fe6e817272806167f0bc9ed Mon Sep 17 00:00:00 2001
From: Jan Niehusmann <jan@gondor.com>
Date: Mon, 6 Jun 2016 14:20:11 +0200
Subject: iommu/vt-d: Fix overflow of iommu->domains array

The valid range of 'did' in get_iommu_domain(*iommu, did)
is 0..cap_ndoms(iommu->cap), so don't exceed that
range in free_all_cpu_cached_iovas().

The user-visible impact of the out-of-bounds access is the machine
hanging on suspend-to-ram. It is, in fact, a kernel panic, but due
to already suspended devices, that's often not visible to the user.

Fixes: 22e2f9fa63b0 ("iommu/vt-d: Use per-cpu IOVA caching")
Signed-off-by: Jan Niehusmann <jan@gondor.com>
Tested-By: Marius Vlad <marius.c.vlad@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 1070094..cfe410e 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4607,7 +4607,7 @@ static void free_all_cpu_cached_iovas(unsigned int cpu)
 		if (!iommu)
 			continue;
 
-		for (did = 0; did < 0xffff; did++) {
+		for (did = 0; did < cap_ndoms(iommu->cap); did++) {
 			domain = get_iommu_domain(iommu, did);
 
 			if (!domain)
-- 
cgit v0.10.2


From 6082ee72e9d89e80a664418be06f47d728243e85 Mon Sep 17 00:00:00 2001
From: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Date: Sun, 26 Jun 2016 10:33:29 +0200
Subject: iommu/amd: Initialize devid variable before using it

Commit 2a0cb4e2d423 ("iommu/amd: Add new map for storing IVHD dev entry
type HID") added a call to DUMP_printk in init_iommu_from_acpi() which
used the value of devid before this variable was initialized.

Fixes: 2a0cb4e2d423 ('iommu/amd: Add new map for storing IVHD dev entry type HID')
Signed-off-by: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 9e00341..d091def 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1107,13 +1107,13 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
 				break;
 			}
 
+			devid = e->devid;
 			DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %02x:%02x.%x\n",
 				    hid, uid,
 				    PCI_BUS_NUM(devid),
 				    PCI_SLOT(devid),
 				    PCI_FUNC(devid));
 
-			devid  = e->devid;
 			flags = e->flags;
 
 			ret = add_acpi_hid_device(hid, uid, &devid, false);
-- 
cgit v0.10.2


From d20cb71dbf3487f24549ede1a8e2d67579b4632e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Mon, 20 Jun 2016 13:14:36 -0400
Subject: make nfs_atomic_open() call d_drop() on all ->open_context() errors.

In "NFSv4: Move dentry instantiation into the NFSv4-specific atomic open code"
unconditional d_drop() after the ->open_context() had been removed.  It had
been correct for success cases (there ->open_context() itself had been doing
dcache manipulations), but not for error ones.  Only one of those (ENOENT)
got a compensatory d_drop() added in that commit, but in fact it should've
been done for all errors.  As it is, the case of O_CREAT non-exclusive open
on a hashed negative dentry racing with e.g. symlink creation from another
client ended up with ->open_context() getting an error and proceeding to
call nfs_lookup().  On a hashed dentry, which would've instantly triggered
BUG_ON() in d_materialise_unique() (or, these days, its equivalent in
d_splice_alias()).

Cc: stable@vger.kernel.org # v3.10+
Tested-by: Oleg Drokin <green@linuxhacker.ru>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2817cce..d8015a0 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1539,9 +1539,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		err = PTR_ERR(inode);
 		trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
 		put_nfs_open_context(ctx);
+		d_drop(dentry);
 		switch (err) {
 		case -ENOENT:
-			d_drop(dentry);
 			d_add(dentry, NULL);
 			nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 			break;
-- 
cgit v0.10.2


From 749d088b8e7f4b9826ede02b9a043e417fa84aa1 Mon Sep 17 00:00:00 2001
From: Minfei Huang <mnghuan@gmail.com>
Date: Fri, 27 May 2016 14:17:10 +0800
Subject: pvclock: Add CPU barriers to get correct version value

Protocol for the "version" fields is: hypervisor raises it (making it
uneven) before it starts updating the fields and raises it again (making
it even) when it is done.  Thus the guest can make sure the time values
it got are consistent by checking the version before and after reading
them.

Add CPU barries after getting version value just like what function
vread_pvclock does, because all of callees in this function is inline.

Fixes: 502dfeff239e8313bfbe906ca0a1a6827ac8481b
Cc: stable@vger.kernel.org
Signed-off-by: Minfei Huang <mnghuan@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index fdcc040..538ae94 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -85,6 +85,8 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
 	u8 ret_flags;
 
 	version = src->version;
+	/* Make the latest version visible */
+	smp_rmb();
 
 	offset = pvclock_get_nsec_offset(src);
 	ret = src->system_time + offset;
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 99bfc02..7f82fe0 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -66,6 +66,8 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
 
 	do {
 		version = __pvclock_read_cycles(src, &ret, &flags);
+		/* Make sure that the version double-check is last. */
+		smp_rmb();
 	} while ((src->version & 1) || version != src->version);
 
 	return flags & valid_flags;
@@ -80,6 +82,8 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
 
 	do {
 		version = __pvclock_read_cycles(src, &ret, &flags);
+		/* Make sure that the version double-check is last. */
+		smp_rmb();
 	} while ((src->version & 1) || version != src->version);
 
 	if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
-- 
cgit v0.10.2


From f7550d076d55c1b5f02f95012e3a5a84d264c47d Mon Sep 17 00:00:00 2001
From: Minfei Huang <mnghuan@gmail.com>
Date: Fri, 27 May 2016 14:17:11 +0800
Subject: pvclock: Cleanup to remove function pvclock_get_nsec_offset

Function __pvclock_read_cycles is short enough, so there is no need to
have another function pvclock_get_nsec_offset to calculate tsc delta.
It's better to combine it into function __pvclock_read_cycles.

Remove useless variables in function __pvclock_read_cycles.

Signed-off-by: Minfei Huang <mnghuan@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 538ae94..7c1c895 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -69,31 +69,22 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
 }
 
 static __always_inline
-u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src)
-{
-	u64 delta = rdtsc_ordered() - src->tsc_timestamp;
-	return pvclock_scale_delta(delta, src->tsc_to_system_mul,
-				   src->tsc_shift);
-}
-
-static __always_inline
 unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
 			       cycle_t *cycles, u8 *flags)
 {
 	unsigned version;
-	cycle_t ret, offset;
-	u8 ret_flags;
+	cycle_t offset;
+	u64 delta;
 
 	version = src->version;
 	/* Make the latest version visible */
 	smp_rmb();
 
-	offset = pvclock_get_nsec_offset(src);
-	ret = src->system_time + offset;
-	ret_flags = src->flags;
-
-	*cycles = ret;
-	*flags = ret_flags;
+	delta = rdtsc_ordered() - src->tsc_timestamp;
+	offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
+				   src->tsc_shift);
+	*cycles = src->system_time + offset;
+	*flags = src->flags;
 	return version;
 }
 
-- 
cgit v0.10.2


From ed911b43adb889c37a37fa57a995f0b460c633b6 Mon Sep 17 00:00:00 2001
From: Minfei Huang <mnghuan@gmail.com>
Date: Sat, 28 May 2016 20:27:43 +0800
Subject: pvclock: Get rid of __pvclock_read_cycles in function
 pvclock_read_flags

There is a generic function __pvclock_read_cycles to be used to get both
flags and cycles. For function pvclock_read_flags, it's useless to get
cycles value. To make this function be more effective, get this variable
flags directly in function.

Signed-off-by: Minfei Huang <mnghuan@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 7f82fe0..06c58ce 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -61,11 +61,14 @@ void pvclock_resume(void)
 u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
 {
 	unsigned version;
-	cycle_t ret;
 	u8 flags;
 
 	do {
-		version = __pvclock_read_cycles(src, &ret, &flags);
+		version = src->version;
+		/* Make the latest version visible */
+		smp_rmb();
+
+		flags = src->flags;
 		/* Make sure that the version double-check is last. */
 		smp_rmb();
 	} while ((src->version & 1) || version != src->version);
-- 
cgit v0.10.2


From 8d93c874ac899bfdf0ad3787baef684a0c878c2c Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 20 Jun 2016 22:28:02 -0300
Subject: KVM: x86: move nsec_to_cycles from x86.c to x86.h

Move the inline function nsec_to_cycles from x86.c to x86.h, as
the next patch uses it from lapic.c.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 902d9da..7da5dd2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1244,12 +1244,6 @@ static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
 static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
 static unsigned long max_tsc_khz;
 
-static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
-{
-	return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
-				   vcpu->arch.virtual_tsc_shift);
-}
-
 static u32 adjust_tsc_khz(u32 khz, s32 ppm)
 {
 	u64 v = (u64)khz * (1000000 + ppm);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 7ce3634..a82ca46 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -2,6 +2,7 @@
 #define ARCH_X86_KVM_X86_H
 
 #include <linux/kvm_host.h>
+#include <asm/pvclock.h>
 #include "kvm_cache_regs.h"
 
 #define MSR_IA32_CR_PAT_DEFAULT  0x0007040600070406ULL
@@ -195,6 +196,12 @@ extern unsigned int lapic_timer_advance_ns;
 
 extern struct static_key kvm_no_apic_vcpu;
 
+static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
+{
+	return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
+				   vcpu->arch.virtual_tsc_shift);
+}
+
 /* Same "calling convention" as do_div:
  * - divide (n << 32) by base
  * - put result in n
-- 
cgit v0.10.2


From b606f189c7d5bf9b875bba168162fe05287880fe Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 20 Jun 2016 22:33:48 -0300
Subject: KVM: LAPIC: cap __delay at lapic_timer_advance_ns

The host timer which emulates the guest LAPIC TSC deadline
timer has its expiration diminished by lapic_timer_advance_ns
nanoseconds. Therefore if, at wait_lapic_expire, a difference
larger than lapic_timer_advance_ns is encountered, delay at most
lapic_timer_advance_ns.

This fixes a problem where the guest can cause the host
to delay for large amounts of time.

Reported-by: Alan Jenkins <alan.christopher.jenkins@gmail.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index bbb5b28..a397200 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1310,7 +1310,8 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
 
 	/* __delay is delay_tsc whenever the hardware has TSC, thus always.  */
 	if (guest_tsc < tsc_deadline)
-		__delay(tsc_deadline - guest_tsc);
+		__delay(min(tsc_deadline - guest_tsc,
+			nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
 }
 
 static void start_apic_timer(struct kvm_lapic *apic)
-- 
cgit v0.10.2


From ff30ef40deca4658e27b0c596e7baf39115e858f Mon Sep 17 00:00:00 2001
From: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Date: Sat, 18 Jun 2016 11:01:05 +0200
Subject: KVM: nVMX: VMX instructions: fix segment checks when L1 is in long
 mode.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I couldn't get Xen to boot a L2 HVM when it was nested under KVM - it was
getting a GP(0) on a rather unspecial vmread from Xen:

     (XEN) ----[ Xen-4.7.0-rc  x86_64  debug=n  Not tainted ]----
     (XEN) CPU:    1
     (XEN) RIP:    e008:[<ffff82d0801e629e>] vmx_get_segment_register+0x14e/0x450
     (XEN) RFLAGS: 0000000000010202   CONTEXT: hypervisor (d1v0)
     (XEN) rax: ffff82d0801e6288   rbx: ffff83003ffbfb7c   rcx: fffffffffffab928
     (XEN) rdx: 0000000000000000   rsi: 0000000000000000   rdi: ffff83000bdd0000
     (XEN) rbp: ffff83000bdd0000   rsp: ffff83003ffbfab0   r8:  ffff830038813910
     (XEN) r9:  ffff83003faf3958   r10: 0000000a3b9f7640   r11: ffff83003f82d418
     (XEN) r12: 0000000000000000   r13: ffff83003ffbffff   r14: 0000000000004802
     (XEN) r15: 0000000000000008   cr0: 0000000080050033   cr4: 00000000001526e0
     (XEN) cr3: 000000003fc79000   cr2: 0000000000000000
     (XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: e008
     (XEN) Xen code around <ffff82d0801e629e> (vmx_get_segment_register+0x14e/0x450):
     (XEN)  00 00 41 be 02 48 00 00 <44> 0f 78 74 24 08 0f 86 38 56 00 00 b8 08 68 00
     (XEN) Xen stack trace from rsp=ffff83003ffbfab0:

     ...

     (XEN) Xen call trace:
     (XEN)    [<ffff82d0801e629e>] vmx_get_segment_register+0x14e/0x450
     (XEN)    [<ffff82d0801f3695>] get_page_from_gfn_p2m+0x165/0x300
     (XEN)    [<ffff82d0801bfe32>] hvmemul_get_seg_reg+0x52/0x60
     (XEN)    [<ffff82d0801bfe93>] hvm_emulate_prepare+0x53/0x70
     (XEN)    [<ffff82d0801ccacb>] handle_mmio+0x2b/0xd0
     (XEN)    [<ffff82d0801be591>] emulate.c#_hvm_emulate_one+0x111/0x2c0
     (XEN)    [<ffff82d0801cd6a4>] handle_hvm_io_completion+0x274/0x2a0
     (XEN)    [<ffff82d0801f334a>] __get_gfn_type_access+0xfa/0x270
     (XEN)    [<ffff82d08012f3bb>] timer.c#add_entry+0x4b/0xb0
     (XEN)    [<ffff82d08012f80c>] timer.c#remove_entry+0x7c/0x90
     (XEN)    [<ffff82d0801c8433>] hvm_do_resume+0x23/0x140
     (XEN)    [<ffff82d0801e4fe7>] vmx_do_resume+0xa7/0x140
     (XEN)    [<ffff82d080164aeb>] context_switch+0x13b/0xe40
     (XEN)    [<ffff82d080128e6e>] schedule.c#schedule+0x22e/0x570
     (XEN)    [<ffff82d08012c0cc>] softirq.c#__do_softirq+0x5c/0x90
     (XEN)    [<ffff82d0801602c5>] domain.c#idle_loop+0x25/0x50
     (XEN)
     (XEN)
     (XEN) ****************************************
     (XEN) Panic on CPU 1:
     (XEN) GENERAL PROTECTION FAULT
     (XEN) [error_code=0000]
     (XEN) ****************************************

Tracing my host KVM showed it was the one injecting the GP(0) when
emulating the VMREAD and checking the destination segment permissions in
get_vmx_mem_address():

     3)               |    vmx_handle_exit() {
     3)               |      handle_vmread() {
     3)               |        nested_vmx_check_permission() {
     3)               |          vmx_get_segment() {
     3)   0.074 us    |            vmx_read_guest_seg_base();
     3)   0.065 us    |            vmx_read_guest_seg_selector();
     3)   0.066 us    |            vmx_read_guest_seg_ar();
     3)   1.636 us    |          }
     3)   0.058 us    |          vmx_get_rflags();
     3)   0.062 us    |          vmx_read_guest_seg_ar();
     3)   3.469 us    |        }
     3)               |        vmx_get_cs_db_l_bits() {
     3)   0.058 us    |          vmx_read_guest_seg_ar();
     3)   0.662 us    |        }
     3)               |        get_vmx_mem_address() {
     3)   0.068 us    |          vmx_cache_reg();
     3)               |          vmx_get_segment() {
     3)   0.074 us    |            vmx_read_guest_seg_base();
     3)   0.068 us    |            vmx_read_guest_seg_selector();
     3)   0.071 us    |            vmx_read_guest_seg_ar();
     3)   1.756 us    |          }
     3)               |          kvm_queue_exception_e() {
     3)   0.066 us    |            kvm_multiple_exception();
     3)   0.684 us    |          }
     3)   4.085 us    |        }
     3)   9.833 us    |      }
     3) + 10.366 us   |    }

Cross-checking the KVM/VMX VMREAD emulation code with the Intel Software
Developper Manual Volume 3C - "VMREAD - Read Field from Virtual-Machine
Control Structure", I found that we're enforcing that the destination
operand is NOT located in a read-only data segment or any code segment when
the L1 is in long mode - BUT that check should only happen when it is in
protected mode.

Shuffling the code a bit to make our emulation follow the specification
allows me to boot a Xen dom0 in a nested KVM and start HVM L2 guests
without problems.

Fixes: f9eb4af67c9d ("KVM: nVMX: VMX instructions: add checks for #GP/#SS exceptions")
Signed-off-by: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Eugene Korenevsky <ekorenevsky@gmail.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 003618e..64a79f2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6671,7 +6671,13 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
 
 	/* Checks for #GP/#SS exceptions. */
 	exn = false;
-	if (is_protmode(vcpu)) {
+	if (is_long_mode(vcpu)) {
+		/* Long mode: #GP(0)/#SS(0) if the memory address is in a
+		 * non-canonical form. This is the only check on the memory
+		 * destination for long mode!
+		 */
+		exn = is_noncanonical_address(*ret);
+	} else if (is_protmode(vcpu)) {
 		/* Protected mode: apply checks for segment validity in the
 		 * following order:
 		 * - segment type check (#GP(0) may be thrown)
@@ -6688,17 +6694,10 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
 			 * execute-only code segment
 			 */
 			exn = ((s.type & 0xa) == 8);
-	}
-	if (exn) {
-		kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
-		return 1;
-	}
-	if (is_long_mode(vcpu)) {
-		/* Long mode: #GP(0)/#SS(0) if the memory address is in a
-		 * non-canonical form. This is an only check for long mode.
-		 */
-		exn = is_noncanonical_address(*ret);
-	} else if (is_protmode(vcpu)) {
+		if (exn) {
+			kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+			return 1;
+		}
 		/* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
 		 */
 		exn = (s.unusable != 0);
-- 
cgit v0.10.2


From 48f1dcb55a7d29aeb8965c567660c14d0dfd1a42 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 23 Jun 2016 15:25:09 +0200
Subject: ipv6: enforce egress device match in per table nexthop lookups

with the commit 8c14586fc320 ("net: ipv6: Use passed in table for
nexthop lookups"), net hop lookup is first performed on route creation
in the passed-in table.
However device match is not enforced in table lookup, so the found
route can be later discarded due to egress device mismatch and no
global lookup will be performed.
This cause the following to fail:

ip link add dummy1 type dummy
ip link add dummy2 type dummy
ip link set dummy1 up
ip link set dummy2 up
ip route add 2001:db8:8086::/48 dev dummy1 metric 20
ip route add 2001:db8:d34d::/64 via 2001:db8:8086::2 dev dummy1 metric 20
ip route add 2001:db8:8086::/48 dev dummy2 metric 21
ip route add 2001:db8:d34d::/64 via 2001:db8:8086::2 dev dummy2 metric 21
RTNETLINK answers: No route to host

This change fixes the issue enforcing device lookup in
ip6_nh_lookup_table()

v1->v2: updated commit message title

Fixes: 8c14586fc320 ("net: ipv6: Use passed in table for nexthop lookups")
Reported-and-tested-by: Beniamino Galvani <bgalvani@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 969913d..520b788 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1782,7 +1782,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
 	};
 	struct fib6_table *table;
 	struct rt6_info *rt;
-	int flags = 0;
+	int flags = RT6_LOOKUP_F_IFACE;
 
 	table = fib6_get_table(net, cfg->fc_table);
 	if (!table)
-- 
cgit v0.10.2


From 4192f672fae559f32d82de72a677701853cc98a7 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 23 Jun 2016 16:28:58 +0100
Subject: vsock: make listener child lock ordering explicit

There are several places where the listener and pending or accept queue
child sockets are accessed at the same time.  Lockdep is unhappy that
two locks from the same class are held.

Tell lockdep that it is safe and document the lock ordering.

Originally Claudio Imbrenda <imbrenda@linux.vnet.ibm.com> sent a similar
patch asking whether this is safe.  I have audited the code and also
covered the vsock_pending_work() function.

Suggested-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index b5f1221..b96ac91 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -61,6 +61,14 @@
  * function will also cleanup rejected sockets, those that reach the connected
  * state but leave it before they have been accepted.
  *
+ * - Lock ordering for pending or accept queue sockets is:
+ *
+ *     lock_sock(listener);
+ *     lock_sock_nested(pending, SINGLE_DEPTH_NESTING);
+ *
+ * Using explicit nested locking keeps lockdep happy since normally only one
+ * lock of a given class may be taken at a time.
+ *
  * - Sockets created by user action will be cleaned up when the user process
  * calls close(2), causing our release implementation to be called. Our release
  * implementation will perform some cleanup then drop the last reference so our
@@ -443,7 +451,7 @@ void vsock_pending_work(struct work_struct *work)
 	cleanup = true;
 
 	lock_sock(listener);
-	lock_sock(sk);
+	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
 
 	if (vsock_is_pending(sk)) {
 		vsock_remove_pending(listener, sk);
@@ -1292,7 +1300,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
 	if (connected) {
 		listener->sk_ack_backlog--;
 
-		lock_sock(connected);
+		lock_sock_nested(connected, SINGLE_DEPTH_NESTING);
 		vconnected = vsock_sk(connected);
 
 		/* If the listener socket has received an error, then we should
-- 
cgit v0.10.2


From ab2a4bf83902c170d29ba130a8abb5f9d90559e1 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 27 Jun 2016 10:23:10 -0400
Subject: USB: don't free bandwidth_mutex too early

The USB core contains a bug that can show up when a USB-3 host
controller is removed.  If the primary (USB-2) hcd structure is
released before the shared (USB-3) hcd, the core will try to do a
double-free of the common bandwidth_mutex.

The problem was described in graphical form by Chung-Geol Kim, who
first reported it:

=================================================
     At *remove USB(3.0) Storage
     sequence <1> --> <5> ((Problem Case))
=================================================
                                  VOLD
------------------------------------|------------
                                 (uevent)
                            ________|_________
                           |<1>               |
                           |dwc3_otg_sm_work  |
                           |usb_put_hcd       |
                           |peer_hcd(kref=2)|
                           |__________________|
                            ________|_________
                           |<2>               |
                           |New USB BUS #2    |
                           |                  |
                           |peer_hcd(kref=1)  |
                           |                  |
                         --(Link)-bandXX_mutex|
                         | |__________________|
                         |
    ___________________  |
   |<3>                | |
   |dwc3_otg_sm_work   | |
   |usb_put_hcd        | |
   |primary_hcd(kref=1)| |
   |___________________| |
    _________|_________  |
   |<4>                | |
   |New USB BUS #1     | |
   |hcd_release        | |
   |primary_hcd(kref=0)| |
   |                   | |
   |bandXX_mutex(free) |<-
   |___________________|
                               (( VOLD ))
                            ______|___________
                           |<5>               |
                           |      SCSI        |
                           |usb_put_hcd       |
                           |peer_hcd(kref=0)  |
                           |*hcd_release      |
                           |bandXX_mutex(free*)|<- double free
                           |__________________|

=================================================

This happens because hcd_release() frees the bandwidth_mutex whenever
it sees a primary hcd being released (which is not a very good idea
in any case), but in the course of releasing the primary hcd, it
changes the pointers in the shared hcd in such a way that the shared
hcd will appear to be primary when it gets released.

This patch fixes the problem by changing hcd_release() so that it
deallocates the bandwidth_mutex only when the _last_ hcd structure
referencing it is released.  The patch also removes an unnecessary
test, so that when an hcd is released, both the shared_hcd and
primary_hcd pointers in the hcd's peer will be cleared.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-by: Chung-Geol Kim <chunggeol.kim@samsung.com>
Tested-by: Chung-Geol Kim <chunggeol.kim@samsung.com>
CC: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 34b837a..d2e3f65 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2598,26 +2598,23 @@ EXPORT_SYMBOL_GPL(usb_create_hcd);
  * Don't deallocate the bandwidth_mutex until the last shared usb_hcd is
  * deallocated.
  *
- * Make sure to only deallocate the bandwidth_mutex when the primary HCD is
- * freed.  When hcd_release() is called for either hcd in a peer set
- * invalidate the peer's ->shared_hcd and ->primary_hcd pointers to
- * block new peering attempts
+ * Make sure to deallocate the bandwidth_mutex only when the last HCD is
+ * freed.  When hcd_release() is called for either hcd in a peer set,
+ * invalidate the peer's ->shared_hcd and ->primary_hcd pointers.
  */
 static void hcd_release(struct kref *kref)
 {
 	struct usb_hcd *hcd = container_of (kref, struct usb_hcd, kref);
 
 	mutex_lock(&usb_port_peer_mutex);
-	if (usb_hcd_is_primary_hcd(hcd)) {
-		kfree(hcd->address0_mutex);
-		kfree(hcd->bandwidth_mutex);
-	}
 	if (hcd->shared_hcd) {
 		struct usb_hcd *peer = hcd->shared_hcd;
 
 		peer->shared_hcd = NULL;
-		if (peer->primary_hcd == hcd)
-			peer->primary_hcd = NULL;
+		peer->primary_hcd = NULL;
+	} else {
+		kfree(hcd->address0_mutex);
+		kfree(hcd->bandwidth_mutex);
 	}
 	mutex_unlock(&usb_port_peer_mutex);
 	kfree(hcd);
-- 
cgit v0.10.2


From 023954351fae0e34ba247cff4d798c98290b20a4 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Thu, 23 Jun 2016 16:54:46 -0500
Subject: dax: fix offset overflow in dax_io

This isn't functionally apparent for some reason, but
when we test io at extreme offsets at the end of the loff_t
rang, such as in fstests xfs/071, the calculation of
"max" in dax_io() can be wrong due to pos + size overflowing.

For example,

# xfs_io -c "pwrite 9223372036854771712 512" /mnt/test/file

enters dax_io with:

start 0x7ffffffffffff000
end   0x7ffffffffffff200

and the rounded up "size" variable is 0x1000.  This yields:

pos + size 0x8000000000000000 (overflows loff_t)
       end 0x7ffffffffffff200

Due to the overflow, the min() function picks the wrong
value for the "max" variable, and when we send (max - pos)
into i.e. copy_from_iter_pmem() it is also the wrong value.

This somehow(tm) gets magically absorbed without incident,
probably because iter->count is correct.  But it seems best
to fix it up properly by comparing the two values as
unsigned.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/fs/dax.c b/fs/dax.c
index 761495b..e207f8f 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -208,7 +208,12 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
 				dax.addr += first;
 				size = map_len - first;
 			}
-			max = min(pos + size, end);
+			/*
+			 * pos + size is one past the last offset for IO,
+			 * so pos + size can overflow loff_t at extreme offsets.
+			 * Cast to u64 to catch this and get the true minimum.
+			 */
+			max = min_t(u64, pos + size, end);
 		}
 
 		if (iov_iter_rw(iter) == WRITE) {
-- 
cgit v0.10.2


From 5ab666e09541e64ce2fd73411c3b5b9e4ad334b1 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 27 Jun 2016 23:47:15 +0200
Subject: intel_pstate: Do not clear utilization update hooks on policy changes

intel_pstate_set_policy() is invoked by the cpufreq core during
driver initialization, on changes of policy attributes (minimim and
maximum frequency, for example) via sysfs and via CPU notifications
from the platform firmware.  On some platforms the latter may occur
relatively often.

Commit bb6ab52f2bef (intel_pstate: Do not set utilization update hook
too early) made intel_pstate_set_policy() clear the CPU's utilization
update hook before updating the policy attributes for it (and set the
hook again after doind that), but that involves invoking
synchronize_sched() and adds overhead to the CPU notifications
mentioned above and to the sched-RCU handling in general.

That extra overhead is arguably not necessary, because updating
policy attributes when the CPU's utilization update hook is active
should not lead to any adverse effects, so drop the clearing of
the hook from intel_pstate_set_policy() and make it check if
the hook has been set already when attempting to set it.

Fixes: bb6ab52f2bef (intel_pstate: Do not set utilization update hook too early)
Reported-by: Jisheng Zhang <jszhang@marvell.com>
Tested-by: Jisheng Zhang <jszhang@marvell.com>
Tested-by: Doug Smythies <dsmythies@telus.net>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index fe9dc17..1fa1a32 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1400,6 +1400,9 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
 {
 	struct cpudata *cpu = all_cpu_data[cpu_num];
 
+	if (cpu->update_util_set)
+		return;
+
 	/* Prevent intel_pstate_update_util() from using stale data. */
 	cpu->sample.time = 0;
 	cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
@@ -1440,8 +1443,6 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 	if (!policy->cpuinfo.max_freq)
 		return -ENODEV;
 
-	intel_pstate_clear_update_util_hook(policy->cpu);
-
 	pr_debug("set_policy cpuinfo.max %u policy->max %u\n",
 		 policy->cpuinfo.max_freq, policy->max);
 
-- 
cgit v0.10.2


From ca5eda5d3db6026080cff267459625c87c43e9ab Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 27 Jun 2016 14:50:13 +0900
Subject: cpufreq: dt: call of_node_put() before error out

If of_match_node() fails, this init function bails out without
calling of_node_put().

Also change of_node_put(of_root) to of_node_put(np); both of them
hold the same pointer, but it seems better to call of_node_put()
against the node returned by of_find_node_by_path().

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 3646b14..0bb44d5 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -79,15 +79,16 @@ static const struct of_device_id machines[] __initconst = {
 static int __init cpufreq_dt_platdev_init(void)
 {
 	struct device_node *np = of_find_node_by_path("/");
+	const struct of_device_id *match;
 
 	if (!np)
 		return -ENODEV;
 
-	if (!of_match_node(machines, np))
+	match = of_match_node(machines, np);
+	of_node_put(np);
+	if (!match)
 		return -ENODEV;
 
-	of_node_put(of_root);
-
 	return PTR_ERR_OR_ZERO(platform_device_register_simple("cpufreq-dt", -1,
 							       NULL, 0));
 }
-- 
cgit v0.10.2


From 742c87bf27d3b715820da6f8a81d6357adbf18f8 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 28 Jun 2016 03:29:29 +0200
Subject: cpufreq: Avoid false-positive WARN_ON()s in cpufreq_update_policy()

CPU notifications from the firmware coming in when cpufreq is
suspended cause cpufreq_update_current_freq() to return 0 which
triggers the WARN_ON() in cpufreq_update_policy() for no reason.

Avoid that by checking cpufreq_suspended before calling
cpufreq_update_current_freq().

Fixes: c9d9c929e674 (cpufreq: Abort cpufreq_update_current_freq() for cpufreq_suspended set)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: 4.6+ <stable@vger.kernel.org> # 4.6+

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 9009295..5617c70 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2261,6 +2261,10 @@ int cpufreq_update_policy(unsigned int cpu)
 	 * -> ask driver for current freq and notify governors about a change
 	 */
 	if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {
+		if (cpufreq_suspended) {
+			ret = -EAGAIN;
+			goto unlock;
+		}
 		new_policy.cur = cpufreq_update_current_freq(policy);
 		if (WARN_ON(!new_policy.cur)) {
 			ret = -EIO;
-- 
cgit v0.10.2


From f52e126cc7476196f44f3c313b7d9f0699a881fc Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 28 Jun 2016 09:42:25 +0530
Subject: ARC: unwind: ensure that .debug_frame is generated (vs. .eh_frame)

With recent binutils update to support dwarf CFI pseudo-ops in gas, we
now get .eh_frame vs. .debug_frame. Although the call frame info is
exactly the same in both, the CIE differs, which the current kernel
unwinder can't cope with.

This broke both the kernel unwinder as well as loadable modules (latter
because of a new unhandled relo R_ARC_32_PCREL from .rela.eh_frame in
the module loader)

The ideal solution would be to switch unwinder to .eh_frame.
For now however we can make do by just ensureing .debug_frame is
generated by removing -fasynchronous-unwind-tables

 .eh_frame    generated with -gdwarf-2 -fasynchronous-unwind-tables
 .debug_frame generated with -gdwarf-2

Fixes STAR 9001058196

Cc: stable@vger.kernel.org
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index d4df6be..85814e7 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -66,8 +66,6 @@ endif
 
 endif
 
-cflags-$(CONFIG_ARC_DW2_UNWIND)		+= -fasynchronous-unwind-tables
-
 # By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok
 ifeq ($(atleast_gcc48),y)
 cflags-$(CONFIG_ARC_DW2_UNWIND)		+= -gdwarf-2
-- 
cgit v0.10.2


From 9bd54517ee86cb164c734f72ea95aeba4804f10b Mon Sep 17 00:00:00 2001
From: Alexey Brodkin <Alexey.Brodkin@synopsys.com>
Date: Thu, 23 Jun 2016 11:00:39 +0300
Subject: arc: unwind: warn only once if DW2_UNWIND is disabled

If CONFIG_ARC_DW2_UNWIND is disabled every time arc_unwind_core()
gets called following message gets printed in debug console:
----------------->8---------------
CONFIG_ARC_DW2_UNWIND needs to be enabled
----------------->8---------------

That message makes sense if user indeed wants to see a backtrace or
get nice function call-graphs in perf but what if user disabled
unwinder for the purpose? Why pollute his debug console?

So instead we'll warn user about possibly missing feature once and
let him decide if that was what he or she really wanted.

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Cc: stable@vger.kernel.org
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index e0efff1..b9192a6 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -142,7 +142,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
 	 * prelogue is setup (callee regs saved and then fp set and not other
 	 * way around
 	 */
-	pr_warn("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
+	pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
 	return 0;
 
 #endif
-- 
cgit v0.10.2


From 5419447e2142d6ed68c9f5c1a28630b3a290a845 Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Mon, 13 Jun 2016 17:03:48 +0200
Subject: Revert "s390/kdump: Clear subchannel ID to signal non-CCW/SCSI IPL"

This reverts commit 852ffd0f4e23248b47531058e531066a988434b5.

There are use cases where an intermediate boot kernel (1) uses kexec
to boot the final production kernel (2). For this scenario we should
provide the original boot information to the production kernel (2).
Therefore clearing the boot information during kexec() should not
be done.

Cc: stable@vger.kernel.org # v3.17+
Reported-by: Steffen Maier <maier@linux.vnet.ibm.com>
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index f20abdb..d14069d 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -2064,12 +2064,5 @@ void s390_reset_system(void)
 	S390_lowcore.program_new_psw.addr =
 		(unsigned long) s390_base_pgm_handler;
 
-	/*
-	 * Clear subchannel ID and number to signal new kernel that no CCW or
-	 * SCSI IPL has been done (for kexec and kdump)
-	 */
-	S390_lowcore.subchannel_id = 0;
-	S390_lowcore.subchannel_nr = 0;
-
 	do_reset_calls();
 }
-- 
cgit v0.10.2


From bcf4dd5f9ee096bd1510f838dd4750c35df4e38b Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 27 Jun 2016 17:06:45 +0200
Subject: s390: fix test_fp_ctl inline assembly contraints

The test_fp_ctl function is used to test if a given value is a valid
floating-point control. The inline assembly in test_fp_ctl uses an
incorrect constraint for the 'orig_fpc' variable. If the compiler
chooses the same register for 'fpc' and 'orig_fpc' the test_fp_ctl()
function always returns true. This allows user space to trigger
kernel oopses with invalid floating-point control values on the
signal stack.

This problem has been introduced with git commit 4725c86055f5bbdcdf
"s390: fix save and restore of the floating-point-control register"

Cc: stable@vger.kernel.org # v3.13+
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
index 5e04f3c..8ae236b0 100644
--- a/arch/s390/include/asm/fpu/api.h
+++ b/arch/s390/include/asm/fpu/api.h
@@ -22,7 +22,7 @@ static inline int test_fp_ctl(u32 fpc)
 		"	la	%0,0\n"
 		"1:\n"
 		EX_TABLE(0b,1b)
-		: "=d" (rc), "=d" (orig_fpc)
+		: "=d" (rc), "=&d" (orig_fpc)
 		: "d" (fpc), "0" (-EINVAL));
 	return rc;
 }
-- 
cgit v0.10.2


From 70a0dec45174c976c64b4c8c1d0898581f759948 Mon Sep 17 00:00:00 2001
From: Tom Goff <thomas.goff@ll.mit.edu>
Date: Thu, 23 Jun 2016 16:11:57 -0400
Subject: ipmr/ip6mr: Initialize the last assert time of mfc entries.

This fixes wrong-interface signaling on 32-bit platforms for entries
created when jiffies > 2^31 + MFC_ASSERT_THRESH.

Signed-off-by: Tom Goff <thomas.goff@ll.mit.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 21a38e2..5ad48ec 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -891,8 +891,10 @@ static struct mfc_cache *ipmr_cache_alloc(void)
 {
 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 
-	if (c)
+	if (c) {
+		c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
 		c->mfc_un.res.minvif = MAXVIFS;
+	}
 	return c;
 }
 
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index f2e2013f8..487ef3b 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1074,6 +1074,7 @@ static struct mfc6_cache *ip6mr_cache_alloc(void)
 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 	if (!c)
 		return NULL;
+	c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
 	c->mfc_un.res.minvif = MAXMIFS;
 	return c;
 }
-- 
cgit v0.10.2


From 0622cab0341cac6b30da177b0faa39fae0680e71 Mon Sep 17 00:00:00 2001
From: Jay Vosburgh <jay.vosburgh@canonical.com>
Date: Thu, 23 Jun 2016 14:20:51 -0700
Subject: bonding: fix 802.3ad aggregator reselection

Since commit 7bb11dc9f59d ("bonding: unify all places where
actor-oper key needs to be updated."), the logic in bonding to handle
selection between multiple aggregators has not functioned.

	This affects only configurations wherein the bonding slaves
connect to two discrete aggregators (e.g., two independent switches, each
with LACP enabled), thus creating two separate aggregation groups within a
single bond.

	The cause is a change in 7bb11dc9f59d to no longer set
AD_PORT_BEGIN on a port after a link state change, which would cause the
port to be reselected for attachment to an aggregator as if were newly
added to the bond.  We cannot restore the prior behavior, as it
contradicts IEEE 802.1AX 5.4.12, which requires ports that "become
inoperable" (lose carrier, setting port_enabled=false as per 802.1AX
5.4.7) to remain selected (i.e., assigned to the aggregator).  As the port
now remains selected, the aggregator selection logic is not invoked.

	A side effect of this change is that aggregators in bonding will
now contain ports that are link down.  The aggregator selection logic
does not currently handle this situation correctly, causing incorrect
aggregator selection.

	This patch makes two changes to repair the aggregator selection
logic in bonding to function as documented and within the confines of the
standard:

	First, the aggregator selection and related logic now utilizes the
number of active ports per aggregator, not the number of selected ports
(as some selected ports may be down).  The ad_select "bandwidth" and
"count" options only consider ports that are link up.

	Second, on any carrier state change of any slave, the aggregator
selection logic is explicitly called to insure the correct aggregator is
active.

Reported-by: Veli-Matti Lintu <veli-matti.lintu@opinsys.fi>
Fixes: 7bb11dc9f59d ("bonding: unify all places where actor-oper key needs to be updated.")
Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index b9304a2..ca81f46 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -657,6 +657,20 @@ static void __set_agg_ports_ready(struct aggregator *aggregator, int val)
 	}
 }
 
+static int __agg_active_ports(struct aggregator *agg)
+{
+	struct port *port;
+	int active = 0;
+
+	for (port = agg->lag_ports; port;
+	     port = port->next_port_in_aggregator) {
+		if (port->is_enabled)
+			active++;
+	}
+
+	return active;
+}
+
 /**
  * __get_agg_bandwidth - get the total bandwidth of an aggregator
  * @aggregator: the aggregator we're looking at
@@ -664,39 +678,40 @@ static void __set_agg_ports_ready(struct aggregator *aggregator, int val)
  */
 static u32 __get_agg_bandwidth(struct aggregator *aggregator)
 {
+	int nports = __agg_active_ports(aggregator);
 	u32 bandwidth = 0;
 
-	if (aggregator->num_of_ports) {
+	if (nports) {
 		switch (__get_link_speed(aggregator->lag_ports)) {
 		case AD_LINK_SPEED_1MBPS:
-			bandwidth = aggregator->num_of_ports;
+			bandwidth = nports;
 			break;
 		case AD_LINK_SPEED_10MBPS:
-			bandwidth = aggregator->num_of_ports * 10;
+			bandwidth = nports * 10;
 			break;
 		case AD_LINK_SPEED_100MBPS:
-			bandwidth = aggregator->num_of_ports * 100;
+			bandwidth = nports * 100;
 			break;
 		case AD_LINK_SPEED_1000MBPS:
-			bandwidth = aggregator->num_of_ports * 1000;
+			bandwidth = nports * 1000;
 			break;
 		case AD_LINK_SPEED_2500MBPS:
-			bandwidth = aggregator->num_of_ports * 2500;
+			bandwidth = nports * 2500;
 			break;
 		case AD_LINK_SPEED_10000MBPS:
-			bandwidth = aggregator->num_of_ports * 10000;
+			bandwidth = nports * 10000;
 			break;
 		case AD_LINK_SPEED_20000MBPS:
-			bandwidth = aggregator->num_of_ports * 20000;
+			bandwidth = nports * 20000;
 			break;
 		case AD_LINK_SPEED_40000MBPS:
-			bandwidth = aggregator->num_of_ports * 40000;
+			bandwidth = nports * 40000;
 			break;
 		case AD_LINK_SPEED_56000MBPS:
-			bandwidth = aggregator->num_of_ports * 56000;
+			bandwidth = nports * 56000;
 			break;
 		case AD_LINK_SPEED_100000MBPS:
-			bandwidth = aggregator->num_of_ports * 100000;
+			bandwidth = nports * 100000;
 			break;
 		default:
 			bandwidth = 0; /* to silence the compiler */
@@ -1530,10 +1545,10 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best,
 
 	switch (__get_agg_selection_mode(curr->lag_ports)) {
 	case BOND_AD_COUNT:
-		if (curr->num_of_ports > best->num_of_ports)
+		if (__agg_active_ports(curr) > __agg_active_ports(best))
 			return curr;
 
-		if (curr->num_of_ports < best->num_of_ports)
+		if (__agg_active_ports(curr) < __agg_active_ports(best))
 			return best;
 
 		/*FALLTHROUGH*/
@@ -1561,8 +1576,14 @@ static int agg_device_up(const struct aggregator *agg)
 	if (!port)
 		return 0;
 
-	return netif_running(port->slave->dev) &&
-	       netif_carrier_ok(port->slave->dev);
+	for (port = agg->lag_ports; port;
+	     port = port->next_port_in_aggregator) {
+		if (netif_running(port->slave->dev) &&
+		    netif_carrier_ok(port->slave->dev))
+			return 1;
+	}
+
+	return 0;
 }
 
 /**
@@ -1610,7 +1631,7 @@ static void ad_agg_selection_logic(struct aggregator *agg,
 
 		agg->is_active = 0;
 
-		if (agg->num_of_ports && agg_device_up(agg))
+		if (__agg_active_ports(agg) && agg_device_up(agg))
 			best = ad_agg_selection_test(best, agg);
 	}
 
@@ -1622,7 +1643,7 @@ static void ad_agg_selection_logic(struct aggregator *agg,
 		 * answering partner.
 		 */
 		if (active && active->lag_ports &&
-		    active->lag_ports->is_enabled &&
+		    __agg_active_ports(active) &&
 		    (__agg_has_partner(active) ||
 		     (!__agg_has_partner(active) &&
 		     !__agg_has_partner(best)))) {
@@ -2133,7 +2154,7 @@ void bond_3ad_unbind_slave(struct slave *slave)
 				else
 					temp_aggregator->lag_ports = temp_port->next_port_in_aggregator;
 				temp_aggregator->num_of_ports--;
-				if (temp_aggregator->num_of_ports == 0) {
+				if (__agg_active_ports(temp_aggregator) == 0) {
 					select_new_active_agg = temp_aggregator->is_active;
 					ad_clear_agg(temp_aggregator);
 					if (select_new_active_agg) {
@@ -2432,7 +2453,9 @@ void bond_3ad_adapter_speed_duplex_changed(struct slave *slave)
  */
 void bond_3ad_handle_link_change(struct slave *slave, char link)
 {
+	struct aggregator *agg;
 	struct port *port;
+	bool dummy;
 
 	port = &(SLAVE_AD_INFO(slave)->port);
 
@@ -2459,6 +2482,9 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
 		port->is_enabled = false;
 		ad_update_actor_keys(port, true);
 	}
+	agg = __get_first_agg(port);
+	ad_agg_selection_logic(agg, &dummy);
+
 	netdev_dbg(slave->bond->dev, "Port %d changed link status to %s\n",
 		   port->actor_port_number,
 		   link == BOND_LINK_UP ? "UP" : "DOWN");
@@ -2499,7 +2525,7 @@ int bond_3ad_set_carrier(struct bonding *bond)
 	active = __get_active_agg(&(SLAVE_AD_INFO(first_slave)->aggregator));
 	if (active) {
 		/* are enough slaves available to consider link up? */
-		if (active->num_of_ports < bond->params.min_links) {
+		if (__agg_active_ports(active) < bond->params.min_links) {
 			if (netif_carrier_ok(bond->dev)) {
 				netif_carrier_off(bond->dev);
 				goto out;
-- 
cgit v0.10.2


From d2b13233879ca1268a1c027d4573109e5a777811 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 23 Jun 2016 14:23:12 -0700
Subject: net: bgmac: Fix SOF bit checking

We are checking for the Start of Frame bit in the ctl1 word, while this
bit is set in the ctl0 word instead. Read the ctl0 word and update the
check to verify that.

Fixes: 9cde94506eac ("bgmac: implement scatter/gather support")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index ee5f431..70926c6 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -267,15 +267,16 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
 	while (ring->start != ring->end) {
 		int slot_idx = ring->start % BGMAC_TX_RING_SLOTS;
 		struct bgmac_slot_info *slot = &ring->slots[slot_idx];
-		u32 ctl1;
+		u32 ctl0, ctl1;
 		int len;
 
 		if (slot_idx == empty_slot)
 			break;
 
+		ctl0 = le32_to_cpu(ring->cpu_base[slot_idx].ctl0);
 		ctl1 = le32_to_cpu(ring->cpu_base[slot_idx].ctl1);
 		len = ctl1 & BGMAC_DESC_CTL1_LEN;
-		if (ctl1 & BGMAC_DESC_CTL0_SOF)
+		if (ctl0 & BGMAC_DESC_CTL0_SOF)
 			/* Unmap no longer used buffer */
 			dma_unmap_single(dma_dev, slot->dma_addr, len,
 					 DMA_TO_DEVICE);
-- 
cgit v0.10.2


From c3897f2a69e54dd113fc9abd2daf872e5b495798 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 23 Jun 2016 14:25:32 -0700
Subject: net: bgmac: Start transmit queue in bgmac_open

The driver does not start the transmit queue in bgmac_open(). If the
queue was stopped prior to closing then re-opening the interface, we
would never be able to wake-up again.

Fixes: dd4544f05469 ("bgmac: driver for GBit MAC core on BCMA bus")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 70926c6..85cd07f 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1314,6 +1314,9 @@ static int bgmac_open(struct net_device *net_dev)
 	phy_start(bgmac->phy_dev);
 
 	netif_carrier_on(net_dev);
+
+	netif_start_queue(net_dev);
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From 3894396e64994f31c3ef5c7e6f63dded0593e567 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 23 Jun 2016 14:25:33 -0700
Subject: net: bgmac: Remove superflous netif_carrier_on()

bgmac_open() calls phy_start() to initialize the PHY state machine,
which will set the interface's carrier state accordingly, no need to
force that as this could be conflicting with the PHY state determined by
PHYLIB.

Fixes: dd4544f05469 ("bgmac: driver for GBit MAC core on BCMA bus")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 85cd07f..a6333d3 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1313,8 +1313,6 @@ static int bgmac_open(struct net_device *net_dev)
 
 	phy_start(bgmac->phy_dev);
 
-	netif_carrier_on(net_dev);
-
 	netif_start_queue(net_dev);
 
 	return 0;
-- 
cgit v0.10.2


From f299a02d5f13c4deb52c1a7ddf2b42630fe6294a Mon Sep 17 00:00:00 2001
From: Wang Sheng-Hui <shhuiw@foxmail.com>
Date: Fri, 24 Jun 2016 08:52:11 +0800
Subject: net/mlx5: use mlx5_buf_alloc_node instead of mlx5_buf_alloc in
 mlx5_wq_ll_create

Commit 311c7c71c9bb ("net/mlx5e: Allocate DMA coherent memory on
reader NUMA node") introduced mlx5_*_alloc_node() but missed changing
some calling and warn messages. This patch introduces 2 changes:
	* Use mlx5_buf_alloc_node() instead of mlx5_buf_alloc() in
	  mlx5_wq_ll_create()
	* Update the failure warn messages with _node postfix for
	  mlx5_*_alloc function names

Fixes: 311c7c71c9bb ("net/mlx5e: Allocate DMA coherent memory on reader NUMA node")
Signed-off-by: Wang Sheng-Hui <shhuiw@foxmail.com>
Acked-By: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index ce21ee5..821a087 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -75,14 +75,14 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
 	err = mlx5_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq),
 				  &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
@@ -111,14 +111,14 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
 	err = mlx5_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq),
 				  &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
@@ -148,13 +148,14 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
-	err = mlx5_buf_alloc(mdev, mlx5_wq_ll_get_byte_size(wq), &wq_ctrl->buf);
+	err = mlx5_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq),
+				  &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
-- 
cgit v0.10.2


From 126e7557328a1cd576be4fca95b133a2695283ff Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Sun, 19 Jun 2016 23:51:02 +0300
Subject: mac80211: Fix mesh estab_plinks counting in STA removal case

If a user space program (e.g., wpa_supplicant) deletes a STA entry that
is currently in NL80211_PLINK_ESTAB state, the number of established
plinks counter was not decremented and this could result in rejecting
new plink establishment before really hitting the real maximum plink
limit. For !user_mpm case, this decrementation is handled by
mesh_plink_deactive().

Fix this by decrementing estab_plinks on STA deletion
(mesh_sta_cleanup() gets called from there) so that the counter has a
correct value and the Beacon frame advertisement in Mesh Configuration
element shows the proper value for capability to accept additional
peers.

Cc: stable@vger.kernel.org
Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>

diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 21b1fdf..6a1603b 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -148,14 +148,17 @@ u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
 void mesh_sta_cleanup(struct sta_info *sta)
 {
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
-	u32 changed;
+	u32 changed = 0;
 
 	/*
 	 * maybe userspace handles peer allocation and peering, but in either
 	 * case the beacon is still generated by the kernel and we might need
 	 * an update.
 	 */
-	changed = mesh_accept_plinks_update(sdata);
+	if (sdata->u.mesh.user_mpm &&
+	    sta->mesh->plink_state == NL80211_PLINK_ESTAB)
+		changed |= mesh_plink_dec_estab_count(sdata);
+	changed |= mesh_accept_plinks_update(sdata);
 	if (!sdata->u.mesh.user_mpm) {
 		changed |= mesh_plink_deactivate(sta);
 		del_timer_sync(&sta->mesh->plink_timer);
-- 
cgit v0.10.2


From cca0e542e02e48cce541a49c4046ec094ec27c1e Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Fri, 24 Jun 2016 14:49:02 +1000
Subject: powerpc/eeh: Fix wrong argument passed to eeh_rmv_device()

When calling eeh_rmv_device() in eeh_reset_device() for partial hotplug
case, @rmv_data instead of its address is the proper argument.
Otherwise, the stack frame is corrupted when writing to
@rmv_data (actually its address) in eeh_rmv_device(). It results in
kernel crash as observed.

This fixes the issue by passing @rmv_data, not its address to
eeh_rmv_device() in eeh_reset_device().

Fixes: 67086e32b564 ("powerpc/eeh: powerpc/eeh: Support error recovery for VF PE")
Reported-by: Pridhiviraj Paidipeddi <ppaidipe@in.ibm.com>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index b5f73cb..d70101e 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -647,7 +647,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
 			pci_unlock_rescan_remove();
 		}
 	} else if (frozen_bus) {
-		eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data);
+		eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
 	}
 
 	/*
-- 
cgit v0.10.2


From 62630ea768869beeb1e514b0bf5607a0c9b93d12 Mon Sep 17 00:00:00 2001
From: Allen Hung <allen_hung@dell.com>
Date: Thu, 23 Jun 2016 16:31:29 +0800
Subject: Revert "HID: multitouch: enable palm rejection if device implements
 confidence usage"

This reverts commit 25a84db15b3f ("HID: multitouch: enable palm rejection
if device implements confidence usage")

The commit enables palm rejection for Win8 Precision Touchpad devices but
the quirk MT_QUIRK_VALID_IS_CONFIDENCE it is using is not working very
properly. This quirk is originally designed for some WIn7 touchscreens. Use
of this for a Win8 Precision Touchpad will cause unexpected pointer jumping
problem.

Cc: stable@vger.kernel.org # v4.5+
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Tested-by: Andy Lutomirski <luto@kernel.org> # XPS 13 9350, BIOS 1.4.3
Signed-off-by: Allen Hung <allen_hung@dell.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 95b7d61..4ef7006 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -502,11 +502,6 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 			mt_store_field(usage, td, hi);
 			return 1;
 		case HID_DG_CONFIDENCE:
-			if (cls->name == MT_CLS_WIN_8 &&
-				field->application == HID_DG_TOUCHPAD) {
-				cls->quirks &= ~MT_QUIRK_ALWAYS_VALID;
-				cls->quirks |= MT_QUIRK_VALID_IS_CONFIDENCE;
-			}
 			mt_store_field(usage, td, hi);
 			return 1;
 		case HID_DG_TIPSWITCH:
-- 
cgit v0.10.2


From 6dd2e27a103d716921cc4a1a96a9adc0a8e3ab57 Mon Sep 17 00:00:00 2001
From: Allen Hung <allen_hung@dell.com>
Date: Thu, 23 Jun 2016 16:31:30 +0800
Subject: HID: multitouch: enable palm rejection for Windows Precision Touchpad

The usage Confidence is mandary to Windows Precision Touchpad devices. If
it is examined in input_mapping on a WIndows Precision Touchpad, a new add
quirk MT_QUIRK_CONFIDENCE desgned for such devices will be applied to the
device. A touch with the confidence bit is not set is determined as
invalid.

Tested on Dell XPS13 9343

Cc: stable@vger.kernel.org # v4.5+
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Tested-by: Andy Lutomirski <luto@kernel.org> # XPS 13 9350, BIOS 1.4.3
Signed-off-by: Allen Hung <allen_hung@dell.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>

diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 4ef7006..fb6f1f4 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -61,6 +61,7 @@ MODULE_LICENSE("GPL");
 #define MT_QUIRK_ALWAYS_VALID		(1 << 4)
 #define MT_QUIRK_VALID_IS_INRANGE	(1 << 5)
 #define MT_QUIRK_VALID_IS_CONFIDENCE	(1 << 6)
+#define MT_QUIRK_CONFIDENCE		(1 << 7)
 #define MT_QUIRK_SLOT_IS_CONTACTID_MINUS_ONE	(1 << 8)
 #define MT_QUIRK_NO_AREA		(1 << 9)
 #define MT_QUIRK_IGNORE_DUPLICATES	(1 << 10)
@@ -78,6 +79,7 @@ struct mt_slot {
 	__s32 contactid;	/* the device ContactID assigned to this slot */
 	bool touch_state;	/* is the touch valid? */
 	bool inrange_state;	/* is the finger in proximity of the sensor? */
+	bool confidence_state;  /* is the touch made by a finger? */
 };
 
 struct mt_class {
@@ -502,6 +504,9 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 			mt_store_field(usage, td, hi);
 			return 1;
 		case HID_DG_CONFIDENCE:
+			if (cls->name == MT_CLS_WIN_8 &&
+				field->application == HID_DG_TOUCHPAD)
+				cls->quirks |= MT_QUIRK_CONFIDENCE;
 			mt_store_field(usage, td, hi);
 			return 1;
 		case HID_DG_TIPSWITCH:
@@ -614,6 +619,7 @@ static void mt_complete_slot(struct mt_device *td, struct input_dev *input)
 		return;
 
 	if (td->curvalid || (td->mtclass.quirks & MT_QUIRK_ALWAYS_VALID)) {
+		int active;
 		int slotnum = mt_compute_slot(td, input);
 		struct mt_slot *s = &td->curdata;
 		struct input_mt *mt = input->mt;
@@ -628,10 +634,14 @@ static void mt_complete_slot(struct mt_device *td, struct input_dev *input)
 				return;
 		}
 
+		if (!(td->mtclass.quirks & MT_QUIRK_CONFIDENCE))
+			s->confidence_state = 1;
+		active = (s->touch_state || s->inrange_state) &&
+							s->confidence_state;
+
 		input_mt_slot(input, slotnum);
-		input_mt_report_slot_state(input, MT_TOOL_FINGER,
-			s->touch_state || s->inrange_state);
-		if (s->touch_state || s->inrange_state) {
+		input_mt_report_slot_state(input, MT_TOOL_FINGER, active);
+		if (active) {
 			/* this finger is in proximity of the sensor */
 			int wide = (s->w > s->h);
 			/* divided by two to match visual scale of touch */
@@ -696,6 +706,8 @@ static void mt_process_mt_event(struct hid_device *hid, struct hid_field *field,
 			td->curdata.touch_state = value;
 			break;
 		case HID_DG_CONFIDENCE:
+			if (quirks & MT_QUIRK_CONFIDENCE)
+				td->curdata.confidence_state = value;
 			if (quirks & MT_QUIRK_VALID_IS_CONFIDENCE)
 				td->curvalid = value;
 			break;
-- 
cgit v0.10.2


From 0888d5f3c0f183ea6177355752ada433d370ac89 Mon Sep 17 00:00:00 2001
From: daniel <daniel@dd-wrt.com>
Date: Fri, 24 Jun 2016 12:35:18 +0200
Subject: Bridge: Fix ipv6 mc snooping if bridge has no ipv6 address
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bridge is falsly dropping ipv6 mulitcast packets if there is:
 1. No ipv6 address assigned on the brigde.
 2. No external mld querier present.
 3. The internal querier enabled.

When the bridge fails to build mld queries, because it has no
ipv6 address, it slilently returns, but keeps the local querier enabled.
This specific case causes confusing packet loss.

Ipv6 multicast snooping can only work if:
 a) An external querier is present
 OR
 b) The bridge has an ipv6 address an is capable of sending own queries

Otherwise it has to forward/flood the ipv6 multicast traffic,
because snooping cannot work.

This patch fixes the issue by adding a flag to the bridge struct that
indicates that there is currently no ipv6 address assinged to the bridge
and returns a false state for the local querier in
__br_multicast_querier_exists().

Special thanks to Linus Lüssing.

Fixes: d1d81d4c3dd8 ("bridge: check return value of ipv6_dev_get_saddr()")
Signed-off-by: Daniel Danzberger <daniel@dd-wrt.com>
Acked-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 6852f3c..4384414 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -464,8 +464,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 	if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
 			       &ip6h->saddr)) {
 		kfree_skb(skb);
+		br->has_ipv6_addr = 0;
 		return NULL;
 	}
+
+	br->has_ipv6_addr = 1;
 	ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
 
 	hopopt = (u8 *)(ip6h + 1);
@@ -1745,6 +1748,7 @@ void br_multicast_init(struct net_bridge *br)
 	br->ip6_other_query.delay_time = 0;
 	br->ip6_querier.port = NULL;
 #endif
+	br->has_ipv6_addr = 1;
 
 	spin_lock_init(&br->multicast_lock);
 	setup_timer(&br->multicast_router_timer,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index c7fb5d7..52edecf 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -314,6 +314,7 @@ struct net_bridge
 	u8				multicast_disabled:1;
 	u8				multicast_querier:1;
 	u8				multicast_query_use_ifaddr:1;
+	u8				has_ipv6_addr:1;
 
 	u32				hash_elasticity;
 	u32				hash_max;
@@ -588,10 +589,22 @@ static inline bool br_multicast_is_router(struct net_bridge *br)
 
 static inline bool
 __br_multicast_querier_exists(struct net_bridge *br,
-			      struct bridge_mcast_other_query *querier)
+				struct bridge_mcast_other_query *querier,
+				const bool is_ipv6)
 {
+	bool own_querier_enabled;
+
+	if (br->multicast_querier) {
+		if (is_ipv6 && !br->has_ipv6_addr)
+			own_querier_enabled = false;
+		else
+			own_querier_enabled = true;
+	} else {
+		own_querier_enabled = false;
+	}
+
 	return time_is_before_jiffies(querier->delay_time) &&
-	       (br->multicast_querier || timer_pending(&querier->timer));
+	       (own_querier_enabled || timer_pending(&querier->timer));
 }
 
 static inline bool br_multicast_querier_exists(struct net_bridge *br,
@@ -599,10 +612,12 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br,
 {
 	switch (eth->h_proto) {
 	case (htons(ETH_P_IP)):
-		return __br_multicast_querier_exists(br, &br->ip4_other_query);
+		return __br_multicast_querier_exists(br,
+			&br->ip4_other_query, false);
 #if IS_ENABLED(CONFIG_IPV6)
 	case (htons(ETH_P_IPV6)):
-		return __br_multicast_querier_exists(br, &br->ip6_other_query);
+		return __br_multicast_querier_exists(br,
+			&br->ip6_other_query, true);
 #endif
 	default:
 		return false;
-- 
cgit v0.10.2


From ab8ed951080e8f59b1da4ea10ac7c05ba24a3cbd Mon Sep 17 00:00:00 2001
From: Aaron Campbell <aaron@monkey.org>
Date: Fri, 24 Jun 2016 10:05:32 -0300
Subject: connector: fix out-of-order cn_proc netlink message delivery

The proc connector messages include a sequence number, allowing userspace
programs to detect lost messages.  However, performing this detection is
currently more difficult than necessary, since netlink messages can be
delivered to the application out-of-order.  To fix this, leave pre-emption
disabled during cn_netlink_send(), and use GFP_NOWAIT.

The following was written as a test case.  Building the kernel w/ make -j32
proved a reliable way to generate out-of-order cn_proc messages.

int
main(int argc, char *argv[])
{
	static uint32_t last_seq[CPU_SETSIZE], seq;
	int cpu, fd;
	struct sockaddr_nl sa;
	struct __attribute__((aligned(NLMSG_ALIGNTO))) {
		struct nlmsghdr nl_hdr;
		struct __attribute__((__packed__)) {
			struct cn_msg cn_msg;
			struct proc_event cn_proc;
		};
	} rmsg;
	struct __attribute__((aligned(NLMSG_ALIGNTO))) {
		struct nlmsghdr nl_hdr;
		struct __attribute__((__packed__)) {
			struct cn_msg cn_msg;
			enum proc_cn_mcast_op cn_mcast;
		};
	} smsg;

	fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
	if (fd < 0) {
		perror("socket");
	}

	sa.nl_family = AF_NETLINK;
	sa.nl_groups = CN_IDX_PROC;
	sa.nl_pid = getpid();
	if (bind(fd, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
		perror("bind");
	}

	memset(&smsg, 0, sizeof(smsg));
	smsg.nl_hdr.nlmsg_len = sizeof(smsg);
	smsg.nl_hdr.nlmsg_pid = getpid();
	smsg.nl_hdr.nlmsg_type = NLMSG_DONE;
	smsg.cn_msg.id.idx = CN_IDX_PROC;
	smsg.cn_msg.id.val = CN_VAL_PROC;
	smsg.cn_msg.len = sizeof(enum proc_cn_mcast_op);
	smsg.cn_mcast = PROC_CN_MCAST_LISTEN;
	if (send(fd, &smsg, sizeof(smsg), 0) != sizeof(smsg)) {
		perror("send");
	}

	while (recv(fd, &rmsg, sizeof(rmsg), 0) == sizeof(rmsg)) {
		cpu = rmsg.cn_proc.cpu;
		if (cpu < 0) {
			continue;
		}
		seq = rmsg.cn_msg.seq;
		if ((last_seq[cpu] != 0) && (seq != last_seq[cpu] + 1)) {
			printf("out-of-order seq=%d on cpu=%d\n", seq, cpu);
		}
		last_seq[cpu] = seq;
	}

	/* NOTREACHED */

	perror("recv");

	return -1;
}

Signed-off-by: Aaron Campbell <aaron@monkey.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 15d06fc..b02f9c6 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -56,11 +56,21 @@ static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC };
 /* proc_event_counts is used as the sequence number of the netlink message */
 static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 };
 
-static inline void get_seq(__u32 *ts, int *cpu)
+static inline void send_msg(struct cn_msg *msg)
 {
 	preempt_disable();
-	*ts = __this_cpu_inc_return(proc_event_counts) - 1;
-	*cpu = smp_processor_id();
+
+	msg->seq = __this_cpu_inc_return(proc_event_counts) - 1;
+	((struct proc_event *)msg->data)->cpu = smp_processor_id();
+
+	/*
+	 * Preemption remains disabled during send to ensure the messages are
+	 * ordered according to their sequence numbers.
+	 *
+	 * If cn_netlink_send() fails, the data is not sent.
+	 */
+	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT);
+
 	preempt_enable();
 }
 
@@ -77,7 +87,6 @@ void proc_fork_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_FORK;
 	rcu_read_lock();
@@ -92,8 +101,7 @@ void proc_fork_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	/*  If cn_netlink_send() failed, the data is not sent */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_exec_connector(struct task_struct *task)
@@ -108,7 +116,6 @@ void proc_exec_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_EXEC;
 	ev->event_data.exec.process_pid = task->pid;
@@ -118,7 +125,7 @@ void proc_exec_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_id_connector(struct task_struct *task, int which_id)
@@ -150,14 +157,13 @@ void proc_id_connector(struct task_struct *task, int which_id)
 		return;
 	}
 	rcu_read_unlock();
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 
 	memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_sid_connector(struct task_struct *task)
@@ -172,7 +178,6 @@ void proc_sid_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_SID;
 	ev->event_data.sid.process_pid = task->pid;
@@ -182,7 +187,7 @@ void proc_sid_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_ptrace_connector(struct task_struct *task, int ptrace_id)
@@ -197,7 +202,6 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_PTRACE;
 	ev->event_data.ptrace.process_pid  = task->pid;
@@ -215,7 +219,7 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_comm_connector(struct task_struct *task)
@@ -230,7 +234,6 @@ void proc_comm_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_COMM;
 	ev->event_data.comm.process_pid  = task->pid;
@@ -241,7 +244,7 @@ void proc_comm_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_coredump_connector(struct task_struct *task)
@@ -256,7 +259,6 @@ void proc_coredump_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_COREDUMP;
 	ev->event_data.coredump.process_pid = task->pid;
@@ -266,7 +268,7 @@ void proc_coredump_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 void proc_exit_connector(struct task_struct *task)
@@ -281,7 +283,6 @@ void proc_exit_connector(struct task_struct *task)
 	msg = buffer_to_cn_msg(buffer);
 	ev = (struct proc_event *)msg->data;
 	memset(&ev->event_data, 0, sizeof(ev->event_data));
-	get_seq(&msg->seq, &ev->cpu);
 	ev->timestamp_ns = ktime_get_ns();
 	ev->what = PROC_EVENT_EXIT;
 	ev->event_data.exit.process_pid = task->pid;
@@ -293,7 +294,7 @@ void proc_exit_connector(struct task_struct *task)
 	msg->ack = 0; /* not used */
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 /*
@@ -325,7 +326,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack)
 	msg->ack = rcvd_ack + 1;
 	msg->len = sizeof(*ev);
 	msg->flags = 0; /* not used */
-	cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL);
+	send_msg(msg);
 }
 
 /**
-- 
cgit v0.10.2


From 9a0fee2b552b1235fb1706ae1fc664ae74573be8 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 24 Jun 2016 16:02:35 -0400
Subject: sock_diag: do not broadcast raw socket destruction

Diag intends to broadcast tcp_sk and udp_sk socket destruction.
Testing sk->sk_protocol for IPPROTO_TCP/IPPROTO_UDP alone is not
sufficient for this. Raw sockets can have the same type.

Add a test for sk->sk_type.

Fixes: eb4cb008529c ("sock_diag: define destruction multicast groups")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 4018b48..a0596ca0 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -36,6 +36,9 @@ enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk)
 {
 	switch (sk->sk_family) {
 	case AF_INET:
+		if (sk->sk_type == SOCK_RAW)
+			return SKNLGRP_NONE;
+
 		switch (sk->sk_protocol) {
 		case IPPROTO_TCP:
 			return SKNLGRP_INET_TCP_DESTROY;
@@ -45,6 +48,9 @@ enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk)
 			return SKNLGRP_NONE;
 		}
 	case AF_INET6:
+		if (sk->sk_type == SOCK_RAW)
+			return SKNLGRP_NONE;
+
 		switch (sk->sk_protocol) {
 		case IPPROTO_TCP:
 			return SKNLGRP_INET6_TCP_DESTROY;
-- 
cgit v0.10.2


From 76a658c20efd541a62838d9ff68ce94170d7a549 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Tue, 28 Jun 2016 12:06:58 -0400
Subject: audit: move calcs after alloc and check when logging set loginuid

Move the calculations of values after the allocation in case the
allocation fails.  This avoids wasting effort in the rare case that it
fails, but more importantly saves us extra logic to release the tty
ref.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 71e14d8..33dafa7 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1985,14 +1985,15 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid,
 	if (!audit_enabled)
 		return;
 
+	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
+	if (!ab)
+		return;
+
 	uid = from_kuid(&init_user_ns, task_uid(current));
 	oldloginuid = from_kuid(&init_user_ns, koldloginuid);
 	loginuid = from_kuid(&init_user_ns, kloginuid),
 	tty = audit_get_tty(current);
 
-	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
-	if (!ab)
-		return;
 	audit_log_format(ab, "pid=%d uid=%u", task_pid_nr(current), uid);
 	audit_log_task_context(ab);
 	audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d",
-- 
cgit v0.10.2


From 3f5be2da8565c1cce5655bb0948fcc957c6eb6c6 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Tue, 28 Jun 2016 12:07:50 -0400
Subject: audit: move audit_get_tty to reduce scope and kabi changes

The only users of audit_get_tty and audit_put_tty are internal to
audit, so move it out of include/linux/audit.h to kernel.h and create
a proper function rather than inlining it.  This also reduces kABI
changes.

Suggested-by: Paul Moore <pmoore@redhat.com>
Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
[PM: line wrapped description]
Signed-off-by: Paul Moore <paul@paul-moore.com>

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 32cdafb..b40ed5d 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -26,7 +26,6 @@
 #include <linux/sched.h>
 #include <linux/ptrace.h>
 #include <uapi/linux/audit.h>
-#include <linux/tty.h>
 
 #define AUDIT_INO_UNSET ((unsigned long)-1)
 #define AUDIT_DEV_UNSET ((dev_t)-1)
@@ -344,23 +343,6 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
 	return tsk->sessionid;
 }
 
-static inline struct tty_struct *audit_get_tty(struct task_struct *tsk)
-{
-	struct tty_struct *tty = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&tsk->sighand->siglock, flags);
-	if (tsk->signal)
-		tty = tty_kref_get(tsk->signal->tty);
-	spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
-	return tty;
-}
-
-static inline void audit_put_tty(struct tty_struct *tty)
-{
-	tty_kref_put(tty);
-}
-
 extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
 extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
 extern void __audit_bprm(struct linux_binprm *bprm);
@@ -518,12 +500,6 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
 {
 	return -1;
 }
-static inline struct tty_struct *audit_get_tty(struct task_struct *tsk)
-{
-	return NULL;
-}
-static inline void audit_put_tty(struct tty_struct *tty)
-{ }
 static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
 { }
 static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid,
diff --git a/kernel/audit.c b/kernel/audit.c
index 384374a..d597101 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1866,6 +1866,23 @@ out_null:
 	audit_log_format(ab, " exe=(null)");
 }
 
+struct tty_struct *audit_get_tty(struct task_struct *tsk)
+{
+	struct tty_struct *tty = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tsk->sighand->siglock, flags);
+	if (tsk->signal)
+		tty = tty_kref_get(tsk->signal->tty);
+	spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
+	return tty;
+}
+
+void audit_put_tty(struct tty_struct *tty)
+{
+	tty_kref_put(tty);
+}
+
 void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk)
 {
 	const struct cred *cred;
diff --git a/kernel/audit.h b/kernel/audit.h
index cbbe6bb..a492f4c 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -23,6 +23,7 @@
 #include <linux/audit.h>
 #include <linux/skbuff.h>
 #include <uapi/linux/mqueue.h>
+#include <linux/tty.h>
 
 /* AUDIT_NAMES is the number of slots we reserve in the audit_context
  * for saving names from getname().  If we get more names we will allocate
@@ -262,6 +263,9 @@ extern struct audit_entry *audit_dupe_rule(struct audit_krule *old);
 extern void audit_log_d_path_exe(struct audit_buffer *ab,
 				 struct mm_struct *mm);
 
+extern struct tty_struct *audit_get_tty(struct task_struct *tsk);
+extern void audit_put_tty(struct tty_struct *tty);
+
 /* audit watch functions */
 #ifdef CONFIG_AUDIT_WATCH
 extern void audit_put_watch(struct audit_watch *watch);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 33dafa7..60a354e 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -63,7 +63,6 @@
 #include <asm/unistd.h>
 #include <linux/security.h>
 #include <linux/list.h>
-#include <linux/tty.h>
 #include <linux/binfmts.h>
 #include <linux/highmem.h>
 #include <linux/syscalls.h>
-- 
cgit v0.10.2


From e547f2628327fec6afd2e03b46f113f614cca05b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sat, 25 Jun 2016 19:19:28 -0400
Subject: NFS: Fix another OPEN_DOWNGRADE bug

Olga Kornievskaia reports that the following test fails to trigger
an OPEN_DOWNGRADE on the wire, and only triggers the final CLOSE.

	fd0 = open(foo, RDRW)   -- should be open on the wire for "both"
	fd1 = open(foo, RDONLY)  -- should be open on the wire for "read"
	close(fd0) -- should trigger an open_downgrade
	read(fd1)
	close(fd1)

The issue is that we're missing a check for whether or not the current
state transitioned from an O_RDWR state as opposed to having transitioned
from a combination of O_RDONLY and O_WRONLY.

Reported-by: Olga Kornievskaia <aglo@umich.edu>
Fixes: cd9288ffaea4 ("NFSv4: Fix another bug in the close/open_downgrade code")
Cc: stable@vger.kernel.org # 2.6.33+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 406dd3e..ff416d0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2882,12 +2882,11 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 			call_close |= is_wronly;
 		else if (is_wronly)
 			calldata->arg.fmode |= FMODE_WRITE;
+		if (calldata->arg.fmode != (FMODE_READ|FMODE_WRITE))
+			call_close |= is_rdwr;
 	} else if (is_rdwr)
 		calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
 
-	if (calldata->arg.fmode == 0)
-		call_close |= is_rdwr;
-
 	if (!nfs4_valid_open_stateid(state))
 		call_close = 0;
 	spin_unlock(&state->owner->so_lock);
-- 
cgit v0.10.2


From 190ce8693c23eae09ba5f303a83bf2fbeb6478b1 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Tue, 28 Jun 2016 13:01:04 +1000
Subject: powerpc/tm: Avoid SLB faults in treclaim/trecheckpoint when RI=0

Currently we have 2 segments that are bolted for the kernel linear
mapping (ie 0xc000... addresses). This is 0 to 1TB and also the kernel
stacks. Anything accessed outside of these regions may need to be
faulted in. (In practice machines with TM always have 1T segments)

If a machine has < 2TB of memory we never fault on the kernel linear
mapping as these two segments cover all physical memory. If a machine
has > 2TB of memory, there may be structures outside of these two
segments that need to be faulted in. This faulting can occur when
running as a guest as the hypervisor may remove any SLB that's not
bolted.

When we treclaim and trecheckpoint we have a window where we need to
run with the userspace GPRs. This means that we no longer have a valid
stack pointer in r1. For this window we therefore clear MSR RI to
indicate that any exceptions taken at this point won't be able to be
handled. This means that we can't take segment misses in this RI=0
window.

In this RI=0 region, we currently access the thread_struct for the
process being context switched to or from. This thread_struct access
may cause a segment fault since it's not guaranteed to be covered by
the two bolted segment entries described above.

We've seen this with a crash when running as a guest with > 2TB of
memory on PowerVM:

  Unrecoverable exception 4100 at c00000000004f138
  Oops: Unrecoverable exception, sig: 6 [#1]
  SMP NR_CPUS=2048 NUMA pSeries
  CPU: 1280 PID: 7755 Comm: kworker/1280:1 Tainted: G                 X 4.4.13-46-default #1
  task: c000189001df4210 ti: c000189001d5c000 task.ti: c000189001d5c000
  NIP: c00000000004f138 LR: 0000000010003a24 CTR: 0000000010001b20
  REGS: c000189001d5f730 TRAP: 4100   Tainted: G                 X  (4.4.13-46-default)
  MSR: 8000000100001031 <SF,ME,IR,DR,LE>  CR: 24000048  XER: 00000000
  CFAR: c00000000004ed18 SOFTE: 0
  GPR00: ffffffffc58d7b60 c000189001d5f9b0 00000000100d7d00 000000003a738288
  GPR04: 0000000000002781 0000000000000006 0000000000000000 c0000d1f4d889620
  GPR08: 000000000000c350 00000000000008ab 00000000000008ab 00000000100d7af0
  GPR12: 00000000100d7ae8 00003ffe787e67a0 0000000000000000 0000000000000211
  GPR16: 0000000010001b20 0000000000000000 0000000000800000 00003ffe787df110
  GPR20: 0000000000000001 00000000100d1e10 0000000000000000 00003ffe787df050
  GPR24: 0000000000000003 0000000000010000 0000000000000000 00003fffe79e2e30
  GPR28: 00003fffe79e2e68 00000000003d0f00 00003ffe787e67a0 00003ffe787de680
  NIP [c00000000004f138] restore_gprs+0xd0/0x16c
  LR [0000000010003a24] 0x10003a24
  Call Trace:
  [c000189001d5f9b0] [c000189001d5f9f0] 0xc000189001d5f9f0 (unreliable)
  [c000189001d5fb90] [c00000000001583c] tm_recheckpoint+0x6c/0xa0
  [c000189001d5fbd0] [c000000000015c40] __switch_to+0x2c0/0x350
  [c000189001d5fc30] [c0000000007e647c] __schedule+0x32c/0x9c0
  [c000189001d5fcb0] [c0000000007e6b58] schedule+0x48/0xc0
  [c000189001d5fce0] [c0000000000deabc] worker_thread+0x22c/0x5b0
  [c000189001d5fd80] [c0000000000e7000] kthread+0x110/0x130
  [c000189001d5fe30] [c000000000009538] ret_from_kernel_thread+0x5c/0xa4
  Instruction dump:
  7cb103a6 7cc0e3a6 7ca222a6 78a58402 38c00800 7cc62838 08860000 7cc000a6
  38a00006 78c60022 7cc62838 0b060000 <e8c701a0> 7ccff120 e8270078 e8a70098
  ---[ end trace 602126d0a1dedd54 ]---

This fixes this by copying the required data from the thread_struct to
the stack before we clear MSR RI. Then once we clear RI, we only access
the stack, guaranteeing there's no segment miss.

We also tighten the region over which we set RI=0 on the treclaim()
path. This may have a slight performance impact since we're adding an
mtmsr instruction.

Fixes: 090b9284d725 ("powerpc/tm: Clear MSR RI in non-recoverable TM code")
Signed-off-by: Michael Neuling <mikey@neuling.org>
Reviewed-by: Cyril Bur <cyrilbur@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index bf8f34a..b7019b5 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -110,17 +110,11 @@ _GLOBAL(tm_reclaim)
 	std	r3, STK_PARAM(R3)(r1)
 	SAVE_NVGPRS(r1)
 
-	/* We need to setup MSR for VSX register save instructions.  Here we
-	 * also clear the MSR RI since when we do the treclaim, we won't have a
-	 * valid kernel pointer for a while.  We clear RI here as it avoids
-	 * adding another mtmsr closer to the treclaim.  This makes the region
-	 * maked as non-recoverable wider than it needs to be but it saves on
-	 * inserting another mtmsrd later.
-	 */
+	/* We need to setup MSR for VSX register save instructions. */
 	mfmsr	r14
 	mr	r15, r14
 	ori	r15, r15, MSR_FP
-	li	r16, MSR_RI
+	li	r16, 0
 	ori	r16, r16, MSR_EE /* IRQs hard off */
 	andc	r15, r15, r16
 	oris	r15, r15, MSR_VEC@h
@@ -176,7 +170,17 @@ dont_backup_fp:
 1:	tdeqi   r6, 0
 	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
 
-	/* The moment we treclaim, ALL of our GPRs will switch
+	/* Clear MSR RI since we are about to change r1, EE is already off. */
+	li	r4, 0
+	mtmsrd	r4, 1
+
+	/*
+	 * BE CAREFUL HERE:
+	 * At this point we can't take an SLB miss since we have MSR_RI
+	 * off. Load only to/from the stack/paca which are in SLB bolted regions
+	 * until we turn MSR RI back on.
+	 *
+	 * The moment we treclaim, ALL of our GPRs will switch
 	 * to user register state.  (FPRs, CCR etc. also!)
 	 * Use an sprg and a tm_scratch in the PACA to shuffle.
 	 */
@@ -197,6 +201,11 @@ dont_backup_fp:
 
 	/* Store the PPR in r11 and reset to decent value */
 	std	r11, GPR11(r1)			/* Temporary stash */
+
+	/* Reset MSR RI so we can take SLB faults again */
+	li	r11, MSR_RI
+	mtmsrd	r11, 1
+
 	mfspr	r11, SPRN_PPR
 	HMT_MEDIUM
 
@@ -397,11 +406,6 @@ restore_gprs:
 	ld	r5, THREAD_TM_DSCR(r3)
 	ld	r6, THREAD_TM_PPR(r3)
 
-	/* Clear the MSR RI since we are about to change R1.  EE is already off
-	 */
-	li	r4, 0
-	mtmsrd	r4, 1
-
 	REST_GPR(0, r7)				/* GPR0 */
 	REST_2GPRS(2, r7)			/* GPR2-3 */
 	REST_GPR(4, r7)				/* GPR4 */
@@ -439,10 +443,33 @@ restore_gprs:
 	ld	r6, _CCR(r7)
 	mtcr    r6
 
-	REST_GPR(1, r7)				/* GPR1 */
-	REST_GPR(5, r7)				/* GPR5-7 */
 	REST_GPR(6, r7)
-	ld	r7, GPR7(r7)
+
+	/*
+	 * Store r1 and r5 on the stack so that we can access them
+	 * after we clear MSR RI.
+	 */
+
+	REST_GPR(5, r7)
+	std	r5, -8(r1)
+	ld	r5, GPR1(r7)
+	std	r5, -16(r1)
+
+	REST_GPR(7, r7)
+
+	/* Clear MSR RI since we are about to change r1. EE is already off */
+	li	r5, 0
+	mtmsrd	r5, 1
+
+	/*
+	 * BE CAREFUL HERE:
+	 * At this point we can't take an SLB miss since we have MSR_RI
+	 * off. Load only to/from the stack/paca which are in SLB bolted regions
+	 * until we turn MSR RI back on.
+	 */
+
+	ld	r5, -8(r1)
+	ld	r1, -16(r1)
 
 	/* Commit register state as checkpointed state: */
 	TRECHKPT
-- 
cgit v0.10.2


From a4859d75944a726533ab86d24bb5ffd1b2b7d6cc Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 29 Jun 2016 08:26:59 +0200
Subject: ovl: fix dentry leak for default_permissions

When using the 'default_permissions' mount option, ovl_permission() on
non-directories was missing a dput(alias), resulting in "BUG Dentry still
in use".

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 8d3095f4ad47 ("ovl: default permissions")
Cc: <stable@vger.kernel.org> # v4.5+

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 1dbeab6..8514d69 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -121,16 +121,18 @@ int ovl_permission(struct inode *inode, int mask)
 
 		err = vfs_getattr(&realpath, &stat);
 		if (err)
-			return err;
+			goto out_dput;
 
+		err = -ESTALE;
 		if ((stat.mode ^ inode->i_mode) & S_IFMT)
-			return -ESTALE;
+			goto out_dput;
 
 		inode->i_mode = stat.mode;
 		inode->i_uid = stat.uid;
 		inode->i_gid = stat.gid;
 
-		return generic_permission(inode, mask);
+		err = generic_permission(inode, mask);
+		goto out_dput;
 	}
 
 	/* Careful in RCU walk mode */
-- 
cgit v0.10.2


From 69fc58a57e56bf5e39b48809aefffdaa1b04c945 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 24 Jun 2016 16:25:24 -0700
Subject: net: phy: Manage fixed PHY address space using IDA

If we have a system which uses fixed PHY devices and calls
fixed_phy_register() then fixed_phy_unregister() we can exhaust the
number of fixed PHYs available after a while, since we keep incrementing
the variable phy_fixed_addr, but we never decrement it.

This patch fixes that by converting the fixed PHY allocation to using
IDA, which takes care of the allocation/dealloaction of the PHY
addresses for us.

Fixes: a75951217472 ("net: phy: extend fixed driver with fixed_phy_register()")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c
index 2d2e433..9ec7f73 100644
--- a/drivers/net/phy/fixed_phy.c
+++ b/drivers/net/phy/fixed_phy.c
@@ -23,6 +23,7 @@
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/gpio.h>
+#include <linux/idr.h>
 
 #define MII_REGS_NUM 29
 
@@ -286,6 +287,8 @@ err_regs:
 }
 EXPORT_SYMBOL_GPL(fixed_phy_add);
 
+static DEFINE_IDA(phy_fixed_ida);
+
 static void fixed_phy_del(int phy_addr)
 {
 	struct fixed_mdio_bus *fmb = &platform_fmb;
@@ -297,14 +300,12 @@ static void fixed_phy_del(int phy_addr)
 			if (gpio_is_valid(fp->link_gpio))
 				gpio_free(fp->link_gpio);
 			kfree(fp);
+			ida_simple_remove(&phy_fixed_ida, phy_addr);
 			return;
 		}
 	}
 }
 
-static int phy_fixed_addr;
-static DEFINE_SPINLOCK(phy_fixed_addr_lock);
-
 struct phy_device *fixed_phy_register(unsigned int irq,
 				      struct fixed_phy_status *status,
 				      int link_gpio,
@@ -319,17 +320,15 @@ struct phy_device *fixed_phy_register(unsigned int irq,
 		return ERR_PTR(-EPROBE_DEFER);
 
 	/* Get the next available PHY address, up to PHY_MAX_ADDR */
-	spin_lock(&phy_fixed_addr_lock);
-	if (phy_fixed_addr == PHY_MAX_ADDR) {
-		spin_unlock(&phy_fixed_addr_lock);
-		return ERR_PTR(-ENOSPC);
-	}
-	phy_addr = phy_fixed_addr++;
-	spin_unlock(&phy_fixed_addr_lock);
+	phy_addr = ida_simple_get(&phy_fixed_ida, 0, PHY_MAX_ADDR, GFP_KERNEL);
+	if (phy_addr < 0)
+		return ERR_PTR(phy_addr);
 
 	ret = fixed_phy_add(irq, phy_addr, status, link_gpio);
-	if (ret < 0)
+	if (ret < 0) {
+		ida_simple_remove(&phy_fixed_ida, phy_addr);
 		return ERR_PTR(ret);
+	}
 
 	phy = get_phy_device(fmb->mii_bus, phy_addr, false);
 	if (IS_ERR(phy)) {
@@ -434,6 +433,7 @@ static void __exit fixed_mdio_bus_exit(void)
 		list_del(&fp->node);
 		kfree(fp);
 	}
+	ida_destroy(&phy_fixed_ida);
 }
 module_exit(fixed_mdio_bus_exit);
 
-- 
cgit v0.10.2


From 0b3dd7dfb81ad8af53791ea2bb64b83bac1b7d32 Mon Sep 17 00:00:00 2001
From: Simon Wunderlich <sw@simonwunderlich.de>
Date: Sun, 26 Jun 2016 11:16:09 +0200
Subject: batman-adv: replace WARN with rate limited output on non-existing
 VLAN

If a VLAN tagged frame is received and the corresponding VLAN is not
configured on the soft interface, it will splat a WARN on every packet
received. This is a quite annoying behaviour for some scenarios, e.g. if
bat0 is bridged with eth0, and there are arbitrary VLAN tagged frames
from Ethernet coming in without having any VLAN configuration on bat0.

The code should probably create vlan objects on the fly and
transparently transport these VLAN-tagged Ethernet frames, but until
this is done, at least the WARN splat should be replaced by a rate
limited output.

Fixes: 354136bcc3c4 ("batman-adv: fix kernel crash due to missing NULL checks")
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index feaf492b..72abab7 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -650,8 +650,10 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
 
 	/* increase the refcounter of the related vlan */
 	vlan = batadv_softif_vlan_get(bat_priv, vid);
-	if (WARN(!vlan, "adding TT local entry %pM to non-existent VLAN %d",
-		 addr, BATADV_PRINT_VID(vid))) {
+	if (!vlan) {
+		net_ratelimited_function(batadv_info, soft_iface,
+					 "adding TT local entry %pM to non-existent VLAN %d\n",
+					 addr, BATADV_PRINT_VID(vid));
 		kfree(tt_local);
 		tt_local = NULL;
 		goto out;
-- 
cgit v0.10.2


From 9c4604a298e0a9807eaf2cd912d1ebf24d98fbeb Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sun, 26 Jun 2016 11:16:10 +0200
Subject: batman-adv: Fix use-after-free/double-free of tt_req_node

The tt_req_node is added and removed from a list inside a spinlock. But the
locking is sometimes removed even when the object is still referenced and
will be used later via this reference. For example batadv_send_tt_request
can create a new tt_req_node (including add to a list) and later
re-acquires the lock to remove it from the list and to free it. But at this
time another context could have already removed this tt_req_node from the
list and freed it.

CPU#0

    batadv_batman_skb_recv from net_device 0
    -> batadv_iv_ogm_receive
      -> batadv_iv_ogm_process
        -> batadv_iv_ogm_process_per_outif
          -> batadv_tvlv_ogm_receive
            -> batadv_tvlv_ogm_receive
              -> batadv_tvlv_containers_process
                -> batadv_tvlv_call_handler
                  -> batadv_tt_tvlv_ogm_handler_v1
                    -> batadv_tt_update_orig
                      -> batadv_send_tt_request
                        -> batadv_tt_req_node_new
                           spin_lock(...)
                           allocates new tt_req_node and adds it to list
                           spin_unlock(...)
                           return tt_req_node

CPU#1

    batadv_batman_skb_recv from net_device 1
    -> batadv_recv_unicast_tvlv
      -> batadv_tvlv_containers_process
        -> batadv_tvlv_call_handler
          -> batadv_tt_tvlv_unicast_handler_v1
            -> batadv_handle_tt_response
               spin_lock(...)
               tt_req_node gets removed from list and is freed
               spin_unlock(...)

CPU#0

                      <- returned to batadv_send_tt_request
                         spin_lock(...)
                         tt_req_node gets removed from list and is freed
                         MEMORY CORRUPTION/SEGFAULT/...
                         spin_unlock(...)

This can only be solved via reference counting to allow multiple contexts
to handle the list manipulation while making sure that only the last
context holding a reference will free the object.

Fixes: a73105b8d4c7 ("batman-adv: improved client announcement mechanism")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Tested-by: Martin Weinelt <martin@darmstadt.freifunk.net>
Tested-by: Amadeus Alfa <amadeus@chemnitz.freifunk.net>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 72abab7..cfb5ccd 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -2271,6 +2271,29 @@ static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv,
 	return crc;
 }
 
+/**
+ * batadv_tt_req_node_release - free tt_req node entry
+ * @ref: kref pointer of the tt req_node entry
+ */
+static void batadv_tt_req_node_release(struct kref *ref)
+{
+	struct batadv_tt_req_node *tt_req_node;
+
+	tt_req_node = container_of(ref, struct batadv_tt_req_node, refcount);
+
+	kfree(tt_req_node);
+}
+
+/**
+ * batadv_tt_req_node_put - decrement the tt_req_node refcounter and
+ *  possibly release it
+ * @tt_req_node: tt_req_node to be free'd
+ */
+static void batadv_tt_req_node_put(struct batadv_tt_req_node *tt_req_node)
+{
+	kref_put(&tt_req_node->refcount, batadv_tt_req_node_release);
+}
+
 static void batadv_tt_req_list_free(struct batadv_priv *bat_priv)
 {
 	struct batadv_tt_req_node *node;
@@ -2280,7 +2303,7 @@ static void batadv_tt_req_list_free(struct batadv_priv *bat_priv)
 
 	hlist_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) {
 		hlist_del_init(&node->list);
-		kfree(node);
+		batadv_tt_req_node_put(node);
 	}
 
 	spin_unlock_bh(&bat_priv->tt.req_list_lock);
@@ -2317,7 +2340,7 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv)
 		if (batadv_has_timed_out(node->issued_at,
 					 BATADV_TT_REQUEST_TIMEOUT)) {
 			hlist_del_init(&node->list);
-			kfree(node);
+			batadv_tt_req_node_put(node);
 		}
 	}
 	spin_unlock_bh(&bat_priv->tt.req_list_lock);
@@ -2349,9 +2372,11 @@ batadv_tt_req_node_new(struct batadv_priv *bat_priv,
 	if (!tt_req_node)
 		goto unlock;
 
+	kref_init(&tt_req_node->refcount);
 	ether_addr_copy(tt_req_node->addr, orig_node->orig);
 	tt_req_node->issued_at = jiffies;
 
+	kref_get(&tt_req_node->refcount);
 	hlist_add_head(&tt_req_node->list, &bat_priv->tt.req_list);
 unlock:
 	spin_unlock_bh(&bat_priv->tt.req_list_lock);
@@ -2615,13 +2640,19 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv,
 out:
 	if (primary_if)
 		batadv_hardif_put(primary_if);
+
 	if (ret && tt_req_node) {
 		spin_lock_bh(&bat_priv->tt.req_list_lock);
-		/* hlist_del_init() verifies tt_req_node still is in the list */
-		hlist_del_init(&tt_req_node->list);
+		if (!hlist_unhashed(&tt_req_node->list)) {
+			hlist_del_init(&tt_req_node->list);
+			batadv_tt_req_node_put(tt_req_node);
+		}
 		spin_unlock_bh(&bat_priv->tt.req_list_lock);
-		kfree(tt_req_node);
 	}
+
+	if (tt_req_node)
+		batadv_tt_req_node_put(tt_req_node);
+
 	kfree(tvlv_tt_data);
 	return ret;
 }
@@ -3057,7 +3088,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
 		if (!batadv_compare_eth(node->addr, resp_src))
 			continue;
 		hlist_del_init(&node->list);
-		kfree(node);
+		batadv_tt_req_node_put(node);
 	}
 
 	spin_unlock_bh(&bat_priv->tt.req_list_lock);
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 6a577f4..ba846b0 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1137,11 +1137,13 @@ struct batadv_tt_change_node {
  * struct batadv_tt_req_node - data to keep track of the tt requests in flight
  * @addr: mac address address of the originator this request was sent to
  * @issued_at: timestamp used for purging stale tt requests
+ * @refcount: number of contexts the object is used by
  * @list: list node for batadv_priv_tt::req_list
  */
 struct batadv_tt_req_node {
 	u8 addr[ETH_ALEN];
 	unsigned long issued_at;
+	struct kref refcount;
 	struct hlist_node list;
 };
 
-- 
cgit v0.10.2


From baceced93274ff2f846eae991664f9094425ffa8 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sun, 26 Jun 2016 11:16:11 +0200
Subject: batman-adv: Fix double-put of vlan object

Each batadv_tt_local_entry hold a single reference to a
batadv_softif_vlan.  In case a new entry cannot be added to the hash
table, the error path puts the reference, but the reference will also
now be dropped by batadv_tt_local_entry_release().

Fixes: a33d970d0b54 ("batman-adv: Fix reference counting of vlan object for tt_local_entry")
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index cfb5ccd..57ec87f 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -693,7 +693,6 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
 	if (unlikely(hash_added != 0)) {
 		/* remove the reference for the hash */
 		batadv_tt_local_entry_put(tt_local);
-		batadv_softif_vlan_put(vlan);
 		goto out;
 	}
 
-- 
cgit v0.10.2


From 3b55e4422087f9f7b241031d758a0c65584e4297 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sun, 26 Jun 2016 11:16:12 +0200
Subject: batman-adv: Fix ICMP RR ethernet access after skb_linearize

The skb_linearize may reallocate the skb. This makes the calculated pointer
for ethhdr invalid. But it the pointer is used later to fill in the RR
field of the batadv_icmp_packet_rr packet.

Instead re-evaluate eth_hdr after the skb_linearize+skb_cow to fix the
pointer and avoid the invalid read.

Fixes: da6b8c20a5b8 ("batman-adv: generalize batman-adv icmp packet handling")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index e3857ed..6c2901a 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -374,6 +374,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
 		if (skb_cow(skb, ETH_HLEN) < 0)
 			goto out;
 
+		ethhdr = eth_hdr(skb);
 		icmph = (struct batadv_icmp_header *)skb->data;
 		icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmph;
 		if (icmp_packet_rr->rr_cur >= BATADV_RR_LEN)
-- 
cgit v0.10.2


From 420cb1b764f9169c5d2601b4af90e4a1702345ee Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sun, 26 Jun 2016 11:16:13 +0200
Subject: batman-adv: Clean up untagged vlan when destroying via rtnl-link

The untagged vlan object is only destroyed when the interface is removed
via the legacy sysfs interface. But it also has to be destroyed when the
standard rtnl-link interface is used.

Fixes: 5d2c05b21337 ("batman-adv: add per VLAN interface attribute framework")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Acked-by: Antonio Quartulli <a@unstable.cc>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 343d2c9..287a387 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1033,7 +1033,9 @@ void batadv_softif_destroy_sysfs(struct net_device *soft_iface)
 static void batadv_softif_destroy_netlink(struct net_device *soft_iface,
 					  struct list_head *head)
 {
+	struct batadv_priv *bat_priv = netdev_priv(soft_iface);
 	struct batadv_hard_iface *hard_iface;
+	struct batadv_softif_vlan *vlan;
 
 	list_for_each_entry(hard_iface, &batadv_hardif_list, list) {
 		if (hard_iface->soft_iface == soft_iface)
@@ -1041,6 +1043,13 @@ static void batadv_softif_destroy_netlink(struct net_device *soft_iface,
 							BATADV_IF_CLEANUP_KEEP);
 	}
 
+	/* destroy the "untagged" VLAN */
+	vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
+	if (vlan) {
+		batadv_softif_destroy_vlan(bat_priv, vlan);
+		batadv_softif_vlan_put(vlan);
+	}
+
 	batadv_sysfs_del_meshif(soft_iface);
 	unregister_netdevice_queue(soft_iface, head);
 }
-- 
cgit v0.10.2


From 3ec0a0f10ceb77c78f540ded1d13bf0cf7f89a07 Mon Sep 17 00:00:00 2001
From: Harini Katakam <harini.katakam@xilinx.com>
Date: Mon, 27 Jun 2016 13:09:59 +0530
Subject: net: marvell: Add separate config ANEG function for Marvell 88E1111

Marvell 88E1111 currently uses the generic marvell config ANEG function.
This function has a sequence accessing Page 5 and Register 31,
both of which are not defined or reserved for this PHY.
Hence this patch adds a new config ANEG function for Marvell 88E1111
without these erroneous accesses.

Signed-off-by: Harini Katakam <harinik@xilinx.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 79ccc11..ec2c1ee 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -285,6 +285,48 @@ static int marvell_config_aneg(struct phy_device *phydev)
 	return 0;
 }
 
+static int m88e1111_config_aneg(struct phy_device *phydev)
+{
+	int err;
+
+	/* The Marvell PHY has an errata which requires
+	 * that certain registers get written in order
+	 * to restart autonegotiation
+	 */
+	err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+
+	err = marvell_set_polarity(phydev, phydev->mdix);
+	if (err < 0)
+		return err;
+
+	err = phy_write(phydev, MII_M1111_PHY_LED_CONTROL,
+			MII_M1111_PHY_LED_DIRECT);
+	if (err < 0)
+		return err;
+
+	err = genphy_config_aneg(phydev);
+	if (err < 0)
+		return err;
+
+	if (phydev->autoneg != AUTONEG_ENABLE) {
+		int bmcr;
+
+		/* A write to speed/duplex bits (that is performed by
+		 * genphy_config_aneg() call above) must be followed by
+		 * a software reset. Otherwise, the write has no effect.
+		 */
+		bmcr = phy_read(phydev, MII_BMCR);
+		if (bmcr < 0)
+			return bmcr;
+
+		err = phy_write(phydev, MII_BMCR, bmcr | BMCR_RESET);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
 #ifdef CONFIG_OF_MDIO
 /*
  * Set and/or override some configuration registers based on the
@@ -1175,7 +1217,7 @@ static struct phy_driver marvell_drivers[] = {
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
 		.config_init = &m88e1111_config_init,
-		.config_aneg = &marvell_config_aneg,
+		.config_aneg = &m88e1111_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,
-- 
cgit v0.10.2


From 664a84d2c77cbff2945ed7f96d08afbba42b6293 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Fri, 13 May 2016 20:53:56 +0300
Subject: drm/i915: Refresh cached DP port register value on resume
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During hibernation the cached DP port register value will be left with
whatever value we have there when we create the hibernation image.
Currently that means the port (and eDP PLL) will be off in the cached
value. However when we resume there is no guarantee that the value
in the actual register will match the cached value. If i915 isn't
loaded in the kernel that loads the hibernation image, the port may
well be on (eg. left on by the BIOS). The encoder state readout
does the right thing in this case and updates our encoder state
to reflect the actual hardware state. However the post-resume modeset
will then use the stale cached port register value in
intel_dp_link_down() and potentially confuse the hardware.

This was caught by the following assert
 WARNING: CPU: 3 PID: 5288 at ../drivers/gpu/drm/i915/intel_dp.c:2184 assert_edp_pll+0x99/0xa0 [i915]
 eDP PLL state assertion failure (expected on, current off)
on account of the eDP PLL getting prematurely turned off when
shutting down the port, since the DP_PLL_ENABLE bit wasn't set
in the cached register value.

Presumably I introduced this problem in
commit 6fec76628333 ("drm/i915: Use intel_dp->DP in eDP PLL setup")
as before that we didn't update the cached value after shuttting the
port down. That's assuming the port got enabled at least once prior
to hibernating. If that didn't happen then the cached value would
still have been totally out of sync with reality (eg. first boot w/o
eDP on, then hibernate, and then resume with eDP on).

So, let's fix this properly and refresh the cached register value from
the hardware register during resume.

DDI platforms shouldn't use the cached value during port disable at
least, so shouldn't have this particular issue. They might still have
issues if we skip the initial modeset and then try to retrain the link
or something. But untangling this DP vs. DDI mess is a bigger topic,
so let's jut punt on DDI for now.

Cc: Jani Nikula <jani.nikula@intel.com>
Cc: stable@vger.kernel.org
Fixes: 6fec76628333 ("drm/i915: Use intel_dp->DP in eDP PLL setup")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1463162036-27931-1-git-send-email-ville.syrjala@linux.intel.com
Reviewed-by: Imre Deak <imre.deak@intel.com>
(cherry picked from commit 64989ca4b27acb026b6496ec21e43bee66f86a5b)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 79cf2d5..49c582d 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -4899,13 +4899,15 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp)
 
 void intel_dp_encoder_reset(struct drm_encoder *encoder)
 {
-	struct intel_dp *intel_dp;
+	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+
+	if (!HAS_DDI(dev_priv))
+		intel_dp->DP = I915_READ(intel_dp->output_reg);
 
 	if (to_intel_encoder(encoder)->type != INTEL_OUTPUT_EDP)
 		return;
 
-	intel_dp = enc_to_intel_dp(encoder);
-
 	pps_lock(intel_dp);
 
 	/*
-- 
cgit v0.10.2


From ea12e2a0cad62f6d634da62e7bf50ffe077de13b Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 28 Jun 2016 13:37:30 +0300
Subject: drm/i915/bxt: Avoid early timeout during PLL enable

Since wait_for_atomic doesn't re-check the wait-for condition after
expiry of the timeout it can fail when called from non-atomic context
even if the condition is set correctly before the expiry. Fix this by
using the non-atomic wait_for instead.

I noticed this via the PLL locking timing out incorrectly, with this fix
I couldn't reproduce the problem.

Fixes: 0351b93992aa ("drm/i915: Do not lie about atomic timeout granularity")
CC: Chris Wilson <chris@chris-wilson.co.uk>
CC: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
CC: drm-intel-fixes@lists.freedesktop.org
Link: http://patchwork.freedesktop.org/patch/msgid/1467110253-16046-2-git-send-email-imre.deak@intel.com
(cherry picked from commit 0b786e41c73956126f6297764459021deef8aba7)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>

diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index baf6f55..58f60b2 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -1377,8 +1377,8 @@ static void bxt_ddi_pll_enable(struct drm_i915_private *dev_priv,
 	I915_WRITE(BXT_PORT_PLL_ENABLE(port), temp);
 	POSTING_READ(BXT_PORT_PLL_ENABLE(port));
 
-	if (wait_for_atomic_us((I915_READ(BXT_PORT_PLL_ENABLE(port)) &
-			PORT_PLL_LOCK), 200))
+	if (wait_for_us((I915_READ(BXT_PORT_PLL_ENABLE(port)) & PORT_PLL_LOCK),
+			200))
 		DRM_ERROR("PLL %d not locked\n", port);
 
 	/*
-- 
cgit v0.10.2


From 11f6145791775ce41b8993fd29e19eb53ef0ff14 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 28 Jun 2016 13:37:31 +0300
Subject: drm/i915/lpt: Avoid early timeout during FDI PHY reset

Since wait_for_atomic doesn't re-check the wait-for condition after
expiry of the timeout it can fail when called from non-atomic context
even if the condition is set correctly before the expiry. Fix this by
using the non-atomic wait_for instead.

Fixes: 0351b93992aa ("drm/i915: Do not lie about atomic timeout granularity")
CC: Chris Wilson <chris@chris-wilson.co.uk>
CC: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
CC: drm-intel-fixes@lists.freedesktop.org
Link: http://patchwork.freedesktop.org/patch/msgid/1467110253-16046-3-git-send-email-imre.deak@intel.com
(cherry picked from commit cf3598c23cd09d5f063fa8c12fe9ddd5a352d3d5)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 56a1637..a164ea4 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -8447,16 +8447,16 @@ static void lpt_reset_fdi_mphy(struct drm_i915_private *dev_priv)
 	tmp |= FDI_MPHY_IOSFSB_RESET_CTL;
 	I915_WRITE(SOUTH_CHICKEN2, tmp);
 
-	if (wait_for_atomic_us(I915_READ(SOUTH_CHICKEN2) &
-			       FDI_MPHY_IOSFSB_RESET_STATUS, 100))
+	if (wait_for_us(I915_READ(SOUTH_CHICKEN2) &
+			FDI_MPHY_IOSFSB_RESET_STATUS, 100))
 		DRM_ERROR("FDI mPHY reset assert timeout\n");
 
 	tmp = I915_READ(SOUTH_CHICKEN2);
 	tmp &= ~FDI_MPHY_IOSFSB_RESET_CTL;
 	I915_WRITE(SOUTH_CHICKEN2, tmp);
 
-	if (wait_for_atomic_us((I915_READ(SOUTH_CHICKEN2) &
-				FDI_MPHY_IOSFSB_RESET_STATUS) == 0, 100))
+	if (wait_for_us((I915_READ(SOUTH_CHICKEN2) &
+			 FDI_MPHY_IOSFSB_RESET_STATUS) == 0, 100))
 		DRM_ERROR("FDI mPHY reset de-assert timeout\n");
 }
 
-- 
cgit v0.10.2


From fa7c13e5b1e2076b0ec716ed584ab76f9e65b7a6 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 28 Jun 2016 13:37:32 +0300
Subject: drm/i915/hsw: Avoid early timeout during LCPLL disable/restore

Since wait_for_atomic doesn't re-check the wait-for condition after
expiry of the timeout it can fail when called from non-atomic context
even if the condition is set correctly before the expiry. Fix this by
using the non-atomic wait_for instead.

Fixes: 0351b93992aa ("drm/i915: Do not lie about atomic timeout granularity")
CC: Chris Wilson <chris@chris-wilson.co.uk>
CC: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
CC: drm-intel-fixes@lists.freedesktop.org
Link: http://patchwork.freedesktop.org/patch/msgid/1467110253-16046-4-git-send-email-imre.deak@intel.com
(cherry picked from commit f53dd63f1119a98a16d1a5a7cb3277a2f1ff483d)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index a164ea4..04452cf 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9440,8 +9440,8 @@ static void hsw_disable_lcpll(struct drm_i915_private *dev_priv,
 		val |= LCPLL_CD_SOURCE_FCLK;
 		I915_WRITE(LCPLL_CTL, val);
 
-		if (wait_for_atomic_us(I915_READ(LCPLL_CTL) &
-				       LCPLL_CD_SOURCE_FCLK_DONE, 1))
+		if (wait_for_us(I915_READ(LCPLL_CTL) &
+				LCPLL_CD_SOURCE_FCLK_DONE, 1))
 			DRM_ERROR("Switching to FCLK failed\n");
 
 		val = I915_READ(LCPLL_CTL);
@@ -9514,8 +9514,8 @@ static void hsw_restore_lcpll(struct drm_i915_private *dev_priv)
 		val &= ~LCPLL_CD_SOURCE_FCLK;
 		I915_WRITE(LCPLL_CTL, val);
 
-		if (wait_for_atomic_us((I915_READ(LCPLL_CTL) &
-					LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1))
+		if (wait_for_us((I915_READ(LCPLL_CTL) &
+				 LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1))
 			DRM_ERROR("Switching back to LCPLL failed\n");
 	}
 
-- 
cgit v0.10.2


From ba34a65324259082dc6d2924cb82d562db1c6a6b Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 28 Jun 2016 13:37:33 +0300
Subject: drm/i915: Avoid early timeout during AUX transfers

Since wait_for_atomic doesn't re-check the wait-for condition after
expiry of the timeout it can fail when called from non-atomic context
even if the condition is set correctly before the expiry. Fix this by
using the non-atomic wait_for instead.

Due to the relatively long 10ms timeout, probably this didn't cause any
real problems, but fix it in any case for consistency.

Fixes: 0351b93992aa ("drm/i915: Do not lie about atomic timeout granularity")
CC: Chris Wilson <chris@chris-wilson.co.uk>
CC: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
CC: drm-intel-fixes@lists.freedesktop.org
Link: http://patchwork.freedesktop.org/patch/msgid/1467110253-16046-5-git-send-email-imre.deak@intel.com
(cherry picked from commit 713a6b668932213247b394559bc229cd0fec2777)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 49c582d..40745e38d 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -663,7 +663,7 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp, bool has_aux_irq)
 		done = wait_event_timeout(dev_priv->gmbus_wait_queue, C,
 					  msecs_to_jiffies_timeout(10));
 	else
-		done = wait_for_atomic(C, 10) == 0;
+		done = wait_for(C, 10) == 0;
 	if (!done)
 		DRM_ERROR("dp aux hw did not signal timeout (has irq: %i)!\n",
 			  has_aux_irq);
-- 
cgit v0.10.2


From 5be1ea899da4f11de92897d2ea706e0820609b9e Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Mon, 27 Jun 2016 12:08:32 +0300
Subject: net/mlx5: Update command strings

Add command string for MODIFY_FLOW_TABLE which is used by the driver.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index dcd2df6..0b49862 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -545,6 +545,7 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(ALLOC_FLOW_COUNTER);
 	MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER);
 	MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER);
+	MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
 	default: return "unknown command opcode";
 	}
 }
-- 
cgit v0.10.2


From 7092fe866907f4f243122ab388cbf0e77305afaa Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Mon, 27 Jun 2016 12:08:33 +0300
Subject: net/mlx5: Add ConnectX-5 PCIe 4.0 to list of supported devices

Add the upcoming ConnectX-5 PCIe 4.0 device to the list of
supported devices by the mlx5 driver.

Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index a19b593..c65f4a1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1508,8 +1508,9 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
 	{ PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF},	/* ConnectX-4 VF */
 	{ PCI_VDEVICE(MELLANOX, 0x1015) },			/* ConnectX-4LX */
 	{ PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF},	/* ConnectX-4LX VF */
-	{ PCI_VDEVICE(MELLANOX, 0x1017) },			/* ConnectX-5 */
+	{ PCI_VDEVICE(MELLANOX, 0x1017) },			/* ConnectX-5, PCIe 3.0 */
 	{ PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF},	/* ConnectX-5 VF */
+	{ PCI_VDEVICE(MELLANOX, 0x1019) },			/* ConnectX-5, PCIe 4.0 */
 	{ 0, }
 };
 
-- 
cgit v0.10.2


From e0f46eb9f64ca2e97d242b6e02b8ae2c7fe6dc56 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Mon, 27 Jun 2016 12:08:34 +0300
Subject: net/mlx5e: Change enum to better reflect usage

Change MLX5E_STATE_ASYNC_EVENTS_ENABLE to
MLX5E_STATE_ASYNC_EVENTS_ENABLED since it represent a state and not an
operation.

Fixes: acff797cd1874 ('net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality')
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index e8a6c33..baa991a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -401,7 +401,7 @@ enum mlx5e_traffic_types {
 };
 
 enum {
-	MLX5E_STATE_ASYNC_EVENTS_ENABLE,
+	MLX5E_STATE_ASYNC_EVENTS_ENABLED,
 	MLX5E_STATE_OPENED,
 	MLX5E_STATE_DESTROYING,
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index f5c8d5d..4383f8c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -244,7 +244,7 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
 {
 	struct mlx5e_priv *priv = vpriv;
 
-	if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
+	if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state))
 		return;
 
 	switch (event) {
@@ -260,12 +260,12 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
 
 static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
 {
-	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
+	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
 }
 
 static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
 {
-	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
+	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state);
 	synchronize_irq(mlx5_get_msix_vec(priv->mdev, MLX5_EQ_VEC_ASYNC));
 }
 
-- 
cgit v0.10.2


From fd4782c21359cfd52af4c3180a2bb6bad55c1eba Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Mon, 27 Jun 2016 12:08:35 +0300
Subject: net/mlx5e: Check for BlueFlame capability before allocating SQ uar

Previous to this patch mapping was always set to write combining without
checking whether BlueFlame is supported in the device.

Fixes: 0ba422410bbf ('net/mlx5: Fix global UAR mapping')
Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 4383f8c..793d7a1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -580,7 +580,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
 	int err;
 
-	err = mlx5_alloc_map_uar(mdev, &sq->uar, true);
+	err = mlx5_alloc_map_uar(mdev, &sq->uar, !!MLX5_CAP_GEN(mdev, bf));
 	if (err)
 		return err;
 
-- 
cgit v0.10.2


From 9ceec359e4ebdbbfd35375203c5bd06d602225f7 Mon Sep 17 00:00:00 2001
From: Matthew Finlay <matt@mellanox.com>
Date: Mon, 27 Jun 2016 12:08:36 +0300
Subject: net/mlx5e: Prevent adding the same vxlan port

Do not allow the same vxlan udp port to be added to the device more than
once.

Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling")
Signed-off-by: Matthew Finlay <matt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
index f2fd1ef..05de772 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -105,6 +105,9 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
 	struct mlx5e_vxlan *vxlan;
 	int err;
 
+	if (mlx5e_vxlan_lookup_port(priv, port))
+		goto free_work;
+
 	if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
 		goto free_work;
 
-- 
cgit v0.10.2


From ed80ec4c179d1f44cc1b36c7a102353ae6103793 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Mon, 27 Jun 2016 12:08:37 +0300
Subject: net/mlx5e: Fix number of PFC counters reported to ethtool

Number of PFC counters used to count only number of priorities with PFC
enabled, but each priority has more than one counter, hence the need to
multiply it by the number of PFC counters per priority.

Fixes: cf678570d5a1 ('net/mlx5e: Add per priority group to PPort counters')
Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index fc7dcc0..c709d41 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -184,7 +184,9 @@ static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv)
 #define MLX5E_NUM_SQ_STATS(priv) \
 	(NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \
 	 test_bit(MLX5E_STATE_OPENED, &priv->state))
-#define MLX5E_NUM_PFC_COUNTERS(priv) hweight8(mlx5e_query_pfc_combined(priv))
+#define MLX5E_NUM_PFC_COUNTERS(priv) \
+	(hweight8(mlx5e_query_pfc_combined(priv)) * \
+	 NUM_PPORT_PER_PRIO_PFC_COUNTERS)
 
 static int mlx5e_get_sset_count(struct net_device *dev, int sset)
 {
-- 
cgit v0.10.2


From bfe6d8d1d433cbd5513a93132695e6dbdd79e6f2 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galp@mellanox.com>
Date: Mon, 27 Jun 2016 12:08:38 +0300
Subject: net/mlx5e: Reorganize ethtool statistics

Categorize and reorganize ethtool statistics counters by renaming to
"rx_*" and "tx_*" and removing redundant and duplicated counters, this
way they are easier to grasp and more user friendly.

Signed-off-by: Gal Pressman <galp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index c709d41..e667a87 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -213,42 +213,41 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
 
 	/* SW counters */
 	for (i = 0; i < NUM_SW_COUNTERS; i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].name);
+		strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format);
 
 	/* Q counters */
 	for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].name);
+		strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].format);
 
 	/* VPORT counters */
 	for (i = 0; i < NUM_VPORT_COUNTERS; i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       vport_stats_desc[i].name);
+		       vport_stats_desc[i].format);
 
 	/* PPORT counters */
 	for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pport_802_3_stats_desc[i].name);
+		       pport_802_3_stats_desc[i].format);
 
 	for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pport_2863_stats_desc[i].name);
+		       pport_2863_stats_desc[i].format);
 
 	for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       pport_2819_stats_desc[i].name);
+		       pport_2819_stats_desc[i].format);
 
 	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
 		for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
-			sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s",
-				prio,
-				pport_per_prio_traffic_stats_desc[i].name);
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				pport_per_prio_traffic_stats_desc[i].format, prio);
 	}
 
 	pfc_combined = mlx5e_query_pfc_combined(priv);
 	for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) {
 		for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
-			sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s",
-				prio, pport_per_prio_pfc_stats_desc[i].name);
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				pport_per_prio_pfc_stats_desc[i].format, prio);
 		}
 	}
 
@@ -258,16 +257,15 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
 	/* per channel counters */
 	for (i = 0; i < priv->params.num_channels; i++)
 		for (j = 0; j < NUM_RQ_STATS; j++)
-			sprintf(data + (idx++) * ETH_GSTRING_LEN, "rx%d_%s", i,
-				rq_stats_desc[j].name);
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				rq_stats_desc[j].format, i);
 
 	for (tc = 0; tc < priv->params.num_tc; tc++)
 		for (i = 0; i < priv->params.num_channels; i++)
 			for (j = 0; j < NUM_SQ_STATS; j++)
 				sprintf(data + (idx++) * ETH_GSTRING_LEN,
-					"tx%d_%s",
-					priv->channeltc_to_txq_map[i][tc],
-					sq_stats_desc[j].name);
+					sq_stats_desc[j].format,
+					priv->channeltc_to_txq_map[i][tc]);
 }
 
 static void mlx5e_get_strings(struct net_device *dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 793d7a1..cb6defd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -105,11 +105,11 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 
 		s->rx_packets	+= rq_stats->packets;
 		s->rx_bytes	+= rq_stats->bytes;
-		s->lro_packets	+= rq_stats->lro_packets;
-		s->lro_bytes	+= rq_stats->lro_bytes;
+		s->rx_lro_packets += rq_stats->lro_packets;
+		s->rx_lro_bytes	+= rq_stats->lro_bytes;
 		s->rx_csum_none	+= rq_stats->csum_none;
-		s->rx_csum_sw	+= rq_stats->csum_sw;
-		s->rx_csum_inner += rq_stats->csum_inner;
+		s->rx_csum_complete += rq_stats->csum_complete;
+		s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
 		s->rx_wqe_err   += rq_stats->wqe_err;
 		s->rx_mpwqe_filler += rq_stats->mpwqe_filler;
 		s->rx_mpwqe_frag   += rq_stats->mpwqe_frag;
@@ -122,24 +122,23 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 
 			s->tx_packets		+= sq_stats->packets;
 			s->tx_bytes		+= sq_stats->bytes;
-			s->tso_packets		+= sq_stats->tso_packets;
-			s->tso_bytes		+= sq_stats->tso_bytes;
-			s->tso_inner_packets	+= sq_stats->tso_inner_packets;
-			s->tso_inner_bytes	+= sq_stats->tso_inner_bytes;
+			s->tx_tso_packets	+= sq_stats->tso_packets;
+			s->tx_tso_bytes		+= sq_stats->tso_bytes;
+			s->tx_tso_inner_packets	+= sq_stats->tso_inner_packets;
+			s->tx_tso_inner_bytes	+= sq_stats->tso_inner_bytes;
 			s->tx_queue_stopped	+= sq_stats->stopped;
 			s->tx_queue_wake	+= sq_stats->wake;
 			s->tx_queue_dropped	+= sq_stats->dropped;
-			s->tx_csum_inner	+= sq_stats->csum_offload_inner;
-			tx_offload_none		+= sq_stats->csum_offload_none;
+			s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
+			tx_offload_none		+= sq_stats->csum_none;
 		}
 	}
 
 	/* Update calculated offload counters */
-	s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner;
-	s->rx_csum_good    = s->rx_packets - s->rx_csum_none -
-			     s->rx_csum_sw;
+	s->tx_csum_partial = s->tx_packets - tx_offload_none - s->tx_csum_partial_inner;
+	s->rx_csum_unnecessary = s->rx_packets - s->rx_csum_none - s->rx_csum_complete;
 
-	s->link_down_events = MLX5_GET(ppcnt_reg,
+	s->link_down_events_phy = MLX5_GET(ppcnt_reg,
 				priv->stats.pport.phy_counters,
 				counter_set.phys_layer_cntrs.link_down_events);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index bd94770..022acc2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -689,7 +689,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 	if (is_first_ethertype_ip(skb)) {
 		skb->ip_summed = CHECKSUM_COMPLETE;
 		skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
-		rq->stats.csum_sw++;
+		rq->stats.csum_complete++;
 		return;
 	}
 
@@ -699,7 +699,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 		if (cqe_is_tunneled(cqe)) {
 			skb->csum_level = 1;
 			skb->encapsulation = 1;
-			rq->stats.csum_inner++;
+			rq->stats.csum_unnecessary_inner++;
 		}
 		return;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 83bc32b..fcd490c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -42,9 +42,11 @@
 	be64_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset))
 
 #define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld)
+#define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld)
+#define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld)
 
 struct counter_desc {
-	char		name[ETH_GSTRING_LEN];
+	char		format[ETH_GSTRING_LEN];
 	int		offset; /* Byte offset */
 };
 
@@ -53,18 +55,18 @@ struct mlx5e_sw_stats {
 	u64 rx_bytes;
 	u64 tx_packets;
 	u64 tx_bytes;
-	u64 tso_packets;
-	u64 tso_bytes;
-	u64 tso_inner_packets;
-	u64 tso_inner_bytes;
-	u64 lro_packets;
-	u64 lro_bytes;
-	u64 rx_csum_good;
+	u64 tx_tso_packets;
+	u64 tx_tso_bytes;
+	u64 tx_tso_inner_packets;
+	u64 tx_tso_inner_bytes;
+	u64 rx_lro_packets;
+	u64 rx_lro_bytes;
+	u64 rx_csum_unnecessary;
 	u64 rx_csum_none;
-	u64 rx_csum_sw;
-	u64 rx_csum_inner;
-	u64 tx_csum_offload;
-	u64 tx_csum_inner;
+	u64 rx_csum_complete;
+	u64 rx_csum_unnecessary_inner;
+	u64 tx_csum_partial;
+	u64 tx_csum_partial_inner;
 	u64 tx_queue_stopped;
 	u64 tx_queue_wake;
 	u64 tx_queue_dropped;
@@ -76,7 +78,7 @@ struct mlx5e_sw_stats {
 	u64 rx_cqe_compress_pkts;
 
 	/* Special handling counters */
-	u64 link_down_events;
+	u64 link_down_events_phy;
 };
 
 static const struct counter_desc sw_stats_desc[] = {
@@ -84,18 +86,18 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_good) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_sw) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_inner) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_offload) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_inner) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
@@ -105,7 +107,7 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) },
 };
 
 struct mlx5e_qcounter_stats {
@@ -125,12 +127,6 @@ struct mlx5e_vport_stats {
 };
 
 static const struct counter_desc vport_stats_desc[] = {
-	{ "rx_vport_error_packets",
-		VPORT_COUNTER_OFF(received_errors.packets) },
-	{ "rx_vport_error_bytes", VPORT_COUNTER_OFF(received_errors.octets) },
-	{ "tx_vport_error_packets",
-		VPORT_COUNTER_OFF(transmit_errors.packets) },
-	{ "tx_vport_error_bytes", VPORT_COUNTER_OFF(transmit_errors.octets) },
 	{ "rx_vport_unicast_packets",
 		VPORT_COUNTER_OFF(received_eth_unicast.packets) },
 	{ "rx_vport_unicast_bytes",
@@ -192,94 +188,68 @@ struct mlx5e_pport_stats {
 };
 
 static const struct counter_desc pport_802_3_stats_desc[] = {
-	{ "frames_tx", PPORT_802_3_OFF(a_frames_transmitted_ok) },
-	{ "frames_rx", PPORT_802_3_OFF(a_frames_received_ok) },
-	{ "check_seq_err", PPORT_802_3_OFF(a_frame_check_sequence_errors) },
-	{ "alignment_err", PPORT_802_3_OFF(a_alignment_errors) },
-	{ "octets_tx", PPORT_802_3_OFF(a_octets_transmitted_ok) },
-	{ "octets_received", PPORT_802_3_OFF(a_octets_received_ok) },
-	{ "multicast_xmitted", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) },
-	{ "broadcast_xmitted", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) },
-	{ "multicast_rx", PPORT_802_3_OFF(a_multicast_frames_received_ok) },
-	{ "broadcast_rx", PPORT_802_3_OFF(a_broadcast_frames_received_ok) },
-	{ "in_range_len_errors", PPORT_802_3_OFF(a_in_range_length_errors) },
-	{ "out_of_range_len", PPORT_802_3_OFF(a_out_of_range_length_field) },
-	{ "too_long_errors", PPORT_802_3_OFF(a_frame_too_long_errors) },
-	{ "symbol_err", PPORT_802_3_OFF(a_symbol_error_during_carrier) },
-	{ "mac_control_tx", PPORT_802_3_OFF(a_mac_control_frames_transmitted) },
-	{ "mac_control_rx", PPORT_802_3_OFF(a_mac_control_frames_received) },
-	{ "unsupported_op_rx",
-		PPORT_802_3_OFF(a_unsupported_opcodes_received) },
-	{ "pause_ctrl_rx", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) },
-	{ "pause_ctrl_tx",
-		PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) },
+	{ "tx_packets_phy", PPORT_802_3_OFF(a_frames_transmitted_ok) },
+	{ "rx_packets_phy", PPORT_802_3_OFF(a_frames_received_ok) },
+	{ "rx_crc_errors_phy", PPORT_802_3_OFF(a_frame_check_sequence_errors) },
+	{ "tx_bytes_phy", PPORT_802_3_OFF(a_octets_transmitted_ok) },
+	{ "rx_bytes_phy", PPORT_802_3_OFF(a_octets_received_ok) },
+	{ "tx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) },
+	{ "tx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) },
+	{ "rx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_received_ok) },
+	{ "rx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_received_ok) },
+	{ "rx_in_range_len_errors_phy", PPORT_802_3_OFF(a_in_range_length_errors) },
+	{ "rx_out_of_range_len_phy", PPORT_802_3_OFF(a_out_of_range_length_field) },
+	{ "rx_oversize_pkts_phy", PPORT_802_3_OFF(a_frame_too_long_errors) },
+	{ "rx_symbol_err_phy", PPORT_802_3_OFF(a_symbol_error_during_carrier) },
+	{ "tx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_transmitted) },
+	{ "rx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_received) },
+	{ "rx_unsupported_op_phy", PPORT_802_3_OFF(a_unsupported_opcodes_received) },
+	{ "rx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) },
+	{ "tx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) },
 };
 
 static const struct counter_desc pport_2863_stats_desc[] = {
-	{ "in_octets", PPORT_2863_OFF(if_in_octets) },
-	{ "in_ucast_pkts", PPORT_2863_OFF(if_in_ucast_pkts) },
-	{ "in_discards", PPORT_2863_OFF(if_in_discards) },
-	{ "in_errors", PPORT_2863_OFF(if_in_errors) },
-	{ "in_unknown_protos", PPORT_2863_OFF(if_in_unknown_protos) },
-	{ "out_octets", PPORT_2863_OFF(if_out_octets) },
-	{ "out_ucast_pkts", PPORT_2863_OFF(if_out_ucast_pkts) },
-	{ "out_discards", PPORT_2863_OFF(if_out_discards) },
-	{ "out_errors", PPORT_2863_OFF(if_out_errors) },
-	{ "in_multicast_pkts", PPORT_2863_OFF(if_in_multicast_pkts) },
-	{ "in_broadcast_pkts", PPORT_2863_OFF(if_in_broadcast_pkts) },
-	{ "out_multicast_pkts", PPORT_2863_OFF(if_out_multicast_pkts) },
-	{ "out_broadcast_pkts", PPORT_2863_OFF(if_out_broadcast_pkts) },
+	{ "rx_discards_phy", PPORT_2863_OFF(if_in_discards) },
+	{ "tx_discards_phy", PPORT_2863_OFF(if_out_discards) },
+	{ "tx_errors_phy", PPORT_2863_OFF(if_out_errors) },
 };
 
 static const struct counter_desc pport_2819_stats_desc[] = {
-	{ "drop_events", PPORT_2819_OFF(ether_stats_drop_events) },
-	{ "octets", PPORT_2819_OFF(ether_stats_octets) },
-	{ "pkts", PPORT_2819_OFF(ether_stats_pkts) },
-	{ "broadcast_pkts", PPORT_2819_OFF(ether_stats_broadcast_pkts) },
-	{ "multicast_pkts", PPORT_2819_OFF(ether_stats_multicast_pkts) },
-	{ "crc_align_errors", PPORT_2819_OFF(ether_stats_crc_align_errors) },
-	{ "undersize_pkts", PPORT_2819_OFF(ether_stats_undersize_pkts) },
-	{ "oversize_pkts", PPORT_2819_OFF(ether_stats_oversize_pkts) },
-	{ "fragments", PPORT_2819_OFF(ether_stats_fragments) },
-	{ "jabbers", PPORT_2819_OFF(ether_stats_jabbers) },
-	{ "collisions", PPORT_2819_OFF(ether_stats_collisions) },
-	{ "p64octets", PPORT_2819_OFF(ether_stats_pkts64octets) },
-	{ "p65to127octets", PPORT_2819_OFF(ether_stats_pkts65to127octets) },
-	{ "p128to255octets", PPORT_2819_OFF(ether_stats_pkts128to255octets) },
-	{ "p256to511octets", PPORT_2819_OFF(ether_stats_pkts256to511octets) },
-	{ "p512to1023octets", PPORT_2819_OFF(ether_stats_pkts512to1023octets) },
-	{ "p1024to1518octets",
-		PPORT_2819_OFF(ether_stats_pkts1024to1518octets) },
-	{ "p1519to2047octets",
-		PPORT_2819_OFF(ether_stats_pkts1519to2047octets) },
-	{ "p2048to4095octets",
-		PPORT_2819_OFF(ether_stats_pkts2048to4095octets) },
-	{ "p4096to8191octets",
-		PPORT_2819_OFF(ether_stats_pkts4096to8191octets) },
-	{ "p8192to10239octets",
-		PPORT_2819_OFF(ether_stats_pkts8192to10239octets) },
+	{ "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) },
+	{ "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) },
+	{ "rx_jabbers_phy", PPORT_2819_OFF(ether_stats_jabbers) },
+	{ "rx_64_bytes_phy", PPORT_2819_OFF(ether_stats_pkts64octets) },
+	{ "rx_65_to_127_bytes_phy", PPORT_2819_OFF(ether_stats_pkts65to127octets) },
+	{ "rx_128_to_255_bytes_phy", PPORT_2819_OFF(ether_stats_pkts128to255octets) },
+	{ "rx_256_to_511_bytes_phy", PPORT_2819_OFF(ether_stats_pkts256to511octets) },
+	{ "rx_512_to_1023_bytes_phy", PPORT_2819_OFF(ether_stats_pkts512to1023octets) },
+	{ "rx_1024_to_1518_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1024to1518octets) },
+	{ "rx_1519_to_2047_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1519to2047octets) },
+	{ "rx_2048_to_4095_bytes_phy", PPORT_2819_OFF(ether_stats_pkts2048to4095octets) },
+	{ "rx_4096_to_8191_bytes_phy", PPORT_2819_OFF(ether_stats_pkts4096to8191octets) },
+	{ "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) },
 };
 
 static const struct counter_desc pport_per_prio_traffic_stats_desc[] = {
-	{ "rx_octets", PPORT_PER_PRIO_OFF(rx_octets) },
-	{ "rx_frames", PPORT_PER_PRIO_OFF(rx_frames) },
-	{ "tx_octets", PPORT_PER_PRIO_OFF(tx_octets) },
-	{ "tx_frames", PPORT_PER_PRIO_OFF(tx_frames) },
+	{ "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) },
+	{ "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) },
+	{ "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) },
+	{ "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) },
 };
 
 static const struct counter_desc pport_per_prio_pfc_stats_desc[] = {
-	{ "rx_pause", PPORT_PER_PRIO_OFF(rx_pause) },
-	{ "rx_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) },
-	{ "tx_pause", PPORT_PER_PRIO_OFF(tx_pause) },
-	{ "tx_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) },
-	{ "rx_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
+	{ "rx_prio%d_pause", PPORT_PER_PRIO_OFF(rx_pause) },
+	{ "rx_prio%d_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) },
+	{ "tx_prio%d_pause", PPORT_PER_PRIO_OFF(tx_pause) },
+	{ "tx_prio%d_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) },
+	{ "rx_prio%d_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
 };
 
 struct mlx5e_rq_stats {
 	u64 packets;
 	u64 bytes;
-	u64 csum_sw;
-	u64 csum_inner;
+	u64 csum_complete;
+	u64 csum_unnecessary_inner;
 	u64 csum_none;
 	u64 lro_packets;
 	u64 lro_bytes;
@@ -292,19 +262,19 @@ struct mlx5e_rq_stats {
 };
 
 static const struct counter_desc rq_stats_desc[] = {
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_sw) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_inner) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_none) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, wqe_err) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_filler) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_frag) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_frag) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
 };
 
 struct mlx5e_sq_stats {
@@ -315,28 +285,28 @@ struct mlx5e_sq_stats {
 	u64 tso_bytes;
 	u64 tso_inner_packets;
 	u64 tso_inner_bytes;
-	u64 csum_offload_inner;
+	u64 csum_partial_inner;
 	u64 nop;
 	/* less likely accessed in data path */
-	u64 csum_offload_none;
+	u64 csum_none;
 	u64 stopped;
 	u64 wake;
 	u64 dropped;
 };
 
 static const struct counter_desc sq_stats_desc[] = {
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_inner) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, nop) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_none) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, stopped) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, wake) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, dropped) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, bytes) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
 };
 
 #define NUM_SW_COUNTERS			ARRAY_SIZE(sw_stats_desc)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index b000ddc..5a750b9cd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -192,12 +192,12 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 		if (skb->encapsulation) {
 			eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM |
 					  MLX5_ETH_WQE_L4_INNER_CSUM;
-			sq->stats.csum_offload_inner++;
+			sq->stats.csum_partial_inner++;
 		} else {
 			eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
 		}
 	} else
-		sq->stats.csum_offload_none++;
+		sq->stats.csum_none++;
 
 	if (sq->cc != sq->prev_cc) {
 		sq->prev_cc = sq->cc;
-- 
cgit v0.10.2


From 3f4c68cfde30caa1f6d8368fd19590671411ade2 Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@cavium.com>
Date: Mon, 27 Jun 2016 15:30:02 +0530
Subject: net: thunderx: Fix link status reporting

Check for SMU RX local/remote faults along with SPU LINK
status. Otherwise at times link is UP at our end but DOWN
at link partner's side. Also due to an issue in BGX it's
rarely seen that initialization doesn't happen properly
and SMU RX reports faults with everything fine at SPU.
This patch tries to reinitialize LMAC to fix it.

Also fixed LMAC disable sequence to properly bring down link.

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: Tao Wang <tao.wang@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 3ed2198..63a39ac 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -551,7 +551,9 @@ static int bgx_xaui_check_link(struct lmac *lmac)
 	}
 
 	/* Clear rcvflt bit (latching high) and read it back */
-	bgx_reg_modify(bgx, lmacid, BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT);
+	if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT)
+		bgx_reg_modify(bgx, lmacid,
+			       BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT);
 	if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) {
 		dev_err(&bgx->pdev->dev, "Receive fault, retry training\n");
 		if (bgx->use_training) {
@@ -570,13 +572,6 @@ static int bgx_xaui_check_link(struct lmac *lmac)
 		return -1;
 	}
 
-	/* Wait for MAC RX to be ready */
-	if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_RX_CTL,
-			 SMU_RX_CTL_STATUS, true)) {
-		dev_err(&bgx->pdev->dev, "SMU RX link not okay\n");
-		return -1;
-	}
-
 	/* Wait for BGX RX to be idle */
 	if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_CTL, SMU_CTL_RX_IDLE, false)) {
 		dev_err(&bgx->pdev->dev, "SMU RX not idle\n");
@@ -589,29 +584,30 @@ static int bgx_xaui_check_link(struct lmac *lmac)
 		return -1;
 	}
 
-	if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) {
-		dev_err(&bgx->pdev->dev, "Receive fault\n");
-		return -1;
-	}
-
-	/* Receive link is latching low. Force it high and verify it */
-	bgx_reg_modify(bgx, lmacid, BGX_SPUX_STATUS1, SPU_STATUS1_RCV_LNK);
-	if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_STATUS1,
-			 SPU_STATUS1_RCV_LNK, false)) {
-		dev_err(&bgx->pdev->dev, "SPU receive link down\n");
-		return -1;
-	}
-
+	/* Clear receive packet disable */
 	cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL);
 	cfg &= ~SPU_MISC_CTL_RX_DIS;
 	bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg);
-	return 0;
+
+	/* Check for MAC RX faults */
+	cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_CTL);
+	/* 0 - Link is okay, 1 - Local fault, 2 - Remote fault */
+	cfg &= SMU_RX_CTL_STATUS;
+	if (!cfg)
+		return 0;
+
+	/* Rx local/remote fault seen.
+	 * Do lmac reinit to see if condition recovers
+	 */
+	bgx_lmac_xaui_init(bgx, lmacid, bgx->lmac_type);
+
+	return -1;
 }
 
 static void bgx_poll_for_link(struct work_struct *work)
 {
 	struct lmac *lmac;
-	u64 link;
+	u64 spu_link, smu_link;
 
 	lmac = container_of(work, struct lmac, dwork.work);
 
@@ -621,8 +617,11 @@ static void bgx_poll_for_link(struct work_struct *work)
 	bgx_poll_reg(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1,
 		     SPU_STATUS1_RCV_LNK, false);
 
-	link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1);
-	if (link & SPU_STATUS1_RCV_LNK) {
+	spu_link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1);
+	smu_link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SMUX_RX_CTL);
+
+	if ((spu_link & SPU_STATUS1_RCV_LNK) &&
+	    !(smu_link & SMU_RX_CTL_STATUS)) {
 		lmac->link_up = 1;
 		if (lmac->bgx->lmac_type == BGX_MODE_XLAUI)
 			lmac->last_speed = 40000;
@@ -636,9 +635,15 @@ static void bgx_poll_for_link(struct work_struct *work)
 	}
 
 	if (lmac->last_link != lmac->link_up) {
+		if (lmac->link_up) {
+			if (bgx_xaui_check_link(lmac)) {
+				/* Errors, clear link_up state */
+				lmac->link_up = 0;
+				lmac->last_speed = SPEED_UNKNOWN;
+				lmac->last_duplex = DUPLEX_UNKNOWN;
+			}
+		}
 		lmac->last_link = lmac->link_up;
-		if (lmac->link_up)
-			bgx_xaui_check_link(lmac);
 	}
 
 	queue_delayed_work(lmac->check_link, &lmac->dwork, HZ * 2);
@@ -710,7 +715,7 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
 static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid)
 {
 	struct lmac *lmac;
-	u64 cmrx_cfg;
+	u64 cfg;
 
 	lmac = &bgx->lmac[lmacid];
 	if (lmac->check_link) {
@@ -719,9 +724,33 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid)
 		destroy_workqueue(lmac->check_link);
 	}
 
-	cmrx_cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
-	cmrx_cfg &= ~(1 << 15);
-	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cmrx_cfg);
+	/* Disable packet reception */
+	cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+	cfg &= ~CMR_PKT_RX_EN;
+	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
+	/* Give chance for Rx/Tx FIFO to get drained */
+	bgx_poll_reg(bgx, lmacid, BGX_CMRX_RX_FIFO_LEN, (u64)0x1FFF, true);
+	bgx_poll_reg(bgx, lmacid, BGX_CMRX_TX_FIFO_LEN, (u64)0x3FFF, true);
+
+	/* Disable packet transmission */
+	cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+	cfg &= ~CMR_PKT_TX_EN;
+	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
+	/* Disable serdes lanes */
+        if (!lmac->is_sgmii)
+                bgx_reg_modify(bgx, lmacid,
+                               BGX_SPUX_CONTROL1, SPU_CTL_LOW_POWER);
+        else
+                bgx_reg_modify(bgx, lmacid,
+                               BGX_GMP_PCS_MRX_CTL, PCS_MRX_CTL_PWR_DN);
+
+	/* Disable LMAC */
+	cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
+	cfg &= ~CMR_EN;
+	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
 	bgx_flush_dmac_addrs(bgx, lmacid);
 
 	if ((bgx->lmac_type != BGX_MODE_XFI) &&
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 149e179..42010d2 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -41,6 +41,7 @@
 #define BGX_CMRX_RX_STAT10		0xC0
 #define BGX_CMRX_RX_BP_DROP		0xC8
 #define BGX_CMRX_RX_DMAC_CTL		0x0E8
+#define BGX_CMRX_RX_FIFO_LEN		0x108
 #define BGX_CMR_RX_DMACX_CAM		0x200
 #define  RX_DMACX_CAM_EN			BIT_ULL(48)
 #define  RX_DMACX_CAM_LMACID(x)			(x << 49)
@@ -50,6 +51,7 @@
 #define BGX_CMR_CHAN_MSK_AND		0x450
 #define BGX_CMR_BIST_STATUS		0x460
 #define BGX_CMR_RX_LMACS		0x468
+#define BGX_CMRX_TX_FIFO_LEN		0x518
 #define BGX_CMRX_TX_STAT0		0x600
 #define BGX_CMRX_TX_STAT1		0x608
 #define BGX_CMRX_TX_STAT2		0x610
-- 
cgit v0.10.2


From 3e29adba567306504e99b8363edf2d47c19dbe1b Mon Sep 17 00:00:00 2001
From: Sunil Goutham <sgoutham@cavium.com>
Date: Mon, 27 Jun 2016 15:30:03 +0530
Subject: net: thunderx: Fix TL4 configuration for secondary Qsets

TL4 calculation for a given SQ of secondary Qsets is incorrect
and goes out of bounds and also for some SQ's TL4 chosen will
transmit data via a different BGX interface and not same as
primary Qset's interface.

This patch fixes this issue.

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 95f17f8..16ed203 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -499,6 +499,7 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
 	u32 rr_quantum;
 	u8 sq_idx = sq->sq_num;
 	u8 pqs_vnic;
+	int svf;
 
 	if (sq->sqs_mode)
 		pqs_vnic = nic->pqs_vf[vnic];
@@ -511,10 +512,19 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
 	/* 24 bytes for FCS, IPG and preamble */
 	rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4);
 
-	tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX);
+	if (!sq->sqs_mode) {
+		tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX);
+	} else {
+		for (svf = 0; svf < MAX_SQS_PER_VF; svf++) {
+			if (nic->vf_sqs[pqs_vnic][svf] == vnic)
+				break;
+		}
+		tl4 = (MAX_LMAC_PER_BGX * NIC_TL4_PER_LMAC);
+		tl4 += (lmac * NIC_TL4_PER_LMAC * MAX_SQS_PER_VF);
+		tl4 += (svf * NIC_TL4_PER_LMAC);
+		tl4 += (bgx * NIC_TL4_PER_BGX);
+	}
 	tl4 += sq_idx;
-	if (sq->sqs_mode)
-		tl4 += vnic * 8;
 
 	tl3 = tl4 / (NIC_MAX_TL4 / NIC_MAX_TL3);
 	nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 |
-- 
cgit v0.10.2


From 42482b4546336ecd93acdd95e435bafd7a4c581c Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Thu, 23 Jun 2016 14:50:35 -0700
Subject: drm/i915: Add more Kabylake PCI IDs.

The spec has been updated adding new PCI IDs.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1466718636-19675-1-git-send-email-rodrigo.vivi@intel.com
(cherry picked from commit 33d9391d3020e069dca98fa87a604c037beb2b9e)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>

diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 9094599..87dde1c 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -309,6 +309,7 @@
 	INTEL_VGA_DEVICE(0x5906, info), /* ULT GT1 */ \
 	INTEL_VGA_DEVICE(0x590E, info), /* ULX GT1 */ \
 	INTEL_VGA_DEVICE(0x5902, info), /* DT  GT1 */ \
+	INTEL_VGA_DEVICE(0x5908, info), /* Halo GT1 */ \
 	INTEL_VGA_DEVICE(0x590B, info), /* Halo GT1 */ \
 	INTEL_VGA_DEVICE(0x590A, info) /* SRV GT1 */
 
@@ -322,7 +323,9 @@
 	INTEL_VGA_DEVICE(0x591D, info) /* WKS GT2 */
 
 #define INTEL_KBL_GT3_IDS(info) \
+	INTEL_VGA_DEVICE(0x5923, info), /* ULT GT3 */ \
 	INTEL_VGA_DEVICE(0x5926, info), /* ULT GT3 */ \
+	INTEL_VGA_DEVICE(0x5927, info), /* ULT GT3 */ \
 	INTEL_VGA_DEVICE(0x592B, info), /* Halo GT3 */ \
 	INTEL_VGA_DEVICE(0x592A, info) /* SRV GT3 */
 
-- 
cgit v0.10.2


From 6b9dc6a474fd4e025c615ed4582735360005727c Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Thu, 23 Jun 2016 14:50:36 -0700
Subject: drm/i915: Removing PCI IDs that are no longer listed as Kabylake.

This is unusual. Usually IDs listed on early stages of platform
definition are kept there as reserved for later use.

However these IDs here are not listed anymore in any of steppings
and devices IDs tables for Kabylake on configurations overview
section of BSpec.

So it is better removing them before they become used in any
other future platform.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1466718636-19675-2-git-send-email-rodrigo.vivi@intel.com
(cherry picked from commit a922eb8d4581c883c37ce6e12dca9ff2cb1ea723)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>

diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 87dde1c..33466bf 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -325,15 +325,10 @@
 #define INTEL_KBL_GT3_IDS(info) \
 	INTEL_VGA_DEVICE(0x5923, info), /* ULT GT3 */ \
 	INTEL_VGA_DEVICE(0x5926, info), /* ULT GT3 */ \
-	INTEL_VGA_DEVICE(0x5927, info), /* ULT GT3 */ \
-	INTEL_VGA_DEVICE(0x592B, info), /* Halo GT3 */ \
-	INTEL_VGA_DEVICE(0x592A, info) /* SRV GT3 */
+	INTEL_VGA_DEVICE(0x5927, info) /* ULT GT3 */
 
 #define INTEL_KBL_GT4_IDS(info) \
-	INTEL_VGA_DEVICE(0x5932, info), /* DT  GT4 */ \
-	INTEL_VGA_DEVICE(0x593B, info), /* Halo GT4 */ \
-	INTEL_VGA_DEVICE(0x593A, info), /* SRV GT4 */ \
-	INTEL_VGA_DEVICE(0x593D, info)  /* WKS GT4 */
+	INTEL_VGA_DEVICE(0x593B, info) /* Halo GT4 */
 
 #define INTEL_KBL_IDS(info) \
 	INTEL_KBL_GT1_IDS(info), \
-- 
cgit v0.10.2


From 96183182ad05d1ce31b9048921c12bf4ad621eaf Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Mon, 27 Jun 2016 20:48:53 +0800
Subject: ibmvnic: fix to use list_for_each_safe() when delete items

Since we will remove items off the list using list_del() we need
to use a safe version of the list_for_each() macro aptly named
list_for_each_safe().

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 864cb21..ecdb685 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2121,7 +2121,7 @@ static void handle_error_info_rsp(union ibmvnic_crq *crq,
 				  struct ibmvnic_adapter *adapter)
 {
 	struct device *dev = &adapter->vdev->dev;
-	struct ibmvnic_error_buff *error_buff;
+	struct ibmvnic_error_buff *error_buff, *tmp;
 	unsigned long flags;
 	bool found = false;
 	int i;
@@ -2133,7 +2133,7 @@ static void handle_error_info_rsp(union ibmvnic_crq *crq,
 	}
 
 	spin_lock_irqsave(&adapter->error_list_lock, flags);
-	list_for_each_entry(error_buff, &adapter->errors, list)
+	list_for_each_entry_safe(error_buff, tmp, &adapter->errors, list)
 		if (error_buff->error_id == crq->request_error_rsp.error_id) {
 			found = true;
 			list_del(&error_buff->list);
@@ -3141,14 +3141,14 @@ static void handle_request_ras_comp_num_rsp(union ibmvnic_crq *crq,
 
 static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter)
 {
-	struct ibmvnic_inflight_cmd *inflight_cmd;
+	struct ibmvnic_inflight_cmd *inflight_cmd, *tmp1;
 	struct device *dev = &adapter->vdev->dev;
-	struct ibmvnic_error_buff *error_buff;
+	struct ibmvnic_error_buff *error_buff, *tmp2;
 	unsigned long flags;
 	unsigned long flags2;
 
 	spin_lock_irqsave(&adapter->inflight_lock, flags);
-	list_for_each_entry(inflight_cmd, &adapter->inflight, list) {
+	list_for_each_entry_safe(inflight_cmd, tmp1, &adapter->inflight, list) {
 		switch (inflight_cmd->crq.generic.cmd) {
 		case LOGIN:
 			dma_unmap_single(dev, adapter->login_buf_token,
@@ -3165,8 +3165,8 @@ static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter)
 			break;
 		case REQUEST_ERROR_INFO:
 			spin_lock_irqsave(&adapter->error_list_lock, flags2);
-			list_for_each_entry(error_buff, &adapter->errors,
-					    list) {
+			list_for_each_entry_safe(error_buff, tmp2,
+						 &adapter->errors, list) {
 				dma_unmap_single(dev, error_buff->dma,
 						 error_buff->len,
 						 DMA_FROM_DEVICE);
-- 
cgit v0.10.2


From a3d2e9f8eb1487f4191ff08ce2d3d63702c65a90 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 27 Jun 2016 17:38:50 +0200
Subject: tcp: do not send too big packets at retransmit time

Arjun reported a bug in TCP stack and bisected it to a recent commit.

In case where we process SACK, we can coalesce multiple skbs
into fat ones (tcp_shift_skb_data()), to lower write queue
overhead, because we do not expect to retransmit these packets.

However, SACK reneging can happen, forcing the sender to retransmit
all these packets. If skb->len is above 64KB, we then send buggy
IP packets that could hang TSO engine on cxgb4.

Neal suggested to use tcp_tso_autosize() instead of tp->gso_segs
so that we cook packets of optimal size vs TCP/pacing.

Thanks to Arjun for reporting the bug and running the tests !

Fixes: 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Arjun V <arjun@chelsio.com>
Tested-by: Arjun V <arjun@chelsio.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8bd9911..e00e972 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2751,7 +2751,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 	struct sk_buff *hole = NULL;
-	u32 last_lost;
+	u32 max_segs, last_lost;
 	int mib_idx;
 	int fwd_rexmitting = 0;
 
@@ -2771,6 +2771,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 		last_lost = tp->snd_una;
 	}
 
+	max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk));
 	tcp_for_write_queue_from(skb, sk) {
 		__u8 sacked = TCP_SKB_CB(skb)->sacked;
 		int segs;
@@ -2784,6 +2785,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 		segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
 		if (segs <= 0)
 			return;
+		/* In case tcp_shift_skb_data() have aggregated large skbs,
+		 * we need to make sure not sending too bigs TSO packets
+		 */
+		segs = min_t(int, segs, max_segs);
 
 		if (fwd_rexmitting) {
 begin_fwd:
-- 
cgit v0.10.2


From 565ce8f32ac4a233b474f401e1d3e7e1de0a31fd Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Mon, 27 Jun 2016 18:34:42 +0200
Subject: net: bridge: fix vlan stats continue counter

I made a dumb off-by-one mistake when I added the vlan stats counter
dumping code. The increment should happen before the check, not after
otherwise we miss one entry when we continue dumping.

Fixes: a60c090361ea ("bridge: netlink: export per-vlan stats")
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index a5343c7..85e89f6 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1273,7 +1273,7 @@ static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev,
 		struct bridge_vlan_xstats vxi;
 		struct br_vlan_stats stats;
 
-		if (vl_idx++ < *prividx)
+		if (++vl_idx < *prividx)
 			continue;
 		memset(&vxi, 0, sizeof(vxi));
 		vxi.vid = v->vid;
-- 
cgit v0.10.2


From ceb56070359b7329b5678b5d95a376fcb24767be Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 27 Jun 2016 21:38:11 +0200
Subject: bpf, perf: delay release of BPF prog after grace period

Commit dead9f29ddcc ("perf: Fix race in BPF program unregister") moved
destruction of BPF program from free_event_rcu() callback to __free_event(),
which is problematic if used with tail calls: if prog A is attached as
trace event directly, but at the same time present in a tail call map used
by another trace event program elsewhere, then we need to delay destruction
via RCU grace period since it can still be in use by the program doing the
tail call (the prog first needs to be dropped from the tail call map, then
trace event with prog A attached destroyed, so we get immediate destruction).

Fixes: dead9f29ddcc ("perf: Fix race in BPF program unregister")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Cc: Jann Horn <jann@thejh.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8269caf..0de4de6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -264,6 +264,10 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 static inline void bpf_prog_put(struct bpf_prog *prog)
 {
 }
+
+static inline void bpf_prog_put_rcu(struct bpf_prog *prog)
+{
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 /* verifier prototypes for helper functions called from eBPF programs */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 274450e..d00c47b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7531,7 +7531,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event)
 	prog = event->tp_event->prog;
 	if (prog) {
 		event->tp_event->prog = NULL;
-		bpf_prog_put(prog);
+		bpf_prog_put_rcu(prog);
 	}
 }
 
-- 
cgit v0.10.2


From 5b4d10f5e0369ed79434593b7cd8e85eebbe473f Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 27 Jun 2016 23:50:29 +0300
Subject: qlcnic: use the correct ring in qlcnic_83xx_process_rcv_ring_diag()

There is a static checker warning here "warn: mask and shift to zero"
and the code sets "ring" to zero every time.  From looking at how
QLCNIC_FETCH_RING_ID() is used in qlcnic_83xx_process_rcv_ring() the
qlcnic_83xx_hndl() should be removed.

Fixes: 4be41e92f7c6 ('qlcnic: 83xx data path routines')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index 7bd6f25..607bb7d 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -2220,7 +2220,7 @@ void qlcnic_83xx_process_rcv_ring_diag(struct qlcnic_host_sds_ring *sds_ring)
 	if (!opcode)
 		return;
 
-	ring = QLCNIC_FETCH_RING_ID(qlcnic_83xx_hndl(sts_data[0]));
+	ring = QLCNIC_FETCH_RING_ID(sts_data[0]);
 	qlcnic_83xx_process_rcv_diag(adapter, ring, sts_data);
 	desc = &sds_ring->desc_head[consumer];
 	desc->status_desc_data[0] = cpu_to_le64(STATUS_OWNER_PHANTOM);
-- 
cgit v0.10.2


From c041778c966c92c964033f1cdfee60a9f2b5e465 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 29 Jun 2016 10:36:39 +0200
Subject: cfg80211: fix proto in ieee80211_data_to_8023 for frames without LLC
 header

The PDU length of incoming LLC frames is set to the total skb payload size
in __ieee80211_data_to_8023() of net/wireless/util.c which incorrectly
includes the length of the IEEE 802.11 header.

The resulting LLC frame header has a too large PDU length, causing the
llc_fixup_skb() function of net/llc/llc_input.c to reject the incoming
skb, effectively breaking STP.

Solve the problem by properly substracting the IEEE 802.11 frame header size
from the PDU length, allowing the LLC processor to pick up the incoming
control messages.

Special thanks to Gerry Rozema for tracking down the regression and proposing
a suitable patch.

Fixes: 2d1c304cb2d5 ("cfg80211: add function for 802.3 conversion with separate output buffer")
Cc: stable@vger.kernel.org
Reported-by: Gerry Rozema <gerryr@rozeware.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>

diff --git a/net/wireless/util.c b/net/wireless/util.c
index 4e809e9..2443ee3 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -509,7 +509,7 @@ static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr,
 		 * replace EtherType */
 		hdrlen += ETH_ALEN + 2;
 	else
-		tmp.h_proto = htons(skb->len);
+		tmp.h_proto = htons(skb->len - hdrlen);
 
 	pskb_pull(skb, hdrlen);
 
-- 
cgit v0.10.2


From 889ad4566604610804df984e1a3dd5e2c66256e5 Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Tue, 28 Jun 2016 20:41:31 -0700
Subject: e1000e: keep VLAN interfaces functional after rxvlan off

I've got a bug report about an e1000e interface, where a VLAN interface is
set up on top of it:

$ ip link add link ens1f0 name ens1f0.99 type vlan id 99
$ ip link set ens1f0 up
$ ip link set ens1f0.99 up
$ ip addr add 192.168.99.92 dev ens1f0.99

At this point, I can ping another host on vlan 99, ip 192.168.99.91.
However, if I do the following:

$ ethtool -K ens1f0 rxvlan off

Then no traffic passes on ens1f0.99. It comes back if I toggle rxvlan on
again. I'm not sure if this is actually intended behavior, or if there's a
lack of software VLAN stripping fallback, or what, but things continue to
work if I simply don't call e1000e_vlan_strip_disable() if there are
active VLANs (plagiarizing a function from the e1000 driver here) on the
interface.

Also slipped a related-ish fix to the kerneldoc text for
e1000e_vlan_strip_disable here...

Signed-off-by: Jarod Wilson <jarod@redhat.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 75e6089..73f7452 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -154,6 +154,16 @@ void __ew32(struct e1000_hw *hw, unsigned long reg, u32 val)
 	writel(val, hw->hw_addr + reg);
 }
 
+static bool e1000e_vlan_used(struct e1000_adapter *adapter)
+{
+	u16 vid;
+
+	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
+		return true;
+
+	return false;
+}
+
 /**
  * e1000_regdump - register printout routine
  * @hw: pointer to the HW structure
@@ -2789,7 +2799,7 @@ static void e1000e_vlan_filter_enable(struct e1000_adapter *adapter)
 }
 
 /**
- * e1000e_vlan_strip_enable - helper to disable HW VLAN stripping
+ * e1000e_vlan_strip_disable - helper to disable HW VLAN stripping
  * @adapter: board private structure to initialize
  **/
 static void e1000e_vlan_strip_disable(struct e1000_adapter *adapter)
@@ -3443,7 +3453,8 @@ static void e1000e_set_rx_mode(struct net_device *netdev)
 
 	ew32(RCTL, rctl);
 
-	if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
+	if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX ||
+	    e1000e_vlan_used(adapter))
 		e1000e_vlan_strip_enable(adapter);
 	else
 		e1000e_vlan_strip_disable(adapter);
-- 
cgit v0.10.2


From b560f03ddfb072bca65e9440ff0dc4f9b1d1f056 Mon Sep 17 00:00:00 2001
From: David Barroso <dbarroso@fastly.com>
Date: Tue, 28 Jun 2016 11:16:43 +0300
Subject: neigh: Explicitly declare RCU-bh read side critical section in
 neigh_xmit()

neigh_xmit() expects to be called inside an RCU-bh read side critical
section, and while one of its two current callers gets this right, the
other one doesn't.

More specifically, neigh_xmit() has two callers, mpls_forward() and
mpls_output(), and while both callers call neigh_xmit() under
rcu_read_lock(), this provides sufficient protection for neigh_xmit()
only in the case of mpls_forward(), as that is always called from
softirq context and therefore doesn't need explicit BH protection,
while mpls_output() can be called from process context with softirqs
enabled.

When mpls_output() is called from process context, with softirqs
enabled, we can be preempted by a softirq at any time, and RCU-bh
considers the completion of a softirq as signaling the end of any
pending read-side critical sections, so if we do get a softirq
while we are in the part of neigh_xmit() that expects to be run inside
an RCU-bh read side critical section, we can end up with an unexpected
RCU grace period running right in the middle of that critical section,
making things go boom.

This patch fixes this impedance mismatch in the callee, by making
neigh_xmit() always take rcu_read_{,un}lock_bh() around the code that
expects to be treated as an RCU-bh read side critical section, as this
seems a safer option than fixing it in the callers.

Fixes: 4fd3d7d9e868f ("neigh: Add helper function neigh_xmit")
Signed-off-by: David Barroso <dbarroso@fastly.com>
Signed-off-by: Lennert Buytenhek <lbuytenhek@fastly.com>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Acked-by: Robert Shearman <rshearma@brocade.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 29dd8cc..510cd62 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2469,13 +2469,17 @@ int neigh_xmit(int index, struct net_device *dev,
 		tbl = neigh_tables[index];
 		if (!tbl)
 			goto out;
+		rcu_read_lock_bh();
 		neigh = __neigh_lookup_noref(tbl, addr, dev);
 		if (!neigh)
 			neigh = __neigh_create(tbl, addr, dev, false);
 		err = PTR_ERR(neigh);
-		if (IS_ERR(neigh))
+		if (IS_ERR(neigh)) {
+			rcu_read_unlock_bh();
 			goto out_kfree_skb;
+		}
 		err = neigh->output(neigh, skb);
+		rcu_read_unlock_bh();
 	}
 	else if (index == NEIGH_LINK_TABLE) {
 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
-- 
cgit v0.10.2


From 34c7bb4705a0a2d344b0c82eaf3d3bfa2bc9da45 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com>
Date: Tue, 28 Jun 2016 07:46:03 -0400
Subject: qed: Protect the doorbell BAR with the write barriers.

SPQ doorbell is currently protected with the compilation barrier. Under the
stress scenarios, we may get into a state where (due to the weak ordering)
several ramrod doorbells were written to the BAR with an out-of-order
producer values. Need to change the barrier type to a write barrier to make
sure that the write buffer is flushed after each doorbell.

Signed-off-by: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index 67d9893..b122f60 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -213,19 +213,15 @@ static int qed_spq_hw_post(struct qed_hwfn *p_hwfn,
 	SET_FIELD(db.params, CORE_DB_DATA_AGG_VAL_SEL,
 		  DQ_XCM_CORE_SPQ_PROD_CMD);
 	db.agg_flags = DQ_XCM_CORE_DQ_CF_CMD;
-
-	/* validate producer is up to-date */
-	rmb();
-
 	db.spq_prod = cpu_to_le16(qed_chain_get_prod_idx(p_chain));
 
-	/* do not reorder */
-	barrier();
+	/* make sure the SPQE is updated before the doorbell */
+	wmb();
 
 	DOORBELL(p_hwfn, qed_db_addr(p_spq->cid, DQ_DEMS_LEGACY), *(u32 *)&db);
 
 	/* make sure doorbell is rang */
-	mmiowb();
+	wmb();
 
 	DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
 		   "Doorbelled [0x%08x, CID 0x%08x] with Flags: %02x agg_params: %02x, prod: %04x\n",
-- 
cgit v0.10.2


From d913d3a763a6f66a862a6eafcf6da89a7905832a Mon Sep 17 00:00:00 2001
From: Samuel Gauthier <samuel.gauthier@6wind.com>
Date: Tue, 28 Jun 2016 17:22:26 +0200
Subject: openvswitch: fix conntrack netlink event delivery

Only the first and last netlink message for a particular conntrack are
actually sent. The first message is sent through nf_conntrack_confirm when
the conntrack is committed. The last one is sent when the conntrack is
destroyed on timeout. The other conntrack state change messages are not
advertised.

When the conntrack subsystem is used from netfilter, nf_conntrack_confirm
is called for each packet, from the postrouting hook, which in turn calls
nf_ct_deliver_cached_events to send the state change netlink messages.

This commit fixes the problem by calling nf_ct_deliver_cached_events in the
non-commit case as well.

Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action")
CC: Joe Stringer <joestringer@nicira.com>
CC: Justin Pettit <jpettit@nicira.com>
CC: Andy Zhou <azhou@nicira.com>
CC: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Samuel Gauthier <samuel.gauthier@6wind.com>
Acked-by: Joe Stringer <joe@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 3d5feed..d843125 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -818,8 +818,18 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 		 */
 		state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
 		__ovs_ct_update_key(key, state, &info->zone, exp->master);
-	} else
-		return __ovs_ct_lookup(net, key, info, skb);
+	} else {
+		struct nf_conn *ct;
+		int err;
+
+		err = __ovs_ct_lookup(net, key, info, skb);
+		if (err)
+			return err;
+
+		ct = (struct nf_conn *)skb->nfct;
+		if (ct)
+			nf_ct_deliver_cached_events(ct);
+	}
 
 	return 0;
 }
-- 
cgit v0.10.2


From 03bea60409328de54e4ff7ec41672e12a9cb0908 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 29 Jun 2016 16:03:55 +0200
Subject: ovl: get_write_access() in truncate

When truncating a file we should check write access on the underlying
inode.  And we should do so on the lower file as well (before copy-up) for
consistency.

Original patch and test case by Aihua Zhang.

 - - >o >o - - test.c - - >o >o - -
#include <stdio.h>
#include <errno.h>
#include <unistd.h>

int main(int argc, char *argv[])
{
	int ret;

	ret = truncate(argv[0], 4096);
	if (ret != -1) {
		fprintf(stderr, "truncate(argv[0]) should have failed\n");
		return 1;
	}
	if (errno != ETXTBSY) {
		perror("truncate(argv[0])");
		return 1;
	}

	return 0;
}
 - - >o >o - - >o >o - - >o >o - -

Reported-by: Aihua Zhang <zhangaihua1@huawei.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Cc: <stable@vger.kernel.org>

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 8514d69..c831c2e 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -59,16 +59,37 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
 	if (err)
 		goto out;
 
+	if (attr->ia_valid & ATTR_SIZE) {
+		struct inode *realinode = d_inode(ovl_dentry_real(dentry));
+
+		err = -ETXTBSY;
+		if (atomic_read(&realinode->i_writecount) < 0)
+			goto out_drop_write;
+	}
+
 	err = ovl_copy_up(dentry);
 	if (!err) {
+		struct inode *winode = NULL;
+
 		upperdentry = ovl_dentry_upper(dentry);
 
+		if (attr->ia_valid & ATTR_SIZE) {
+			winode = d_inode(upperdentry);
+			err = get_write_access(winode);
+			if (err)
+				goto out_drop_write;
+		}
+
 		inode_lock(upperdentry->d_inode);
 		err = notify_change(upperdentry, attr, NULL);
 		if (!err)
 			ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
 		inode_unlock(upperdentry->d_inode);
+
+		if (winode)
+			put_write_access(winode);
 	}
+out_drop_write:
 	ovl_drop_write(dentry);
 out:
 	return err;
-- 
cgit v0.10.2


From 3a8bd717ee3e0508fc0dd517b97e950989e15f8c Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Mon, 27 Jun 2016 14:46:47 +0800
Subject: drm/amd/powerplay: Workaround for Memory EDC Error on Polaris10.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
index d23dcce..d15584c 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
@@ -2924,6 +2924,31 @@ static int polaris10_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+int polaris10_patch_voltage_workaround(struct pp_hwmgr *hwmgr)
+{
+	struct phm_ppt_v1_information *table_info =
+		       (struct phm_ppt_v1_information *)(hwmgr->pptable);
+	struct phm_ppt_v1_clock_voltage_dependency_table *dep_mclk_table =
+			table_info->vdd_dep_on_mclk;
+	struct phm_ppt_v1_voltage_lookup_table *lookup_table =
+			table_info->vddc_lookup_table;
+	uint32_t i;
+
+	if (hwmgr->chip_id == CHIP_POLARIS10 && hwmgr->hw_revision == 0xC7) {
+		if (lookup_table->entries[dep_mclk_table->entries[dep_mclk_table->count-1].vddInd].us_vdd >= 1000)
+			return 0;
+
+		for (i = 0; i < lookup_table->count; i++) {
+			if (lookup_table->entries[i].us_vdd < 0xff01 && lookup_table->entries[i].us_vdd >= 1000) {
+				dep_mclk_table->entries[dep_mclk_table->count-1].vddInd = (uint8_t) i;
+				return 0;
+			}
+		}
+	}
+	return 0;
+}
+
+
 int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
@@ -3001,6 +3026,7 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 
 	polaris10_set_features_platform_caps(hwmgr);
 
+	polaris10_patch_voltage_workaround(hwmgr);
 	polaris10_init_dpm_defaults(hwmgr);
 
 	/* Get leakage voltage based on leakage ID. */
-- 
cgit v0.10.2


From 0636e0d666e0238fa22348172c20a49f42a94395 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Mon, 27 Jun 2016 17:30:24 +0800
Subject: drm/amd/powerplay: fix issue uvd dpm can't enabled on Polaris11.

1. Populate correct value of VDDCI voltage for SMC SAMU, VCE,
   and UVD levels depending on whether VDDCi control is SVI2 or GPIO.
2. Populate SMC ACPI minimum voltage using VBIOS boot SCLK and MCLK

When static voltage is configured as VDDCI, driver still tries to program
a voltage for MM minVoltage using VDDC-VDDCI delta requirement.
minVoltage should be set as boot up voltage.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
index d15584c..ec2a7ad 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
@@ -1423,22 +1423,19 @@ static int polaris10_populate_smc_acpi_level(struct pp_hwmgr *hwmgr,
 
 	table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC;
 
-	if (!data->sclk_dpm_key_disabled) {
-		/* Get MinVoltage and Frequency from DPM0,
-		 * already converted to SMC_UL */
-		sclk_frequency = data->dpm_table.sclk_table.dpm_levels[0].value;
-		result = polaris10_get_dependency_volt_by_clk(hwmgr,
-				table_info->vdd_dep_on_sclk,
-				table->ACPILevel.SclkFrequency,
-				&table->ACPILevel.MinVoltage, &mvdd);
-		PP_ASSERT_WITH_CODE((0 == result),
-				"Cannot find ACPI VDDC voltage value "
-				"in Clock Dependency Table", );
-	} else {
-		sclk_frequency = data->vbios_boot_state.sclk_bootup_value;
-		table->ACPILevel.MinVoltage =
-				data->vbios_boot_state.vddc_bootup_value * VOLTAGE_SCALE;
-	}
+
+	/* Get MinVoltage and Frequency from DPM0,
+	 * already converted to SMC_UL */
+	sclk_frequency = data->dpm_table.sclk_table.dpm_levels[0].value;
+	result = polaris10_get_dependency_volt_by_clk(hwmgr,
+			table_info->vdd_dep_on_sclk,
+			sclk_frequency,
+			&table->ACPILevel.MinVoltage, &mvdd);
+	PP_ASSERT_WITH_CODE((0 == result),
+			"Cannot find ACPI VDDC voltage value "
+			"in Clock Dependency Table",
+			);
+
 
 	result = polaris10_calculate_sclk_params(hwmgr, sclk_frequency,  &(table->ACPILevel.SclkSetting));
 	PP_ASSERT_WITH_CODE(result == 0, "Error retrieving Engine Clock dividers from VBIOS.", return result);
@@ -1463,24 +1460,18 @@ static int polaris10_populate_smc_acpi_level(struct pp_hwmgr *hwmgr,
 	CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw1_frac);
 	CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Sclk_ss_slew_rate);
 
-	if (!data->mclk_dpm_key_disabled) {
-		/* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */
-		table->MemoryACPILevel.MclkFrequency =
-				data->dpm_table.mclk_table.dpm_levels[0].value;
-		result = polaris10_get_dependency_volt_by_clk(hwmgr,
-				table_info->vdd_dep_on_mclk,
-				table->MemoryACPILevel.MclkFrequency,
-				&table->MemoryACPILevel.MinVoltage, &mvdd);
-		PP_ASSERT_WITH_CODE((0 == result),
-				"Cannot find ACPI VDDCI voltage value "
-				"in Clock Dependency Table",
-				);
-	} else {
-		table->MemoryACPILevel.MclkFrequency =
-				data->vbios_boot_state.mclk_bootup_value;
-		table->MemoryACPILevel.MinVoltage =
-				data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE;
-	}
+
+	/* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */
+	table->MemoryACPILevel.MclkFrequency =
+			data->dpm_table.mclk_table.dpm_levels[0].value;
+	result = polaris10_get_dependency_volt_by_clk(hwmgr,
+			table_info->vdd_dep_on_mclk,
+			table->MemoryACPILevel.MclkFrequency,
+			&table->MemoryACPILevel.MinVoltage, &mvdd);
+	PP_ASSERT_WITH_CODE((0 == result),
+			"Cannot find ACPI VDDCI voltage value "
+			"in Clock Dependency Table",
+			);
 
 	us_mvdd = 0;
 	if ((POLARIS10_VOLTAGE_CONTROL_NONE == data->mvdd_control) ||
@@ -1525,6 +1516,7 @@ static int polaris10_populate_smc_vce_level(struct pp_hwmgr *hwmgr,
 	struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
 			table_info->mm_dep_table;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t vddci;
 
 	table->VceLevelCount = (uint8_t)(mm_table->count);
 	table->VceBootLevel = 0;
@@ -1534,9 +1526,18 @@ static int polaris10_populate_smc_vce_level(struct pp_hwmgr *hwmgr,
 		table->VceLevel[count].MinVoltage = 0;
 		table->VceLevel[count].MinVoltage |=
 				(mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT;
+
+		if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+			vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+						mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+		else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+			vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+		else
+			vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+
 		table->VceLevel[count].MinVoltage |=
-				((mm_table->entries[count].vddc - data->vddc_vddci_delta) *
-						VOLTAGE_SCALE) << VDDCI_SHIFT;
+				(vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
 		table->VceLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
 
 		/*retrieve divider value for VBIOS */
@@ -1565,6 +1566,7 @@ static int polaris10_populate_smc_samu_level(struct pp_hwmgr *hwmgr,
 	struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
 			table_info->mm_dep_table;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t vddci;
 
 	table->SamuBootLevel = 0;
 	table->SamuLevelCount = (uint8_t)(mm_table->count);
@@ -1575,8 +1577,16 @@ static int polaris10_populate_smc_samu_level(struct pp_hwmgr *hwmgr,
 		table->SamuLevel[count].Frequency = mm_table->entries[count].samclock;
 		table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc *
 				VOLTAGE_SCALE) << VDDC_SHIFT;
-		table->SamuLevel[count].MinVoltage |= ((mm_table->entries[count].vddc -
-				data->vddc_vddci_delta) * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+			vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+						mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+		else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+			vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+		else
+			vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		table->SamuLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
 		table->SamuLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
 
 		/* retrieve divider value for VBIOS */
@@ -1659,6 +1669,7 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr,
 	struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
 			table_info->mm_dep_table;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t vddci;
 
 	table->UvdLevelCount = (uint8_t)(mm_table->count);
 	table->UvdBootLevel = 0;
@@ -1669,8 +1680,16 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr,
 		table->UvdLevel[count].DclkFrequency = mm_table->entries[count].dclk;
 		table->UvdLevel[count].MinVoltage |= (mm_table->entries[count].vddc *
 				VOLTAGE_SCALE) << VDDC_SHIFT;
-		table->UvdLevel[count].MinVoltage |= ((mm_table->entries[count].vddc -
-				data->vddc_vddci_delta) * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+			vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+						mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+		else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+			vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+		else
+			vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		table->UvdLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
 		table->UvdLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
 
 		/* retrieve divider value for VBIOS */
@@ -1691,8 +1710,8 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr,
 		CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].VclkFrequency);
 		CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].DclkFrequency);
 		CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].MinVoltage);
-
 	}
+
 	return result;
 }
 
-- 
cgit v0.10.2


From d9d533c1483c4daf76e7e720c35896a430563ff8 Mon Sep 17 00:00:00 2001
From: Ken Wang <Qingqing.Wang@amd.com>
Date: Tue, 28 Jun 2016 13:28:50 +0800
Subject: drm/amdgpu: add ACLK_CNTL setting for polaris10

This is a temporary workaround for early boards.

Signed-off-by: Ken Wang <Qingqing.Wang@amd.com>
Reviewed-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 1a5cbaf..b2ebd4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -47,6 +47,8 @@
 #include "dce/dce_10_0_d.h"
 #include "dce/dce_10_0_sh_mask.h"
 
+#include "smu/smu_7_1_3_d.h"
+
 #define GFX8_NUM_GFX_RINGS     1
 #define GFX8_NUM_COMPUTE_RINGS 8
 
@@ -693,6 +695,7 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 		amdgpu_program_register_sequence(adev,
 						 polaris10_golden_common_all,
 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
+		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
 		break;
 	case CHIP_CARRIZO:
 		amdgpu_program_register_sequence(adev,
-- 
cgit v0.10.2


From a7f14a184e0e8e94becfc3f9608f6b0f9c339572 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Tue, 28 Jun 2016 16:22:07 +0800
Subject: drm/amd/powerplay: workaround for UVD clock issue

workaround issue that when uvd dpm disabled,
uvd clock remain high on polaris10. Manually turn
off the clocks.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Ken Wang <Qingqing.Wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index e19520c..d9c88d13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1106,6 +1106,10 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 	if (fences == 0 && handles == 0) {
 		if (adev->pm.dpm_enabled) {
 			amdgpu_dpm_enable_uvd(adev, false);
+			/* just work around for uvd clock remain high even
+			 * when uvd dpm disabled on Polaris10 */
+			if (adev->asic_type == CHIP_POLARIS10)
+				amdgpu_asic_set_uvd_clocks(adev, 0, 0);
 		} else {
 			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
 		}
-- 
cgit v0.10.2


From 91ff811f32763ea3135e832f7c1aeafc85ae1c98 Mon Sep 17 00:00:00 2001
From: Venkat Reddy Talla <vreddytalla@nvidia.com>
Date: Wed, 29 Jun 2016 15:31:27 +0530
Subject: regulator: max77620: check for valid regulator info

SD4 regulator is not registered with regulator core
framework in probe as there is no support in MAX77620 PMIC,
removing SD4 entry from MAX77620 regulator information list
and checking for valid regulator information data before
configuring FPS source and FPS power up/down period to avoid
NULL pointer exception if regulator not registered with core.

Signed-off-by: Venkat Reddy Talla <vreddytalla@nvidia.com>
Signed-off-by: Mark Brown <broonie@kernel.org>

diff --git a/drivers/regulator/max77620-regulator.c b/drivers/regulator/max77620-regulator.c
index 321e804..a1b49a6 100644
--- a/drivers/regulator/max77620-regulator.c
+++ b/drivers/regulator/max77620-regulator.c
@@ -123,6 +123,9 @@ static int max77620_regulator_set_fps_src(struct max77620_regulator *pmic,
 	unsigned int val;
 	int ret;
 
+	if (!rinfo)
+		return 0;
+
 	switch (fps_src) {
 	case MAX77620_FPS_SRC_0:
 	case MAX77620_FPS_SRC_1:
@@ -171,6 +174,9 @@ static int max77620_regulator_set_fps_slots(struct max77620_regulator *pmic,
 	int pd = rpdata->active_fps_pd_slot;
 	int ret = 0;
 
+	if (!rinfo)
+		return 0;
+
 	if (is_suspend) {
 		pu = rpdata->suspend_fps_pu_slot;
 		pd = rpdata->suspend_fps_pd_slot;
@@ -680,7 +686,6 @@ static struct max77620_regulator_info max77620_regs_info[MAX77620_NUM_REGS] = {
 	RAIL_SD(SD1, sd1, "in-sd1", SD1, 600000, 1550000, 12500, 0x22, SD1),
 	RAIL_SD(SD2, sd2, "in-sd2", SDX, 600000, 3787500, 12500, 0xFF, NONE),
 	RAIL_SD(SD3, sd3, "in-sd3", SDX, 600000, 3787500, 12500, 0xFF, NONE),
-	RAIL_SD(SD4, sd4, "in-sd4", SDX, 600000, 3787500, 12500, 0xFF, NONE),
 
 	RAIL_LDO(LDO0, ldo0, "in-ldo0-1", N, 800000, 2375000, 25000),
 	RAIL_LDO(LDO1, ldo1, "in-ldo0-1", N, 800000, 2375000, 25000),
-- 
cgit v0.10.2


From 1bcbf42d2732b3fdaa8559b0dfc91567769e23c8 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 29 Jun 2016 11:19:32 -0700
Subject: nfit: fix format interface code byte order

Per JEDEC Annex L Release 3 the SPD data is:

Bits 9~5 00 000 = Function Undefined
         00 001 = Byte addressable energy backed
         00 010 = Block addressed
         00 011 = Byte addressable, no energy backed
         All other codes reserved
Bits 4~0 0 0000 = Proprietary interface
         0 0001 = Standard interface 1
         All other codes reserved; see Definitions of Functions

...and per the ACPI 6.1 spec:

    byte0: Bits 4~0 (0 or 1)
    byte1: Bits 9~5 (1, 2, or 3)

...so a format interface code displayed as 0x301 should be stored in the
nfit as (0x1, 0x3), little-endian.

Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Robert Moore <robert.moore@intel.com>
Cc: Robert Elliott <elliott@hpe.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=121161
Fixes: 30ec5fd464d5 ("nfit: fix format interface code byte order per ACPI6.1")
Fixes: 5ad9a7fde07a ("acpi/nfit: Update nfit driver to comply with ACPI 6.1")
Reported-by: Kristin Jacque <kristin.jacque@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 32579a7..ac6ddcc0 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -928,7 +928,7 @@ static ssize_t format_show(struct device *dev,
 {
 	struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev);
 
-	return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->code));
+	return sprintf(buf, "0x%04x\n", le16_to_cpu(dcr->code));
 }
 static DEVICE_ATTR_RO(format);
 
@@ -961,8 +961,8 @@ static ssize_t format1_show(struct device *dev,
 				continue;
 			if (nfit_dcr->dcr->code == dcr->code)
 				continue;
-			rc = sprintf(buf, "%#x\n",
-					be16_to_cpu(nfit_dcr->dcr->code));
+			rc = sprintf(buf, "0x%04x\n",
+					le16_to_cpu(nfit_dcr->dcr->code));
 			break;
 		}
 		if (rc != ENXIO)
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
index 11cb383..02b9ea1 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit.h
@@ -53,12 +53,12 @@ enum nfit_uuids {
 };
 
 /*
- * Region format interface codes are stored as an array of bytes in the
- * NFIT DIMM Control Region structure
+ * Region format interface codes are stored with the interface as the
+ * LSB and the function as the MSB.
  */
-#define NFIT_FIC_BYTE cpu_to_be16(0x101) /* byte-addressable energy backed */
-#define NFIT_FIC_BLK cpu_to_be16(0x201) /* block-addressable non-energy backed */
-#define NFIT_FIC_BYTEN cpu_to_be16(0x301) /* byte-addressable non-energy backed */
+#define NFIT_FIC_BYTE cpu_to_le16(0x101) /* byte-addressable energy backed */
+#define NFIT_FIC_BLK cpu_to_le16(0x201) /* block-addressable non-energy backed */
+#define NFIT_FIC_BYTEN cpu_to_le16(0x301) /* byte-addressable non-energy backed */
 
 enum {
 	NFIT_BLK_READ_FLUSH = 1,
-- 
cgit v0.10.2


From 3c35f6edc09b239a60de87a5aeb78563fc372704 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Mon, 6 Jun 2016 16:56:49 +0100
Subject: reset: Reorder inline reset_control_get*() wrappers

We're about to split the current API into two, where consumers will
be forced to be explicit when requesting reset lines.  The choice
will be to either the call the *_exclusive or *_shared variant
depending on whether they can actually tolorate not being asserted
when that request is made.

The new API will look like this once reorded and complete:

  reset_control_get_exclusive()
  reset_control_get_shared()
  reset_control_get_optional_exclusive()
  reset_control_get_optional_shared()
  of_reset_control_get_exclusive()
  of_reset_control_get_shared()
  of_reset_control_get_exclusive_by_index()
  of_reset_control_get_shared_by_index()
  devm_reset_control_get_exclusive()
  devm_reset_control_get_shared()
  devm_reset_control_get_optional_exclusive()
  devm_reset_control_get_optional_shared()
  devm_reset_control_get_exclusive_by_index()
  devm_reset_control_get_shared_by_index()

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>

diff --git a/include/linux/reset.h b/include/linux/reset.h
index ec0306ce..33eaf11 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -107,12 +107,6 @@ static inline struct reset_control *__must_check reset_control_get(
 	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0);
 }
 
-static inline struct reset_control *reset_control_get_optional(
-					struct device *dev, const char *id)
-{
-	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0);
-}
-
 /**
  * reset_control_get_shared - Lookup and obtain a shared reference to a
  *                            reset controller.
@@ -141,6 +135,12 @@ static inline struct reset_control *reset_control_get_shared(
 	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1);
 }
 
+static inline struct reset_control *reset_control_get_optional(
+					struct device *dev, const char *id)
+{
+	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0);
+}
+
 /**
  * of_reset_control_get - Lookup and obtain an exclusive reference to a
  *                        reset controller.
@@ -191,6 +191,21 @@ static inline struct reset_control *__must_check devm_reset_control_get(
 	return __devm_reset_control_get(dev, id, 0, 0);
 }
 
+/**
+ * devm_reset_control_get_shared - resource managed reset_control_get_shared()
+ * @dev: device to be reset by the controller
+ * @id: reset line name
+ *
+ * Managed reset_control_get_shared(). For reset controllers returned from
+ * this function, reset_control_put() is called automatically on driver detach.
+ * See reset_control_get_shared() for more information.
+ */
+static inline struct reset_control *devm_reset_control_get_shared(
+					struct device *dev, const char *id)
+{
+	return __devm_reset_control_get(dev, id, 0, 1);
+}
+
 static inline struct reset_control *devm_reset_control_get_optional(
 					struct device *dev, const char *id)
 {
@@ -213,21 +228,6 @@ static inline struct reset_control *devm_reset_control_get_by_index(
 }
 
 /**
- * devm_reset_control_get_shared - resource managed reset_control_get_shared()
- * @dev: device to be reset by the controller
- * @id: reset line name
- *
- * Managed reset_control_get_shared(). For reset controllers returned from
- * this function, reset_control_put() is called automatically on driver detach.
- * See reset_control_get_shared() for more information.
- */
-static inline struct reset_control *devm_reset_control_get_shared(
-					struct device *dev, const char *id)
-{
-	return __devm_reset_control_get(dev, id, 0, 1);
-}
-
-/**
  * devm_reset_control_get_shared_by_index - resource managed
  * reset_control_get_shared
  * @dev: device to be reset by the controller
-- 
cgit v0.10.2


From a53e35db70d1638002e40d495d096181dbb0fe16 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Mon, 6 Jun 2016 16:56:50 +0100
Subject: reset: Ensure drivers are explicit when requesting reset lines

Phasing out generic reset line requests enables us to make some better
decisions on when and how to (de)assert said lines.  If an 'exclusive'
line is requested, we know a device *requires* a reset and that it's
preferable to act upon a request right away.  However, if a 'shared'
reset line is requested, we can reasonably assume sure that placing a
device into reset isn't a hard requirement, but probably a measure to
save power and is thus able to cope with not being asserted if another
device is still in use.

In order allow gentle adoption and not to forcing all consumers to
move to the API immediately, causing administration headache between
subsystems, this patch adds some temporary stand-in shim-calls.  This
will ease the burden at merge time and allow subsystems to migrate over
to the new API in a more realistic time-frame.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>

diff --git a/include/linux/reset.h b/include/linux/reset.h
index 33eaf11..9cf4cf3 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -84,8 +84,8 @@ static inline struct reset_control *__devm_reset_control_get(
 #endif /* CONFIG_RESET_CONTROLLER */
 
 /**
- * reset_control_get - Lookup and obtain an exclusive reference to a
- *                     reset controller.
+ * reset_control_get_exclusive - Lookup and obtain an exclusive reference
+ *                               to a reset controller.
  * @dev: device to be reset by the controller
  * @id: reset line name
  *
@@ -98,8 +98,8 @@ static inline struct reset_control *__devm_reset_control_get(
  *
  * Use of id names is optional.
  */
-static inline struct reset_control *__must_check reset_control_get(
-					struct device *dev, const char *id)
+static inline struct reset_control *
+__must_check reset_control_get_exclusive(struct device *dev, const char *id)
 {
 #ifndef CONFIG_RESET_CONTROLLER
 	WARN_ON(1);
@@ -135,15 +135,15 @@ static inline struct reset_control *reset_control_get_shared(
 	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1);
 }
 
-static inline struct reset_control *reset_control_get_optional(
+static inline struct reset_control *reset_control_get_optional_exclusive(
 					struct device *dev, const char *id)
 {
 	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0);
 }
 
 /**
- * of_reset_control_get - Lookup and obtain an exclusive reference to a
- *                        reset controller.
+ * of_reset_control_get_exclusive - Lookup and obtain an exclusive reference
+ *                                  to a reset controller.
  * @node: device to be reset by the controller
  * @id: reset line name
  *
@@ -151,15 +151,16 @@ static inline struct reset_control *reset_control_get_optional(
  *
  * Use of id names is optional.
  */
-static inline struct reset_control *of_reset_control_get(
+static inline struct reset_control *of_reset_control_get_exclusive(
 				struct device_node *node, const char *id)
 {
 	return __of_reset_control_get(node, id, 0, 0);
 }
 
 /**
- * of_reset_control_get_by_index - Lookup and obtain an exclusive reference to
- *                                 a reset controller by index.
+ * of_reset_control_get_exclusive_by_index - Lookup and obtain an exclusive
+ *                                           reference to a reset controller
+ *                                           by index.
  * @node: device to be reset by the controller
  * @index: index of the reset controller
  *
@@ -167,23 +168,27 @@ static inline struct reset_control *of_reset_control_get(
  * in whatever order. Returns a struct reset_control or IS_ERR() condition
  * containing errno.
  */
-static inline struct reset_control *of_reset_control_get_by_index(
+static inline struct reset_control *of_reset_control_get_exclusive_by_index(
 					struct device_node *node, int index)
 {
 	return __of_reset_control_get(node, NULL, index, 0);
 }
 
 /**
- * devm_reset_control_get - resource managed reset_control_get()
+ * devm_reset_control_get_exclusive - resource managed
+ *                                    reset_control_get_exclusive()
  * @dev: device to be reset by the controller
  * @id: reset line name
  *
- * Managed reset_control_get(). For reset controllers returned from this
- * function, reset_control_put() is called automatically on driver detach.
- * See reset_control_get() for more information.
+ * Managed reset_control_get_exclusive(). For reset controllers returned
+ * from this function, reset_control_put() is called automatically on driver
+ * detach.
+ *
+ * See reset_control_get_exclusive() for more information.
  */
-static inline struct reset_control *__must_check devm_reset_control_get(
-					struct device *dev, const char *id)
+static inline struct reset_control *
+__must_check devm_reset_control_get_exclusive(struct device *dev,
+					      const char *id)
 {
 #ifndef CONFIG_RESET_CONTROLLER
 	WARN_ON(1);
@@ -206,23 +211,26 @@ static inline struct reset_control *devm_reset_control_get_shared(
 	return __devm_reset_control_get(dev, id, 0, 1);
 }
 
-static inline struct reset_control *devm_reset_control_get_optional(
+static inline struct reset_control *devm_reset_control_get_optional_exclusive(
 					struct device *dev, const char *id)
 {
 	return __devm_reset_control_get(dev, id, 0, 0);
 }
 
 /**
- * devm_reset_control_get_by_index - resource managed reset_control_get
+ * devm_reset_control_get_exclusive_by_index - resource managed
+ *                                             reset_control_get_exclusive()
  * @dev: device to be reset by the controller
  * @index: index of the reset controller
  *
- * Managed reset_control_get(). For reset controllers returned from this
- * function, reset_control_put() is called automatically on driver detach.
- * See reset_control_get() for more information.
+ * Managed reset_control_get_exclusive(). For reset controllers returned from
+ * this function, reset_control_put() is called automatically on driver
+ * detach.
+ *
+ * See reset_control_get_exclusive() for more information.
  */
-static inline struct reset_control *devm_reset_control_get_by_index(
-					struct device *dev, int index)
+static inline struct reset_control *
+devm_reset_control_get_exclusive_by_index(struct device *dev, int index)
 {
 	return __devm_reset_control_get(dev, NULL, index, 0);
 }
@@ -243,4 +251,54 @@ static inline struct reset_control *devm_reset_control_get_shared_by_index(
 	return __devm_reset_control_get(dev, NULL, index, 1);
 }
 
+/*
+ * TEMPORARY calls to use during transition:
+ *
+ *   of_reset_control_get() => of_reset_control_get_exclusive()
+ *
+ * These inline function calls will be removed once all consumers
+ * have been moved over to the new explicit API.
+ */
+static inline struct reset_control *reset_control_get(
+				struct device *dev, const char *id)
+{
+	return reset_control_get_exclusive(dev, id);
+}
+
+static inline struct reset_control *reset_control_get_optional(
+					struct device *dev, const char *id)
+{
+	return reset_control_get_optional_exclusive(dev, id);
+}
+
+static inline struct reset_control *of_reset_control_get(
+				struct device_node *node, const char *id)
+{
+	return of_reset_control_get_exclusive(node, id);
+}
+
+static inline struct reset_control *of_reset_control_get_by_index(
+				struct device_node *node, int index)
+{
+	return of_reset_control_get_exclusive_by_index(node, index);
+}
+
+static inline struct reset_control *devm_reset_control_get(
+				struct device *dev, const char *id)
+{
+	return devm_reset_control_get_exclusive(dev, id);
+}
+
+static inline struct reset_control *devm_reset_control_get_optional(
+				struct device *dev, const char *id)
+{
+	return devm_reset_control_get_optional_exclusive(dev, id);
+
+}
+
+static inline struct reset_control *devm_reset_control_get_by_index(
+				struct device *dev, int index)
+{
+	return devm_reset_control_get_exclusive_by_index(dev, index);
+}
 #endif
-- 
cgit v0.10.2


From 40faee8ee471ae526344cd5c62ff520d356de841 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Mon, 6 Jun 2016 16:56:51 +0100
Subject: reset: Supply *_shared variant calls when using of_* API

Consumers need to be able to specify whether they are requesting an
'exclusive' or 'shared' reset line no matter which API (of_*, devm_*,
etc) they are using.  This change allows users of the of_* API in
particular to specify that their request is for a 'shared' line.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>

diff --git a/include/linux/reset.h b/include/linux/reset.h
index 9cf4cf3..fd69240 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -158,6 +158,31 @@ static inline struct reset_control *of_reset_control_get_exclusive(
 }
 
 /**
+ * of_reset_control_get_shared - Lookup and obtain an shared reference
+ *                               to a reset controller.
+ * @node: device to be reset by the controller
+ * @id: reset line name
+ *
+ * When a reset-control is shared, the behavior of reset_control_assert /
+ * deassert is changed, the reset-core will keep track of a deassert_count
+ * and only (re-)assert the reset after reset_control_assert has been called
+ * as many times as reset_control_deassert was called. Also see the remark
+ * about shared reset-controls in the reset_control_assert docs.
+ *
+ * Calling reset_control_assert without first calling reset_control_deassert
+ * is not allowed on a shared reset control. Calling reset_control_reset is
+ * also not allowed on a shared reset control.
+ * Returns a struct reset_control or IS_ERR() condition containing errno.
+ *
+ * Use of id names is optional.
+ */
+static inline struct reset_control *of_reset_control_get_shared(
+				struct device_node *node, const char *id)
+{
+	return __of_reset_control_get(node, id, 0, 1);
+}
+
+/**
  * of_reset_control_get_exclusive_by_index - Lookup and obtain an exclusive
  *                                           reference to a reset controller
  *                                           by index.
@@ -175,6 +200,34 @@ static inline struct reset_control *of_reset_control_get_exclusive_by_index(
 }
 
 /**
+ * of_reset_control_get_shared_by_index - Lookup and obtain an shared
+ *                                        reference to a reset controller
+ *                                        by index.
+ * @node: device to be reset by the controller
+ * @index: index of the reset controller
+ *
+ * When a reset-control is shared, the behavior of reset_control_assert /
+ * deassert is changed, the reset-core will keep track of a deassert_count
+ * and only (re-)assert the reset after reset_control_assert has been called
+ * as many times as reset_control_deassert was called. Also see the remark
+ * about shared reset-controls in the reset_control_assert docs.
+ *
+ * Calling reset_control_assert without first calling reset_control_deassert
+ * is not allowed on a shared reset control. Calling reset_control_reset is
+ * also not allowed on a shared reset control.
+ * Returns a struct reset_control or IS_ERR() condition containing errno.
+ *
+ * This is to be used to perform a list of resets for a device or power domain
+ * in whatever order. Returns a struct reset_control or IS_ERR() condition
+ * containing errno.
+ */
+static inline struct reset_control *of_reset_control_get_shared_by_index(
+					struct device_node *node, int index)
+{
+	return __of_reset_control_get(node, NULL, index, 1);
+}
+
+/**
  * devm_reset_control_get_exclusive - resource managed
  *                                    reset_control_get_exclusive()
  * @dev: device to be reset by the controller
-- 
cgit v0.10.2


From c33d61a0c400c50f378ef03a386e646abca292ca Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Mon, 6 Jun 2016 16:56:52 +0100
Subject: reset: Supply *_shared variant calls when using *_optional APIs

Consumers need to be able to specify whether they are requesting an
'exclusive' or 'shared' reset line no matter which API (of_*, devm_*,
etc) they are using.  This change allows users of the optional_* API
in particular to specify that their request is for a 'shared' line.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>

diff --git a/include/linux/reset.h b/include/linux/reset.h
index fd69240..c358106 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -141,6 +141,12 @@ static inline struct reset_control *reset_control_get_optional_exclusive(
 	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0);
 }
 
+static inline struct reset_control *reset_control_get_optional_shared(
+					struct device *dev, const char *id)
+{
+	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1);
+}
+
 /**
  * of_reset_control_get_exclusive - Lookup and obtain an exclusive reference
  *                                  to a reset controller.
@@ -270,6 +276,12 @@ static inline struct reset_control *devm_reset_control_get_optional_exclusive(
 	return __devm_reset_control_get(dev, id, 0, 0);
 }
 
+static inline struct reset_control *devm_reset_control_get_optional_shared(
+					struct device *dev, const char *id)
+{
+	return __devm_reset_control_get(dev, id, 0, 1);
+}
+
 /**
  * devm_reset_control_get_exclusive_by_index - resource managed
  *                                             reset_control_get_exclusive()
-- 
cgit v0.10.2


From 0bcc0eab363aa80f79769324bf3f2ab7781840f2 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Mon, 6 Jun 2016 16:56:53 +0100
Subject: reset: TRIVIAL: Add line break at same place for similar APIs

Standardise the way inline functions:

  devm_reset_control_get_shared_by_index
  devm_reset_control_get_exclusive_by_index

... are formatted.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>

diff --git a/include/linux/reset.h b/include/linux/reset.h
index c358106..45a4abe 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -310,8 +310,8 @@ devm_reset_control_get_exclusive_by_index(struct device *dev, int index)
  * this function, reset_control_put() is called automatically on driver detach.
  * See reset_control_get_shared() for more information.
  */
-static inline struct reset_control *devm_reset_control_get_shared_by_index(
-					struct device *dev, int index)
+static inline struct reset_control *
+devm_reset_control_get_shared_by_index(struct device *dev, int index)
 {
 	return __devm_reset_control_get(dev, NULL, index, 1);
 }
-- 
cgit v0.10.2


From f5ee3f2b5ad6881a8194b9f684ce11640d1062f7 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Mon, 6 Jun 2016 18:08:54 +0100
Subject: usb: host: ohci-st: Inform the reset framework that our reset line
 may be shared

On the STiH410 B2120 development board the ST EHCI IP shares its reset
line with the OHCI IP.  New functionality in the reset subsystems forces
consumers to be explicit when requesting shared/exclusive reset lines.

Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Lee Jones <lee.jones@linaro.org>

diff --git a/drivers/usb/host/ohci-st.c b/drivers/usb/host/ohci-st.c
index acf2eb2..02816a1 100644
--- a/drivers/usb/host/ohci-st.c
+++ b/drivers/usb/host/ohci-st.c
@@ -188,13 +188,15 @@ static int st_ohci_platform_probe(struct platform_device *dev)
 		priv->clk48 = NULL;
 	}
 
-	priv->pwr = devm_reset_control_get_optional(&dev->dev, "power");
+	priv->pwr =
+		devm_reset_control_get_optional_shared(&dev->dev, "power");
 	if (IS_ERR(priv->pwr)) {
 		err = PTR_ERR(priv->pwr);
 		goto err_put_clks;
 	}
 
-	priv->rst = devm_reset_control_get_optional(&dev->dev, "softreset");
+	priv->rst =
+		devm_reset_control_get_optional_shared(&dev->dev, "softreset");
 	if (IS_ERR(priv->rst)) {
 		err = PTR_ERR(priv->rst);
 		goto err_put_clks;
-- 
cgit v0.10.2


From 19599304625b74c95bff318c735928eec668b1ca Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Mon, 6 Jun 2016 18:08:53 +0100
Subject: usb: host: ehci-st: Inform the reset framework that our reset line
 may be shared

On the STiH410 B2120 development board the ST EHCI IP shares its reset
line with the OHCI IP.  New functionality in the reset subsystems forces
consumers to be explicit when requesting shared/exclusive reset lines.

Acked-by: Peter Griffin <peter.griffin@linaro.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Lee Jones <lee.jones@linaro.org>

diff --git a/drivers/usb/host/ehci-st.c b/drivers/usb/host/ehci-st.c
index a94ed67..be4a278 100644
--- a/drivers/usb/host/ehci-st.c
+++ b/drivers/usb/host/ehci-st.c
@@ -206,7 +206,8 @@ static int st_ehci_platform_probe(struct platform_device *dev)
 		priv->clk48 = NULL;
 	}
 
-	priv->pwr = devm_reset_control_get_optional(&dev->dev, "power");
+	priv->pwr =
+		devm_reset_control_get_optional_shared(&dev->dev, "power");
 	if (IS_ERR(priv->pwr)) {
 		err = PTR_ERR(priv->pwr);
 		if (err == -EPROBE_DEFER)
@@ -214,7 +215,8 @@ static int st_ehci_platform_probe(struct platform_device *dev)
 		priv->pwr = NULL;
 	}
 
-	priv->rst = devm_reset_control_get_optional(&dev->dev, "softreset");
+	priv->rst =
+		devm_reset_control_get_optional_shared(&dev->dev, "softreset");
 	if (IS_ERR(priv->rst)) {
 		err = PTR_ERR(priv->rst);
 		if (err == -EPROBE_DEFER)
-- 
cgit v0.10.2


From 002f17bc54ff4005260d8e6e6700de97f5b25679 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Tue, 28 Jun 2016 09:23:58 +0100
Subject: usb: dwc3: st: Inform the reset framework that our reset line may be
 shared

On the STiH410 B2120 development board the MiPHY28lp shares its reset
line with the Synopsys DWC3 SuperSpeed (SS) USB 3.0 Dual-Role-Device
(DRD).  New functionality in the reset subsystems forces consumers to
be explicit when requesting shared/exclusive reset lines.

Acked-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>

diff --git a/drivers/usb/dwc3/dwc3-st.c b/drivers/usb/dwc3/dwc3-st.c
index 5c0adb9..b204617 100644
--- a/drivers/usb/dwc3/dwc3-st.c
+++ b/drivers/usb/dwc3/dwc3-st.c
@@ -237,7 +237,8 @@ static int st_dwc3_probe(struct platform_device *pdev)
 	/* Manage PowerDown */
 	reset_control_deassert(dwc3_data->rstc_pwrdn);
 
-	dwc3_data->rstc_rst = devm_reset_control_get(dev, "softreset");
+	dwc3_data->rstc_rst =
+		devm_reset_control_get_shared(dev, "softreset");
 	if (IS_ERR(dwc3_data->rstc_rst)) {
 		dev_err(&pdev->dev, "could not get reset controller\n");
 		ret = PTR_ERR(dwc3_data->rstc_rst);
-- 
cgit v0.10.2


From 9278e707f4e187df2b4d9eeb2bc78a1724fbe4ac Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Tue, 28 Jun 2016 09:32:12 +0100
Subject: phy: phy-stih407-usb: Inform the reset framework that our reset line
 may be shared

On the STiH410 B2120 development board the ports on the Generic PHY
share their reset lines with each other.  New functionality in the
reset subsystems forces consumers to be explicit when requesting
shared/exclusive reset lines.

Signed-off-by: Lee Jones <lee.jones@linaro.org>

diff --git a/drivers/phy/phy-stih407-usb.c b/drivers/phy/phy-stih407-usb.c
index 1d5ae5f..53cf8d1 100644
--- a/drivers/phy/phy-stih407-usb.c
+++ b/drivers/phy/phy-stih407-usb.c
@@ -105,7 +105,7 @@ static int stih407_usb2_picophy_probe(struct platform_device *pdev)
 	phy_dev->dev = dev;
 	dev_set_drvdata(dev, phy_dev);
 
-	phy_dev->rstc = devm_reset_control_get(dev, "global");
+	phy_dev->rstc = devm_reset_control_get_shared(dev, "global");
 	if (IS_ERR(phy_dev->rstc)) {
 		dev_err(dev, "failed to ctrl picoPHY reset\n");
 		return PTR_ERR(phy_dev->rstc);
-- 
cgit v0.10.2


From 82d8eb40bab10082050be945c8e7096df8e9f989 Mon Sep 17 00:00:00 2001
From: Rhyland Klein <rklein@nvidia.com>
Date: Thu, 12 May 2016 13:45:04 -0400
Subject: mfd: max77620: Fix FPS switch statements

When configuring FPS during probe, assuming a DT node is present for
FPS, the code can run into a problem with the switch statements in
max77620_config_fps() and max77620_get_fps_period_reg_value(). Namely,
in the case of chip->chip_id == MAX77620, it will set
fps_[mix|max]_period but then fall through to the default switch case
and return -EINVAL. Returning this from max77620_config_fps() will
cause probe to fail.

Signed-off-by: Rhyland Klein <rklein@nvidia.com>
Reviewed-by: Laxman Dewangan <ldewangan@nvidia.com>
Reviewed-by: Thierry Reding <treding@nvidia.com>
Tested-by: Thierry Reding <treding@nvidia.com>
Tested-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>

diff --git a/drivers/mfd/max77620.c b/drivers/mfd/max77620.c
index 199d261..f32fbb8 100644
--- a/drivers/mfd/max77620.c
+++ b/drivers/mfd/max77620.c
@@ -203,6 +203,7 @@ static int max77620_get_fps_period_reg_value(struct max77620_chip *chip,
 		break;
 	case MAX77620:
 		fps_min_period = MAX77620_FPS_PERIOD_MIN_US;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -236,6 +237,7 @@ static int max77620_config_fps(struct max77620_chip *chip,
 		break;
 	case MAX77620:
 		fps_max_period = MAX77620_FPS_PERIOD_MAX_US;
+		break;
 	default:
 		return -EINVAL;
 	}
-- 
cgit v0.10.2


From ba4a1c28a92df55b9263e838068b92eaa80c7850 Mon Sep 17 00:00:00 2001
From: Steve Twiss <stwiss.opensource@diasemi.com>
Date: Mon, 27 Jun 2016 16:06:36 +0100
Subject: mfd: da9053: Fix compiler warning message for uninitialised variable

Fix compiler warning caused by an uninitialised variable inside
da9052_group_write() function. Defaulting the value to zero covers
the trivial case.

Signed-off-by: Steve Twiss <stwiss.opensource@diasemi.com>
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>

diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h
index c18a4c1..ce9230a 100644
--- a/include/linux/mfd/da9052/da9052.h
+++ b/include/linux/mfd/da9052/da9052.h
@@ -171,7 +171,7 @@ static inline int da9052_group_read(struct da9052 *da9052, unsigned char reg,
 static inline int da9052_group_write(struct da9052 *da9052, unsigned char reg,
 				      unsigned reg_cnt, unsigned char *val)
 {
-	int ret;
+	int ret = 0;
 	int i;
 
 	for (i = 0; i < reg_cnt; i++) {
-- 
cgit v0.10.2


From bfa37087aa04e45f56c41142dfceecb79b8e6ef9 Mon Sep 17 00:00:00 2001
From: Darren Stevens <darren@stevens-zone.net>
Date: Wed, 29 Jun 2016 21:06:28 +0100
Subject: powerpc: Initialise pci_io_base as early as possible

Commit d6a9996e84ac ("powerpc/mm: vmalloc abstraction in preparation for
radix") turned kernel memory and IO addresses from #defined constants to
variables initialised at runtime.

On PA6T (pasemi) systems the setup_arch() machine call initialises the
onboard PCI-e root-ports, and uses pci_io_base to do this, which is now
before its value has been set, resulting in a panic early in boot before
console IO is initialised.

Move the pci_io_base initialisation to the same place as vmalloc ranges
are set (hash__early_init_mmu()/radix__early_init_mmu()) - this is the
earliest possible place we can initialise it.

Fixes: d6a9996e84ac ("powerpc/mm: vmalloc abstraction in preparation for radix")
Reported-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Signed-off-by: Darren Stevens <darren@stevens-zone.net>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
[mpe: Add #ifdef CONFIG_PCI, massage change log slightly]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 88a5eca..ab84c89 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -230,6 +230,7 @@ extern unsigned long __kernel_virt_size;
 #define KERN_VIRT_SIZE  __kernel_virt_size
 extern struct page *vmemmap;
 extern unsigned long ioremap_bot;
+extern unsigned long pci_io_base;
 #endif /* __ASSEMBLY__ */
 
 #include <asm/book3s/64/hash.h>
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 3759df5..a5ae49a 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -47,7 +47,6 @@ static int __init pcibios_init(void)
 
 	printk(KERN_INFO "PCI: Probing PCI hardware\n");
 
-	pci_io_base = ISA_IO_BASE;
 	/* For now, override phys_mem_access_prot. If we need it,g
 	 * later, we may move that initialization to each ppc_md
 	 */
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 5b22ba0..2971ea1 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -922,6 +922,10 @@ void __init hash__early_init_mmu(void)
 	vmemmap = (struct page *)H_VMEMMAP_BASE;
 	ioremap_bot = IOREMAP_BASE;
 
+#ifdef CONFIG_PCI
+	pci_io_base = ISA_IO_BASE;
+#endif
+
 	/* Initialize the MMU Hash table and create the linear mapping
 	 * of memory. Has to be done before SLB initialization as this is
 	 * currently where the page size encoding is obtained.
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index e58707d..7931e14 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -328,6 +328,11 @@ void __init radix__early_init_mmu(void)
 	__vmalloc_end = RADIX_VMALLOC_END;
 	vmemmap = (struct page *)RADIX_VMEMMAP_BASE;
 	ioremap_bot = IOREMAP_BASE;
+
+#ifdef CONFIG_PCI
+	pci_io_base = ISA_IO_BASE;
+#endif
+
 	/*
 	 * For now radix also use the same frag size
 	 */
-- 
cgit v0.10.2


From cab103274352721b77fc5923a631fc63350bef8e Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Mon, 13 Jun 2016 23:42:00 +0000
Subject: drm/i915: Fix missing unlock on error in i915_ppgtt_info()

Add the missing unlock before return from function i915_ppgtt_info()
in the error handling case.

Fixes: 1d2ac403ae3b(drm: Protect dev->filelist with its own mutex)
Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/1465861320-26221-1-git-send-email-weiyj_lk@163.com
(cherry picked from commit b0212486909de4f239ca9f20d032de1b1f2dc52e)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3269033..1035468 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2365,16 +2365,16 @@ static int i915_ppgtt_info(struct seq_file *m, void *data)
 		task = get_pid_task(file->pid, PIDTYPE_PID);
 		if (!task) {
 			ret = -ESRCH;
-			goto out_put;
+			goto out_unlock;
 		}
 		seq_printf(m, "\nproc: %s\n", task->comm);
 		put_task_struct(task);
 		idr_for_each(&file_priv->context_idr, per_file_ctx,
 			     (void *)(unsigned long)m);
 	}
+out_unlock:
 	mutex_unlock(&dev->filelist_mutex);
 
-out_put:
 	intel_runtime_pm_put(dev_priv);
 	mutex_unlock(&dev->struct_mutex);
 
-- 
cgit v0.10.2


From 5c672ab3f0ee0f78f7acad183f34db0f8781a200 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 30 Jun 2016 13:10:49 +0200
Subject: fuse: serialize dirops by default

Negotiate with userspace filesystems whether they support parallel readdir
and lookup.  Disable parallelism by default for fear of breaking fuse
filesystems.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 9902af79c01a ("parallel lookups: actual switch to rwsem")
Fixes: d9b3dbdcfd62 ("fuse: switch to ->iterate_shared()")

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index ccd4971..264f07c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -341,8 +341,10 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 	struct dentry *newent;
 	bool outarg_valid = true;
 
+	fuse_lock_inode(dir);
 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
 			       &outarg, &inode);
+	fuse_unlock_inode(dir);
 	if (err == -ENOENT) {
 		outarg_valid = false;
 		err = 0;
@@ -1341,7 +1343,9 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx)
 		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
 			       FUSE_READDIR);
 	}
+	fuse_lock_inode(inode);
 	fuse_request_send(fc, req);
+	fuse_unlock_inode(inode);
 	nbytes = req->out.args[0].size;
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index eddbe02..929c383 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -110,6 +110,9 @@ struct fuse_inode {
 
 	/** Miscellaneous bits describing inode state */
 	unsigned long state;
+
+	/** Lock for serializing lookup and readdir for back compatibility*/
+	struct mutex mutex;
 };
 
 /** FUSE inode state bits */
@@ -540,6 +543,9 @@ struct fuse_conn {
 	/** write-back cache policy (default is write-through) */
 	unsigned writeback_cache:1;
 
+	/** allow parallel lookups and readdir (default is serialized) */
+	unsigned parallel_dirops:1;
+
 	/*
 	 * The following bitfields are only for optimization purposes
 	 * and hence races in setting them will not cause malfunction
@@ -956,4 +962,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
 
 void fuse_set_initialized(struct fuse_conn *fc);
 
+void fuse_unlock_inode(struct inode *inode);
+void fuse_lock_inode(struct inode *inode);
+
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1ce6766..9961d843 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -97,6 +97,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
 	INIT_LIST_HEAD(&fi->queued_writes);
 	INIT_LIST_HEAD(&fi->writepages);
 	init_waitqueue_head(&fi->page_waitq);
+	mutex_init(&fi->mutex);
 	fi->forget = fuse_alloc_forget();
 	if (!fi->forget) {
 		kmem_cache_free(fuse_inode_cachep, inode);
@@ -117,6 +118,7 @@ static void fuse_destroy_inode(struct inode *inode)
 	struct fuse_inode *fi = get_fuse_inode(inode);
 	BUG_ON(!list_empty(&fi->write_files));
 	BUG_ON(!list_empty(&fi->queued_writes));
+	mutex_destroy(&fi->mutex);
 	kfree(fi->forget);
 	call_rcu(&inode->i_rcu, fuse_i_callback);
 }
@@ -351,6 +353,18 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
 	return 0;
 }
 
+void fuse_lock_inode(struct inode *inode)
+{
+	if (!get_fuse_conn(inode)->parallel_dirops)
+		mutex_lock(&get_fuse_inode(inode)->mutex);
+}
+
+void fuse_unlock_inode(struct inode *inode)
+{
+	if (!get_fuse_conn(inode)->parallel_dirops)
+		mutex_unlock(&get_fuse_inode(inode)->mutex);
+}
+
 static void fuse_umount_begin(struct super_block *sb)
 {
 	fuse_abort_conn(get_fuse_conn_super(sb));
@@ -898,6 +912,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 				fc->async_dio = 1;
 			if (arg->flags & FUSE_WRITEBACK_CACHE)
 				fc->writeback_cache = 1;
+			if (arg->flags & FUSE_PARALLEL_DIROPS)
+				fc->parallel_dirops = 1;
 			if (arg->time_gran && arg->time_gran <= 1000000000)
 				fc->sb->s_time_gran = arg->time_gran;
 		} else {
@@ -928,7 +944,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 		FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
 		FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
 		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
-		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT;
+		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
+		FUSE_PARALLEL_DIROPS;
 	req->in.h.opcode = FUSE_INIT;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(*arg);
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 5974fae..27e1736 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -105,6 +105,9 @@
  *
  *  7.24
  *  - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support
+ *
+ *  7.25
+ *  - add FUSE_PARALLEL_DIROPS
  */
 
 #ifndef _LINUX_FUSE_H
@@ -140,7 +143,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 24
+#define FUSE_KERNEL_MINOR_VERSION 25
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -234,6 +237,7 @@ struct fuse_file_lock {
  * FUSE_ASYNC_DIO: asynchronous direct I/O submission
  * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes
  * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens
+ * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -253,6 +257,7 @@ struct fuse_file_lock {
 #define FUSE_ASYNC_DIO		(1 << 15)
 #define FUSE_WRITEBACK_CACHE	(1 << 16)
 #define FUSE_NO_OPEN_SUPPORT	(1 << 17)
+#define FUSE_PARALLEL_DIROPS    (1 << 18)
 
 /**
  * CUSE INIT request/reply flags
-- 
cgit v0.10.2


From 54794580f5949253520265e46c903878ab222d84 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Wed, 29 Jun 2016 04:27:38 -0400
Subject: ACPI,PCI,IRQ: correct operator precedence

The omitted parenthesis prevents the addition operation when
acpi_penalize_isa_irq function is called.

Fixes: 103544d86976 (ACPI,PCI,IRQ: reduce resource requirements)
Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index 8fc7323..4ed4061 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -839,7 +839,7 @@ void acpi_penalize_isa_irq(int irq, int active)
 {
 	if ((irq >= 0) && (irq < ARRAY_SIZE(acpi_isa_irq_penalty)))
 		acpi_isa_irq_penalty[irq] = acpi_irq_get_penalty(irq) +
-			active ? PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING;
+		  (active ? PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING);
 }
 
 bool acpi_isa_irq_available(int irq)
-- 
cgit v0.10.2


From cb7d224f82e41d82518e7f9ea271d215d4d08e6e Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew@redhat.com>
Date: Thu, 30 Jun 2016 10:39:32 -0400
Subject: lockd: unregister notifier blocks if the service fails to come up
 completely

If the lockd service fails to start up then we need to be sure that the
notifier blocks are not registered, otherwise a subsequent start of the
service could cause the same notifier to be registered twice, leading to
soft lockups.

Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Cc: stable@vger.kernel.org
Fixes: 0751ddf77b6a "lockd: Register callbacks on the inetaddr_chain..."
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 154a107..fc4084e 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -335,12 +335,17 @@ static struct notifier_block lockd_inet6addr_notifier = {
 };
 #endif
 
-static void lockd_svc_exit_thread(void)
+static void lockd_unregister_notifiers(void)
 {
 	unregister_inetaddr_notifier(&lockd_inetaddr_notifier);
 #if IS_ENABLED(CONFIG_IPV6)
 	unregister_inet6addr_notifier(&lockd_inet6addr_notifier);
 #endif
+}
+
+static void lockd_svc_exit_thread(void)
+{
+	lockd_unregister_notifiers();
 	svc_exit_thread(nlmsvc_rqst);
 }
 
@@ -462,7 +467,7 @@ int lockd_up(struct net *net)
 	 * Note: svc_serv structures have an initial use count of 1,
 	 * so we exit through here on both success and failure.
 	 */
-err_net:
+err_put:
 	svc_destroy(serv);
 err_create:
 	mutex_unlock(&nlmsvc_mutex);
@@ -470,7 +475,9 @@ err_create:
 
 err_start:
 	lockd_down_net(serv, net);
-	goto err_net;
+err_net:
+	lockd_unregister_notifiers();
+	goto err_put;
 }
 EXPORT_SYMBOL_GPL(lockd_up);
 
-- 
cgit v0.10.2


From b403f0e37a11f84f7ceaf40b0075499e5bcfd220 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 29 Jun 2016 10:54:23 +0200
Subject: 9p: use file_dentry()

v9fs may be used as lower layer of overlayfs and accessing f_path.dentry
can lead to a crash.  In this case it's a NULL pointer dereference in
p9_fid_create().

Fix by replacing direct access of file->f_path.dentry with the
file_dentry() accessor, which will always return a native object.

Reported-by: Alessio Igor Bogani <alessioigorbogani@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Tested-by: Alessio Igor Bogani <alessioigorbogani@gmail.com>
Fixes: 4bacc9c9234c ("overlayfs: Make f_path always point to the overlay and f_inode to the underlay")
Cc: <stable@vger.kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index b84c291..d7b78d5 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -74,7 +74,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 					v9fs_proto_dotu(v9ses));
 	fid = file->private_data;
 	if (!fid) {
-		fid = v9fs_fid_clone(file->f_path.dentry);
+		fid = v9fs_fid_clone(file_dentry(file));
 		if (IS_ERR(fid))
 			return PTR_ERR(fid);
 
@@ -100,7 +100,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 		 * because we want write after unlink usecase
 		 * to work.
 		 */
-		fid = v9fs_writeback_fid(file->f_path.dentry);
+		fid = v9fs_writeback_fid(file_dentry(file));
 		if (IS_ERR(fid)) {
 			err = PTR_ERR(fid);
 			mutex_unlock(&v9inode->v_mutex);
@@ -516,7 +516,7 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma)
 		 * because we want write after unlink usecase
 		 * to work.
 		 */
-		fid = v9fs_writeback_fid(filp->f_path.dentry);
+		fid = v9fs_writeback_fid(file_dentry(filp));
 		if (IS_ERR(fid)) {
 			retval = PTR_ERR(fid);
 			mutex_unlock(&v9inode->v_mutex);
-- 
cgit v0.10.2


From e06b933e6ded42384164d28a2060b7f89243b895 Mon Sep 17 00:00:00 2001
From: Andrey Ulanov <andreyu@google.com>
Date: Fri, 15 Apr 2016 14:24:41 -0700
Subject: namespace: update event counter when umounting a deleted dentry

- m_start() in fs/namespace.c expects that ns->event is incremented each
  time a mount added or removed from ns->list.
- umount_tree() removes items from the list but does not increment event
  counter, expecting that it's done before the function is called.
- There are some codepaths that call umount_tree() without updating
  "event" counter. e.g. from __detach_mounts().
- When this happens m_start may reuse a cached mount structure that no
  longer belongs to ns->list (i.e. use after free which usually leads
  to infinite loop).

This change fixes the above problem by incrementing global event counter
before invoking umount_tree().

Change-Id: I622c8e84dcb9fb63542372c5dbf0178ee86bb589
Cc: stable@vger.kernel.org
Signed-off-by: Andrey Ulanov <andreyu@google.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

diff --git a/fs/namespace.c b/fs/namespace.c
index 4fb1691..298618b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1562,6 +1562,7 @@ void __detach_mounts(struct dentry *dentry)
 		goto out_unlock;
 
 	lock_mount_hash();
+	event++;
 	while (!hlist_empty(&mp->m_list)) {
 		mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
 		if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
-- 
cgit v0.10.2


From 8293c8a3bb068bd2d2dfe00b6b0000a8fc5c860a Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 3 Jun 2016 11:44:28 +0100
Subject: phy: miphy28lp: Inform the reset framework that our reset line may be
 shared

On the STiH410 B2120 development board the MiPHY28lp shares its reset
line with the Synopsys DWC3 SuperSpeed (SS) USB 3.0 Dual-Role-Device
(DRD).  New functionality in the reset subsystems forces consumers to
be explicit when requesting shared/exclusive reset lines.

Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>

diff --git a/drivers/phy/phy-miphy28lp.c b/drivers/phy/phy-miphy28lp.c
index 3acd2a1..213e2e1 100644
--- a/drivers/phy/phy-miphy28lp.c
+++ b/drivers/phy/phy-miphy28lp.c
@@ -1143,7 +1143,8 @@ static int miphy28lp_probe_resets(struct device_node *node,
 	struct miphy28lp_dev *miphy_dev = miphy_phy->phydev;
 	int err;
 
-	miphy_phy->miphy_rst = of_reset_control_get(node, "miphy-sw-rst");
+	miphy_phy->miphy_rst =
+		of_reset_control_get_shared(node, "miphy-sw-rst");
 
 	if (IS_ERR(miphy_phy->miphy_rst)) {
 		dev_err(miphy_dev->dev,
-- 
cgit v0.10.2


From f5f35830fb84364a24794fe6c7101f85318481d2 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Tue, 28 Jun 2016 09:33:55 +0100
Subject: phy: phy-stih407-usb: Use explicit reset_control_get_exclusive() API

We're making all reset line users specify whether their lines are
shared with other IP or they operate them exclusively.  In this case
the line is exclusively used only by this IP, so use the *_exclusive()
API accordingly.

Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>

diff --git a/drivers/phy/phy-stih407-usb.c b/drivers/phy/phy-stih407-usb.c
index 53cf8d1..b1f44ab 100644
--- a/drivers/phy/phy-stih407-usb.c
+++ b/drivers/phy/phy-stih407-usb.c
@@ -111,7 +111,7 @@ static int stih407_usb2_picophy_probe(struct platform_device *pdev)
 		return PTR_ERR(phy_dev->rstc);
 	}
 
-	phy_dev->rstport = devm_reset_control_get(dev, "port");
+	phy_dev->rstport = devm_reset_control_get_exclusive(dev, "port");
 	if (IS_ERR(phy_dev->rstport)) {
 		dev_err(dev, "failed to ctrl picoPHY reset\n");
 		return PTR_ERR(phy_dev->rstport);
-- 
cgit v0.10.2


From 5baaf3b9efe127d239038de9219d381f4d882b26 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Tue, 28 Jun 2016 09:24:40 +0100
Subject: usb: dwc3: st: Use explicit reset_control_get_exclusive() API

We're making all reset line users specify whether their lines are
shared with other IP or they operate them exclusively.  In this case
the line is exclusively used only by this IP, so use the *_exclusive()
API accordingly.

Acked-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>

diff --git a/drivers/usb/dwc3/dwc3-st.c b/drivers/usb/dwc3/dwc3-st.c
index b204617..e77bacb 100644
--- a/drivers/usb/dwc3/dwc3-st.c
+++ b/drivers/usb/dwc3/dwc3-st.c
@@ -227,7 +227,8 @@ static int st_dwc3_probe(struct platform_device *pdev)
 	dev_vdbg(&pdev->dev, "glue-logic addr 0x%p, syscfg-reg offset 0x%x\n",
 		 dwc3_data->glue_base, dwc3_data->syscfg_reg_off);
 
-	dwc3_data->rstc_pwrdn = devm_reset_control_get(dev, "powerdown");
+	dwc3_data->rstc_pwrdn =
+		devm_reset_control_get_exclusive(dev, "powerdown");
 	if (IS_ERR(dwc3_data->rstc_pwrdn)) {
 		dev_err(&pdev->dev, "could not get power controller\n");
 		ret = PTR_ERR(dwc3_data->rstc_pwrdn);
-- 
cgit v0.10.2


From 6343a2120862f7023006c8091ad95c1f16a32077 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 1 Jul 2016 14:56:07 +0200
Subject: locks: use file_inode()

(Another one for the f_path debacle.)

ltp fcntl33 testcase caused an Oops in selinux_file_send_sigiotask.

The reason is that generic_add_lease() used filp->f_path.dentry->inode
while all the others use file_inode().  This makes a difference for files
opened on overlayfs since the former will point to the overlay inode the
latter to the underlying inode.

So generic_add_lease() added the lease to the overlay inode and
generic_delete_lease() removed it from the underlying inode.  When the file
was released the lease remained on the overlay inode's lock list, resulting
in use after free.

Reported-by: Eryu Guan <eguan@redhat.com>
Fixes: 4bacc9c9234c ("overlayfs: Make f_path always point to the overlay and f_inode to the underlay")
Cc: <stable@vger.kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>

diff --git a/fs/locks.c b/fs/locks.c
index 7c5f91b..ee1b15f 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1628,7 +1628,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
 {
 	struct file_lock *fl, *my_fl = NULL, *lease;
 	struct dentry *dentry = filp->f_path.dentry;
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = file_inode(filp);
 	struct file_lock_context *ctx;
 	bool is_deleg = (*flp)->fl_flags & FL_DELEG;
 	int error;
-- 
cgit v0.10.2


From 6d037de90a1fd7b4879b48d4dd5c4839b271be98 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 1 Jul 2016 15:01:01 +0200
Subject: MIPS: Fix possible corruption of cache mode by mprotect.

The following testcase may result in a page table entries with a invalid
CCA field being generated:

static void *bindstack;

static int sysrqfd;

static void protect_low(int protect)
{
	mprotect(bindstack, BINDSTACK_SIZE, protect);
}

static void sigbus_handler(int signal, siginfo_t * info, void *context)
{
	void *addr = info->si_addr;

	write(sysrqfd, "x", 1);

	printf("sigbus, fault address %p (should not happen, but might)\n",
	       addr);
	abort();
}

static void run_bind_test(void)
{
	unsigned int *p = bindstack;

	p[0] = 0xf001f001;

	write(sysrqfd, "x", 1);

	/* Set trap on access to p[0] */
	protect_low(PROT_NONE);

	write(sysrqfd, "x", 1);

	/* Clear trap on access to p[0] */
	protect_low(PROT_READ | PROT_WRITE | PROT_EXEC);

	write(sysrqfd, "x", 1);

	/* Check the contents of p[0] */
	if (p[0] != 0xf001f001) {
		write(sysrqfd, "x", 1);

		/* Reached, but shouldn't be */
		printf("badness, shouldn't happen but does\n");
		abort();
	}
}

int main(void)
{
	struct sigaction sa;

	sysrqfd = open("/proc/sysrq-trigger", O_WRONLY);

	if (sigprocmask(SIG_BLOCK, NULL, &sa.sa_mask)) {
		perror("sigprocmask");
		return 0;
	}

	sa.sa_sigaction = sigbus_handler;
	sa.sa_flags = SA_SIGINFO | SA_NODEFER | SA_RESTART;
	if (sigaction(SIGBUS, &sa, NULL)) {
		perror("sigaction");
		return 0;
	}

	bindstack = mmap(NULL,
			 BINDSTACK_SIZE,
			 PROT_READ | PROT_WRITE | PROT_EXEC,
			 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
	if (bindstack == MAP_FAILED) {
		perror("mmap bindstack");
		return 0;
	}

	printf("bindstack: %p\n", bindstack);

	run_bind_test();

	printf("done\n");

	return 0;
}

There are multiple ingredients for this:

 1) PAGE_NONE is defined to _CACHE_CACHABLE_NONCOHERENT, which is CCA 3
    on all platforms except SB1 where it's CCA 5.
 2) _page_cachable_default must have bits set which are not set
    _CACHE_CACHABLE_NONCOHERENT.
 3) Either the defective version of pte_modify for XPA or the standard
    version must be in used.  However pte_modify for the 36 bit address
    space support is no affected.

In that case additional bits in the final CCA mode may generate an invalid
value for the CCA field.  On the R10000 system where this was tracked
down for example a CCA 7 has been observed, which is Uncached Accelerated.

Fixed by:

 1) Using the proper CCA mode for PAGE_NONE just like for all the other
    PAGE_* pte/pmd bits.
 2) Fix the two affected variants of pte_modify.

Further code inspection also shows the same issue to exist in pmd_modify
which would affect huge page systems.

Issue in pte_modify tracked down by Alastair Bridgewater, PAGE_NONE
and pmd_modify issue found by me.

The history of this goes back beyond Linus' git history.  Chris Dearman's
commit 351336929ccf222ae38ff0cb7a8dd5fd5c6236a0 ("[MIPS] Allow setting of
the cache attribute at run time.") missed the opportunity to fix this
but it was originally introduced in lmo commit
d523832cf12007b3242e50bb77d0c9e63e0b6518 ("Missing from last commit.")
and 32cc38229ac7538f2346918a09e75413e8861f87 ("New configuration option
CONFIG_MIPS_UNCACHED.")

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Reported-by: Alastair Bridgewater <alastair.bridgewater@gmail.com>

diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index a6b611f..f538167 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -24,7 +24,7 @@ struct mm_struct;
 struct vm_area_struct;
 
 #define PAGE_NONE	__pgprot(_PAGE_PRESENT | _PAGE_NO_READ | \
-				 _CACHE_CACHABLE_NONCOHERENT)
+				 _page_cachable_default)
 #define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_WRITE | \
 				 _page_cachable_default)
 #define PAGE_COPY	__pgprot(_PAGE_PRESENT | _PAGE_NO_EXEC | \
@@ -476,7 +476,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	pte.pte_low  &= (_PAGE_MODIFIED | _PAGE_ACCESSED | _PFNX_MASK);
 	pte.pte_high &= (_PFN_MASK | _CACHE_MASK);
 	pte.pte_low  |= pgprot_val(newprot) & ~_PFNX_MASK;
-	pte.pte_high |= pgprot_val(newprot) & ~_PFN_MASK;
+	pte.pte_high |= pgprot_val(newprot) & ~(_PFN_MASK | _CACHE_MASK);
 	return pte;
 }
 #elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
@@ -491,7 +491,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #else
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+	return __pte((pte_val(pte) & _PAGE_CHG_MASK) |
+		     (pgprot_val(newprot) & ~_PAGE_CHG_MASK));
 }
 #endif
 
@@ -632,7 +633,8 @@ static inline struct page *pmd_page(pmd_t pmd)
 
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 {
-	pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) | pgprot_val(newprot);
+	pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) |
+		       (pgprot_val(newprot) & ~_PAGE_CHG_MASK);
 	return pmd;
 }
 
-- 
cgit v0.10.2


From e7c0b5991dd1be7b6f6dc2b54a15a0f47b64b007 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 1 Jul 2016 10:02:44 -0400
Subject: ovl: warn instead of error if d_type is not supported

overlay needs underlying fs to support d_type. Recently I put in a
patch in to detect this condition and started failing mount if
underlying fs did not support d_type.

But this breaks existing configurations over kernel upgrade. Those who
are running docker (partially broken configuration) with xfs not
supporting d_type, are surprised that after kernel upgrade docker does
not run anymore.

https://github.com/docker/docker/issues/22937#issuecomment-229881315

So instead of erroring out, detect broken configuration and warn
about it. This should allow existing docker setups to continue
working after kernel upgrade.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 45aebeaf4f67 ("ovl: Ensure upper filesystem supports d_type")
Cc: <stable@vger.kernel.org> 4.6

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index ce02f46..9a7693d 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1082,11 +1082,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 			if (err < 0)
 				goto out_put_workdir;
 
-			if (!err) {
-				pr_err("overlayfs: upper fs needs to support d_type.\n");
-				err = -EINVAL;
-				goto out_put_workdir;
-			}
+			/*
+			 * We allowed this configuration and don't want to
+			 * break users over kernel upgrade. So warn instead
+			 * of erroring out.
+			 */
+			if (!err)
+				pr_warn("overlayfs: upper fs needs to support d_type.\n");
 		}
 	}
 
-- 
cgit v0.10.2


From a99cde438de0c4c0cecc1d1af1a55a75b10bfdef Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 3 Jul 2016 23:01:00 -0700
Subject: Linux 4.7-rc6


diff --git a/Makefile b/Makefile
index 2d24bab..0d50489 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 7
 SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc6
 NAME = Psychotic Stoned Sheep
 
 # *DOCUMENTATION*
-- 
cgit v0.10.2