From 8ce7da474f3063f57ac446eb428945a9852e102d Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Mon, 8 Jun 2015 11:26:30 +0300 Subject: drm/i915: Properly initialize SDVO analog connectors In the commit below, I missed the connector allocation in the function intel_sdvo_analog_init(), leading to those connectors to have a NULL state pointer. commit 08d9bc920d465bbbbd762cac9383249c19bf69a2 Author: Ander Conselvan de Oliveira Date: Fri Apr 10 10:59:10 2015 +0300 drm/i915: Allocate connector state together with the connectors Reported-by: Stefan Lippers-Hollmann Tested-by: Stefan Lippers-Hollmann Signed-off-by: Ander Conselvan de Oliveira Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index e87d2f4..987b81f 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -2550,7 +2550,7 @@ intel_sdvo_analog_init(struct intel_sdvo *intel_sdvo, int device) DRM_DEBUG_KMS("initialising analog device %d\n", device); - intel_sdvo_connector = kzalloc(sizeof(*intel_sdvo_connector), GFP_KERNEL); + intel_sdvo_connector = intel_sdvo_connector_alloc(); if (!intel_sdvo_connector) return false; -- cgit v0.10.2 From 7f2ca8b55aeff1fe51ed3570200ef88a96060917 Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Mon, 8 Jun 2015 10:17:32 -0700 Subject: Input: synaptics - add min/max quirk for Lenovo S540 https://bugzilla.redhat.com/show_bug.cgi?id=1223051#c2 Cc: stable@vger.kernel.org Tested-by: tommy.gagnes@gmail.com Signed-off-by: Peter Hutterer Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 630af73..35c8d0c 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -151,6 +151,11 @@ static const struct min_max_quirk min_max_pnpid_table[] = { 1024, 5112, 2024, 4832 }, { + (const char * const []){"LEN2000", NULL}, + {ANY_BOARD_ID, ANY_BOARD_ID}, + 1024, 5113, 2021, 4832 + }, + { (const char * const []){"LEN2001", NULL}, {ANY_BOARD_ID, ANY_BOARD_ID}, 1024, 5022, 2508, 4832 @@ -191,7 +196,7 @@ static const char * const topbuttonpad_pnp_ids[] = { "LEN0045", "LEN0047", "LEN0049", - "LEN2000", + "LEN2000", /* S540 */ "LEN2001", /* Edge E431 */ "LEN2002", /* Edge E531 */ "LEN2003", -- cgit v0.10.2 From 3f5f1554ee715639e78d9be87623ee82772537e0 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 Jun 2015 19:21:15 +0300 Subject: drm/i915: Fix DDC probe for passive adapters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Passive DP->DVI/HDMI dongles on DP++ ports show up to the system as HDMI devices, as they do not have a sink device in them to respond to any AUX traffic. When probing these dongles over the DDC, sometimes they will NAK the first attempt even though the transaction is valid and they support the DDC protocol. The retry loop inside of drm_do_probe_ddc_edid() would normally catch this case and try the transaction again, resulting in success. That, however, was thwarted by the fix for [1]: commit 9292f37e1f5c79400254dca46f83313488093825 Author: Eugeni Dodonov Date: Thu Jan 5 09:34:28 2012 -0200 drm: give up on edid retries when i2c bus is not responding This added code to exit immediately if the return code from the i2c_transfer function was -ENXIO in order to reduce the amount of time spent in waiting for unresponsive or disconnected devices. That was possible because the underlying i2c bit banging algorithm had retries of its own (which, of course, were part of the reason for the bug the commit fixes). Since its introduction in commit f899fc64cda8569d0529452aafc0da31c042df2e Author: Chris Wilson Date: Tue Jul 20 15:44:45 2010 -0700 drm/i915: use GMBUS to manage i2c links we've been flipping back and forth enabling the GMBUS transfers, but we've settled since then. The GMBUS implementation does not do any retries, however, bailing out of the drm_do_probe_ddc_edid() retry loop on first encounter of -ENXIO. This, combined with Eugeni's commit, broke the retry on -ENXIO. Retry GMBUS once on -ENXIO on first message to mitigate the issues with passive adapters. This patch is based on the work, and commit message, by Todd Previte . [1] https://bugs.freedesktop.org/show_bug.cgi?id=41059 v2: Don't retry if using bit banging. v3: Move retry within gmbux_xfer, retry only on first message. v4: Initialize GMBUS0 on retry (Ville). v5: Take index reads into account (Ville). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85924 Cc: Todd Previte Cc: stable@vger.kernel.org Tested-by: Oliver Grafe (v2) Tested-by: Jim Bride Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index 56e437e..ae62800 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -435,7 +435,7 @@ gmbus_xfer(struct i2c_adapter *adapter, struct intel_gmbus, adapter); struct drm_i915_private *dev_priv = bus->dev_priv; - int i, reg_offset; + int i = 0, inc, try = 0, reg_offset; int ret = 0; intel_aux_display_runtime_get(dev_priv); @@ -448,12 +448,14 @@ gmbus_xfer(struct i2c_adapter *adapter, reg_offset = dev_priv->gpio_mmio_base; +retry: I915_WRITE(GMBUS0 + reg_offset, bus->reg0); - for (i = 0; i < num; i++) { + for (; i < num; i += inc) { + inc = 1; if (gmbus_is_index_read(msgs, i, num)) { ret = gmbus_xfer_index_read(dev_priv, &msgs[i]); - i += 1; /* set i to the index of the read xfer */ + inc = 2; /* an index read is two msgs */ } else if (msgs[i].flags & I2C_M_RD) { ret = gmbus_xfer_read(dev_priv, &msgs[i], 0); } else { @@ -525,6 +527,18 @@ clear_err: adapter->name, msgs[i].addr, (msgs[i].flags & I2C_M_RD) ? 'r' : 'w', msgs[i].len); + /* + * Passive adapters sometimes NAK the first probe. Retry the first + * message once on -ENXIO for GMBUS transfers; the bit banging algorithm + * has retries internally. See also the retry loop in + * drm_do_probe_ddc_edid, which bails out on the first -ENXIO. + */ + if (ret == -ENXIO && i == 0 && try++ == 0) { + DRM_DEBUG_KMS("GMBUS [%s] NAK on first message, retry\n", + adapter->name); + goto retry; + } + goto out; timeout: -- cgit v0.10.2 From bd00c606a6f60ca015a62bdbf671eadd48a4ca82 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 9 Jun 2015 15:06:55 +0100 Subject: iommu/vt-d: Change PASID support to bit 40 of Extended Capability Register The existing hardware implementations with PASID support advertised in bit 28? Forget them. They do not exist. Bit 28 means nothing. When we have something that works, it'll use bit 40. Do not attempt to infer anything meaningful from bit 28. This will be reflected in an updated VT-d spec in the extremely near future. Signed-off-by: David Woodhouse diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 796ef96..a240e61 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -115,13 +115,14 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) * Extended Capability Register */ +#define ecap_pasid(e) ((e >> 40) & 0x1) #define ecap_pss(e) ((e >> 35) & 0x1f) #define ecap_eafs(e) ((e >> 34) & 0x1) #define ecap_nwfs(e) ((e >> 33) & 0x1) #define ecap_srs(e) ((e >> 31) & 0x1) #define ecap_ers(e) ((e >> 30) & 0x1) #define ecap_prs(e) ((e >> 29) & 0x1) -#define ecap_pasid(e) ((e >> 28) & 0x1) +/* PASID support used to be on bit 28 */ #define ecap_dis(e) ((e >> 27) & 0x1) #define ecap_nest(e) ((e >> 26) & 0x1) #define ecap_mts(e) ((e >> 25) & 0x1) -- cgit v0.10.2 From 9c5a18a31b321f120efda412281bb9f610f84aa0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 9 Jun 2015 21:35:44 +0200 Subject: cfg80211: wext: clear sinfo struct before calling driver Until recently, mac80211 overwrote all the statistics it could provide when getting called, but it now relies on the struct having been zeroed by the caller. This was always the case in nl80211, but wext used a static struct which could even cause values from one device leak to another. Using a static struct is OK (as even documented in a comment) since the whole usage of this function and its return value is always locked under RTNL. Not clearing the struct for calling the driver has always been wrong though, since drivers were free to only fill values they could report, so calling this for one device and then for another would always have leaked values from one to the other. Fix this by initializing the structure in question before the driver method call. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=99691 Cc: stable@vger.kernel.org Reported-by: Gerrit Renker Reported-by: Alexander Kaltsas Signed-off-by: Johannes Berg Signed-off-by: David S. Miller diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index fff1bef..fd68283 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -1333,6 +1333,8 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN); wdev_unlock(wdev); + memset(&sinfo, 0, sizeof(sinfo)); + if (rdev_get_station(rdev, dev, bssid, &sinfo)) return NULL; -- cgit v0.10.2 From c3b4afca7023b5aa0531912364246e67f79b3010 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 4 Jun 2015 22:25:04 +0800 Subject: blk-mq: free hctx->ctxs in queue's release handler Now blk_cleanup_queue() can be called before calling del_gendisk()[1], inside which hctx->ctxs is touched from blk_mq_unregister_hctx(), but the variable has been freed by blk_cleanup_queue() at that time. So this patch moves freeing of hctx->ctxs into queue's release handler for fixing the oops reported by Stefan. [1], 6cd18e711dd8075 (block: destroy bdi before blockdev is unregistered) Reported-by: Stefan Seyfried Cc: NeilBrown Cc: Christoph Hellwig Cc: stable@vger.kernel.org (v4.0) Signed-off-by: Ming Lei Signed-off-by: Jens Axboe diff --git a/block/blk-mq.c b/block/blk-mq.c index e68b71b..594eea0 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1600,6 +1600,7 @@ static int blk_mq_hctx_notify(void *data, unsigned long action, return NOTIFY_OK; } +/* hctx->ctxs will be freed in queue's release handler */ static void blk_mq_exit_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) @@ -1618,7 +1619,6 @@ static void blk_mq_exit_hctx(struct request_queue *q, blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); blk_free_flush_queue(hctx->fq); - kfree(hctx->ctxs); blk_mq_free_bitmap(&hctx->ctx_map); } @@ -1891,8 +1891,12 @@ void blk_mq_release(struct request_queue *q) unsigned int i; /* hctx kobj stays in hctx */ - queue_for_each_hw_ctx(q, hctx, i) + queue_for_each_hw_ctx(q, hctx, i) { + if (!hctx) + continue; + kfree(hctx->ctxs); kfree(hctx); + } kfree(q->queue_hw_ctx); -- cgit v0.10.2 From 5eea900389f73ad715649e3c3e78fc4482248635 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 1 May 2015 05:59:35 -0700 Subject: blackfin: Fix build error Fix include/asm-generic/io.h: In function 'readb': include/asm-generic/io.h:113:2: error: implicit declaration of function 'bfin_read8' include/asm-generic/io.h: In function 'readw': include/asm-generic/io.h:121:2: error: implicit declaration of function 'bfin_read16' include/asm-generic/io.h: In function 'readl': include/asm-generic/io.h:129:2: error: implicit declaration of function 'bfin_read32' include/asm-generic/io.h: In function 'writeb': include/asm-generic/io.h:147:2: error: implicit declaration of function 'bfin_write8' include/asm-generic/io.h: In function 'writew': include/asm-generic/io.h:155:2: error: implicit declaration of function 'bfin_write16' include/asm-generic/io.h: In function 'writel': include/asm-generic/io.h:163:2: error: implicit declaration of function 'bfin_write32' Reported-by: Geert Uytterhoeven Fixes: 1a3372bc522ef ("blackfin: io: define __raw_readx/writex with bfin_readx/writex") Cc: Steven Miao Signed-off-by: Guenter Roeck diff --git a/arch/blackfin/include/asm/io.h b/arch/blackfin/include/asm/io.h index 4e8ad05..6abebe8 100644 --- a/arch/blackfin/include/asm/io.h +++ b/arch/blackfin/include/asm/io.h @@ -10,6 +10,7 @@ #include #include #include +#include #define __raw_readb bfin_read8 #define __raw_readw bfin_read16 -- cgit v0.10.2 From 80524e933658077842271fbf34e6861c41095a32 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 15 Apr 2015 08:33:50 -0700 Subject: score: Fix exception handler label The latest version of modinfo fails to compile score architecture targets with the following error. FATAL: The relocation at __ex_table+0x634 references section "__ex_table" which is not executable, IOW the kernel will fault if it ever tries to jump to it. Something is seriously wrong and should be fixed. The probem is caused by a bad label in an __ex_table entry. Acked-by: Lennox Wu Cc: Quentin Casasnovas Signed-off-by: Guenter Roeck diff --git a/arch/score/lib/string.S b/arch/score/lib/string.S index 00b7d3a..16efa3a 100644 --- a/arch/score/lib/string.S +++ b/arch/score/lib/string.S @@ -175,10 +175,10 @@ ENTRY(__clear_user) br r3 .section .fixup, "ax" +99: br r3 .previous .section __ex_table, "a" .align 2 -99: .word 0b, 99b .previous -- cgit v0.10.2 From 1b0ccfe54a6abd1bc4d7bdd1c33e61e2c58f72c7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 10 Jun 2015 15:29:31 -0700 Subject: Revert "ipv6: Fix protocol resubmission" This reverts commit 0243508edd317ff1fa63b495643a7c192fbfcd92. It introduces new regressions. Signed-off-by: David S. Miller diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 41a73da..f2e464e 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -212,13 +212,13 @@ static int ip6_input_finish(struct sock *sk, struct sk_buff *skb) */ rcu_read_lock(); +resubmit: idev = ip6_dst_idev(skb_dst(skb)); if (!pskb_pull(skb, skb_transport_offset(skb))) goto discard; nhoff = IP6CB(skb)->nhoff; nexthdr = skb_network_header(skb)[nhoff]; -resubmit: raw = raw6_local_deliver(skb, nexthdr); ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot) { @@ -246,12 +246,10 @@ resubmit: goto discard; ret = ipprot->handler(skb); - if (ret < 0) { - nexthdr = -ret; + if (ret > 0) goto resubmit; - } else if (ret == 0) { + else if (ret == 0) IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); - } } else { if (!raw) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { -- cgit v0.10.2 From 85bd839983778fcd0c1c043327b14a046e979b39 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Wed, 10 Jun 2015 11:14:43 -0700 Subject: mm/memory_hotplug.c: set zone->wait_table to null after freeing it Izumi found the following oops when hot re-adding a node: BUG: unable to handle kernel paging request at ffffc90008963690 IP: __wake_up_bit+0x20/0x70 Oops: 0000 [#1] SMP CPU: 68 PID: 1237 Comm: rs:main Q:Reg Not tainted 4.1.0-rc5 #80 Hardware name: FUJITSU PRIMEQUEST2800E/SB, BIOS PRIMEQUEST 2000 Series BIOS Version 1.87 04/28/2015 task: ffff880838df8000 ti: ffff880017b94000 task.ti: ffff880017b94000 RIP: 0010:[] [] __wake_up_bit+0x20/0x70 RSP: 0018:ffff880017b97be8 EFLAGS: 00010246 RAX: ffffc90008963690 RBX: 00000000003c0000 RCX: 000000000000a4c9 RDX: 0000000000000000 RSI: ffffea101bffd500 RDI: ffffc90008963648 RBP: ffff880017b97c08 R08: 0000000002000020 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8a0797c73800 R13: ffffea101bffd500 R14: 0000000000000001 R15: 00000000003c0000 FS: 00007fcc7ffff700(0000) GS:ffff880874800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffc90008963690 CR3: 0000000836761000 CR4: 00000000001407e0 Call Trace: unlock_page+0x6d/0x70 generic_write_end+0x53/0xb0 xfs_vm_write_end+0x29/0x80 [xfs] generic_perform_write+0x10a/0x1e0 xfs_file_buffered_aio_write+0x14d/0x3e0 [xfs] xfs_file_write_iter+0x79/0x120 [xfs] __vfs_write+0xd4/0x110 vfs_write+0xac/0x1c0 SyS_write+0x58/0xd0 system_call_fastpath+0x12/0x76 Code: 5d c3 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 48 83 ec 20 65 48 8b 04 25 28 00 00 00 48 89 45 f8 31 c0 48 8d 47 48 <48> 39 47 48 48 c7 45 e8 00 00 00 00 48 c7 45 f0 00 00 00 00 48 RIP [] __wake_up_bit+0x20/0x70 RSP CR2: ffffc90008963690 Reproduce method (re-add a node):: Hot-add nodeA --> remove nodeA --> hot-add nodeA (panic) This seems an use-after-free problem, and the root cause is zone->wait_table was not set to *NULL* after free it in try_offline_node. When hot re-add a node, we will reuse the pgdat of it, so does the zone struct, and when add pages to the target zone, it will init the zone first (including the wait_table) if the zone is not initialized. The judgement of zone initialized is based on zone->wait_table: static inline bool zone_is_initialized(struct zone *zone) { return !!zone->wait_table; } so if we do not set the zone->wait_table to *NULL* after free it, the memory hotplug routine will skip the init of new zone when hot re-add the node, and the wait_table still points to the freed memory, then we will access the invalid address when trying to wake up the waiting people after the i/o operation with the page is done, such as mentioned above. Signed-off-by: Gu Zheng Reported-by: Taku Izumi Reviewed by: Yasuaki Ishimatsu Cc: KAMEZAWA Hiroyuki Cc: Tang Chen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 457bde5..9e88f74 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1969,8 +1969,10 @@ void try_offline_node(int nid) * wait_table may be allocated from boot memory, * here only free if it's allocated by vmalloc. */ - if (is_vmalloc_addr(zone->wait_table)) + if (is_vmalloc_addr(zone->wait_table)) { vfree(zone->wait_table); + zone->wait_table = NULL; + } } } EXPORT_SYMBOL(try_offline_node); -- cgit v0.10.2 From 7d638093d4b0e9ef15bd78f38f11f126e773cc14 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Wed, 10 Jun 2015 11:14:46 -0700 Subject: memcg: do not call reclaim if !__GFP_WAIT When trimming memcg consumption excess (see memory.high), we call try_to_free_mem_cgroup_pages without checking if we are allowed to sleep in the current context, which can result in a deadlock. Fix this. Fixes: 241994ed8649 ("mm: memcontrol: default hierarchy interface for memory") Signed-off-by: Vladimir Davydov Cc: Johannes Weiner Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 14c2f20..9da23a7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2323,6 +2323,8 @@ done_restock: css_get_many(&memcg->css, batch); if (batch > nr_pages) refill_stock(memcg, batch - nr_pages); + if (!(gfp_mask & __GFP_WAIT)) + goto done; /* * If the hierarchy is above the normal consumption range, * make the charging task trim their excess contribution. -- cgit v0.10.2 From d7ad41a1c498729b7584c257710b1b437a0c1470 Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Wed, 10 Jun 2015 11:14:49 -0700 Subject: zram: clear disk io accounting when reset zram device Clear zram disk io accounting when resetting the zram device. Otherwise the residual io accounting stat will affect the diskstat in the next zram active cycle. Signed-off-by: Weijie Yang Acked-by: Sergey Senozhatsky Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 8dcbced..6e134f4 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -805,7 +805,9 @@ static void zram_reset_device(struct zram *zram) memset(&zram->stats, 0, sizeof(zram->stats)); zram->disksize = 0; zram->max_comp_streams = 1; + set_capacity(zram->disk, 0); + part_stat_set_all(&zram->disk->part0, 0); up_write(&zram->init_lock); /* I/O operation under all of CPU are done so let's free */ -- cgit v0.10.2 From 5129e87cb81996d4d7e8f5a86b5355c5f2d0383b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 10 Jun 2015 11:14:52 -0700 Subject: checkpatch: fix "GLOBAL_INITIALISERS" test Commit d5e616fc1c1d ("checkpatch: add a few more --fix corrections") broke the GLOBAL_INITIALISERS test with bad parentheses and optional leading spaces. Fix it. Signed-off-by: Joe Perches Reported-by: Bandan Das Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 89b1df4..c5ec977 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3169,12 +3169,12 @@ sub process { } # check for global initialisers. - if ($line =~ /^\+(\s*$Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/) { + if ($line =~ /^\+$Type\s*$Ident(?:\s+$Modifier)*\s*=\s*(?:0|NULL|false)\s*;/) { if (ERROR("GLOBAL_INITIALISERS", "do not initialise globals to 0 or NULL\n" . $herecurr) && $fix) { - $fixed[$fixlinenr] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/; + $fixed[$fixlinenr] =~ s/(^.$Type\s*$Ident(?:\s+$Modifier)*)\s*=\s*(0|NULL|false)\s*;/$1;/; } } # check for static initialisers. -- cgit v0.10.2 From f371763a79d5212c2cb216b46fa8af46ba56cee3 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 10 Jun 2015 11:14:54 -0700 Subject: mm: memcontrol: fix false-positive VM_BUG_ON() on -rt On -rt, the VM_BUG_ON(!irqs_disabled()) triggers inside the memcg swapout path because the spin_lock_irq(&mapping->tree_lock) in the caller doesn't actually disable the hardware interrupts - which is fine, because on -rt the tophalves run in process context and so we are still safe from preemption while updating the statistics. Remove the VM_BUG_ON() but keep the comment of what we rely on. Signed-off-by: Johannes Weiner Reported-by: Clark Williams Cc: Fernando Lopez-Lezcano Cc: Steven Rostedt Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9da23a7..a04225d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5835,9 +5835,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) if (!mem_cgroup_is_root(memcg)) page_counter_uncharge(&memcg->memory, 1); - /* XXX: caller holds IRQ-safe mapping->tree_lock */ - VM_BUG_ON(!irqs_disabled()); - + /* Caller disabled preemption with mapping->tree_lock */ mem_cgroup_charge_statistics(memcg, page, -1); memcg_check_events(memcg, page); } -- cgit v0.10.2 From 02f7b4145da113683ad64c74bf64605e16b71789 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 10 Jun 2015 11:14:57 -0700 Subject: zsmalloc: fix a null pointer dereference in destroy_handle_cache() If zs_create_pool()->create_handle_cache()->kmem_cache_create() or pool->name allocation fails, zs_create_pool()->destroy_handle_cache() will dereference the NULL pool->handle_cachep. Modify destroy_handle_cache() to avoid this. Signed-off-by: Sergey Senozhatsky Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 08bd7a3..a8b5e74 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -289,7 +289,8 @@ static int create_handle_cache(struct zs_pool *pool) static void destroy_handle_cache(struct zs_pool *pool) { - kmem_cache_destroy(pool->handle_cachep); + if (pool->handle_cachep) + kmem_cache_destroy(pool->handle_cachep); } static unsigned long alloc_handle(struct zs_pool *pool) -- cgit v0.10.2 From 8e76d4eecf7afeec9328e21cd5880e281838d0d6 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 10 Jun 2015 11:15:00 -0700 Subject: sched, numa: do not hint for NUMA balancing on VM_MIXEDMAP mappings Jovi Zhangwei reported the following problem Below kernel vm bug can be triggered by tcpdump which mmaped a lot of pages with GFP_COMP flag. [Mon May 25 05:29:33 2015] page:ffffea0015414000 count:66 mapcount:1 mapping: (null) index:0x0 [Mon May 25 05:29:33 2015] flags: 0x20047580004000(head) [Mon May 25 05:29:33 2015] page dumped because: VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page)) [Mon May 25 05:29:33 2015] ------------[ cut here ]------------ [Mon May 25 05:29:33 2015] kernel BUG at mm/migrate.c:1661! [Mon May 25 05:29:33 2015] invalid opcode: 0000 [#1] SMP In this case it was triggered by running tcpdump but it's not necessary reproducible on all systems. sudo tcpdump -i bond0.100 'tcp port 4242' -c 100000000000 -w 4242.pcap Compound pages cannot be migrated and it was not expected that such pages be marked for NUMA balancing. This did not take into account that drivers such as net/packet/af_packet.c may insert compound pages into userspace with vm_insert_page. This patch tells the NUMA balancing protection scanner to skip all VM_MIXEDMAP mappings which avoids the possibility that compound pages are marked for migration. Signed-off-by: Mel Gorman Reported-by: Jovi Zhangwei Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ffeaa41..c2980e8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2181,7 +2181,7 @@ void task_numa_work(struct callback_head *work) } for (; vma; vma = vma->vm_next) { if (!vma_migratable(vma) || !vma_policy_mof(vma) || - is_vm_hugetlb_page(vma)) { + is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_MIXEDMAP)) { continue; } -- cgit v0.10.2 From 5ec45a192fe6e287f0fc06d5ca4f3bd446d94803 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 10 Jun 2015 11:15:02 -0700 Subject: arch/x86/kvm/mmu.c: work around gcc-4.4.4 bug Fix this compile issue with gcc-4.4.4: arch/x86/kvm/mmu.c: In function 'kvm_mmu_pte_write': arch/x86/kvm/mmu.c:4256: error: unknown field 'cr0_wp' specified in initializer arch/x86/kvm/mmu.c:4257: error: unknown field 'cr4_pae' specified in initializer arch/x86/kvm/mmu.c:4257: warning: excess elements in union initializer ... gcc-4.4.4 (at least) has issues when using anonymous unions in initializers. Fixes: edc90b7dc4ceef6 ("KVM: MMU: fix SMAP virtualization") Cc: Xiao Guangrong Cc: Paolo Bonzini Cc: Davidlohr Bueso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 44a7d25..b733376 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4215,13 +4215,13 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, u64 entry, gentry, *spte; int npte; bool remote_flush, local_flush, zap_page; - union kvm_mmu_page_role mask = (union kvm_mmu_page_role) { - .cr0_wp = 1, - .cr4_pae = 1, - .nxe = 1, - .smep_andnot_wp = 1, - .smap_andnot_wp = 1, - }; + union kvm_mmu_page_role mask = { }; + + mask.cr0_wp = 1; + mask.cr4_pae = 1; + mask.nxe = 1; + mask.smep_andnot_wp = 1; + mask.smap_andnot_wp = 1; /* * If we don't have indirect shadow pages, it means no page is -- cgit v0.10.2 From b3be5e3e726a6cc849f40c70c3ae525e4146e9df Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Tue, 9 Jun 2015 17:27:12 +0200 Subject: tipc: disconnect socket directly after probe failure If the TIPC connection timer expires in a probing state, a self abort message is supposed to be generated and delivered to the local socket. This is currently broken, and the abort message is actually sent out to the peer node with invalid addressing information. This will cause the link to enter a constant retransmission state and eventually reset. We fix this by removing the self-abort message creation and tear down connection immediately instead. Signed-off-by: Erik Hugne Reviewed-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9074b5c..f485600 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2142,11 +2142,17 @@ static void tipc_sk_timeout(unsigned long data) peer_node = tsk_peer_node(tsk); if (tsk->probing_state == TIPC_CONN_PROBING) { - /* Previous probe not answered -> self abort */ - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, - TIPC_CONN_MSG, SHORT_H_SIZE, 0, - own_node, peer_node, tsk->portid, - peer_port, TIPC_ERR_NO_PORT); + if (!sock_owned_by_user(sk)) { + sk->sk_socket->state = SS_DISCONNECTING; + tsk->connected = 0; + tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), + tsk_peer_port(tsk)); + sk->sk_state_change(sk); + } else { + /* Try again later */ + sk_reset_timer(sk, &sk->sk_timer, (HZ / 20)); + } + } else { skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0, peer_node, own_node, -- cgit v0.10.2 From 1a040eaca1a22f8da8285ceda6b5e4a2cb704867 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 9 Jun 2015 10:23:57 -0700 Subject: bridge: fix multicast router rlist endless loop Since the addition of sysfs multicast router support if one set multicast_router to "2" more than once, then the port would be added to the hlist every time and could end up linking to itself and thus causing an endless loop for rlist walkers. So to reproduce just do: echo 2 > multicast_router; echo 2 > multicast_router; in a bridge port and let some igmp traffic flow, for me it hangs up in br_multicast_flood(). Fix this by adding a check in br_multicast_add_router() if the port is already linked. The reason this didn't happen before the addition of multicast_router sysfs entries is because there's a !hlist_unhashed check that prevents it. Signed-off-by: Nikolay Aleksandrov Fixes: 0909e11758bd ("bridge: Add multicast_router sysfs entries") Acked-by: Herbert Xu Signed-off-by: David S. Miller diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 22fd041..ff667e1 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1167,6 +1167,9 @@ static void br_multicast_add_router(struct net_bridge *br, struct net_bridge_port *p; struct hlist_node *slot = NULL; + if (!hlist_unhashed(&port->rlist)) + return; + hlist_for_each_entry(p, &br->router_list, rlist) { if ((unsigned long) port >= (unsigned long) p) break; @@ -1194,12 +1197,8 @@ static void br_multicast_mark_router(struct net_bridge *br, if (port->multicast_router != 1) return; - if (!hlist_unhashed(&port->rlist)) - goto timer; - br_multicast_add_router(br, port); -timer: mod_timer(&port->multicast_router_timer, now + br->multicast_querier_interval); } -- cgit v0.10.2 From 5d753610277862fd8050901f38b6571b9500cdb6 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 10 Jun 2015 21:02:04 -0400 Subject: net, swap: Remove a warning and clarify why sk_mem_reclaim is required when deactivating swap Jeff Layton reported the following; [ 74.232485] ------------[ cut here ]------------ [ 74.233354] WARNING: CPU: 2 PID: 754 at net/core/sock.c:364 sk_clear_memalloc+0x51/0x80() [ 74.234790] Modules linked in: cts rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache xfs libcrc32c snd_hda_codec_generic snd_hda_intel snd_hda_controller snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device nfsd snd_pcm snd_timer snd e1000 ppdev parport_pc joydev parport pvpanic soundcore floppy serio_raw i2c_piix4 pcspkr nfs_acl lockd virtio_balloon acpi_cpufreq auth_rpcgss grace sunrpc qxl drm_kms_helper ttm drm virtio_console virtio_blk virtio_pci ata_generic virtio_ring pata_acpi virtio [ 74.243599] CPU: 2 PID: 754 Comm: swapoff Not tainted 4.1.0-rc6+ #5 [ 74.244635] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 74.245546] 0000000000000000 0000000079e69e31 ffff8800d066bde8 ffffffff8179263d [ 74.246786] 0000000000000000 0000000000000000 ffff8800d066be28 ffffffff8109e6fa [ 74.248175] 0000000000000000 ffff880118d48000 ffff8800d58f5c08 ffff880036e380a8 [ 74.249483] Call Trace: [ 74.249872] [] dump_stack+0x45/0x57 [ 74.250703] [] warn_slowpath_common+0x8a/0xc0 [ 74.251655] [] warn_slowpath_null+0x1a/0x20 [ 74.252585] [] sk_clear_memalloc+0x51/0x80 [ 74.253519] [] xs_disable_swap+0x42/0x80 [sunrpc] [ 74.254537] [] rpc_clnt_swap_deactivate+0x7e/0xc0 [sunrpc] [ 74.255610] [] nfs_swap_deactivate+0x27/0x30 [nfs] [ 74.256582] [] destroy_swap_extents+0x74/0x80 [ 74.257496] [] SyS_swapoff+0x222/0x5c0 [ 74.258318] [] ? syscall_trace_leave+0xc7/0x140 [ 74.259253] [] system_call_fastpath+0x12/0x71 [ 74.260158] ---[ end trace 2530722966429f10 ]--- The warning in question was unnecessary but with Jeff's series the rules are also clearer. This patch removes the warning and updates the comment to explain why sk_mem_reclaim() may still be called. [jlayton: remove if (sk->sk_forward_alloc) conditional. As Leon points out that it's not needed.] Cc: Leon Romanovsky Signed-off-by: Mel Gorman Signed-off-by: Jeff Layton Signed-off-by: David S. Miller diff --git a/net/core/sock.c b/net/core/sock.c index 292f422..469d603 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -354,15 +354,12 @@ void sk_clear_memalloc(struct sock *sk) /* * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward - * progress of swapping. However, if SOCK_MEMALLOC is cleared while - * it has rmem allocations there is a risk that the user of the - * socket cannot make forward progress due to exceeding the rmem - * limits. By rights, sk_clear_memalloc() should only be called - * on sockets being torn down but warn and reset the accounting if - * that assumption breaks. + * progress of swapping. SOCK_MEMALLOC may be cleared while + * it has rmem allocations due to the last swapfile being deactivated + * but there is a risk that the socket is unusable due to exceeding + * the rmem limits. Reclaim the reserves and obey rmem limits again. */ - if (WARN_ON(sk->sk_forward_alloc)) - sk_mem_reclaim(sk); + sk_mem_reclaim(sk); } EXPORT_SYMBOL_GPL(sk_clear_memalloc); -- cgit v0.10.2 From 6286e8285078ab02b215bba1d42a05ecfd864515 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Thu, 11 Jun 2015 11:52:54 +0530 Subject: enic: unlock napi busy poll before unmasking intr There is a small window between vnic_intr_unmask() and enic_poll_unlock_napi(). In this window if an irq occurs and napi is scheduled on different cpu, it tries to acquire enic_poll_lock_napi() and hits the following WARN_ON message. Fix is to unlock napi_poll before unmasking the interrupt. [ 781.121746] ------------[ cut here ]------------ [ 781.121789] WARNING: CPU: 1 PID: 0 at drivers/net/ethernet/cisco/enic/vnic_rq.h:228 enic_poll_msix_rq+0x36a/0x3c0 [enic]() [ 781.121834] Modules linked in: nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss oid_registry nfsv4 dns_resolver coretemp intel_rapl iosf_mbi x86_pkg_temp_thermal intel_powerclamp kvm_intel kvm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel mgag200 ttm drm_kms_helper joydev aes_x86_64 lrw drm gf128mul mousedev glue_helper sb_edac ablk_helper iTCO_wdt iTCO_vendor_support evdev ipmi_si syscopyarea sysfillrect sysimgblt i2c_algo_bit i2c_core edac_core lpc_ich mac_hid cryptd pcspkr ipmi_msghandler shpchp tpm_tis acpi_power_meter tpm wmi processor hwmon button ac sch_fq_codel nfs lockd grace sunrpc fscache hid_generic usbhid hid ehci_pci ehci_hcd sd_mod megaraid_sas usbcore scsi_mod usb_common enic crc32c_generic crc32c_intel btrfs xor raid6_pq ext4 crc16 mbcache jbd2 [ 781.122176] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.1.0-rc6-ARCH-00040-gc46a024-dirty #106 [ 781.122210] Hardware name: Cisco Systems Inc UCSB-B200-M4/UCSB-B200-M4, BIOS B200M4.2.2.2.23.061220140128 06/12/2014 [ 781.122252] 0000000000000000 bddbbc9d655ec96e ffff880277e43da8 ffffffff81583fe8 [ 781.122286] 0000000000000000 0000000000000000 ffff880277e43de8 ffffffff8107acfa [ 781.122319] ffff880272c01000 ffff880273f18000 ffff880273f1a100 0000000000000000 [ 781.122352] Call Trace: [ 781.122364] [] dump_stack+0x4f/0x7b [ 781.122399] [] warn_slowpath_common+0x8a/0xc0 [ 781.122425] [] warn_slowpath_null+0x1a/0x20 [ 781.122455] [] enic_poll_msix_rq+0x36a/0x3c0 [enic] [ 781.122487] [] net_rx_action+0x22a/0x370 [ 781.122512] [] __do_softirq+0xed/0x2d0 [ 781.122537] [] irq_exit+0x7e/0xa0 [ 781.122560] [] do_IRQ+0x64/0x100 [ 781.122582] [] common_interrupt+0x6e/0x6e [ 781.122605] [] ? cpu_startup_entry+0x121/0x480 [ 781.122638] [] ? cpu_startup_entry+0xec/0x480 [ 781.122667] [] ? clockevents_register_device+0x113/0x1f0 [ 781.122698] [] start_secondary+0x196/0x1e0 [ 781.122723] ---[ end trace cec2e9dd3af7b9db ]--- Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 204bd182..0e5a01d 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1407,6 +1407,7 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget) */ enic_calc_int_moderation(enic, &enic->rq[rq]); + enic_poll_unlock_napi(&enic->rq[rq]); if (work_done < work_to_do) { /* Some work done, but not enough to stay in polling, @@ -1418,7 +1419,6 @@ static int enic_poll_msix_rq(struct napi_struct *napi, int budget) enic_set_int_moderation(enic, &enic->rq[rq]); vnic_intr_unmask(&enic->intr[intr]); } - enic_poll_unlock_napi(&enic->rq[rq]); return work_done; } -- cgit v0.10.2 From 19b596bda1c5400808635fde0d521c1f89a6c1a3 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Thu, 11 Jun 2015 11:52:55 +0530 Subject: enic: check return value for stat dump We do not check the return value of enic_dev_stats_dump(). If allocation fails, we will hit NULL pointer reference. Return only if memory allocation fails. For other failures, we return the previously recorded values. Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index 28d9ca6..68d47b1 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -131,8 +131,15 @@ static void enic_get_drvinfo(struct net_device *netdev, { struct enic *enic = netdev_priv(netdev); struct vnic_devcmd_fw_info *fw_info; + int err; - enic_dev_fw_info(enic, &fw_info); + err = enic_dev_fw_info(enic, &fw_info); + /* return only when pci_zalloc_consistent fails in vnic_dev_fw_info + * For other failures, like devcmd failure, we return previously + * recorded info. + */ + if (err == -ENOMEM) + return; strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver)); strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version)); @@ -181,8 +188,15 @@ static void enic_get_ethtool_stats(struct net_device *netdev, struct enic *enic = netdev_priv(netdev); struct vnic_stats *vstats; unsigned int i; - - enic_dev_stats_dump(enic, &vstats); + int err; + + err = enic_dev_stats_dump(enic, &vstats); + /* return only when pci_zalloc_consistent fails in vnic_dev_stats_dump + * For other failures, like devcmd failure, we return previously + * recorded stats. + */ + if (err == -ENOMEM) + return; for (i = 0; i < enic_n_tx_stats; i++) *(data++) = ((u64 *)&vstats->tx)[enic_tx_stats[i].index]; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 0e5a01d..eadae1b 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -615,8 +615,15 @@ static struct rtnl_link_stats64 *enic_get_stats(struct net_device *netdev, { struct enic *enic = netdev_priv(netdev); struct vnic_stats *stats; + int err; - enic_dev_stats_dump(enic, &stats); + err = enic_dev_stats_dump(enic, &stats); + /* return only when pci_zalloc_consistent fails in vnic_dev_stats_dump + * For other failures, like devcmd failure, we return previously + * recorded stats. + */ + if (err == -ENOMEM) + return net_stats; net_stats->tx_packets = stats->tx.tx_frames_ok; net_stats->tx_bytes = stats->tx.tx_bytes_ok; -- cgit v0.10.2 From 8b13b4e0bc884ba7dc8ee4de3ee915b7d30e7f78 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan <_govind@gmx.com> Date: Thu, 11 Jun 2015 11:52:56 +0530 Subject: enic: fix memory leak in rq_clean When incoming packet qualifies for rx_copybreak, we copy the data to newly allocated skb. We do not free/unmap the original buffer. At this point driver assumes this buffer is unallocated. When enic_rq_alloc_buf() is called for buffer allocation, it checks if buf->os_buf is NULL. If its not NULL that means buffer can be re-used. When vnic_rq_clean() is called for freeing all rq buffers, and if the rx_copybreak reused buffer falls outside the used desc, we do not free the buffer. The following trace is observer when dma-debug is enabled. Fix is to walk through complete ring and clean if buffer is present. [ 40.555386] ------------[ cut here ]------------ [ 40.555396] WARNING: CPU: 0 PID: 491 at lib/dma-debug.c:971 dma_debug_device_change+0x188/0x1f0() [ 40.555400] pci 0000:06:00.0: DMA-API: device driver has pending DMA allocations while released from device [count=4] One of leaked entries details: [device address=0x00000000ff4cc040] [size=9018 bytes] [mapped with DMA_FROM_DEVICE] [mapped as single] [ 40.555402] Modules linked in: nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss oid_registry nfsv4 dns_resolver coretemp intel_rapl iosf_mbi x86_pkg_temp_thermal intel_powerclamp kvm_intel kvm crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel aes_x86_64 lrw joydev mousedev gf128mul hid_generic glue_helper mgag200 usbhid ttm hid drm_kms_helper drm ablk_helper syscopyarea sysfillrect sysimgblt i2c_algo_bit i2c_core iTCO_wdt cryptd mac_hid evdev pcspkr sb_edac edac_core tpm_tis iTCO_vendor_support ipmi_si wmi tpm ipmi_msghandler shpchp lpc_ich processor acpi_power_meter hwmon button ac sch_fq_codel nfs lockd grace sunrpc fscache sd_mod ehci_pci ehci_hcd megaraid_sas usbcore scsi_mod usb_common enic(-) crc32c_generic crc32c_intel btrfs xor raid6_pq ext4 crc16 mbcache jbd2 [ 40.555467] CPU: 0 PID: 491 Comm: rmmod Not tainted 4.1.0-rc7-ARCH-01305-gf59b71f #118 [ 40.555469] Hardware name: Cisco Systems Inc UCSB-B200-M4/UCSB-B200-M4, BIOS B200M4.2.2.2.23.061220140128 06/12/2014 [ 40.555471] 0000000000000000 00000000e2f8a5b7 ffff880275f8bc48 ffffffff8158d6f0 [ 40.555474] 0000000000000000 ffff880275f8bca0 ffff880275f8bc88 ffffffff8107b04a [ 40.555477] ffff8802734e0000 0000000000000004 ffff8804763fb3c0 ffff88027600b650 [ 40.555480] Call Trace: [ 40.555488] [] dump_stack+0x4f/0x7b [ 40.555492] [] warn_slowpath_common+0x8a/0xc0 [ 40.555494] [] warn_slowpath_fmt+0x55/0x70 [ 40.555498] [] dma_debug_device_change+0x188/0x1f0 [ 40.555503] [] notifier_call_chain+0x4f/0x80 [ 40.555506] [] __blocking_notifier_call_chain+0x4b/0x70 [ 40.555510] [] blocking_notifier_call_chain+0x16/0x20 [ 40.555514] [] __device_release_driver+0xf6/0x120 [ 40.555518] [] driver_detach+0xc8/0xd0 [ 40.555523] [] bus_remove_driver+0x59/0xe0 [ 40.555527] [] driver_unregister+0x30/0x70 [ 40.555534] [] pci_unregister_driver+0x2d/0xa0 [ 40.555542] [] enic_cleanup_module+0x10/0x14e [enic] [ 40.555547] [] SyS_delete_module+0x1cf/0x280 [ 40.555551] [] ? ____fput+0xe/0x10 [ 40.555554] [] ? task_work_run+0xbc/0xf0 [ 40.555558] [] system_call_fastpath+0x12/0x71 [ 40.555561] ---[ end trace 4988cadc77c2b236 ]--- [ 40.555562] Mapped at: [ 40.555563] [] debug_dma_map_page+0x95/0x150 [ 40.555566] [] enic_rq_alloc_buf+0x1b8/0x360 [enic] [ 40.555570] [] enic_open+0xf8/0x820 [enic] [ 40.555574] [] __dev_open+0xce/0x150 [ 40.555579] [] __dev_change_flags+0xa1/0x170 Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.c b/drivers/net/ethernet/cisco/enic/vnic_rq.c index 36a2ed6..c4b2183 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_rq.c +++ b/drivers/net/ethernet/cisco/enic/vnic_rq.c @@ -188,16 +188,15 @@ void vnic_rq_clean(struct vnic_rq *rq, struct vnic_rq_buf *buf; u32 fetch_index; unsigned int count = rq->ring.desc_count; + int i; buf = rq->to_clean; - while (vnic_rq_desc_used(rq) > 0) { - + for (i = 0; i < rq->ring.desc_count; i++) { (*buf_clean)(rq, buf); - - buf = rq->to_clean = buf->next; - rq->ring.desc_avail++; + buf = buf->next; } + rq->ring.desc_avail = rq->ring.desc_count - 1; /* Use current fetch_index as the ring starting point */ fetch_index = ioread32(&rq->ctrl->fetch_index); -- cgit v0.10.2 From 108029323910c5dd1ef8fa2d10da1ce5fbce6e12 Mon Sep 17 00:00:00 2001 From: Wang Long Date: Wed, 10 Jun 2015 08:12:37 +0000 Subject: ring-buffer-benchmark: Fix the wrong sched_priority of producer The producer should be used producer_fifo as its sched_priority, so correct it. Link: http://lkml.kernel.org/r/1433923957-67842-1-git-send-email-long.wanglong@huawei.com Cc: stable@vger.kernel.org # 2.6.33+ Signed-off-by: Wang Long Signed-off-by: Steven Rostedt diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 13d945c..1b28df2 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -450,7 +450,7 @@ static int __init ring_buffer_benchmark_init(void) if (producer_fifo >= 0) { struct sched_param param = { - .sched_priority = consumer_fifo + .sched_priority = producer_fifo }; sched_setscheduler(producer, SCHED_FIFO, ¶m); } else -- cgit v0.10.2 From 6dfd197283bffc23a2b046a7f065588de7e1fc1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Fri, 5 Jun 2015 13:33:57 -0400 Subject: drm/radeon: fix freeze for laptop with Turks/Thames GPU. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Laptop with Turks/Thames GPU will freeze if dpm is enabled. It seems the SMC engine is relying on some state inside the CP engine. CP needs to chew at least one packet for it to get in good state for dynamic power management. This patch simply disabled and re-enable DPM after the ring test which is enough to avoid the freeze. Signed-off-by: Jérôme Glisse Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index b7ca4c5..a7fdfa4 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1463,6 +1463,21 @@ int radeon_device_init(struct radeon_device *rdev, if (r) DRM_ERROR("ib ring test failed (%d).\n", r); + /* + * Turks/Thames GPU will freeze whole laptop if DPM is not restarted + * after the CP ring have chew one packet at least. Hence here we stop + * and restart DPM after the radeon_ib_ring_tests(). + */ + if (rdev->pm.dpm_enabled && + (rdev->pm.pm_method == PM_METHOD_DPM) && + (rdev->family == CHIP_TURKS) && + (rdev->flags & RADEON_IS_MOBILITY)) { + mutex_lock(&rdev->pm.mutex); + radeon_dpm_disable(rdev); + radeon_dpm_enable(rdev); + mutex_unlock(&rdev->pm.mutex); + } + if ((radeon_testing & 1)) { if (rdev->accel_working) radeon_test_moves(rdev); -- cgit v0.10.2 From 6fb3c025fee16f11ebd73f84f5aba1ee9ce7f8c6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 10 Jun 2015 01:29:14 -0400 Subject: Revert "drm/radeon: don't share plls if monitors differ in audio support" This reverts commit a10f0df0615abb194968fc08147f3cdd70fd5aa5. Fixes some systems at the expense of others. Need to properly fix the pll divider selection. bug: https://bugzilla.kernel.org/show_bug.cgi?id=99651 Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index e597ffc..42b2ea3 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1798,9 +1798,7 @@ static int radeon_get_shared_nondp_ppll(struct drm_crtc *crtc) if ((crtc->mode.clock == test_crtc->mode.clock) && (adjusted_clock == test_adjusted_clock) && (radeon_crtc->ss_enabled == test_radeon_crtc->ss_enabled) && - (test_radeon_crtc->pll_id != ATOM_PPLL_INVALID) && - (drm_detect_monitor_audio(radeon_connector_edid(test_radeon_crtc->connector)) == - drm_detect_monitor_audio(radeon_connector_edid(radeon_crtc->connector)))) + (test_radeon_crtc->pll_id != ATOM_PPLL_INVALID)) return test_radeon_crtc->pll_id; } } -- cgit v0.10.2 From ebb9bf18636926d5da97136c22e882c5d91fda73 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 10 Jun 2015 01:30:54 -0400 Subject: Revert "drm/radeon: adjust pll when audio is not enabled" This reverts commit 7fe04d6fa824ccea704535a597dc417c8687f990. Fixes some systems at the expense of others. Need to properly fix the pll divider selection. bug: https://bugzilla.kernel.org/show_bug.cgi?id=99651 Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 42b2ea3..dac78ad 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -580,9 +580,6 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc, else radeon_crtc->pll_flags |= RADEON_PLL_PREFER_LOW_REF_DIV; - /* if there is no audio, set MINM_OVER_MAXP */ - if (!drm_detect_monitor_audio(radeon_connector_edid(connector))) - radeon_crtc->pll_flags |= RADEON_PLL_PREFER_MINM_OVER_MAXP; if (rdev->family < CHIP_RV770) radeon_crtc->pll_flags |= RADEON_PLL_PREFER_MINM_OVER_MAXP; /* use frac fb div on APUs */ -- cgit v0.10.2 From ee18e599251ed06bf0c8ade7c434a0de311342ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Thu, 11 Jun 2015 18:38:38 +0900 Subject: drm/radeon: Make sure radeon_vm_bo_set_addr always unreserves the BO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some error paths didn't unreserve the BO. This resulted in a deadlock down the road on the next attempt to reserve the (still reserved) BO. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90873 Cc: stable@vger.kernel.org Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index de42fc4..9c3377c 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -458,14 +458,16 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, /* make sure object fit at this offset */ eoffset = soffset + size; if (soffset >= eoffset) { - return -EINVAL; + r = -EINVAL; + goto error_unreserve; } last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; if (last_pfn > rdev->vm_manager.max_pfn) { dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", last_pfn, rdev->vm_manager.max_pfn); - return -EINVAL; + r = -EINVAL; + goto error_unreserve; } } else { @@ -486,7 +488,8 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo, soffset, tmp->bo, tmp->it.start, tmp->it.last); mutex_unlock(&vm->mutex); - return -EINVAL; + r = -EINVAL; + goto error_unreserve; } } @@ -497,7 +500,8 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); if (!tmp) { mutex_unlock(&vm->mutex); - return -ENOMEM; + r = -ENOMEM; + goto error_unreserve; } tmp->it.start = bo_va->it.start; tmp->it.last = bo_va->it.last; @@ -555,7 +559,6 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, r = radeon_vm_clear_bo(rdev, pt); if (r) { radeon_bo_unref(&pt); - radeon_bo_reserve(bo_va->bo, false); return r; } @@ -575,6 +578,10 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, mutex_unlock(&vm->mutex); return 0; + +error_unreserve: + radeon_bo_unreserve(bo_va->bo); + return r; } /** -- cgit v0.10.2 From 4d66e5e9b6d720d8463e11d027bd4ad91c8b1318 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 10 Jun 2015 23:47:14 -0400 Subject: block: fix ext_dev_lock lockdep report ================================= [ INFO: inconsistent lock state ] 4.1.0-rc7+ #217 Tainted: G O --------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. swapper/6/0 [HC0[0]:SC1[1]:HE1:SE0] takes: (ext_devt_lock){+.?...}, at: [] blk_free_devt+0x3c/0x70 {SOFTIRQ-ON-W} state was registered at: [] __lock_acquire+0x461/0x1e70 [] lock_acquire+0xb7/0x290 [] _raw_spin_lock+0x38/0x50 [] blk_alloc_devt+0x6d/0xd0 <-- take the lock in process context [..] [] __lock_acquire+0x3fe/0x1e70 [] ? __lock_acquire+0xe5d/0x1e70 [] lock_acquire+0xb7/0x290 [] ? blk_free_devt+0x3c/0x70 [] _raw_spin_lock+0x38/0x50 [] ? blk_free_devt+0x3c/0x70 [] blk_free_devt+0x3c/0x70 <-- take the lock in softirq [] part_release+0x1c/0x50 [] device_release+0x36/0xb0 [] kobject_cleanup+0x7b/0x1a0 [] kobject_put+0x30/0x70 [] put_device+0x17/0x20 [] delete_partition_rcu_cb+0x16c/0x180 [] ? read_dev_sector+0xa0/0xa0 [] rcu_process_callbacks+0x2ff/0xa90 [] ? rcu_process_callbacks+0x2bf/0xa90 [] __do_softirq+0xde/0x600 Neil sees this in his tests and it also triggers on pmem driver unbind for the libnvdimm tests. This fix is on top of an initial fix by Keith for incorrect usage of mutex_lock() in this path: 2da78092dda1 "block: Fix dev_t minor allocation lifetime". Both this and 2da78092dda1 are candidates for -stable. Fixes: 2da78092dda1 ("block: Fix dev_t minor allocation lifetime") Cc: Cc: Keith Busch Reported-by: NeilBrown Signed-off-by: Dan Williams Signed-off-by: Jens Axboe diff --git a/block/genhd.c b/block/genhd.c index 666e11b..ea982ea 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -422,9 +422,9 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt) /* allocate ext devt */ idr_preload(GFP_KERNEL); - spin_lock(&ext_devt_lock); + spin_lock_bh(&ext_devt_lock); idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT); - spin_unlock(&ext_devt_lock); + spin_unlock_bh(&ext_devt_lock); idr_preload_end(); if (idx < 0) @@ -449,9 +449,9 @@ void blk_free_devt(dev_t devt) return; if (MAJOR(devt) == BLOCK_EXT_MAJOR) { - spin_lock(&ext_devt_lock); + spin_lock_bh(&ext_devt_lock); idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); - spin_unlock(&ext_devt_lock); + spin_unlock_bh(&ext_devt_lock); } } @@ -690,13 +690,13 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) } else { struct hd_struct *part; - spin_lock(&ext_devt_lock); + spin_lock_bh(&ext_devt_lock); part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); if (part && get_disk(part_to_disk(part))) { *partno = part->partno; disk = part_to_disk(part); } - spin_unlock(&ext_devt_lock); + spin_unlock_bh(&ext_devt_lock); } return disk; -- cgit v0.10.2 From b6f2098fb708ce935a59763178c4e8cd942fcf88 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 4 May 2015 20:58:57 +0200 Subject: block: pmem: Add dependency on HAS_IOMEM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not all architectures have io memory. Fixes: drivers/block/pmem.c: In function ‘pmem_alloc’: drivers/block/pmem.c:146:2: error: implicit declaration of function ‘ioremap_nocache’ [-Werror=implicit-function-declaration] pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size); ^ drivers/block/pmem.c:146:18: warning: assignment makes pointer from integer without a cast [enabled by default] pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size); ^ drivers/block/pmem.c:182:2: error: implicit declaration of function ‘iounmap’ [-Werror=implicit-function-declaration] iounmap(pmem->virt_addr); ^ Signed-off-by: Richard Weinberger Reviewed-by: Ross Zwisler Signed-off-by: Jens Axboe diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index eb1fed5..3ccef9e 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -406,6 +406,7 @@ config BLK_DEV_RAM_DAX config BLK_DEV_PMEM tristate "Persistent memory block device support" + depends on HAS_IOMEM help Saying Y here will allow you to use a contiguous range of reserved memory as one or more persistent block devices. -- cgit v0.10.2 From 58c98be137830d34b79024cc5dc95ef54fcd7ffe Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Thu, 11 Jun 2015 14:51:30 +0200 Subject: net: igb: fix the start time for periodic output signals When programming the start of a periodic output, the code wrongly places the seconds value into the "low" register and the nanoseconds into the "high" register. Even though this is backwards, it slipped through my testing, because the re-arming code in the interrupt service routine is correct, and the signal does appear starting with the second edge. This patch fixes the issue by programming the registers correctly. Signed-off-by: Richard Cochran Reviewed-by: Jacob Keller Acked-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index e3b9b63..c3a9392c 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -538,8 +538,8 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp, igb->perout[i].start.tv_nsec = rq->perout.start.nsec; igb->perout[i].period.tv_sec = ts.tv_sec; igb->perout[i].period.tv_nsec = ts.tv_nsec; - wr32(trgttiml, rq->perout.start.sec); - wr32(trgttimh, rq->perout.start.nsec); + wr32(trgttimh, rq->perout.start.sec); + wr32(trgttiml, rq->perout.start.nsec); tsauxc |= tsauxc_mask; tsim |= tsim_mask; } else { -- cgit v0.10.2 From 0fae3bf018d97b210051c8797a49d66d31071847 Mon Sep 17 00:00:00 2001 From: Robert Shearman Date: Thu, 11 Jun 2015 19:58:26 +0100 Subject: mpls: handle device renames for per-device sysctls If a device is renamed and the original name is subsequently reused for a new device, the following warning is generated: sysctl duplicate entry: /net/mpls/conf/veth0//input CPU: 3 PID: 1379 Comm: ip Not tainted 4.1.0-rc4+ #20 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 0000000000000000 0000000000000000 ffffffff81566aaf 0000000000000000 ffffffff81236279 ffff88002f7d7f00 0000000000000000 ffff88000db336d8 ffff88000db33698 0000000000000005 ffff88002e046000 ffff8800168c9280 Call Trace: [] ? dump_stack+0x40/0x50 [] ? __register_sysctl_table+0x289/0x5a0 [] ? mpls_dev_notify+0x1ff/0x300 [mpls_router] [] ? notifier_call_chain+0x4f/0x70 [] ? register_netdevice+0x2b2/0x480 [] ? veth_newlink+0x178/0x2d3 [veth] [] ? rtnl_newlink+0x73c/0x8e0 [] ? rtnl_newlink+0x16a/0x8e0 [] ? __kmalloc_reserve.isra.30+0x32/0x90 [] ? rtnetlink_rcv_msg+0x8d/0x250 [] ? __alloc_skb+0x47/0x1f0 [] ? __netlink_lookup+0xab/0xe0 [] ? rtnetlink_rcv+0x30/0x30 [] ? netlink_rcv_skb+0xb0/0xd0 [] ? rtnetlink_rcv+0x24/0x30 [] ? netlink_unicast+0x107/0x1a0 [] ? netlink_sendmsg+0x50e/0x630 [] ? sock_sendmsg+0x3c/0x50 [] ? ___sys_sendmsg+0x27b/0x290 [] ? mem_cgroup_try_charge+0x88/0x110 [] ? mem_cgroup_commit_charge+0x56/0xa0 [] ? do_filp_open+0x30/0xa0 [] ? __sys_sendmsg+0x3e/0x80 [] ? system_call_fastpath+0x16/0x75 Fix this by unregistering the previous sysctl table (registered for the path containing the original device name) and re-registering the table for the path containing the new device name. Fixes: 37bde79979c3 ("mpls: Per-device enabling of packet input") Reported-by: Scott Feldman Signed-off-by: Robert Shearman Signed-off-by: David S. Miller diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index bff427f..1f93a59 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -564,6 +564,17 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, case NETDEV_UNREGISTER: mpls_ifdown(dev); break; + case NETDEV_CHANGENAME: + mdev = mpls_dev_get(dev); + if (mdev) { + int err; + + mpls_dev_sysctl_unregister(mdev); + err = mpls_dev_sysctl_register(dev, mdev); + if (err) + return notifier_from_errno(err); + } + break; } return NOTIFY_OK; } -- cgit v0.10.2 From fb05e7a89f500cfc06ae277bdc911b281928995d Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 11 Jun 2015 16:50:48 -0700 Subject: net: don't wait for order-3 page allocation We saw excessive direct memory compaction triggered by skb_page_frag_refill. This causes performance issues and add latency. Commit 5640f7685831e0 introduces the order-3 allocation. According to the changelog, the order-3 allocation isn't a must-have but to improve performance. But direct memory compaction has high overhead. The benefit of order-3 allocation can't compensate the overhead of direct memory compaction. This patch makes the order-3 page allocation atomic. If there is no memory pressure and memory isn't fragmented, the alloction will still success, so we don't sacrifice the order-3 benefit here. If the atomic allocation fails, direct memory compaction will not be triggered, skb_page_frag_refill will fallback to order-0 immediately, hence the direct memory compaction overhead is avoided. In the allocation failure case, kswapd is waken up and doing compaction, so chances are allocation could success next time. alloc_skb_with_frags is the same. The mellanox driver does similar thing, if this is accepted, we must fix the driver too. V3: fix the same issue in alloc_skb_with_frags as pointed out by Eric V2: make the changelog clearer Cc: Eric Dumazet Cc: Chris Mason Cc: Debabrata Banerjee Signed-off-by: Shaohua Li Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3cfff2a..41ec022 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4398,7 +4398,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, while (order) { if (npages >= 1 << order) { - page = alloc_pages(gfp_mask | + page = alloc_pages((gfp_mask & ~__GFP_WAIT) | __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY, diff --git a/net/core/sock.c b/net/core/sock.c index 469d603..dc30dc5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1880,7 +1880,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp) pfrag->offset = 0; if (SKB_FRAG_PAGE_ORDER) { - pfrag->page = alloc_pages(gfp | __GFP_COMP | + pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY, SKB_FRAG_PAGE_ORDER); if (likely(pfrag->page)) { -- cgit v0.10.2 From c008f1d356277a5b7561040596a073d87e56b0c8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 12 Jun 2015 19:46:44 +1000 Subject: md: don't return 0 from array_state_store Returning zero from a 'store' function is bad. The return value should be either len length of the string or an error. So use 'len' if 'err' is zero. Fixes: 6791875e2e53 ("md: make reconfig_mutex optional for writes to md sysfs files.") Signed-off-by: NeilBrown Cc: stable@vger.kernel (v4.0+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 2750630..dd59d71 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3834,7 +3834,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) err = -EBUSY; } spin_unlock(&mddev->lock); - return err; + return err ?: len; } err = mddev_lock(mddev); if (err) -- cgit v0.10.2 From 8e8e2518fceca407bb8fc2a6710d19d2e217892e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 12 Jun 2015 19:51:27 +1000 Subject: md: Close race when setting 'action' to 'idle'. Checking ->sync_thread without holding the mddev_lock() isn't really safe, even after flushing the workqueue which ensures md_start_sync() has been run. While this code is waiting for the lock, md_check_recovery could reap the thread itself, and then start another thread (e.g. recovery might finish, then reshape starts). When this thread gets the lock md_start_sync() hasn't run so it doesn't get reaped, but MD_RECOVERY_RUNNING gets cleared. This allows two threads to start which leads to confusion. So don't both if MD_RECOVERY_RUNNING isn't set, but if it is do the flush and the test and the reap all under the mddev_lock to avoid any race with md_check_recovery. Signed-off-by: NeilBrown Fixes: 6791875e2e53 ("md: make reconfig_mutex optional for writes to md sysfs files.") Cc: stable@vger.kernel.org (v4.0+) diff --git a/drivers/md/md.c b/drivers/md/md.c index dd59d71..8d4408b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4217,13 +4217,14 @@ action_store(struct mddev *mddev, const char *page, size_t len) set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); else clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - flush_workqueue(md_misc_wq); - if (mddev->sync_thread) { - set_bit(MD_RECOVERY_INTR, &mddev->recovery); - if (mddev_lock(mddev) == 0) { + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && + mddev_lock(mddev) == 0) { + flush_workqueue(md_misc_wq); + if (mddev->sync_thread) { + set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_reap_sync_thread(mddev); - mddev_unlock(mddev); } + mddev_unlock(mddev); } } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) -- cgit v0.10.2 From ea358cd0d2c634ff1379a1392edcdf2289f31e13 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 12 Jun 2015 20:05:04 +1000 Subject: md: make sure MD_RECOVERY_DONE is clear before starting recovery/resync MD_RECOVERY_DONE is normally cleared by md_check_recovery after a resync etc finished. However it is possible for raid5_start_reshape to race and start a reshape before MD_RECOVERY_DONE is cleared. This can lean to multiple reshapes running at the same time, which isn't good. To make sure it is cleared before starting a reshape, and also clear it when reaping a thread, just to be safe. Signed-off-by: NeilBrown diff --git a/drivers/md/md.c b/drivers/md/md.c index 8d4408b..4dbed4a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8262,6 +8262,7 @@ void md_reap_sync_thread(struct mddev *mddev) if (mddev_is_clustered(mddev)) md_cluster_ops->metadata_update_finish(mddev); clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); + clear_bit(MD_RECOVERY_DONE, &mddev->recovery); clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e793ab6..f55c3f3 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4156,6 +4156,7 @@ static int raid10_start_reshape(struct mddev *mddev) clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); + clear_bit(MD_RECOVERY_DONE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 553d54b..b6793d2 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7354,6 +7354,7 @@ static int raid5_start_reshape(struct mddev *mddev) clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); + clear_bit(MD_RECOVERY_DONE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); mddev->sync_thread = md_register_thread(md_do_sync, mddev, -- cgit v0.10.2 From c83b2f20fdde578bded3dfc4405c5db7a039c694 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 12 Jun 2015 10:15:49 +0100 Subject: iommu/vt-d: Only enable extended context tables if PASID is supported Although the extended tables are theoretically a completely orthogonal feature to PASID and anything else that *uses* the newly-available bits, some of the early hardware has problems even when all we do is enable them and use only the same bits that were in the old context tables. For now, there's no motivation to support extended tables unless we're going to use PASID support to do SVM. So just don't use them unless PASID support is advertised too. Also add a command-line bailout just in case later chips also have issues. The equivalent problem for PASID support has already been fixed with the upcoming VT-d spec update and commit bd00c606a ("iommu/vt-d: Change PASID support to bit 40 of Extended Capability Register"), because the problematic platforms use the old definition of the PASID-capable bit, which is now marked as reserved and meaningless. So with this change, we'll magically start using ECS again only when we see the new hardware advertising "hey, we have PASID support and we actually tested it this time" on bit 40. The VT-d hardware architect has promised that we are not going to have any reason to support ECS *without* PASID any time soon, and he'll make sure he checks with us before changing that. In the future, if hypothetical new features also use new bits in the context tables and can be seen on implementations *without* PASID support, we might need to add their feature bits to the ecs_enabled() macro. Signed-off-by: David Woodhouse diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f6befa9..491bb15 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1481,6 +1481,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. By default, super page will be supported if Intel IOMMU has the capability. With this option, super page will not be supported. + ecs_off [Default Off] + By default, extended context tables will be supported if + the hardware advertises that it has support both for the + extended tables themselves, and also PASID support. With + this option set, extended tables will not be used even + on hardware which claims to support them. intel_idle.max_cstate= [KNL,HW,ACPI,X86] 0 disables intel_idle and fall back on acpi_idle. diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 2ffe589..5ecfaf2 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -422,6 +422,14 @@ static int dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; +static int intel_iommu_ecs = 1; + +/* We only actually use ECS when PASID support (on the new bit 40) + * is also advertised. Some early implementations — the ones with + * PASID support on bit 28 — have issues even when we *only* use + * extended root/context tables. */ +#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \ + ecap_pasid(iommu->ecap)) int intel_iommu_gfx_mapped; EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); @@ -465,6 +473,10 @@ static int __init intel_iommu_setup(char *str) printk(KERN_INFO "Intel-IOMMU: disable supported super page\n"); intel_iommu_superpage = 0; + } else if (!strncmp(str, "ecs_off", 7)) { + printk(KERN_INFO + "Intel-IOMMU: disable extended context table support\n"); + intel_iommu_ecs = 0; } str += strcspn(str, ","); @@ -669,7 +681,7 @@ static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu struct context_entry *context; u64 *entry; - if (ecap_ecs(iommu->ecap)) { + if (ecs_enabled(iommu)) { if (devfn >= 0x80) { devfn -= 0x80; entry = &root->hi; @@ -806,7 +818,7 @@ static void free_context_table(struct intel_iommu *iommu) if (context) free_pgtable_page(context); - if (!ecap_ecs(iommu->ecap)) + if (!ecs_enabled(iommu)) continue; context = iommu_context_addr(iommu, i, 0x80, 0); @@ -1141,7 +1153,7 @@ static void iommu_set_root_entry(struct intel_iommu *iommu) unsigned long flag; addr = virt_to_phys(iommu->root_entry); - if (ecap_ecs(iommu->ecap)) + if (ecs_enabled(iommu)) addr |= DMA_RTADDR_RTT; raw_spin_lock_irqsave(&iommu->register_lock, flag); -- cgit v0.10.2 From ae36806a622aea5ac79f279cfccc82144967b6e7 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 11 Jun 2015 14:49:46 -0300 Subject: sctp: allow authenticating DATA chunks that are bundled with COOKIE_ECHO Currently, we can ask to authenticate DATA chunks and we can send DATA chunks on the same packet as COOKIE_ECHO, but if you try to combine both, the DATA chunk will be sent unauthenticated and peer won't accept it, leading to a communication failure. This happens because even though the data was queued after it was requested to authenticate DATA chunks, it was also queued before we could know that remote peer can handle authenticating, so sctp_auth_send_cid() returns false. The fix is whenever we set up an active key, re-check send queue for chunks that now should be authenticated. As a result, such packet will now contain COOKIE_ECHO + AUTH + DATA chunks, in that order. Reported-by: Liu Wei Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/net/sctp/auth.c b/net/sctp/auth.c index fb7976a..4f15b7d 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -381,13 +381,14 @@ nomem: } -/* Public interface to creat the association shared key. +/* Public interface to create the association shared key. * See code above for the algorithm. */ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) { struct sctp_auth_bytes *secret; struct sctp_shared_key *ep_key; + struct sctp_chunk *chunk; /* If we don't support AUTH, or peer is not capable * we don't need to do anything. @@ -410,6 +411,14 @@ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) sctp_auth_key_put(asoc->asoc_shared_key); asoc->asoc_shared_key = secret; + /* Update send queue in case any chunk already in there now + * needs authenticating + */ + list_for_each_entry(chunk, &asoc->outqueue.out_chunk_list, list) { + if (sctp_auth_send_cid(chunk->chunk_hdr->type, asoc)) + chunk->auth = 1; + } + return 0; } -- cgit v0.10.2 From b07d496177cd3bc4b70fb8a5e85ede24cb403a11 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Sat, 13 Jun 2015 00:23:21 +0900 Subject: Doc: networking: Fix URL for wiki.wireshark.org in udplite.txt This patch fix URL (http to https) for wiki.wireshark.org. Signed-off-by: Masanari Iida Signed-off-by: David S. Miller diff --git a/Documentation/networking/udplite.txt b/Documentation/networking/udplite.txt index d727a38..53a7268 100644 --- a/Documentation/networking/udplite.txt +++ b/Documentation/networking/udplite.txt @@ -20,7 +20,7 @@ files/UDP-Lite-HOWTO.txt o The Wireshark UDP-Lite WiKi (with capture files): - http://wiki.wireshark.org/Lightweight_User_Datagram_Protocol + https://wiki.wireshark.org/Lightweight_User_Datagram_Protocol o The Protocol Spec, RFC 3828, http://www.ietf.org/rfc/rfc3828.txt -- cgit v0.10.2